OpenStructure
ligand_scoring.py
Go to the documentation of this file.
1 import warnings
2 
3 import numpy as np
4 import numpy.ma as np_ma
5 import networkx
6 
7 from ost import mol
8 from ost import geom
9 from ost import LogError, LogWarning, LogScript, LogInfo, LogVerbose, LogDebug
10 from ost.mol.alg import chain_mapping
11 
12 
14  """ Scorer class to compute various small molecule ligand (non polymer) scores.
15 
16  .. note ::
17  Extra requirements:
18 
19  - Python modules `numpy` and `networkx` must be available
20  (e.g. use ``pip install numpy networkx``)
21 
22  At the moment, two scores are available:
23 
24  * lDDT-PLI, that looks at the conservation of protein-ligand contacts
25  with :class:`lDDT <ost.mol.alg.lddt.lDDTScorer>`.
26  * Binding-site superposed, symmetry-corrected RMSD that assesses the
27  accuracy of the ligand pose (BiSyRMSD, hereinafter referred to as RMSD).
28 
29  Both scores involve local chain mapping of the reference binding site
30  onto the model, symmetry-correction, and finally assignment (mapping)
31  of model and target ligands, as described in (Manuscript in preparation).
32 
33  The binding site is defined based on a radius around the target ligand
34  in the reference structure only. It only contains protein and nucleic
35  acid chains that pass the criteria for the
36  :class:`chain mapping <ost.mol.alg.chain_mapping>`. This means ignoring
37  other ligands, waters, short polymers as well as any incorrectly connected
38  chains that may be in proximity.
39 
40  Results are available as matrices (`(lddt_pli|rmsd)_matrix`), where every
41  target-model score is reported in a matrix; as `(lddt_pli|rmsd)` where
42  a model-target assignment has been determined (see below) and reported in
43  a dictionary; and as (`(lddt_pli|rmsd)_details`) methods, which report
44  additional details about different aspects of the scoring such as chain
45  mapping.
46 
47  The behavior of chain mapping and ligand assignment can be controlled
48  with the `global_chain_mapping` and `rmsd_assignment` arguments.
49 
50  By default, chain mapping is performed locally, ie. only within the
51  binding site. As a result, different ligand scores can correspond to
52  different chain mappings. This tends to produce more favorable scores,
53  especially in large, partially regular oligomeric complexes.
54  Setting `global_chain_mapping=True` enforces a single global chain mapping,
55  as per :meth:`ost.mol.alg.chain_mapping.ChainMapper.GetMapping`.
56  Note that this global chain mapping currently ignores non polymer entities
57  such as small ligands, and may result in overly pessimistic scores.
58 
59  By default, target-model ligand assignments are computed identically
60  for the RMSD and lDDT-PLI scores. Each model ligand is uniquely assigned
61  to a target ligand, starting from the lowest RMSD and using each target and
62  model ligand in a single assignment. If `rmsd_assignment` is set to False,
63  RMSD and lDDT-PLI are assigned separately to optimize each score, and the
64  other score is used as a tiebreaker.
65 
66  By default, only exact matches between target and model ligands are
67  considered. This is a problem when the target only contains a subset
68  of the expected atoms (for instance if atoms are missing in an
69  experimental structure, which often happens in the PDB). With
70  `substructure_match=True`, complete model ligands can be scored against
71  partial target ligands. One problem with this approach is that it is
72  very easy to find good matches to small, irrelevant ligands like EDO, CO2
73  or GOL. To counter that, the assignment algorithm considers the coverage,
74  expressed as the fraction of atoms of the model ligand atoms covered in the
75  target. Higher coverage matches are prioritized, but a match with a better
76  score will be preferred if it falls within a window of `coverage_delta`
77  (by default 0.2) of a worse-scoring match. As a result, for instance,
78  with a delta of 0.2, a low-score match with coverage 0.96 would be
79  preferred to a high-score match with coverage 0.90.
80 
81  Assumptions:
82 
83  The class generally assumes that the
84  :attr:`~ost.mol.ResidueHandle.is_ligand` property is properly set on all
85  the ligand atoms, and only ligand atoms. This is typically the case for
86  entities loaded from mmCIF (tested with mmCIF files from the PDB and
87  SWISS-MODEL). Legacy PDB files must contain `HET` headers (which is usually
88  the case for files downloaded from the PDB but not elsewhere).
89 
90  The class doesn't perform any cleanup of the provided structures.
91  It is up to the caller to ensure that the data is clean and suitable for
92  scoring. :ref:`Molck <molck>` should be used with extra
93  care, as many of the options (such as `rm_non_std` or `map_nonstd_res`) can
94  cause ligands to be removed from the structure. If cleanup with Molck is
95  needed, ligands should be kept aside and passed separately. Non-ligand residues
96  should be valid compounds with atom names following the naming conventions
97  of the component dictionary. Non-standard residues are acceptable, and if
98  the model contains a standard residue at that position, only atoms with
99  matching names will be considered.
100 
101  Unlike most of OpenStructure, this class does not assume that the ligands
102  (either in the model or the target) are part of the PDB component
103  dictionary. They may have arbitrary residue names. Residue names do not
104  have to match between the model and the target. Matching is based on
105  the calculation of isomorphisms which depend on the atom element name and
106  atom connectivity (bond order is ignored).
107  It is up to the caller to ensure that the connectivity of atoms is properly
108  set before passing any ligands to this class. Ligands with improper
109  connectivity will lead to bogus results.
110 
111  Note, however, that atom names should be unique within a residue (ie two
112  distinct atoms cannot have the same atom name).
113 
114  This only applies to the ligand. The rest of the model and target
115  structures (protein, nucleic acids) must still follow the usual rules and
116  contain only residues from the compound library.
117 
118  Although it isn't a requirement, hydrogen atoms should be removed from the
119  structures. Here is an example code snippet that will perform a reasonable
120  cleanup. Keep in mind that this is most likely not going to work as
121  expected with entities loaded from PDB files, as the `is_ligand` flag is
122  probably not set properly.
123 
124  Here is a snippet example of how to use this code::
125 
126  from ost.mol.alg.ligand_scoring import LigandScorer
127  from ost.mol.alg import Molck, MolckSettings
128 
129  # Load data
130  # Structure model in PDB format, containing the receptor only
131  model = io.LoadPDB("path_to_model.pdb")
132  # Ligand model as SDF file
133  model_ligand = io.LoadEntity("path_to_ligand.sdf", format="sdf")
134  # Target loaded from mmCIF, containing the ligand
135  target = io.LoadMMCIF("path_to_target.cif")
136 
137  # Cleanup a copy of the structures
138  cleaned_model = model.Copy()
139  cleaned_target = target.Copy()
140  molck_settings = MolckSettings(rm_unk_atoms=True,
141  rm_non_std=False,
142  rm_hyd_atoms=True,
143  rm_oxt_atoms=False,
144  rm_zero_occ_atoms=False,
145  colored=False,
146  map_nonstd_res=False,
147  assign_elem=True)
148  Molck(cleaned_model, conop.GetDefaultLib(), molck_settings)
149  Molck(cleaned_target, conop.GetDefaultLib(), molck_settings)
150 
151  # Setup scorer object and compute lDDT-PLI
152  model_ligands = [model_ligand.Select("ele != H")]
153  ls = LigandScorer(model=cleaned_model, target=cleaned_target, model_ligands=model_ligands)
154  print("lDDT-PLI:", ls.lddt_pli)
155  print("RMSD:", ls.rmsd)
156 
157  :param model: Model structure - a deep copy is available as :attr:`model`.
158  No additional processing (ie. Molck), checks,
159  stereochemistry checks or sanitization is performed on the
160  input. Hydrogen atoms are kept.
161  :type model: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
162  :param target: Target structure - a deep copy is available as :attr:`target`.
163  No additional processing (ie. Molck), checks or sanitization
164  is performed on the input. Hydrogen atoms are kept.
165  :type target: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
166  :param model_ligands: Model ligands, as a list of
167  :class:`~ost.mol.ResidueHandle` belonging to the model
168  entity. Can be instantiated with either a :class:list of
169  :class:`~ost.mol.ResidueHandle`/:class:`ost.mol.ResidueView`
170  or of :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`.
171  If `None`, ligands will be extracted based on the
172  :attr:`~ost.mol.ResidueHandle.is_ligand` flag (this is
173  normally set properly in entities loaded from mmCIF).
174  :type model_ligands: :class:`list`
175  :param target_ligands: Target ligands, as a list of
176  :class:`~ost.mol.ResidueHandle` belonging to the target
177  entity. Can be instantiated either a :class:list of
178  :class:`~ost.mol.ResidueHandle`/:class:`ost.mol.ResidueView`
179  or of :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
180  containing a single residue each.
181  If `None`, ligands will be extracted based on the
182  :attr:`~ost.mol.ResidueHandle.is_ligand` flag (this is
183  normally set properly in entities loaded from mmCIF).
184  :type target_ligands: :class:`list`
185  :param resnum_alignments: Whether alignments between chemically equivalent
186  chains in *model* and *target* can be computed
187  based on residue numbers. This can be assumed in
188  benchmarking setups such as CAMEO/CASP.
189  :type resnum_alignments: :class:`bool`
190  :param check_resnames: On by default. Enforces residue name matches
191  between mapped model and target residues.
192  :type check_resnames: :class:`bool`
193  :param rename_ligand_chain: If a residue with the same chain name and
194  residue number than an explicitly passed model
195  or target ligand exits in the structure,
196  and `rename_ligand_chain` is False, a
197  RuntimeError will be raised. If
198  `rename_ligand_chain` is True, the ligand will
199  be moved to a new chain instead, and the move
200  will be logged to the console with SCRIPT
201  level.
202  :type rename_ligand_chain: :class:`bool`
203  :param chain_mapper: a chain mapper initialized for the target structure.
204  If None (default), a chain mapper will be initialized
205  lazily as required.
206  :type chain_mapper: :class:`ost.mol.alg.chain_mapping.ChainMapper`
207  :param substructure_match: Set this to True to allow incomplete (ie
208  partially resolved) target ligands.
209  :type substructure_match: :class:`bool`
210  :param coverage_delta: the coverage delta for partial ligand assignment.
211  :type coverage_delta: :class:`float`
212  :param radius: Inclusion radius for the binding site. Residues with
213  atoms within this distance of the ligand will be considered
214  for inclusion in the binding site.
215  :type radius: :class:`float`
216  :param lddt_pli_radius: lDDT inclusion radius for lDDT-PLI. Should be
217  at least equal to or larger than `radius`.
218  :type lddt_pli_radius: :class:`float`
219  :param lddt_lp_radius: lDDT inclusion radius for lDDT-LP.
220  :type lddt_lp_radius: :class:`float`
221  :param model_bs_radius: inclusion radius for model binding sites.
222  Only used when full_bs_search=False, otherwise the
223  radius is effectively infinite. Only chains with
224  atoms within this distance of a model ligand will
225  be considered in the chain mapping.
226  :type model_bs_radius: :class:`float`
227  :param binding_sites_topn: maximum number of target binding site
228  representations to assess, per target ligand.
229  Ignored if `global_chain_mapping` is True.
230  :type binding_sites_topn: :class:`int`
231  :param global_chain_mapping: set to True to use a global chain mapping for
232  the polymer (protein, nucleotide) chains.
233  Defaults to False, in which case only local
234  chain mappings are allowed (where different
235  ligand may be scored against different chain
236  mappings).
237  :type global_chain_mapping: :class:`bool`
238  :param custom_mapping: Provide custom chain mapping between *model* and
239  *target* that is used as global chain mapping.
240  Dictionary with target chain names as key and model
241  chain names as value. Only has an effect if
242  *global_chain_mapping* is True.
243  :type custom_mapping: :class:`dict`
244  :param rmsd_assignment: set to False to assign lDDT-PLI and RMSD separately
245  using a combination of these two scores to
246  optimize the assignment. By default (True), only
247  RMSD is considered for the ligand assignment.
248  :type rmsd_assignment: :class:`bool`
249  :param n_max_naive: Parameter for global chain mapping. If *model* and
250  *target* have less or equal that number of chains,
251  the full
252  mapping solution space is enumerated to find the
253  the optimum. A heuristic is used otherwise.
254  :type n_max_naive: :class:`int`
255  :param max_symmetries: If more than that many isomorphisms exist for
256  a target-ligand pair, it will be ignored and reported
257  as unassigned.
258  :type max_symmetries: :class:`int`
259  :param unassigned: If True, unassigned model ligands are reported in
260  the output together with assigned ligands, with a score
261  of None, and reason for not being assigned in the
262  \\*_details matrix. Defaults to False.
263  :type unassigned: :class:`bool`
264  :param full_bs_search: If True, all potential binding sites in the model
265  are searched for each target binding site. If False,
266  the search space in the model is reduced to chains
267  around (`model_bs_radius` Å) model ligands.
268  This speeds up computations, but may result in
269  ligands not being scored if the predicted ligand
270  pose is too far from the actual binding site.
271  When that's the case, the value in the
272  `unassigned_*_ligands` property will be
273  `model_representation` and is indistinguishable from
274  cases where the binding site was not modeled at all.
275  Ignored when `global_chain_mapping` is True.
276  :type full_bs_search: :class:`bool`
277  """
278  def __init__(self, model, target, model_ligands=None, target_ligands=None,
279  resnum_alignments=False, check_resnames=True,
280  rename_ligand_chain=False, chain_mapper=None,
281  substructure_match=False, coverage_delta=0.2, radius=4.0,
282  lddt_pli_radius=6.0, lddt_lp_radius=10.0, model_bs_radius=20,
283  binding_sites_topn=100000, global_chain_mapping=False,
284  rmsd_assignment=False, n_max_naive=12, max_symmetries=1e5,
285  custom_mapping=None, unassigned=False, full_bs_search=False):
286 
287  if isinstance(model, mol.EntityView):
288  self.modelmodel = mol.CreateEntityFromView(model, False)
289  elif isinstance(model, mol.EntityHandle):
290  self.modelmodel = model.Copy()
291  else:
292  raise RuntimeError("model must be of type EntityView/EntityHandle")
293 
294  if isinstance(target, mol.EntityView):
295  self.targettarget = mol.CreateEntityFromView(target, False)
296  elif isinstance(target, mol.EntityHandle):
297  self.targettarget = target.Copy()
298  else:
299  raise RuntimeError("target must be of type EntityView/EntityHandle")
300 
301  # Extract ligands from target
302  if target_ligands is None:
303  self.target_ligandstarget_ligands = self._extract_ligands_extract_ligands(self.targettarget)
304  else:
305  self.target_ligandstarget_ligands = self._prepare_ligands_prepare_ligands(self.targettarget, target,
306  target_ligands,
307  rename_ligand_chain)
308  if len(self.target_ligandstarget_ligands) == 0:
309  LogWarning("No ligands in the target")
310 
311  # Extract ligands from model
312  if model_ligands is None:
313  self.model_ligandsmodel_ligands = self._extract_ligands_extract_ligands(self.modelmodel)
314  else:
315  self.model_ligandsmodel_ligands = self._prepare_ligands_prepare_ligands(self.modelmodel, model,
316  model_ligands,
317  rename_ligand_chain)
318  if len(self.model_ligandsmodel_ligands) == 0:
319  LogWarning("No ligands in the model")
320  if len(self.target_ligandstarget_ligands) == 0:
321  raise ValueError("No ligand in the model and in the target")
322 
323  self._chain_mapper_chain_mapper = chain_mapper
324  self.resnum_alignmentsresnum_alignments = resnum_alignments
325  self.check_resnamescheck_resnames = check_resnames
326  self.rename_ligand_chainrename_ligand_chain = rename_ligand_chain
327  self.substructure_matchsubstructure_match = substructure_match
328  self.radiusradius = radius
329  self.model_bs_radiusmodel_bs_radius = model_bs_radius
330  self.lddt_pli_radiuslddt_pli_radius = lddt_pli_radius
331  self.lddt_lp_radiuslddt_lp_radius = lddt_lp_radius
332  self.binding_sites_topnbinding_sites_topn = binding_sites_topn
333  self.global_chain_mappingglobal_chain_mapping = global_chain_mapping
334  self.rmsd_assignmentrmsd_assignment = rmsd_assignment
335  self.n_max_naiven_max_naive = n_max_naive
336  self.max_symmetriesmax_symmetries = max_symmetries
337  self.unassignedunassigned = unassigned
338  self.coverage_deltacoverage_delta = coverage_delta
339  self.full_bs_searchfull_bs_search = full_bs_search
340 
341  # scoring matrices
342  self._rmsd_matrix_rmsd_matrix = None
343  self._rmsd_full_matrix_rmsd_full_matrix = None
344  self._lddt_pli_matrix_lddt_pli_matrix = None
345  self._lddt_pli_full_matrix_lddt_pli_full_matrix = None
346 
347  # lazily computed scores
348  self._rmsd_rmsd = None
349  self._rmsd_details_rmsd_details = None
350  self._lddt_pli_lddt_pli = None
351  self._lddt_pli_details_lddt_pli_details = None
352 
353  # lazily precomputed variables
354  self.__model_mapping__model_mapping = None
355 
356  # Residues that are in contact with a ligand => binding site
357  # defined as all residues with at least one atom within self.radius
358  # key: ligand.handle.hash_code, value: EntityView of whatever
359  # entity ligand belongs to
360  self._binding_sites_binding_sites = dict()
361 
362  # lazily precomputed variables to speedup GetRepr chain mapping calls
363  # for localized GetRepr searches
364  self._chem_mapping_chem_mapping = None
365  self._chem_group_alns_chem_group_alns = None
366  self._chain_mapping_mdl_chain_mapping_mdl = None
367  self._get_repr_input_get_repr_input = dict()
368 
369  # the actual representations as returned by ChainMapper.GetRepr
370  # key: (target_ligand.handle.hash_code, model_ligand.handle.hash_code)
371  # value: list of repr results
372  self._repr_repr = dict()
373 
374  # cache for rmsd values
375  # rmsd is used as tie breaker in lddt-pli, we therefore need access to
376  # the rmsd for each target_ligand/model_ligand/repr combination
377  # key: (target_ligand.handle.hash_code, model_ligand.handle.hash_code,
378  # repr_id)
379  # value: rmsd
380  self._rmsd_cache_rmsd_cache = dict()
381 
382  # Bookkeeping of unassigned ligands
383  self._unassigned_target_ligands_unassigned_target_ligands = None
384  self._unassigned_model_ligands_unassigned_model_ligands = None
385  self._unassigned_target_ligands_reason_unassigned_target_ligands_reason = {}
386  self._unassigned_target_ligand_short_unassigned_target_ligand_short = None
387  self._unassigned_model_ligand_short_unassigned_model_ligand_short = None
388  self._unassigned_target_ligand_descriptions_unassigned_target_ligand_descriptions = None
389  self._unassigned_model_ligand_descriptions_unassigned_model_ligand_descriptions = None
390  # Keep track of symmetries/isomorphisms (regardless of scoring)
391  # 0.0: no isomorphism
392  # 1.0: isomorphic
393  # np.nan: not assessed yet - that's why we can't use a boolean
394  self._assignment_isomorphisms_assignment_isomorphisms = None
395  # Keep track of match coverage (only in case there was a score)
396  self._assignment_match_coverage_assignment_match_coverage = None
397 
398  if custom_mapping is not None:
399  self._set_custom_mapping_set_custom_mapping(custom_mapping)
400 
401  @property
402  def chain_mapper(self):
403  """ Chain mapper object for the given :attr:`target`.
404 
405  :type: :class:`ost.mol.alg.chain_mapping.ChainMapper`
406  """
407  if self._chain_mapper_chain_mapper is None:
408  self._chain_mapper_chain_mapper = chain_mapping.ChainMapper(self.targettarget,
409  n_max_naive=1e9,
410  resnum_alignments=self.resnum_alignmentsresnum_alignments)
411  return self._chain_mapper_chain_mapper
412 
413  def get_target_binding_site(self, target_ligand):
414 
415  if target_ligand.handle.hash_code not in self._binding_sites_binding_sites:
416 
417  # create view of reference binding site
418  ref_residues_hashes = set() # helper to keep track of added residues
419  ignored_residue_hashes = {target_ligand.hash_code}
420  for ligand_at in target_ligand.atoms:
421  close_atoms = self.targettarget.FindWithin(ligand_at.GetPos(), self.radiusradius)
422  for close_at in close_atoms:
423  # Skip any residue not in the chain mapping target
424  ref_res = close_at.GetResidue()
425  h = ref_res.handle.GetHashCode()
426  if h not in ref_residues_hashes and \
427  h not in ignored_residue_hashes:
428  if self.chain_mapperchain_mapper.target.ViewForHandle(ref_res).IsValid():
429  h = ref_res.handle.GetHashCode()
430  ref_residues_hashes.add(h)
431  elif ref_res.is_ligand:
432  LogWarning("Ignoring ligand %s in binding site of %s" % (
433  ref_res.qualified_name, target_ligand.qualified_name))
434  ignored_residue_hashes.add(h)
435  elif ref_res.chem_type == mol.ChemType.WATERS:
436  pass # That's ok, no need to warn
437  else:
438  LogWarning("Ignoring residue %s in binding site of %s" % (
439  ref_res.qualified_name, target_ligand.qualified_name))
440  ignored_residue_hashes.add(h)
441 
442  ref_bs = self.targettarget.CreateEmptyView()
443  if ref_residues_hashes:
444  # reason for doing that separately is to guarantee same ordering of
445  # residues as in underlying entity. (Reorder by ResNum seems only
446  # available on ChainHandles)
447  for ch in self.targettarget.chains:
448  for r in ch.residues:
449  if r.handle.GetHashCode() in ref_residues_hashes:
450  ref_bs.AddResidue(r, mol.ViewAddFlag.INCLUDE_ALL)
451  if len(ref_bs.residues) == 0:
452  raise RuntimeError("Failed to add proximity residues to "
453  "the reference binding site entity")
454  else:
455  # Flag missing binding site
456  self._unassigned_target_ligands_reason_unassigned_target_ligands_reason[target_ligand] = ("binding_site",
457  "No residue in proximity of the target ligand")
458 
459  self._binding_sites_binding_sites[target_ligand.handle.hash_code] = ref_bs
460 
461  return self._binding_sites_binding_sites[target_ligand.handle.hash_code]
462 
463  @property
464  def _model_mapping(self):
465  """Get the global chain mapping for the model."""
466  if self.__model_mapping__model_mapping is None:
467  self.__model_mapping__model_mapping = self.chain_mapperchain_mapper.GetMapping(self.modelmodel,
468  n_max_naive=self.n_max_naiven_max_naive)
469  return self.__model_mapping__model_mapping
470 
471  @staticmethod
472  def _extract_ligands(entity):
473  """Extract ligands from entity. Return a list of residues.
474 
475  Assumes that ligands have the :attr:`~ost.mol.ResidueHandle.is_ligand`
476  flag set. This is typically the case for entities loaded from mmCIF
477  (tested with mmCIF files from the PDB and SWISS-MODEL).
478  Legacy PDB files must contain `HET` headers (which is usually the
479  case for files downloaded from the PDB but not elsewhere).
480 
481  This function performs basic checks to ensure that the residues in this
482  chain are not forming polymer bonds (ie peptide/nucleotide ligands) and
483  will raise a RuntimeError if this assumption is broken.
484 
485  :param entity: the entity to extract ligands from
486  :type entity: :class:`~ost.mol.EntityHandle`
487  :rtype: :class:`list` of :class:`~ost.mol.ResidueHandle`
488 
489  """
490  extracted_ligands = []
491  for residue in entity.residues:
492  if residue.is_ligand:
493  if mol.InSequence(residue, residue.next):
494  raise RuntimeError("Residue %s connected in polymer sequen"
495  "ce %s" % (residue.qualified_name))
496  extracted_ligands.append(residue)
497  LogVerbose("Detected residue %s as ligand" % residue)
498  return extracted_ligands
499 
500  @staticmethod
501  def _prepare_ligands(new_entity, old_entity, ligands, rename_chain):
502  """Prepare the ligands given into a list of ResidueHandles which are
503  part of the copied entity, suitable for the model_ligands and
504  target_ligands properties.
505 
506  This function takes a list of ligands as (Entity|Residue)(Handle|View).
507  Entities can contain multiple ligands, which will be considered as
508  separate ligands.
509 
510  Ligands which are part of the entity are simply fetched in the new
511  copied entity. Otherwise, they are copied over to the copied entity.
512  """
513  extracted_ligands = []
514 
515  next_chain_num = 1
516  new_editor = None
517 
518  def _copy_residue(residue, rename_chain):
519  """ Copy the residue into the new chain.
520  Return the new residue handle."""
521  nonlocal next_chain_num, new_editor
522 
523  # Instantiate the editor
524  if new_editor is None:
525  new_editor = new_entity.EditXCS()
526 
527  new_chain = new_entity.FindChain(residue.chain.name)
528  if not new_chain.IsValid():
529  new_chain = new_editor.InsertChain(residue.chain.name)
530  else:
531  # Does a residue with the same name already exist?
532  already_exists = new_chain.FindResidue(residue.number).IsValid()
533  if already_exists:
534  if rename_chain:
535  chain_ext = 2 # Extend the chain name by this
536  while True:
537  new_chain_name = residue.chain.name + "_" + str(chain_ext)
538  new_chain = new_entity.FindChain(new_chain_name)
539  if new_chain.IsValid():
540  chain_ext += 1
541  continue
542  else:
543  new_chain = new_editor.InsertChain(new_chain_name)
544  break
545  LogScript("Moved ligand residue %s to new chain %s" % (
546  residue.qualified_name, new_chain.name))
547  else:
548  msg = "A residue number %s already exists in chain %s" % (
549  residue.number, residue.chain.name)
550  raise RuntimeError(msg)
551 
552  # Add the residue with its original residue number
553  new_res = new_editor.AppendResidue(new_chain, residue.name, residue.number)
554  # Add atoms
555  for old_atom in residue.atoms:
556  new_editor.InsertAtom(new_res, old_atom.name, old_atom.pos,
557  element=old_atom.element, occupancy=old_atom.occupancy,
558  b_factor=old_atom.b_factor, is_hetatm=old_atom.is_hetatom)
559  # Add bonds
560  for old_atom in residue.atoms:
561  for old_bond in old_atom.bonds:
562  new_first = new_res.FindAtom(old_bond.first.name)
563  new_second = new_res.FindAtom(old_bond.second.name)
564  new_editor.Connect(new_first, new_second)
565  return new_res
566 
567  def _process_ligand_residue(res, rename_chain):
568  """Copy or fetch the residue. Return the residue handle."""
569  new_res = None
570  if res.entity.handle == old_entity.handle:
571  # Residue is part of the old_entity handle.
572  # However, it may not be in the copied one, for instance it may have been a view
573  # We try to grab it first, otherwise we copy it
574  new_res = new_entity.FindResidue(res.chain.name, res.number)
575  if new_res and new_res.valid:
576  LogVerbose("Ligand residue %s already in entity" % res.handle.qualified_name)
577  else:
578  # Residue is not part of the entity, need to copy it first
579  new_res = _copy_residue(res, rename_chain)
580  LogVerbose("Copied ligand residue %s" % res.handle.qualified_name)
581  new_res.SetIsLigand(True)
582  return new_res
583 
584  for ligand in ligands:
585  if isinstance(ligand, mol.EntityHandle) or isinstance(ligand, mol.EntityView):
586  for residue in ligand.residues:
587  new_residue = _process_ligand_residue(residue, rename_chain)
588  extracted_ligands.append(new_residue)
589  elif isinstance(ligand, mol.ResidueHandle) or isinstance(ligand, mol.ResidueView):
590  new_residue = _process_ligand_residue(ligand, rename_chain)
591  extracted_ligands.append(new_residue)
592  else:
593  raise RuntimeError("Ligands should be given as Entity or Residue")
594 
595  if new_editor is not None:
596  new_editor.UpdateICS()
597  return extracted_ligands
598 
599  @staticmethod
600  def _build_binding_site_entity(ligand, residues, extra_residues=[]):
601  """ Build an entity with all the binding site residues in chain A
602  and the ligand in chain _. Residues are renumbered consecutively from
603  1. The ligand is assigned residue number 1 and residue name LIG.
604  Residues in extra_residues not in `residues` in the model are added
605  at the end of chain A.
606 
607  :param ligand: the Residue Handle of the ligand
608  :type ligand: :class:`~ost.mol.ResidueHandle`
609  :param residues: a list of binding site residues
610  :type residues: :class:`list` of :class:`~ost.mol.ResidueHandle`
611  :param extra_residues: an optional list with addition binding site
612  residues. Residues in this list which are not
613  in `residues` will be added at the end of chain
614  A. This allows for instance adding unmapped
615  residues missing from the model into the
616  reference binding site.
617  :type extra_residues: :class:`list` of :class:`~ost.mol.ResidueHandle`
618  :rtype: :class:`~ost.mol.EntityHandle`
619  """
620  bs_ent = mol.CreateEntity()
621  ed = bs_ent.EditXCS()
622  bs_chain = ed.InsertChain("A")
623  seen_res_qn = []
624  for resnum, old_res in enumerate(residues, 1):
625  seen_res_qn.append(old_res.qualified_name)
626  new_res = ed.AppendResidue(bs_chain, old_res.handle,
627  deep=True)
628  ed.SetResidueNumber(new_res, mol.ResNum(resnum))
629 
630  # Add extra residues at the end.
631  for extra_res in extra_residues:
632  if extra_res.qualified_name not in seen_res_qn:
633  resnum += 1
634  seen_res_qn.append(extra_res.qualified_name)
635  new_res = ed.AppendResidue(bs_chain,
636  extra_res.handle,
637  deep=True)
638  ed.SetResidueNumber(new_res, mol.ResNum(resnum))
639  # Add the ligand in chain _
640  ligand_chain = ed.InsertChain("_")
641  ligand_res = ed.AppendResidue(ligand_chain, ligand.handle,
642  deep=True)
643  ed.RenameResidue(ligand_res, "LIG")
644  ed.SetResidueNumber(ligand_res, mol.ResNum(1))
645  ed.UpdateICS()
646 
647  return bs_ent
648 
649  def _compute_scores(self):
650  """
651  Compute the RMSD and lDDT-PLI scores for every possible target-model
652  ligand pair and store the result in internal matrices.
653  """
654 
657  self._rmsd_full_matrix_rmsd_full_matrix = np.empty(
658  (len(self.target_ligandstarget_ligands), len(self.model_ligandsmodel_ligands)), dtype=dict)
659  self._lddt_pli_full_matrix_lddt_pli_full_matrix = np.empty(
660  (len(self.target_ligandstarget_ligands), len(self.model_ligandsmodel_ligands)), dtype=dict)
661  self._assignment_isomorphisms_assignment_isomorphisms = np.full(
662  (len(self.target_ligandstarget_ligands), len(self.model_ligandsmodel_ligands)), fill_value=np.nan)
663  self._assignment_match_coverage_assignment_match_coverage = np.zeros(
664  (len(self.target_ligandstarget_ligands), len(self.model_ligandsmodel_ligands)))
665 
666  for target_id, target_ligand in enumerate(self.target_ligandstarget_ligands):
667  LogVerbose("Analyzing target ligand %s" % target_ligand)
668  for model_id, model_ligand in enumerate(self.model_ligandsmodel_ligands):
669  LogVerbose("Compare to model ligand %s" % model_ligand)
670 
671 
674  try:
675  symmetries = _ComputeSymmetries(
676  model_ligand, target_ligand,
677  substructure_match=self.substructure_matchsubstructure_match,
678  by_atom_index=True,
679  max_symmetries=self.max_symmetriesmax_symmetries)
680  LogVerbose("Ligands %s and %s symmetry match" % (
681  str(model_ligand), str(target_ligand)))
682  except NoSymmetryError:
683  # Ligands are different - skip
684  LogVerbose("No symmetry between %s and %s" % (
685  str(model_ligand), str(target_ligand)))
686  self._assignment_isomorphisms_assignment_isomorphisms[target_id, model_id] = 0.
687  continue
688  except TooManySymmetriesError:
689  # Ligands are too symmetrical - skip
690  LogVerbose("Too many symmetries between %s and %s" % (
691  str(model_ligand), str(target_ligand)))
692  self._assignment_isomorphisms_assignment_isomorphisms[target_id, model_id] = -1.
693  continue
694  except DisconnectedGraphError:
695  # Disconnected graph is handled elsewhere
696  continue
697 
698 
703  rmsd_result = self._compute_rmsd_compute_rmsd(symmetries, target_ligand,
704  model_ligand)
705  lddt_pli_result = self._compute_lddtpli_compute_lddtpli(symmetries, target_ligand,
706  model_ligand)
707 
708 
711  if (rmsd_result is None) != (lddt_pli_result is None):
712  # Ligand assignment makes assumptions here, and is likely
713  # to not work properly if this differs. There is no reason
714  # it would ever do, so let's just check it
715  raise Exception("Ligand scoring bug: discrepency between "
716  "RMSD and lDDT-PLI definition.")
717  if rmsd_result is not None:
718  # Now we assume both rmsd_result and lddt_pli_result are defined
719  # Add coverage
720  substructure_match = len(symmetries[0][0]) != len(model_ligand.atoms)
721  coverage = len(symmetries[0][0]) / len(model_ligand.atoms)
722  self._assignment_match_coverage_assignment_match_coverage[target_id, model_id] = coverage
723  self._assignment_isomorphisms_assignment_isomorphisms[target_id, model_id] = 1.
724  # Add RMSD
725  rmsd_result["substructure_match"] = substructure_match
726  rmsd_result["coverage"] = coverage
727  if self.unassignedunassigned:
728  rmsd_result["unassigned"] = False
729  # Add lDDT-PLI
730  lddt_pli_result["substructure_match"] = substructure_match
731  lddt_pli_result["coverage"] = coverage
732  if self.unassignedunassigned:
733  lddt_pli_result["unassigned"] = False
734 
735 
738  self._lddt_pli_full_matrix_lddt_pli_full_matrix[target_id, model_id] = lddt_pli_result
739  self._rmsd_full_matrix_rmsd_full_matrix[target_id, model_id] = rmsd_result
740 
741 
742  def _compute_rmsd(self, symmetries, target_ligand, model_ligand):
743  best_rmsd_result = None
744  for r_i, r in enumerate(self.get_reprget_repr(target_ligand, model_ligand)):
745  rmsd = _SCRMSD_symmetries(symmetries, model_ligand,
746  target_ligand, transformation=r.transform)
747 
748  cache_key = (target_ligand.handle.hash_code,
749  model_ligand.handle.hash_code, r_i)
750  self._rmsd_cache_rmsd_cache[cache_key] = rmsd
751 
752  if best_rmsd_result is None or rmsd < best_rmsd_result["rmsd"]:
753  best_rmsd_result = {"rmsd": rmsd,
754  "lddt_lp": r.lDDT,
755  "bs_ref_res": r.substructure.residues,
756  "bs_ref_res_mapped": r.ref_residues,
757  "bs_mdl_res_mapped": r.mdl_residues,
758  "bb_rmsd": r.bb_rmsd,
759  "target_ligand": target_ligand,
760  "model_ligand": model_ligand,
761  "chain_mapping": r.GetFlatChainMapping(),
762  "transform": r.transform,
763  "inconsistent_residues": r.inconsistent_residues}
764 
765  return best_rmsd_result
766 
767 
768  def _compute_lddtpli(self, symmetries, target_ligand, model_ligand):
769  ref_bs = self.get_target_binding_siteget_target_binding_site(target_ligand)
770  best_lddt_result = None
771  for r_i, r in enumerate(self.get_reprget_repr(target_ligand, model_ligand)):
772  ref_bs_ent = self._build_binding_site_entity_build_binding_site_entity(
773  target_ligand, r.ref_residues,
774  r.substructure.residues)
775  ref_bs_ent_ligand = ref_bs_ent.FindResidue("_", 1) # by definition
776 
777  custom_compounds = {
778  ref_bs_ent_ligand.name:
779  mol.alg.lddt.CustomCompound.FromResidue(
780  ref_bs_ent_ligand)}
781  lddt_scorer = mol.alg.lddt.lDDTScorer(
782  ref_bs_ent,
783  custom_compounds=custom_compounds,
784  inclusion_radius=self.lddt_pli_radiuslddt_pli_radius)
785 
786  lddt_tot = 4 * sum([len(x) for x in lddt_scorer.ref_indices_ic])
787  if lddt_tot == 0:
788  # it's a space ship in the reference!
789  self._unassigned_target_ligands_reason_unassigned_target_ligands_reason[
790  target_ligand] = ("no_contact",
791  "No lDDT-PLI contacts in the"
792  " reference structure")
793  #continue
794  mdl_bs_ent = self._build_binding_site_entity_build_binding_site_entity(
795  model_ligand, r.mdl_residues, [])
796  mdl_bs_ent_ligand = mdl_bs_ent.FindResidue("_", 1) # by definition
797  # Now for each symmetry, loop and rename atoms according
798  # to ref.
799  mdl_editor = mdl_bs_ent.EditXCS()
800  for i, (trg_sym, mdl_sym) in enumerate(symmetries):
801  for mdl_anum, trg_anum in zip(mdl_sym, trg_sym):
802  # Rename model atoms according to symmetry
803  trg_atom = ref_bs_ent_ligand.atoms[trg_anum]
804  mdl_atom = mdl_bs_ent_ligand.atoms[mdl_anum]
805  mdl_editor.RenameAtom(mdl_atom, trg_atom.name)
806  mdl_editor.UpdateICS()
807 
808  global_lddt, local_lddt = lddt_scorer.lDDT(
809  mdl_bs_ent, chain_mapping={"A": "A", "_": "_"},
810  no_intrachain=True,
811  check_resnames=self.check_resnamescheck_resnames)
812 
813  its_awesome = (best_lddt_result is None) or \
814  (global_lddt > best_lddt_result["lddt_pli"])
815 
816  # additionally consider rmsd as tiebreaker
817  if (not its_awesome) and (global_lddt == best_lddt_result["lddt_pli"]):
818  rmsd_cache_key = (target_ligand.handle.hash_code,
819  model_ligand.handle.hash_code, r_i)
820  rmsd = self._rmsd_cache_rmsd_cache[rmsd_cache_key]
821  if rmsd < best_lddt_result["rmsd"]:
822  its_awesome = True
823 
824  if its_awesome:
825  rmsd_cache_key = (target_ligand.handle.hash_code,
826  model_ligand.handle.hash_code, r_i)
827  best_lddt_result = {"lddt_pli": global_lddt,
828  "lddt_lp": r.lDDT,
829  "lddt_pli_n_contacts": lddt_tot,
830  "rmsd": self._rmsd_cache_rmsd_cache[rmsd_cache_key],
831  "bs_ref_res": r.substructure.residues,
832  "bs_ref_res_mapped": r.ref_residues,
833  "bs_mdl_res_mapped": r.mdl_residues,
834  "bb_rmsd": r.bb_rmsd,
835  "target_ligand": target_ligand,
836  "model_ligand": model_ligand,
837  "chain_mapping": r.GetFlatChainMapping(),
838  "transform": r.transform,
839  "inconsistent_residues": r.inconsistent_residues}
840 
841  return best_lddt_result
842 
843 
844  @staticmethod
845  def _find_ligand_assignment(mat1, mat2=None, coverage=None, coverage_delta=None):
846  """ Find the ligand assignment based on mat1. If mat2 is provided, it
847  will be used to break ties in mat1. If mat2 is not provided, ties will
848  be resolved by taking the first match arbitrarily.
849 
850  Both mat1 and mat2 should "look" like RMSD - ie be between inf (bad)
851  and 0 (good).
852  """
853  # We will modify mat1 and mat2, so make copies of it first
854  mat1 = np.copy(mat1)
855  if mat2 is None:
856  mat2 = np.copy(mat1)
857  mat2[~np.isnan(mat2)] = np.inf
858  else:
859  mat2 = np.copy(mat2)
860  if coverage is None:
861  coverage = np.copy(mat1)
862  coverage[:] = 1 # Assume full coverage by default
863  else:
864  coverage = np.copy(coverage)
865 
866  assignments = []
867  if 0 in mat1.shape:
868  # No model or target ligand
869  LogDebug("No model or target ligand, returning no assignment.")
870  return assignments
871 
872  def _get_best_match(mat1_val, coverage_val):
873  """ Extract the row/column indices of the prediction matching the
874  given values."""
875  mat1_match_idx = np.argwhere((mat1 == mat1_val) & (coverage >= coverage_val))
876  # Multiple "best" - use mat2 to disambiguate
877  if len(mat1_match_idx) > 1:
878  # Get the values of mat2 at these positions
879  best_mat2_match = [mat2[tuple(x)] for x in mat1_match_idx]
880  # Find the index of the best mat2
881  # Note: argmin returns the first value which is min.
882  best_mat2_idx = np.array(best_mat2_match).argmin()
883  # Now get the original indices
884  return mat1_match_idx[best_mat2_idx]
885  else:
886  return mat1_match_idx[0]
887 
888  # First only consider top coverage matches.
889  min_coverage = np.max(coverage)
890  i = mat1.size + 1
891  while min_coverage > 0 and not np.all(np.isnan(mat1)):
892  LogVerbose("Looking for matches with coverage >= %s" % min_coverage)
893  min_mat1 = LigandScorer._nanmin_nowarn(mat1, coverage < min_coverage)
894  while not np.isnan(min_mat1):
895  max_i_trg, max_i_mdl = _get_best_match(min_mat1, min_coverage)
896 
897  # Would we have a match for this model ligand with higher score
898  # but lower coverage?
899  alternative_matches = (mat1[:, max_i_mdl] < min_mat1) & (
900  coverage[:, max_i_mdl] > (min_coverage - coverage_delta))
901  if np.any(alternative_matches):
902  # Get the scores of these matches
903  LogVerbose("Found match with lower coverage but better score")
904  min_mat1 = np.nanmin(mat1[alternative_matches])
905  max_i_trg, max_i_mdl = _get_best_match(min_mat1, min_coverage - coverage_delta)
906 
907  # Disable row and column
908  mat1[max_i_trg, :] = np.nan
909  mat1[:, max_i_mdl] = np.nan
910  mat2[max_i_trg, :] = np.nan
911  mat2[:, max_i_mdl] = np.nan
912  coverage[max_i_trg, :] = -np.inf
913  coverage[:, max_i_mdl] = -np.inf
914 
915  # Save
916  assignments.append((max_i_trg, max_i_mdl))
917 
918  # Recompute min
919  min_mat1 = LigandScorer._nanmin_nowarn(mat1, coverage < min_coverage)
920  if i < 0:
921  raise Exception("Ligand scoring bug: hit appatent infinite loop!")
922  i -= 1
923  # Recompute min_coverage
924  min_coverage = np.max(coverage)
925  if i < 0:
926  raise Exception("Ligand scoring bug: hit appatent infinite loop!")
927  i -= 1
928  return assignments
929 
930  @staticmethod
931  def _nanmin_nowarn(array, mask):
932  """Compute np.nanmin but ignore the RuntimeWarning."""
933  masked_array = np_ma.masked_array(array, mask=mask)
934  with warnings.catch_warnings(): # RuntimeWarning: All-NaN slice encountered
935  warnings.simplefilter("ignore")
936  min = np.nanmin(masked_array, )
937  if np_ma.is_masked(min):
938  return np.nan # Everything was masked
939  else:
940  return min
941 
942  @staticmethod
943  def _reverse_lddt(lddt):
944  """Reverse lDDT means turning it from a number between 0 and 1 to a
945  number between infinity and 0 (0 being better).
946 
947  In practice, this is 1/lDDT. If lDDT is 0, the result is infinity.
948  """
949  with warnings.catch_warnings(): # RuntimeWarning: divide by zero
950  warnings.simplefilter("ignore")
951  return np.float64(1) / lddt
952 
953  def _assign_ligands_rmsd(self):
954  """Assign (map) ligands between model and target.
955 
956  Sets self._rmsd and self._rmsd_details.
957  """
958  mat2 = self._reverse_lddt_reverse_lddt(self.lddt_pli_matrixlddt_pli_matrix)
959 
960  mat_tuple = self._assign_matrices_assign_matrices(self.rmsd_matrixrmsd_matrix,
961  mat2,
962  self._rmsd_full_matrix_rmsd_full_matrix,
963  "rmsd")
964  self._rmsd_rmsd = mat_tuple[0]
965  self._rmsd_details_rmsd_details = mat_tuple[1]
966  # Ignore unassigned ligands - they are dealt with in lddt_pli.
967  # So the following lines should stay commented out:
968  # self._unassigned_target_ligands = mat_tuple[2]
969  # self._unassigned_model_ligands = mat_tuple[3]
970 
971  def _assign_matrices(self, mat1, mat2, data, main_key):
972  """
973  Perform the ligand assignment, ie find the mapping between model and
974  target ligands.
975 
976  The algorithm starts by assigning the "best" mapping, and then discards
977  the target and model ligands (row, column) so that every model ligand
978  can be assigned to a single target ligand, and every target ligand
979  is only assigned to a single model ligand. Repeat until there is
980  nothing left to assign.
981 
982  In case of a tie in values in `mat1`, it uses `mat2` to break the tie.
983 
984  This algorithm doesn't guarantee a globally optimal assignment.
985 
986  Both `mat1` and `mat2` should contain values between 0 and infinity,
987  with lower values representing better scores. Use the
988  :meth:`_reverse_lddt` method to convert lDDT values to such a score.
989 
990  :param mat1: the main ligand assignment criteria (RMSD or lDDT-PLI)
991  :param mat2: the secondary ligand assignment criteria (lDDT-PLI or RMSD)
992  :param data: the data (either self._rmsd_full_matrix or self._lddt_pli_matrix)
993  :param main_key: the key of data (dictionnaries within `data`) to
994  assign into out_main.
995  :return: a tuple with 2 dictionaries of matrices containing the main
996  data, and details, respectively.
997  """
998  assignments = self._find_ligand_assignment_find_ligand_assignment(mat1, mat2,
999  self._assignment_match_coverage_assignment_match_coverage,
1000  self.coverage_deltacoverage_delta)
1001  out_main = {}
1002  out_details = {}
1003  assigned_trg = [False] * len(self.target_ligandstarget_ligands)
1004  assigned_mdl = [False] * len(self.model_ligandsmodel_ligands)
1005  for assignment in assignments:
1006  trg_idx, mdl_idx = assignment
1007  assigned_mdl[mdl_idx] = True
1008  assigned_trg[trg_idx] = True
1009  mdl_lig = self.model_ligandsmodel_ligands[mdl_idx]
1010  mdl_cname = mdl_lig.chain.name
1011  mdl_resnum = mdl_lig.number
1012  if mdl_cname not in out_main:
1013  out_main[mdl_cname] = {}
1014  out_details[mdl_cname] = {}
1015  out_main[mdl_cname][mdl_resnum] = data[
1016  trg_idx, mdl_idx][main_key]
1017  out_details[mdl_cname][mdl_resnum] = data[
1018  trg_idx, mdl_idx]
1019 
1020  unassigned_trg, unassigned_mdl = self._assign_unassigned_assign_unassigned(
1021  assigned_trg, assigned_mdl, [out_main], [out_details], [main_key])
1022  return out_main, out_details, unassigned_trg, unassigned_mdl
1023 
1024  def _assign_unassigned(self, assigned_trg, assigned_mdl,
1025  out_main, out_details, main_key):
1026  unassigned_trg = {}
1027  unassigned_mdl = {}
1028 
1029  unassigned_trg_idx = [i for i, x in enumerate(assigned_trg) if not x]
1030  unassigned_mdl_idx = [i for i, x in enumerate(assigned_mdl) if not x]
1031 
1032  for mdl_idx in unassigned_mdl_idx:
1033  mdl_lig = self.model_ligandsmodel_ligands[mdl_idx]
1034  reason = self._find_unassigned_model_ligand_reason_find_unassigned_model_ligand_reason(mdl_lig, check=False)
1035  mdl_cname = mdl_lig.chain.name
1036  mdl_resnum = mdl_lig.number
1037  if mdl_cname not in unassigned_mdl:
1038  unassigned_mdl[mdl_cname] = {}
1039  unassigned_mdl[mdl_cname][mdl_resnum] = reason
1040  if self.unassignedunassigned:
1041  for i, _ in enumerate(out_main):
1042  if mdl_cname not in out_main[i]:
1043  out_main[i][mdl_cname] = {}
1044  out_details[i][mdl_cname] = {}
1045  out_main[i][mdl_cname][mdl_resnum] = None
1046  out_details[i][mdl_cname][mdl_resnum] = {
1047  "unassigned": True,
1048  "reason_short": reason[0],
1049  "reason_long": reason[1],
1050  main_key[i]: None,
1051  }
1052  LogInfo("Model ligand %s is unassigned: %s" % (
1053  mdl_lig.qualified_name, reason[1]))
1054 
1055  for trg_idx in unassigned_trg_idx:
1056  trg_lig = self.target_ligandstarget_ligands[trg_idx]
1057  reason = self._find_unassigned_target_ligand_reason_find_unassigned_target_ligand_reason(trg_lig, check=False)
1058  trg_cname = trg_lig.chain.name
1059  trg_resnum = trg_lig.number
1060  if trg_cname not in unassigned_trg:
1061  unassigned_trg[trg_cname] = {}
1062  unassigned_trg[trg_cname][trg_resnum] = reason
1063  LogInfo("Target ligand %s is unassigned: %s" % (
1064  trg_lig.qualified_name, reason[1]))
1065 
1066  return unassigned_trg, unassigned_mdl
1067 
1068 
1069  def _assign_matrix(self, mat, data1, main_key1, data2, main_key2):
1070  """
1071  Perform the ligand assignment, ie find the mapping between model and
1072  target ligands, based on a single matrix
1073 
1074  The algorithm starts by assigning the "best" mapping, and then discards
1075  the target and model ligands (row, column) so that every model ligand
1076  can be assigned to a single target ligand, and every target ligand
1077  is only assigned to a single model ligand. Repeat until there is
1078  nothing left to assign.
1079 
1080  This algorithm doesn't guarantee a globally optimal assignment.
1081 
1082  `mat` should contain values between 0 and infinity,
1083  with lower values representing better scores. Use the
1084  :meth:`_reverse_lddt` method to convert lDDT values to such a score.
1085 
1086  :param mat: the ligand assignment criteria (RMSD or lDDT-PLI)
1087  :param data1: the first data (either self._rmsd_full_matrix or self._lddt_pli_matrix)
1088  :param main_key1: the first key of data (dictionnaries within `data`) to
1089  assign into out_main.
1090  :param data2: the second data (either self._rmsd_full_matrix or self._lddt_pli_matrix)
1091  :param main_key2: the second key of data (dictionnaries within `data`) to
1092  assign into out_main.
1093  :return: a tuple with 4 dictionaries of matrices containing the main
1094  data1, details1, main data2 and details2, respectively.
1095  """
1096  assignments = self._find_ligand_assignment_find_ligand_assignment(mat,
1097  coverage=self._assignment_match_coverage_assignment_match_coverage,
1098  coverage_delta=self.coverage_deltacoverage_delta)
1099  out_main1 = {}
1100  out_details1 = {}
1101  out_main2 = {}
1102  out_details2 = {}
1103  assigned_trg = [False] * len(self.target_ligandstarget_ligands)
1104  assigned_mdl = [False] * len(self.model_ligandsmodel_ligands)
1105  for assignment in assignments:
1106  trg_idx, mdl_idx = assignment
1107  assigned_mdl[mdl_idx] = True
1108  assigned_trg[trg_idx] = True
1109  mdl_lig = self.model_ligandsmodel_ligands[mdl_idx]
1110  mdl_cname = mdl_lig.chain.name
1111  mdl_resnum = mdl_lig.number
1112  # Data 1
1113  if mdl_cname not in out_main1:
1114  out_main1[mdl_cname] = {}
1115  out_details1[mdl_cname] = {}
1116  out_main1[mdl_cname][mdl_resnum] = data1[
1117  trg_idx, mdl_idx][main_key1]
1118  out_details1[mdl_cname][mdl_resnum] = data1[
1119  trg_idx, mdl_idx]
1120  # Data2
1121  if mdl_cname not in out_main2:
1122  out_main2[mdl_cname] = {}
1123  out_details2[mdl_cname] = {}
1124  out_main2[mdl_cname][mdl_resnum] = data2[
1125  trg_idx, mdl_idx][main_key2]
1126  out_details2[mdl_cname][mdl_resnum] = data2[
1127  trg_idx, mdl_idx]
1128 
1129  unassigned_trg, unassigned_mdl = self._assign_unassigned_assign_unassigned(
1130  assigned_trg, assigned_mdl,
1131  [out_main1, out_main2], [out_details1, out_details2],
1132  [main_key1, main_key2])
1133 
1134  return out_main1, out_details1, out_main2, out_details2, \
1135  unassigned_trg, unassigned_mdl
1136 
1137  def _assign_ligands_lddt_pli(self):
1138  """ Assign ligands based on lDDT-PLI.
1139 
1140  Sets self._lddt_pli and self._lddt_pli_details.
1141  """
1142  mat1 = self._reverse_lddt_reverse_lddt(self.lddt_pli_matrixlddt_pli_matrix)
1143 
1144  mat_tuple = self._assign_matrices_assign_matrices(mat1,
1145  self.rmsd_matrixrmsd_matrix,
1146  self._lddt_pli_full_matrix_lddt_pli_full_matrix,
1147  "lddt_pli")
1148  self._lddt_pli_lddt_pli = mat_tuple[0]
1149  self._lddt_pli_details_lddt_pli_details = mat_tuple[1]
1150  self._unassigned_target_ligands_unassigned_target_ligands = mat_tuple[2]
1151  self._unassigned_model_ligands_unassigned_model_ligands = mat_tuple[3]
1152 
1153  def _assign_ligands_rmsd_only(self):
1154  """Assign (map) ligands between model and target based on RMSD only.
1155 
1156  Sets self._rmsd, self._rmsd_details, self._lddt_pli and
1157  self._lddt_pli_details.
1158  """
1159  mat_tuple = self._assign_matrix_assign_matrix(self.rmsd_matrixrmsd_matrix,
1160  self._rmsd_full_matrix_rmsd_full_matrix,
1161  "rmsd",
1162  self._lddt_pli_full_matrix_lddt_pli_full_matrix,
1163  "lddt_pli")
1164  self._rmsd_rmsd = mat_tuple[0]
1165  self._rmsd_details_rmsd_details = mat_tuple[1]
1166  self._lddt_pli_lddt_pli = mat_tuple[2]
1167  self._lddt_pli_details_lddt_pli_details = mat_tuple[3]
1168  self._unassigned_target_ligands_unassigned_target_ligands = mat_tuple[4]
1169  self._unassigned_model_ligands_unassigned_model_ligands = mat_tuple[5]
1170 
1171  @property
1172  def rmsd_matrix(self):
1173  """ Get the matrix of RMSD values.
1174 
1175  Target ligands are in rows, model ligands in columns.
1176 
1177  NaN values indicate that no RMSD could be computed (i.e. different
1178  ligands).
1179 
1180  :rtype: :class:`~numpy.ndarray`
1181  """
1182  if self._rmsd_full_matrix_rmsd_full_matrix is None:
1183  self._compute_scores_compute_scores()
1184  if self._rmsd_matrix_rmsd_matrix is None:
1185  # convert
1186  shape = self._rmsd_full_matrix_rmsd_full_matrix.shape
1187  self._rmsd_matrix_rmsd_matrix = np.full(shape, np.nan)
1188  for i, j in np.ndindex(shape):
1189  if self._rmsd_full_matrix_rmsd_full_matrix[i, j] is not None:
1190  self._rmsd_matrix_rmsd_matrix[i, j] = self._rmsd_full_matrix_rmsd_full_matrix[
1191  i, j]["rmsd"]
1192  return self._rmsd_matrix_rmsd_matrix
1193 
1194  @property
1195  def lddt_pli_matrix(self):
1196  """ Get the matrix of lDDT-PLI values.
1197 
1198  Target ligands are in rows, model ligands in columns.
1199 
1200  NaN values indicate that no lDDT-PLI could be computed (i.e. different
1201  ligands).
1202 
1203  :rtype: :class:`~numpy.ndarray`
1204  """
1205  if self._lddt_pli_full_matrix_lddt_pli_full_matrix is None:
1206  self._compute_scores_compute_scores()
1207  if self._lddt_pli_matrix_lddt_pli_matrix is None:
1208  # convert
1209  shape = self._lddt_pli_full_matrix_lddt_pli_full_matrix.shape
1210  self._lddt_pli_matrix_lddt_pli_matrix = np.full(shape, np.nan)
1211  for i, j in np.ndindex(shape):
1212  if self._lddt_pli_full_matrix_lddt_pli_full_matrix[i, j] is not None:
1213  self._lddt_pli_matrix_lddt_pli_matrix[i, j] = self._lddt_pli_full_matrix_lddt_pli_full_matrix[
1214  i, j]["lddt_pli"]
1215  return self._lddt_pli_matrix_lddt_pli_matrix
1216 
1217  @property
1218  def coverage_matrix(self):
1219  """ Get the matrix of model ligand atom coverage in the target.
1220 
1221  Target ligands are in rows, model ligands in columns.
1222 
1223  A value of 0 indicates that there was no isomorphism between the model
1224  and target ligands. If `substructure_match=False`, only full match
1225  isomorphisms are considered, and therefore only values of 1.0 and 0.0
1226  are reported.
1227 
1228  :rtype: :class:`~numpy.ndarray`
1229  """
1230  if self._assignment_match_coverage_assignment_match_coverage is None:
1231  self._compute_scores_compute_scores()
1232  return self._assignment_match_coverage_assignment_match_coverage
1233 
1234  @property
1235  def rmsd(self):
1236  """Get a dictionary of RMSD score values, keyed by model ligand
1237  (chain name, :class:`~ost.mol.ResNum`).
1238 
1239  If the scoring object was instantiated with `unassigned=True`, some
1240  scores may be `None`.
1241 
1242  :rtype: :class:`dict`
1243  """
1244  if self._rmsd_rmsd is None:
1245  if self.rmsd_assignmentrmsd_assignment:
1246  self._assign_ligands_rmsd_only_assign_ligands_rmsd_only()
1247  else:
1248  self._assign_ligands_rmsd_assign_ligands_rmsd()
1249  return self._rmsd_rmsd
1250 
1251  @property
1252  def rmsd_details(self):
1253  """Get a dictionary of RMSD score details (dictionaries), keyed by
1254  model ligand (chain name, :class:`~ost.mol.ResNum`).
1255 
1256  The value is a dictionary. For ligands that were assigned (mapped) to
1257  the target, the dictionary contain the following information:
1258 
1259  * `rmsd`: the RMSD score value.
1260  * `lddt_lp`: the lDDT score of the ligand pocket (lDDT-LP).
1261  * `bs_ref_res`: a list of residues (:class:`~ost.mol.ResidueHandle`)
1262  that define the binding site in the reference.
1263  * `bs_ref_res_mapped`: a list of residues
1264  (:class:`~ost.mol.ResidueHandle`) in the reference binding site
1265  that could be mapped to the model.
1266  * `bs_mdl_res_mapped`: a list of residues
1267  (:class:`~ost.mol.ResidueHandle`) in the model that were mapped to
1268  the reference binding site. The residues are in the same order as
1269  `bs_ref_res_mapped`.
1270  * `bb_rmsd`: the RMSD of the binding site backbone after superposition
1271  * `target_ligand`: residue handle of the target ligand.
1272  * `model_ligand`: residue handle of the model ligand.
1273  * `chain_mapping`: local chain mapping as a dictionary, with target
1274  chain name as key and model chain name as value.
1275  * `transform`: transformation to superpose the model onto the target.
1276  * `substructure_match`: whether the score is the result of a partial
1277  (substructure) match. A value of `True` indicates that the target
1278  ligand covers only part of the model, while `False` indicates a
1279  perfect match.
1280  * `coverage`: the fraction of model atoms covered by the assigned
1281  target ligand, in the interval (0, 1]. If `substructure_match`
1282  is `False`, this will always be 1.
1283  * `inconsistent_residues`: a list of tuples of mapped residues views
1284  (:class:`~ost.mol.ResidueView`) with residue names that differ
1285  between the reference and the model, respectively.
1286  The list is empty if all residue names match, which is guaranteed
1287  if `check_resnames=True`.
1288  Note: more binding site mappings may be explored during scoring,
1289  but only inconsistencies in the selected mapping are reported.
1290  * `unassigned`: only if the scorer was instantiated with
1291  `unassigned=True`: `False`
1292 
1293  If the scoring object was instantiated with `unassigned=True`, in
1294  addition the unassigned ligands will be reported with a score of `None`
1295  and the following information:
1296 
1297  * `unassigned`: `True`,
1298  * `reason_short`: a short token of the reason, see
1299  :attr:`unassigned_model_ligands` for details and meaning.
1300  * `reason_long`: a human-readable text of the reason, see
1301  :attr:`unassigned_model_ligands` for details and meaning.
1302  * `rmsd`: `None`
1303 
1304  :rtype: :class:`dict`
1305  """
1306  if self._rmsd_details_rmsd_details is None:
1307  if self.rmsd_assignmentrmsd_assignment:
1308  self._assign_ligands_rmsd_only_assign_ligands_rmsd_only()
1309  else:
1310  self._assign_ligands_rmsd_assign_ligands_rmsd()
1311  return self._rmsd_details_rmsd_details
1312 
1313  @property
1314  def lddt_pli(self):
1315  """Get a dictionary of lDDT-PLI score values, keyed by model ligand
1316  (chain name, :class:`~ost.mol.ResNum`).
1317 
1318  If the scoring object was instantiated with `unassigned=True`, some
1319  scores may be `None`.
1320 
1321  :rtype: :class:`dict`
1322  """
1323  if self._lddt_pli_lddt_pli is None:
1324  if self.rmsd_assignmentrmsd_assignment:
1325  self._assign_ligands_rmsd_only_assign_ligands_rmsd_only()
1326  else:
1327  self._assign_ligands_lddt_pli_assign_ligands_lddt_pli()
1328  return self._lddt_pli_lddt_pli
1329 
1330  @property
1331  def lddt_pli_details(self):
1332  """Get a dictionary of lDDT-PLI score details (dictionaries), keyed by
1333  model ligand (chain name, :class:`~ost.mol.ResNum`).
1334 
1335  Each sub-dictionary contains the following information:
1336 
1337  * `lddt_pli`: the lDDT-PLI score value.
1338  * `rmsd`: the RMSD score value corresponding to the lDDT-PLI
1339  chain mapping and assignment. This may differ from the RMSD-based
1340  assignment. Note that a different isomorphism than `lddt_pli` may
1341  be used.
1342  * `lddt_lp`: the lDDT score of the ligand pocket (lDDT-LP).
1343  * `lddt_pli_n_contacts`: number of total contacts used in lDDT-PLI,
1344  summed over all thresholds. Can be divided by 8 to obtain the number
1345  of atomic contacts.
1346  * `bs_ref_res`: a list of residues (:class:`~ost.mol.ResidueHandle`)
1347  that define the binding site in the reference.
1348  * `bs_ref_res_mapped`: a list of residues
1349  (:class:`~ost.mol.ResidueHandle`) in the reference binding site
1350  that could be mapped to the model.
1351  * `bs_mdl_res_mapped`: a list of residues
1352  (:class:`~ost.mol.ResidueHandle`) in the model that were mapped to
1353  the reference binding site. The residues are in the same order as
1354  `bs_ref_res_mapped`.
1355  * `bb_rmsd`: the RMSD of the binding site backbone after superposition.
1356  Note: not used for lDDT-PLI computation.
1357  * `target_ligand`: residue handle of the target ligand.
1358  * `model_ligand`: residue handle of the model ligand.
1359  * `chain_mapping`: local chain mapping as a dictionary, with target
1360  chain name as key and model chain name as value.
1361  * `transform`: transformation to superpose the model onto the target
1362  (for RMSD only).
1363  * `substructure_match`: whether the score is the result of a partial
1364  (substructure) match. A value of `True` indicates that the target
1365  ligand covers only part of the model, while `False` indicates a
1366  perfect match.
1367  * `inconsistent_residues`: a list of tuples of mapped residues views
1368  (:class:`~ost.mol.ResidueView`) with residue names that differ
1369  between the reference and the model, respectively.
1370  The list is empty if all residue names match, which is guaranteed
1371  if `check_resnames=True`.
1372  Note: more binding site mappings may be explored during scoring,
1373  but only inconsistencies in the selected mapping are reported.
1374  * `unassigned`: only if the scorer was instantiated with
1375  `unassigned=True`: `False`
1376 
1377  If the scoring object was instantiated with `unassigned=True`, in
1378  addition the unmapped ligands will be reported with a score of `None`
1379  and the following information:
1380 
1381  * `unassigned`: `True`,
1382  * `reason_short`: a short token of the reason, see
1383  :attr:`unassigned_model_ligands` for details and meaning.
1384  * `reason_long`: a human-readable text of the reason, see
1385  :attr:`unassigned_model_ligands` for details and meaning.
1386  * `lddt_pli`: `None`
1387 
1388  :rtype: :class:`dict`
1389  """
1390  if self._lddt_pli_details_lddt_pli_details is None:
1391  if self.rmsd_assignmentrmsd_assignment:
1392  self._assign_ligands_rmsd_only_assign_ligands_rmsd_only()
1393  else:
1394  self._assign_ligands_lddt_pli_assign_ligands_lddt_pli()
1395  return self._lddt_pli_details_lddt_pli_details
1396 
1397  @property
1399  """Get a dictionary of target ligands not assigned to any model ligand,
1400  keyed by target ligand (chain name, :class:`~ost.mol.ResNum`).
1401 
1402  The assignment for the lDDT-PLI score is used (and is controlled
1403  by the `rmsd_assignment` argument).
1404 
1405  Each item contains a string from a controlled dictionary
1406  about the reason for the absence of assignment.
1407  A human-readable description can be obtained from the
1408  :attr:`unassigned_target_ligand_descriptions` property.
1409 
1410  Currently, the following reasons are reported:
1411 
1412  * `no_ligand`: there was no ligand in the model.
1413  * `disconnected`: the ligand graph was disconnected.
1414  * `binding_site`: no residues were in proximity of the ligand.
1415  * `model_representation`: no representation of the reference binding
1416  site was found in the model. (I.e. the binding site was not modeled,
1417  or the model ligand was positioned too far in combination with
1418  `full_bs_search=False`)
1419  * `identity`: the ligand was not found in the model (by graph
1420  isomorphism). Check your ligand connectivity, and enable the
1421  `substructure_match` option if the target ligand is incomplete.
1422  * `stoichiometry`: there was a possible assignment in the model, but
1423  the model ligand was already assigned to a different target ligand.
1424  This indicates different stoichiometries.
1425  * `symmetries`: too many symmetries were found (by graph isomorphisms).
1426  Increase `max_symmetries`.
1427  * `no_contact`: there were no lDDT contacts between the binding site
1428  and the ligand, and lDDT-PLI is undefined. Increase the value of
1429  `lddt_pli_radius` to at least the value of the binding site `radius`.
1430 
1431  Some of these reasons can be overlapping, but a single reason will be
1432  reported.
1433 
1434  :rtype: :class:`dict`
1435  """
1436  if self._unassigned_target_ligand_short_unassigned_target_ligand_short is None:
1437  if self.rmsd_assignmentrmsd_assignment:
1438  self._assign_ligands_rmsd_only_assign_ligands_rmsd_only()
1439  else:
1440  self._assign_ligands_lddt_pli_assign_ligands_lddt_pli()
1441  self._unassigned_target_ligand_short_unassigned_target_ligand_short = {}
1442  self._unassigned_target_ligand_descriptions_unassigned_target_ligand_descriptions = {}
1443  for cname, res in self._unassigned_target_ligands_unassigned_target_ligands.items():
1444  self._unassigned_target_ligand_short_unassigned_target_ligand_short[cname] = {}
1445  for resnum, val in res.items():
1446  self._unassigned_target_ligand_short_unassigned_target_ligand_short[cname][resnum] = val[0]
1447  self._unassigned_target_ligand_descriptions_unassigned_target_ligand_descriptions[val[0]] = val[1]
1448  return self._unassigned_target_ligand_short_unassigned_target_ligand_short
1449 
1450  @property
1452  """Get a human-readable description of why target ligands were
1453  unassigned, as a dictionary keyed by the controlled dictionary
1454  from :attr:`unassigned_target_ligands`.
1455  """
1456  if self._unassigned_target_ligand_descriptions_unassigned_target_ligand_descriptions is None:
1457  _ = self.unassigned_target_ligandsunassigned_target_ligands # assigned there
1458  return self._unassigned_target_ligand_descriptions_unassigned_target_ligand_descriptions
1459 
1460  @property
1462  """Get a dictionary of model ligands not assigned to any target ligand,
1463  keyed by model ligand (chain name, :class:`~ost.mol.ResNum`).
1464 
1465  The assignment for the lDDT-PLI score is used (and is controlled
1466  by the `rmsd_assignment` argument).
1467 
1468  Each item contains a string from a controlled dictionary
1469  about the reason for the absence of assignment.
1470  A human-readable description can be obtained from the
1471  :attr:`unassigned_model_ligand_descriptions` property.
1472  Currently, the following reasons are reported:
1473 
1474  * `no_ligand`: there was no ligand in the target.
1475  * `disconnected`: the ligand graph is disconnected.
1476  * `binding_site`: a potential assignment was found in the target, but
1477  there were no polymer residues in proximity of the ligand in the
1478  target.
1479  * `model_representation`: a potential assignment was found in the target,
1480  but no representation of the binding site was found in the model.
1481  (I.e. the binding site was not modeled, or the model ligand was
1482  positioned too far in combination with `full_bs_search=False`)
1483  * `identity`: the ligand was not found in the target (by graph
1484  isomorphism). Check your ligand connectivity, and enable the
1485  `substructure_match` option if the target ligand is incomplete.
1486  * `stoichiometry`: there was a possible assignment in the target, but
1487  the model target was already assigned to a different model ligand.
1488  This indicates different stoichiometries.
1489  * `symmetries`: too many symmetries were found (by graph isomorphisms).
1490  Increase `max_symmetries`.
1491  * `no_contact`: there were no lDDT contacts between the binding site
1492  and the ligand in the target structure, and lDDT-PLI is undefined.
1493  Increase the value of `lddt_pli_radius` to at least the value of the
1494  binding site `radius`.
1495 
1496  Some of these reasons can be overlapping, but a single reason will be
1497  reported.
1498 
1499  :rtype: :class:`dict`
1500  """
1501  if self._unassigned_model_ligand_short_unassigned_model_ligand_short is None:
1502  if self.rmsd_assignmentrmsd_assignment:
1503  self._assign_ligands_rmsd_only_assign_ligands_rmsd_only()
1504  else:
1505  self._assign_ligands_lddt_pli_assign_ligands_lddt_pli()
1506  self._unassigned_model_ligand_short_unassigned_model_ligand_short = {}
1507  self._unassigned_model_ligand_descriptions_unassigned_model_ligand_descriptions = {}
1508  for cname, res in self._unassigned_model_ligands_unassigned_model_ligands.items():
1509  self._unassigned_model_ligand_short_unassigned_model_ligand_short[cname] = {}
1510  for resnum, val in res.items():
1511  self._unassigned_model_ligand_short_unassigned_model_ligand_short[cname][resnum] = val[0]
1512  self._unassigned_model_ligand_descriptions_unassigned_model_ligand_descriptions[val[0]] = val[1]
1513  return self._unassigned_model_ligand_short_unassigned_model_ligand_short
1514 
1515  @property
1517  """Get a human-readable description of why model ligands were
1518  unassigned, as a dictionary keyed by the controlled dictionary
1519  from :attr:`unassigned_model_ligands`.
1520  """
1521  if self._unassigned_model_ligand_descriptions_unassigned_model_ligand_descriptions is None:
1522  _ = self.unassigned_model_ligandsunassigned_model_ligands # assigned there
1523  return self._unassigned_model_ligand_descriptions_unassigned_model_ligand_descriptions
1524 
1525 
1526  def _set_custom_mapping(self, mapping):
1527  """ sets self.__model_mapping with a full blown MappingResult object
1528 
1529  :param mapping: mapping with trg chains as key and mdl ch as values
1530  :type mapping: :class:`dict`
1531  """
1532  chain_mapper = self.chain_mapperchain_mapper
1533  chem_mapping, chem_group_alns, mdl = \
1534  chain_mapper.GetChemMapping(self.modelmodel)
1535 
1536  # now that we have a chem mapping, lets do consistency checks
1537  # - check whether chain names are unique and available in structures
1538  # - check whether the mapped chains actually map to the same chem groups
1539  if len(mapping) != len(set(mapping.keys())):
1540  raise RuntimeError(f"Expect unique trg chain names in mapping. Got "
1541  f"{mapping.keys()}")
1542  if len(mapping) != len(set(mapping.values())):
1543  raise RuntimeError(f"Expect unique mdl chain names in mapping. Got "
1544  f"{mapping.values()}")
1545 
1546  trg_chains = set([ch.GetName() for ch in chain_mapper.target.chains])
1547  mdl_chains = set([ch.GetName() for ch in mdl.chains])
1548  for k,v in mapping.items():
1549  if k not in trg_chains:
1550  raise RuntimeError(f"Target chain \"{k}\" is not available "
1551  f"in target processed for chain mapping "
1552  f"({trg_chains})")
1553  if v not in mdl_chains:
1554  raise RuntimeError(f"Model chain \"{v}\" is not available "
1555  f"in model processed for chain mapping "
1556  f"({mdl_chains})")
1557 
1558  for trg_ch, mdl_ch in mapping.items():
1559  trg_group_idx = None
1560  mdl_group_idx = None
1561  for idx, group in enumerate(chain_mapper.chem_groups):
1562  if trg_ch in group:
1563  trg_group_idx = idx
1564  break
1565  for idx, group in enumerate(chem_mapping):
1566  if mdl_ch in group:
1567  mdl_group_idx = idx
1568  break
1569  if trg_group_idx is None or mdl_group_idx is None:
1570  raise RuntimeError("Could not establish a valid chem grouping "
1571  "of chain names provided in custom mapping.")
1572 
1573  if trg_group_idx != mdl_group_idx:
1574  raise RuntimeError(f"Chem group mismatch in custom mapping: "
1575  f"target chain \"{trg_ch}\" groups with the "
1576  f"following chemically equivalent target "
1577  f"chains: "
1578  f"{chain_mapper.chem_groups[trg_group_idx]} "
1579  f"but model chain \"{mdl_ch}\" maps to the "
1580  f"following target chains: "
1581  f"{chain_mapper.chem_groups[mdl_group_idx]}")
1582 
1583  pairs = set([(trg_ch, mdl_ch) for trg_ch, mdl_ch in mapping.items()])
1584  ref_mdl_alns = \
1585  chain_mapping._GetRefMdlAlns(chain_mapper.chem_groups,
1586  chain_mapper.chem_group_alignments,
1587  chem_mapping,
1588  chem_group_alns,
1589  pairs = pairs)
1590 
1591  # translate mapping format
1592  final_mapping = list()
1593  for ref_chains in chain_mapper.chem_groups:
1594  mapped_mdl_chains = list()
1595  for ref_ch in ref_chains:
1596  if ref_ch in mapping:
1597  mapped_mdl_chains.append(mapping[ref_ch])
1598  else:
1599  mapped_mdl_chains.append(None)
1600  final_mapping.append(mapped_mdl_chains)
1601 
1602  alns = dict()
1603  for ref_group, mdl_group in zip(chain_mapper.chem_groups,
1604  final_mapping):
1605  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
1606  if ref_ch is not None and mdl_ch is not None:
1607  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1608  trg_view = chain_mapper.target.Select(f"cname={mol.QueryQuoteName(ref_ch)}")
1609  mdl_view = mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch)}")
1610  aln.AttachView(0, trg_view)
1611  aln.AttachView(1, mdl_view)
1612  alns[(ref_ch, mdl_ch)] = aln
1613 
1614  self.__model_mapping__model_mapping = chain_mapping.MappingResult(chain_mapper.target, mdl,
1615  chain_mapper.chem_groups,
1616  chem_mapping,
1617  final_mapping, alns)
1618 
1619  def _find_unassigned_model_ligand_reason(self, ligand, assignment="lddt_pli", check=True):
1620  # Is this a model ligand?
1621  try:
1622  ligand_idx = self.model_ligandsmodel_ligands.index(ligand)
1623  except ValueError:
1624  # Raise with a better error message
1625  raise ValueError("Ligand %s is not in self.model_ligands" % ligand)
1626 
1627  # Ensure we are unassigned
1628  if check:
1629  details = getattr(self, assignment + "_details")
1630  if ligand.chain.name in details and ligand.number in details[ligand.chain.name]:
1631  ligand_details = details[ligand.chain.name][ligand.number]
1632  if not ("unassigned" in ligand_details and ligand_details["unassigned"]):
1633  raise RuntimeError("Ligand %s is mapped to %s" % (ligand, ligand_details["target_ligand"]))
1634 
1635  # Were there any ligands in the target?
1636  if len(self.target_ligandstarget_ligands) == 0:
1637  return ("no_ligand", "No ligand in the target")
1638 
1639  # Is the ligand disconnected?
1640  graph = _ResidueToGraph(ligand)
1641  if not networkx.is_connected(graph):
1642  return ("disconnected", "Ligand graph is disconnected")
1643 
1644  # Do we have isomorphisms with the target?
1645  for trg_lig_idx, assigned in enumerate(self._assignment_isomorphisms_assignment_isomorphisms[:, ligand_idx]):
1646  if np.isnan(assigned):
1647  try:
1648  _ComputeSymmetries(
1649  self.model_ligandsmodel_ligands[ligand_idx],
1650  self.target_ligandstarget_ligands[trg_lig_idx],
1651  substructure_match=self.substructure_matchsubstructure_match,
1652  by_atom_index=True,
1653  return_symmetries=False)
1654  except (NoSymmetryError, DisconnectedGraphError):
1655  assigned = 0.
1656  except TooManySymmetriesError:
1657  assigned = -1.
1658  else:
1659  assigned = 1.
1660  self._assignment_isomorphisms_assignment_isomorphisms[trg_lig_idx,ligand_idx] = assigned
1661  if assigned == 1.:
1662  # Could have been assigned
1663  # So what's up with this target ligand?
1664  assignment_matrix = getattr(self, assignment + "_matrix")
1665  all_nan = np.all(np.isnan(assignment_matrix[:, ligand_idx]))
1666  if all_nan:
1667  # The assignment matrix is all nans so we have a problem
1668  # with the binding site or the representation
1669  trg_ligand = self.target_ligandstarget_ligands[trg_lig_idx]
1670  return self._unassigned_target_ligands_reason_unassigned_target_ligands_reason[trg_ligand]
1671  else:
1672  # Ligand was already assigned
1673  return ("stoichiometry",
1674  "Ligand was already assigned to an other "
1675  "model ligand (different stoichiometry)")
1676  elif assigned == -1:
1677  # Symmetries / isomorphisms exceeded limit
1678  return ("symmetries",
1679  "Too many symmetries were found.")
1680 
1681  # Could not be assigned to any ligand - must be different
1682  if self.substructure_matchsubstructure_match:
1683  iso = "subgraph isomorphism"
1684  else:
1685  iso = "full graph isomorphism"
1686  return ("identity", "Ligand was not found in the target (by %s)" % iso)
1687 
1688  def _find_unassigned_target_ligand_reason(self, ligand, assignment="lddt_pli", check=True):
1689  # Is this a target ligand?
1690  try:
1691  ligand_idx = self.target_ligandstarget_ligands.index(ligand)
1692  except ValueError:
1693  # Raise with a better error message
1694  raise ValueError("Ligand %s is not in self.target_ligands" % ligand)
1695 
1696  # Ensure we are unassigned
1697  if check:
1698  details = getattr(self, assignment + "_details")
1699  for cname, chain_ligands in details.items():
1700  for rnum, details in chain_ligands.items():
1701  if "unassigned" in details and details["unassigned"]:
1702  continue
1703  if details['target_ligand'] == ligand:
1704  raise RuntimeError("Ligand %s is mapped to %s.%s" % (
1705  ligand, cname, rnum))
1706 
1707  # Were there any ligands in the model?
1708  if len(self.model_ligandsmodel_ligands) == 0:
1709  return ("no_ligand", "No ligand in the model")
1710 
1711  # Is the ligand disconnected?
1712  graph = _ResidueToGraph(ligand)
1713  if not networkx.is_connected(graph):
1714  return ("disconnected", "Ligand graph is disconnected")
1715 
1716  # Is it because there was no valid binding site or no representation?
1717  if ligand in self._unassigned_target_ligands_reason_unassigned_target_ligands_reason:
1718  return self._unassigned_target_ligands_reason_unassigned_target_ligands_reason[ligand]
1719  # Or because no symmetry?
1720  for model_lig_idx, assigned in enumerate(
1721  self._assignment_isomorphisms_assignment_isomorphisms[ligand_idx, :]):
1722  if np.isnan(assigned):
1723  try:
1724  _ComputeSymmetries(
1725  self.model_ligandsmodel_ligands[model_lig_idx],
1726  self.target_ligandstarget_ligands[ligand_idx],
1727  substructure_match=self.substructure_matchsubstructure_match,
1728  by_atom_index=True,
1729  return_symmetries=False)
1730  except (NoSymmetryError, DisconnectedGraphError):
1731  assigned = 0.
1732  except TooManySymmetriesError:
1733  assigned = -1.
1734  else:
1735  assigned = 1.
1736  self._assignment_isomorphisms_assignment_isomorphisms[ligand_idx,model_lig_idx] = assigned
1737  if assigned == 1:
1738  # Could have been assigned but was assigned to a different ligand
1739  return ("stoichiometry",
1740  "Ligand was already assigned to an other "
1741  "target ligand (different stoichiometry)")
1742  elif assigned == -1:
1743  # Symmetries / isomorphisms exceeded limit
1744  return ("symmetries",
1745  "Too many symmetries were found.")
1746 
1747  # Could not be assigned to any ligand - must be different
1748  if self.substructure_matchsubstructure_match:
1749  iso = "subgraph isomorphism"
1750  else:
1751  iso = "full graph isomorphism"
1752  return ("identity", "Ligand was not found in the model (by %s)" % iso)
1753 
1754  @property
1755  def chem_mapping(self):
1756  if self._chem_mapping_chem_mapping is None:
1757  self._chem_mapping_chem_mapping, self._chem_group_alns_chem_group_alns, \
1758  self._chain_mapping_mdl_chain_mapping_mdl = \
1759  self.chain_mapperchain_mapper.GetChemMapping(self.modelmodel)
1760  return self._chem_mapping_chem_mapping
1761 
1762  @property
1763  def chem_group_alns(self):
1764  if self._chem_group_alns_chem_group_alns is None:
1765  self._chem_mapping_chem_mapping, self._chem_group_alns_chem_group_alns, \
1766  self._chain_mapping_mdl_chain_mapping_mdl = \
1767  self.chain_mapperchain_mapper.GetChemMapping(self.modelmodel)
1768  return self._chem_group_alns_chem_group_alns
1769 
1770  @property
1772  if self._chain_mapping_mdl_chain_mapping_mdl is None:
1773  self._chem_mapping_chem_mapping, self._chem_group_alns_chem_group_alns, \
1774  self._chain_mapping_mdl_chain_mapping_mdl = \
1775  self.chain_mapperchain_mapper.GetChemMapping(self.modelmodel)
1776  return self._chain_mapping_mdl_chain_mapping_mdl
1777 
1778  def get_get_repr_input(self, mdl_ligand):
1779  if mdl_ligand.handle.hash_code not in self._get_repr_input_get_repr_input:
1780 
1781  # figure out what chains in the model are in contact with the ligand
1782  # that may give a non-zero contribution to lDDT in
1783  # chain_mapper.GetRepr
1784  radius = self.model_bs_radiusmodel_bs_radius
1785  chains = set()
1786  for at in mdl_ligand.atoms:
1787  close_atoms = self.chain_mapping_mdlchain_mapping_mdl.FindWithin(at.GetPos(),
1788  radius)
1789  for close_at in close_atoms:
1790  chains.add(close_at.GetChain().GetName())
1791 
1792  if len(chains) > 0:
1793 
1794  # the chain mapping model which only contains close chains
1795  query = "cname="
1796  query += ','.join([mol.QueryQuoteName(x) for x in chains])
1797  mdl = self.chain_mapping_mdlchain_mapping_mdl.Select(query)
1798 
1799  # chem mapping which is reduced to the respective chains
1800  chem_mapping = list()
1801  for m in self.chem_mappingchem_mapping:
1802  chem_mapping.append([x for x in m if x in chains])
1803 
1804  self._get_repr_input_get_repr_input[mdl_ligand.handle.hash_code] = \
1805  (mdl, chem_mapping)
1806 
1807  else:
1808  self._get_repr_input_get_repr_input[mdl_ligand.handle.hash_code] = \
1809  (self.chain_mapping_mdlchain_mapping_mdl.CreateEmptyView(),
1810  [list() for _ in self.chem_mappingchem_mapping])
1811 
1812  return (self._get_repr_input_get_repr_input[mdl_ligand.hash_code][1],
1813  self.chem_group_alnschem_group_alns,
1814  self._get_repr_input_get_repr_input[mdl_ligand.hash_code][0])
1815 
1816 
1817  def get_repr(self, target_ligand, model_ligand):
1818 
1819  key = None
1820  if self.full_bs_searchfull_bs_search:
1821  # all possible binding sites, independent from actual model ligand
1822  key = (target_ligand.handle.hash_code, 0)
1823  else:
1824  key = (target_ligand.handle.hash_code, model_ligand.handle.hash_code)
1825 
1826  if key not in self._repr_repr:
1827  ref_bs = self.get_target_binding_siteget_target_binding_site(target_ligand)
1828  if self.global_chain_mappingglobal_chain_mapping:
1829  reprs = self.chain_mapperchain_mapper.GetRepr(
1830  ref_bs, self.modelmodel, inclusion_radius=self.lddt_lp_radiuslddt_lp_radius,
1831  global_mapping = self._model_mapping_model_mapping)
1832  elif self.full_bs_searchfull_bs_search:
1833  reprs = self.chain_mapperchain_mapper.GetRepr(
1834  ref_bs, self.modelmodel, inclusion_radius=self.lddt_lp_radiuslddt_lp_radius,
1835  topn=self.binding_sites_topnbinding_sites_topn)
1836  else:
1837  reprs = self.chain_mapperchain_mapper.GetRepr(ref_bs, self.modelmodel,
1838  inclusion_radius=self.lddt_lp_radiuslddt_lp_radius,
1839  topn=self.binding_sites_topnbinding_sites_topn,
1840  chem_mapping_result = self.get_get_repr_inputget_get_repr_input(model_ligand))
1841  self._repr_repr[key] = reprs
1842  if len(reprs) == 0:
1843  # whatever is in there already has precedence
1844  if target_ligand not in self._unassigned_target_ligands_reason_unassigned_target_ligands_reason:
1845  self._unassigned_target_ligands_reason_unassigned_target_ligands_reason[target_ligand] = (
1846  "model_representation",
1847  "No representation of the reference binding site was "
1848  "found in the model")
1849 
1850  return self._repr_repr[key]
1851 
1852 
1853 def _ResidueToGraph(residue, by_atom_index=False):
1854  """Return a NetworkX graph representation of the residue.
1855 
1856  :param residue: the residue from which to derive the graph
1857  :type residue: :class:`ost.mol.ResidueHandle` or
1858  :class:`ost.mol.ResidueView`
1859  :param by_atom_index: Set this parameter to True if you need the nodes to
1860  be labeled by atom index (within the residue).
1861  Otherwise, if False, the nodes will be labeled by
1862  atom names.
1863  :type by_atom_index: :class:`bool`
1864  :rtype: :class:`~networkx.classes.graph.Graph`
1865 
1866  Nodes are labeled with the Atom's uppercase :attr:`~ost.mol.AtomHandle.element`.
1867  """
1868  nxg = networkx.Graph()
1869 
1870  for atom in residue.atoms:
1871  nxg.add_node(atom.name, element=atom.element.upper())
1872 
1873  # This will list all edges twice - once for every atom of the pair.
1874  # But as of NetworkX 3.0 adding the same edge twice has no effect, so we're good.
1875  nxg.add_edges_from([(
1876  b.first.name,
1877  b.second.name) for a in residue.atoms for b in a.GetBondList()])
1878 
1879  if by_atom_index:
1880  nxg = networkx.relabel_nodes(nxg,
1881  {a: b for a, b in zip(
1882  [a.name for a in residue.atoms],
1883  range(len(residue.atoms)))},
1884  True)
1885  return nxg
1886 
1887 
1888 def SCRMSD(model_ligand, target_ligand, transformation=geom.Mat4(),
1889  substructure_match=False, max_symmetries=1e6):
1890  """Calculate symmetry-corrected RMSD.
1891 
1892  Binding site superposition must be computed separately and passed as
1893  `transformation`.
1894 
1895  :param model_ligand: The model ligand
1896  :type model_ligand: :class:`ost.mol.ResidueHandle` or
1897  :class:`ost.mol.ResidueView`
1898  :param target_ligand: The target ligand
1899  :type target_ligand: :class:`ost.mol.ResidueHandle` or
1900  :class:`ost.mol.ResidueView`
1901  :param transformation: Optional transformation to apply on each atom
1902  position of model_ligand.
1903  :type transformation: :class:`ost.geom.Mat4`
1904  :param substructure_match: Set this to True to allow partial target
1905  ligand.
1906  :type substructure_match: :class:`bool`
1907  :param max_symmetries: If more than that many isomorphisms exist, raise
1908  a :class:`TooManySymmetriesError`. This can only be assessed by
1909  generating at least that many isomorphisms and can take some time.
1910  :type max_symmetries: :class:`int`
1911  :rtype: :class:`float`
1912  :raises: :class:`NoSymmetryError` when no symmetry can be found,
1913  :class:`DisconnectedGraphError` when ligand graph is disconnected,
1914  :class:`TooManySymmetriesError` when more than `max_symmetries`
1915  isomorphisms are found.
1916  """
1917 
1918  symmetries = _ComputeSymmetries(model_ligand, target_ligand,
1919  substructure_match=substructure_match,
1920  by_atom_index=True,
1921  max_symmetries=max_symmetries)
1922  return _SCRMSD_symmetries(symmetries, model_ligand, target_ligand,
1923  transformation)
1924 
1925 
1926 def _SCRMSD_symmetries(symmetries, model_ligand, target_ligand,
1927  transformation):
1928  """Compute SCRMSD with pre-computed symmetries. Internal. """
1929 
1930  best_rmsd = np.inf
1931  for i, (trg_sym, mdl_sym) in enumerate(symmetries):
1932  # Prepare Entities for RMSD
1933  trg_lig_rmsd_ent = mol.CreateEntity()
1934  trg_lig_rmsd_editor = trg_lig_rmsd_ent.EditXCS()
1935  trg_lig_rmsd_chain = trg_lig_rmsd_editor.InsertChain("_")
1936  trg_lig_rmsd_res = trg_lig_rmsd_editor.AppendResidue(trg_lig_rmsd_chain, "LIG")
1937 
1938  mdl_lig_rmsd_ent = mol.CreateEntity()
1939  mdl_lig_rmsd_editor = mdl_lig_rmsd_ent.EditXCS()
1940  mdl_lig_rmsd_chain = mdl_lig_rmsd_editor.InsertChain("_")
1941  mdl_lig_rmsd_res = mdl_lig_rmsd_editor.AppendResidue(mdl_lig_rmsd_chain, "LIG")
1942 
1943  for mdl_anum, trg_anum in zip(mdl_sym, trg_sym):
1944  # Rename model atoms according to symmetry
1945  trg_atom = target_ligand.atoms[trg_anum]
1946  mdl_atom = model_ligand.atoms[mdl_anum]
1947  # Add atoms in the correct order to the RMSD entities
1948  trg_lig_rmsd_editor.InsertAtom(trg_lig_rmsd_res, trg_atom.name, trg_atom.pos)
1949  mdl_lig_rmsd_editor.InsertAtom(mdl_lig_rmsd_res, mdl_atom.name, mdl_atom.pos)
1950 
1951  trg_lig_rmsd_editor.UpdateICS()
1952  mdl_lig_rmsd_editor.UpdateICS()
1953 
1954  rmsd = mol.alg.CalculateRMSD(mdl_lig_rmsd_ent.CreateFullView(),
1955  trg_lig_rmsd_ent.CreateFullView(),
1956  transformation)
1957  if rmsd < best_rmsd:
1958  best_rmsd = rmsd
1959 
1960  return best_rmsd
1961 
1962 
1963 def _ComputeSymmetries(model_ligand, target_ligand, substructure_match=False,
1964  by_atom_index=False, return_symmetries=True,
1965  max_symmetries=1e6):
1966  """Return a list of symmetries (isomorphisms) of the model onto the target
1967  residues.
1968 
1969  :param model_ligand: The model ligand
1970  :type model_ligand: :class:`ost.mol.ResidueHandle` or
1971  :class:`ost.mol.ResidueView`
1972  :param target_ligand: The target ligand
1973  :type target_ligand: :class:`ost.mol.ResidueHandle` or
1974  :class:`ost.mol.ResidueView`
1975  :param substructure_match: Set this to True to allow partial ligands
1976  in the reference.
1977  :type substructure_match: :class:`bool`
1978  :param by_atom_index: Set this parameter to True if you need the symmetries
1979  to refer to atom index (within the residue).
1980  Otherwise, if False, the symmetries refer to atom
1981  names.
1982  :type by_atom_index: :class:`bool`
1983  :type return_symmetries: If Truthy, return the mappings, otherwise simply
1984  return True if a mapping is found (and raise if
1985  no mapping is found). This is useful to quickly
1986  find out if a mapping exist without the expensive
1987  step to find all the mappings.
1988  :type return_symmetries: :class:`bool`
1989  :param max_symmetries: If more than that many isomorphisms exist, raise
1990  a :class:`TooManySymmetriesError`. This can only be assessed by
1991  generating at least that many isomorphisms and can take some time.
1992  :type max_symmetries: :class:`int`
1993  :raises: :class:`NoSymmetryError` when no symmetry can be found;
1994  :class:`TooManySymmetriesError` when more than `max_symmetries`
1995  isomorphisms are found.
1996 
1997  """
1998 
1999  # Get the Graphs of the ligands
2000  model_graph = _ResidueToGraph(model_ligand, by_atom_index=by_atom_index)
2001  target_graph = _ResidueToGraph(target_ligand, by_atom_index=by_atom_index)
2002 
2003  if not networkx.is_connected(model_graph):
2004  raise DisconnectedGraphError("Disconnected graph for model ligand %s" % model_ligand)
2005  if not networkx.is_connected(target_graph):
2006  raise DisconnectedGraphError("Disconnected graph for target ligand %s" % target_ligand)
2007 
2008  # Note the argument order (model, target) which differs from spyrmsd.
2009  # This is because a subgraph of model is isomorphic to target - but not the opposite
2010  # as we only consider partial ligands in the reference.
2011  # Make sure to generate the symmetries correctly in the end
2012  gm = networkx.algorithms.isomorphism.GraphMatcher(
2013  model_graph, target_graph, node_match=lambda x, y:
2014  x["element"] == y["element"])
2015  if gm.is_isomorphic():
2016  if not return_symmetries:
2017  return True
2018  symmetries = []
2019  for i, isomorphism in enumerate(gm.isomorphisms_iter()):
2020  if i >= max_symmetries:
2021  raise TooManySymmetriesError(
2022  "Too many symmetries between %s and %s" % (
2023  str(model_ligand), str(target_ligand)))
2024  symmetries.append((list(isomorphism.values()), list(isomorphism.keys())))
2025  assert len(symmetries) > 0
2026  LogDebug("Found %s isomorphic mappings (symmetries)" % len(symmetries))
2027  elif gm.subgraph_is_isomorphic() and substructure_match:
2028  if not return_symmetries:
2029  return True
2030  symmetries = []
2031  for i, isomorphism in enumerate(gm.subgraph_isomorphisms_iter()):
2032  if i >= max_symmetries:
2033  raise TooManySymmetriesError(
2034  "Too many symmetries between %s and %s" % (
2035  str(model_ligand), str(target_ligand)))
2036  symmetries.append((list(isomorphism.values()), list(isomorphism.keys())))
2037  assert len(symmetries) > 0
2038  # Assert that all the atoms in the target are part of the substructure
2039  assert len(symmetries[0][0]) == len(target_ligand.atoms)
2040  LogDebug("Found %s subgraph isomorphisms (symmetries)" % len(symmetries))
2041  elif gm.subgraph_is_isomorphic():
2042  LogDebug("Found subgraph isomorphisms (symmetries), but"
2043  " ignoring because substructure_match=False")
2044  raise NoSymmetryError("No symmetry between %s and %s" % (
2045  str(model_ligand), str(target_ligand)))
2046  else:
2047  LogDebug("Found no isomorphic mappings (symmetries)")
2048  raise NoSymmetryError("No symmetry between %s and %s" % (
2049  str(model_ligand), str(target_ligand)))
2050 
2051  return symmetries
2052 
2053 class NoSymmetryError(ValueError):
2054  """Exception raised when no symmetry can be found.
2055  """
2056  pass
2057 
2058 
2059 class TooManySymmetriesError(ValueError):
2060  """Exception raised when too many symmetries are found.
2061  """
2062  pass
2063 
2064 class DisconnectedGraphError(Exception):
2065  """Exception raised when the ligand graph is disconnected.
2066  """
2067  pass
2068 
2069 
2070 __all__ = ["LigandScorer", "SCRMSD", "NoSymmetryError",
2071  "TooManySymmetriesError", "DisconnectedGraphError"]
Protein or molecule.
definition of EntityView
Definition: entity_view.hh:86
def _find_unassigned_target_ligand_reason(self, ligand, assignment="lddt_pli", check=True)
def _build_binding_site_entity(ligand, residues, extra_residues=[])
def _assign_matrices(self, mat1, mat2, data, main_key)
def _find_unassigned_model_ligand_reason(self, ligand, assignment="lddt_pli", check=True)
def _prepare_ligands(new_entity, old_entity, ligands, rename_chain)
def get_target_binding_site(self, target_ligand)
def _compute_lddtpli(self, symmetries, target_ligand, model_ligand)
def _find_ligand_assignment(mat1, mat2=None, coverage=None, coverage_delta=None)
def _assign_matrix(self, mat, data1, main_key1, data2, main_key2)
def get_repr(self, target_ligand, model_ligand)
def _compute_rmsd(self, symmetries, target_ligand, model_ligand)
def _assign_unassigned(self, assigned_trg, assigned_mdl, out_main, out_details, main_key)
def __init__(self, model, target, model_ligands=None, target_ligands=None, resnum_alignments=False, check_resnames=True, rename_ligand_chain=False, chain_mapper=None, substructure_match=False, coverage_delta=0.2, radius=4.0, lddt_pli_radius=6.0, lddt_lp_radius=10.0, model_bs_radius=20, binding_sites_topn=100000, global_chain_mapping=False, rmsd_assignment=False, n_max_naive=12, max_symmetries=1e5, custom_mapping=None, unassigned=False, full_bs_search=False)
def SCRMSD(model_ligand, target_ligand, transformation=geom.Mat4(), substructure_match=False, max_symmetries=1e6)