4 import numpy.ma
as np_ma
9 from ost
import LogError, LogWarning, LogScript, LogInfo, LogVerbose, LogDebug
14 """ Scorer class to compute various small molecule ligand (non polymer) scores.
19 - Python modules `numpy` and `networkx` must be available
20 (e.g. use ``pip install numpy networkx``)
22 At the moment, two scores are available:
24 * lDDT-PLI, that looks at the conservation of protein-ligand contacts
25 with :class:`lDDT <ost.mol.alg.lddt.lDDTScorer>`.
26 * Binding-site superposed, symmetry-corrected RMSD that assesses the
27 accuracy of the ligand pose (BiSyRMSD, hereinafter referred to as RMSD).
29 Both scores involve local chain mapping of the reference binding site
30 onto the model, symmetry-correction, and finally assignment (mapping)
31 of model and target ligands, as described in (Manuscript in preparation).
33 The binding site is defined based on a radius around the target ligand
34 in the reference structure only. It only contains protein and nucleic
35 acid chains that pass the criteria for the
36 :class:`chain mapping <ost.mol.alg.chain_mapping>`. This means ignoring
37 other ligands, waters, short polymers as well as any incorrectly connected
38 chains that may be in proximity.
40 Results are available as matrices (`(lddt_pli|rmsd)_matrix`), where every
41 target-model score is reported in a matrix; as `(lddt_pli|rmsd)` where
42 a model-target assignment has been determined (see below) and reported in
43 a dictionary; and as (`(lddt_pli|rmsd)_details`) methods, which report
44 additional details about different aspects of the scoring such as chain
47 The behavior of chain mapping and ligand assignment can be controlled
48 with the `global_chain_mapping` and `rmsd_assignment` arguments.
50 By default, chain mapping is performed locally, ie. only within the
51 binding site. As a result, different ligand scores can correspond to
52 different chain mappings. This tends to produce more favorable scores,
53 especially in large, partially regular oligomeric complexes.
54 Setting `global_chain_mapping=True` enforces a single global chain mapping,
55 as per :meth:`ost.mol.alg.chain_mapping.ChainMapper.GetMapping`.
56 Note that this global chain mapping currently ignores non polymer entities
57 such as small ligands, and may result in overly pessimistic scores.
59 By default, target-model ligand assignments are computed identically
60 for the RMSD and lDDT-PLI scores. Each model ligand is uniquely assigned
61 to a target ligand, starting from the lowest RMSD and using each target and
62 model ligand in a single assignment. If `rmsd_assignment` is set to False,
63 RMSD and lDDT-PLI are assigned separately to optimize each score, and the
64 other score is used as a tiebreaker.
66 By default, only exact matches between target and model ligands are
67 considered. This is a problem when the target only contains a subset
68 of the expected atoms (for instance if atoms are missing in an
69 experimental structure, which often happens in the PDB). With
70 `substructure_match=True`, complete model ligands can be scored against
71 partial target ligands. One problem with this approach is that it is
72 very easy to find good matches to small, irrelevant ligands like EDO, CO2
73 or GOL. To counter that, the assignment algorithm considers the coverage,
74 expressed as the fraction of atoms of the model ligand atoms covered in the
75 target. Higher coverage matches are prioritized, but a match with a better
76 score will be preferred if it falls within a window of `coverage_delta`
77 (by default 0.2) of a worse-scoring match. As a result, for instance,
78 with a delta of 0.2, a low-score match with coverage 0.96 would be
79 preferred to a high-score match with coverage 0.90.
83 The class generally assumes that the
84 :attr:`~ost.mol.ResidueHandle.is_ligand` property is properly set on all
85 the ligand atoms, and only ligand atoms. This is typically the case for
86 entities loaded from mmCIF (tested with mmCIF files from the PDB and
87 SWISS-MODEL). Legacy PDB files must contain `HET` headers (which is usually
88 the case for files downloaded from the PDB but not elsewhere).
90 The class doesn't perform any cleanup of the provided structures.
91 It is up to the caller to ensure that the data is clean and suitable for
92 scoring. :ref:`Molck <molck>` should be used with extra
93 care, as many of the options (such as `rm_non_std` or `map_nonstd_res`) can
94 cause ligands to be removed from the structure. If cleanup with Molck is
95 needed, ligands should be kept aside and passed separately. Non-ligand residues
96 should be valid compounds with atom names following the naming conventions
97 of the component dictionary. Non-standard residues are acceptable, and if
98 the model contains a standard residue at that position, only atoms with
99 matching names will be considered.
101 Unlike most of OpenStructure, this class does not assume that the ligands
102 (either in the model or the target) are part of the PDB component
103 dictionary. They may have arbitrary residue names. Residue names do not
104 have to match between the model and the target. Matching is based on
105 the calculation of isomorphisms which depend on the atom element name and
106 atom connectivity (bond order is ignored).
107 It is up to the caller to ensure that the connectivity of atoms is properly
108 set before passing any ligands to this class. Ligands with improper
109 connectivity will lead to bogus results.
111 Note, however, that atom names should be unique within a residue (ie two
112 distinct atoms cannot have the same atom name).
114 This only applies to the ligand. The rest of the model and target
115 structures (protein, nucleic acids) must still follow the usual rules and
116 contain only residues from the compound library.
118 Although it isn't a requirement, hydrogen atoms should be removed from the
119 structures. Here is an example code snippet that will perform a reasonable
120 cleanup. Keep in mind that this is most likely not going to work as
121 expected with entities loaded from PDB files, as the `is_ligand` flag is
122 probably not set properly.
124 Here is a snippet example of how to use this code::
126 from ost.mol.alg.ligand_scoring import LigandScorer
127 from ost.mol.alg import Molck, MolckSettings
130 # Structure model in PDB format, containing the receptor only
131 model = io.LoadPDB("path_to_model.pdb")
132 # Ligand model as SDF file
133 model_ligand = io.LoadEntity("path_to_ligand.sdf", format="sdf")
134 # Target loaded from mmCIF, containing the ligand
135 target = io.LoadMMCIF("path_to_target.cif")
137 # Cleanup a copy of the structures
138 cleaned_model = model.Copy()
139 cleaned_target = target.Copy()
140 molck_settings = MolckSettings(rm_unk_atoms=True,
144 rm_zero_occ_atoms=False,
146 map_nonstd_res=False,
148 Molck(cleaned_model, conop.GetDefaultLib(), molck_settings)
149 Molck(cleaned_target, conop.GetDefaultLib(), molck_settings)
151 # Setup scorer object and compute lDDT-PLI
152 model_ligands = [model_ligand.Select("ele != H")]
153 ls = LigandScorer(model=cleaned_model, target=cleaned_target, model_ligands=model_ligands)
154 print("lDDT-PLI:", ls.lddt_pli)
155 print("RMSD:", ls.rmsd)
157 :param model: Model structure - a deep copy is available as :attr:`model`.
158 No additional processing (ie. Molck), checks,
159 stereochemistry checks or sanitization is performed on the
160 input. Hydrogen atoms are kept.
161 :type model: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
162 :param target: Target structure - a deep copy is available as :attr:`target`.
163 No additional processing (ie. Molck), checks or sanitization
164 is performed on the input. Hydrogen atoms are kept.
165 :type target: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
166 :param model_ligands: Model ligands, as a list of
167 :class:`~ost.mol.ResidueHandle` belonging to the model
168 entity. Can be instantiated with either a :class:list of
169 :class:`~ost.mol.ResidueHandle`/:class:`ost.mol.ResidueView`
170 or of :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`.
171 If `None`, ligands will be extracted based on the
172 :attr:`~ost.mol.ResidueHandle.is_ligand` flag (this is
173 normally set properly in entities loaded from mmCIF).
174 :type model_ligands: :class:`list`
175 :param target_ligands: Target ligands, as a list of
176 :class:`~ost.mol.ResidueHandle` belonging to the target
177 entity. Can be instantiated either a :class:list of
178 :class:`~ost.mol.ResidueHandle`/:class:`ost.mol.ResidueView`
179 or of :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
180 containing a single residue each.
181 If `None`, ligands will be extracted based on the
182 :attr:`~ost.mol.ResidueHandle.is_ligand` flag (this is
183 normally set properly in entities loaded from mmCIF).
184 :type target_ligands: :class:`list`
185 :param resnum_alignments: Whether alignments between chemically equivalent
186 chains in *model* and *target* can be computed
187 based on residue numbers. This can be assumed in
188 benchmarking setups such as CAMEO/CASP.
189 :type resnum_alignments: :class:`bool`
190 :param check_resnames: On by default. Enforces residue name matches
191 between mapped model and target residues.
192 :type check_resnames: :class:`bool`
193 :param rename_ligand_chain: If a residue with the same chain name and
194 residue number than an explicitly passed model
195 or target ligand exits in the structure,
196 and `rename_ligand_chain` is False, a
197 RuntimeError will be raised. If
198 `rename_ligand_chain` is True, the ligand will
199 be moved to a new chain instead, and the move
200 will be logged to the console with SCRIPT
202 :type rename_ligand_chain: :class:`bool`
203 :param chain_mapper: a chain mapper initialized for the target structure.
204 If None (default), a chain mapper will be initialized
206 :type chain_mapper: :class:`ost.mol.alg.chain_mapping.ChainMapper`
207 :param substructure_match: Set this to True to allow incomplete (ie
208 partially resolved) target ligands.
209 :type substructure_match: :class:`bool`
210 :param coverage_delta: the coverage delta for partial ligand assignment.
211 :type coverage_delta: :class:`float`
212 :param radius: Inclusion radius for the binding site. Residues with
213 atoms within this distance of the ligand will be considered
214 for inclusion in the binding site.
215 :type radius: :class:`float`
216 :param lddt_pli_radius: lDDT inclusion radius for lDDT-PLI. Should be
217 at least equal to or larger than `radius`.
218 :type lddt_pli_radius: :class:`float`
219 :param lddt_lp_radius: lDDT inclusion radius for lDDT-LP.
220 :type lddt_lp_radius: :class:`float`
221 :param model_bs_radius: inclusion radius for model binding sites.
222 Only used when full_bs_search=False, otherwise the
223 radius is effectively infinite. Only chains with
224 atoms within this distance of a model ligand will
225 be considered in the chain mapping.
226 :type model_bs_radius: :class:`float`
227 :param binding_sites_topn: maximum number of target binding site
228 representations to assess, per target ligand.
229 Ignored if `global_chain_mapping` is True.
230 :type binding_sites_topn: :class:`int`
231 :param global_chain_mapping: set to True to use a global chain mapping for
232 the polymer (protein, nucleotide) chains.
233 Defaults to False, in which case only local
234 chain mappings are allowed (where different
235 ligand may be scored against different chain
237 :type global_chain_mapping: :class:`bool`
238 :param custom_mapping: Provide custom chain mapping between *model* and
239 *target* that is used as global chain mapping.
240 Dictionary with target chain names as key and model
241 chain names as value. Only has an effect if
242 *global_chain_mapping* is True.
243 :type custom_mapping: :class:`dict`
244 :param rmsd_assignment: set to False to assign lDDT-PLI and RMSD separately
245 using a combination of these two scores to
246 optimize the assignment. By default (True), only
247 RMSD is considered for the ligand assignment.
248 :type rmsd_assignment: :class:`bool`
249 :param n_max_naive: Parameter for global chain mapping. If *model* and
250 *target* have less or equal that number of chains,
252 mapping solution space is enumerated to find the
253 the optimum. A heuristic is used otherwise.
254 :type n_max_naive: :class:`int`
255 :param max_symmetries: If more than that many isomorphisms exist for
256 a target-ligand pair, it will be ignored and reported
258 :type max_symmetries: :class:`int`
259 :param unassigned: If True, unassigned model ligands are reported in
260 the output together with assigned ligands, with a score
261 of None, and reason for not being assigned in the
262 \\*_details matrix. Defaults to False.
263 :type unassigned: :class:`bool`
264 :param full_bs_search: If True, all potential binding sites in the model
265 are searched for each target binding site. If False,
266 the search space in the model is reduced to chains
267 around (`model_bs_radius` Å) model ligands.
268 This speeds up computations, but may result in
269 ligands not being scored if the predicted ligand
270 pose is too far from the actual binding site.
271 When that's the case, the value in the
272 `unassigned_*_ligands` property will be
273 `model_representation` and is indistinguishable from
274 cases where the binding site was not modeled at all.
275 Ignored when `global_chain_mapping` is True.
276 :type full_bs_search: :class:`bool`
278 def __init__(self, model, target, model_ligands=None, target_ligands=None,
279 resnum_alignments=False, check_resnames=True,
280 rename_ligand_chain=False, chain_mapper=None,
281 substructure_match=False, coverage_delta=0.2, radius=4.0,
282 lddt_pli_radius=6.0, lddt_lp_radius=10.0, model_bs_radius=20,
283 binding_sites_topn=100000, global_chain_mapping=False,
284 rmsd_assignment=False, n_max_naive=12, max_symmetries=1e5,
285 custom_mapping=None, unassigned=False, full_bs_search=False):
288 self.
modelmodel = mol.CreateEntityFromView(model,
False)
290 self.
modelmodel = model.Copy()
292 raise RuntimeError(
"model must be of type EntityView/EntityHandle")
295 self.
targettarget = mol.CreateEntityFromView(target,
False)
297 self.
targettarget = target.Copy()
299 raise RuntimeError(
"target must be of type EntityView/EntityHandle")
302 if target_ligands
is None:
309 LogWarning(
"No ligands in the target")
312 if model_ligands
is None:
319 LogWarning(
"No ligands in the model")
321 raise ValueError(
"No ligand in the model and in the target")
348 self.
_rmsd_rmsd =
None
372 self.
_repr_repr = dict()
398 if custom_mapping
is not None:
403 """ Chain mapper object for the given :attr:`target`.
405 :type: :class:`ost.mol.alg.chain_mapping.ChainMapper`
415 if target_ligand.handle.hash_code
not in self.
_binding_sites_binding_sites:
418 ref_residues_hashes = set()
419 ignored_residue_hashes = {target_ligand.hash_code}
420 for ligand_at
in target_ligand.atoms:
421 close_atoms = self.
targettarget.FindWithin(ligand_at.GetPos(), self.
radiusradius)
422 for close_at
in close_atoms:
424 ref_res = close_at.GetResidue()
425 h = ref_res.handle.GetHashCode()
426 if h
not in ref_residues_hashes
and \
427 h
not in ignored_residue_hashes:
428 if self.
chain_mapperchain_mapper.target.ViewForHandle(ref_res).IsValid():
429 h = ref_res.handle.GetHashCode()
430 ref_residues_hashes.add(h)
431 elif ref_res.is_ligand:
432 LogWarning(
"Ignoring ligand %s in binding site of %s" % (
433 ref_res.qualified_name, target_ligand.qualified_name))
434 ignored_residue_hashes.add(h)
435 elif ref_res.chem_type == mol.ChemType.WATERS:
438 LogWarning(
"Ignoring residue %s in binding site of %s" % (
439 ref_res.qualified_name, target_ligand.qualified_name))
440 ignored_residue_hashes.add(h)
442 ref_bs = self.
targettarget.CreateEmptyView()
443 if ref_residues_hashes:
447 for ch
in self.
targettarget.chains:
448 for r
in ch.residues:
449 if r.handle.GetHashCode()
in ref_residues_hashes:
450 ref_bs.AddResidue(r, mol.ViewAddFlag.INCLUDE_ALL)
451 if len(ref_bs.residues) == 0:
452 raise RuntimeError(
"Failed to add proximity residues to "
453 "the reference binding site entity")
457 "No residue in proximity of the target ligand")
459 self.
_binding_sites_binding_sites[target_ligand.handle.hash_code] = ref_bs
461 return self.
_binding_sites_binding_sites[target_ligand.handle.hash_code]
464 def _model_mapping(self):
465 """Get the global chain mapping for the model."""
472 def _extract_ligands(entity):
473 """Extract ligands from entity. Return a list of residues.
475 Assumes that ligands have the :attr:`~ost.mol.ResidueHandle.is_ligand`
476 flag set. This is typically the case for entities loaded from mmCIF
477 (tested with mmCIF files from the PDB and SWISS-MODEL).
478 Legacy PDB files must contain `HET` headers (which is usually the
479 case for files downloaded from the PDB but not elsewhere).
481 This function performs basic checks to ensure that the residues in this
482 chain are not forming polymer bonds (ie peptide/nucleotide ligands) and
483 will raise a RuntimeError if this assumption is broken.
485 :param entity: the entity to extract ligands from
486 :type entity: :class:`~ost.mol.EntityHandle`
487 :rtype: :class:`list` of :class:`~ost.mol.ResidueHandle`
490 extracted_ligands = []
491 for residue
in entity.residues:
492 if residue.is_ligand:
493 if mol.InSequence(residue, residue.next):
494 raise RuntimeError(
"Residue %s connected in polymer sequen"
495 "ce %s" % (residue.qualified_name))
496 extracted_ligands.append(residue)
497 LogVerbose(
"Detected residue %s as ligand" % residue)
498 return extracted_ligands
501 def _prepare_ligands(new_entity, old_entity, ligands, rename_chain):
502 """Prepare the ligands given into a list of ResidueHandles which are
503 part of the copied entity, suitable for the model_ligands and
504 target_ligands properties.
506 This function takes a list of ligands as (Entity|Residue)(Handle|View).
507 Entities can contain multiple ligands, which will be considered as
510 Ligands which are part of the entity are simply fetched in the new
511 copied entity. Otherwise, they are copied over to the copied entity.
513 extracted_ligands = []
518 def _copy_residue(residue, rename_chain):
519 """ Copy the residue into the new chain.
520 Return the new residue handle."""
521 nonlocal next_chain_num, new_editor
524 if new_editor
is None:
525 new_editor = new_entity.EditXCS()
527 new_chain = new_entity.FindChain(residue.chain.name)
528 if not new_chain.IsValid():
529 new_chain = new_editor.InsertChain(residue.chain.name)
532 already_exists = new_chain.FindResidue(residue.number).IsValid()
537 new_chain_name = residue.chain.name +
"_" + str(chain_ext)
538 new_chain = new_entity.FindChain(new_chain_name)
539 if new_chain.IsValid():
543 new_chain = new_editor.InsertChain(new_chain_name)
545 LogScript(
"Moved ligand residue %s to new chain %s" % (
546 residue.qualified_name, new_chain.name))
548 msg =
"A residue number %s already exists in chain %s" % (
549 residue.number, residue.chain.name)
550 raise RuntimeError(msg)
553 new_res = new_editor.AppendResidue(new_chain, residue.name, residue.number)
555 for old_atom
in residue.atoms:
556 new_editor.InsertAtom(new_res, old_atom.name, old_atom.pos,
557 element=old_atom.element, occupancy=old_atom.occupancy,
558 b_factor=old_atom.b_factor, is_hetatm=old_atom.is_hetatom)
560 for old_atom
in residue.atoms:
561 for old_bond
in old_atom.bonds:
562 new_first = new_res.FindAtom(old_bond.first.name)
563 new_second = new_res.FindAtom(old_bond.second.name)
564 new_editor.Connect(new_first, new_second)
567 def _process_ligand_residue(res, rename_chain):
568 """Copy or fetch the residue. Return the residue handle."""
570 if res.entity.handle == old_entity.handle:
574 new_res = new_entity.FindResidue(res.chain.name, res.number)
575 if new_res
and new_res.valid:
576 LogVerbose(
"Ligand residue %s already in entity" % res.handle.qualified_name)
579 new_res = _copy_residue(res, rename_chain)
580 LogVerbose(
"Copied ligand residue %s" % res.handle.qualified_name)
581 new_res.SetIsLigand(
True)
584 for ligand
in ligands:
586 for residue
in ligand.residues:
587 new_residue = _process_ligand_residue(residue, rename_chain)
588 extracted_ligands.append(new_residue)
590 new_residue = _process_ligand_residue(ligand, rename_chain)
591 extracted_ligands.append(new_residue)
593 raise RuntimeError(
"Ligands should be given as Entity or Residue")
595 if new_editor
is not None:
596 new_editor.UpdateICS()
597 return extracted_ligands
600 def _build_binding_site_entity(ligand, residues, extra_residues=[]):
601 """ Build an entity with all the binding site residues in chain A
602 and the ligand in chain _. Residues are renumbered consecutively from
603 1. The ligand is assigned residue number 1 and residue name LIG.
604 Residues in extra_residues not in `residues` in the model are added
605 at the end of chain A.
607 :param ligand: the Residue Handle of the ligand
608 :type ligand: :class:`~ost.mol.ResidueHandle`
609 :param residues: a list of binding site residues
610 :type residues: :class:`list` of :class:`~ost.mol.ResidueHandle`
611 :param extra_residues: an optional list with addition binding site
612 residues. Residues in this list which are not
613 in `residues` will be added at the end of chain
614 A. This allows for instance adding unmapped
615 residues missing from the model into the
616 reference binding site.
617 :type extra_residues: :class:`list` of :class:`~ost.mol.ResidueHandle`
618 :rtype: :class:`~ost.mol.EntityHandle`
620 bs_ent = mol.CreateEntity()
621 ed = bs_ent.EditXCS()
622 bs_chain = ed.InsertChain(
"A")
624 for resnum, old_res
in enumerate(residues, 1):
625 seen_res_qn.append(old_res.qualified_name)
626 new_res = ed.AppendResidue(bs_chain, old_res.handle,
628 ed.SetResidueNumber(new_res,
mol.ResNum(resnum))
631 for extra_res
in extra_residues:
632 if extra_res.qualified_name
not in seen_res_qn:
634 seen_res_qn.append(extra_res.qualified_name)
635 new_res = ed.AppendResidue(bs_chain,
638 ed.SetResidueNumber(new_res,
mol.ResNum(resnum))
640 ligand_chain = ed.InsertChain(
"_")
641 ligand_res = ed.AppendResidue(ligand_chain, ligand.handle,
643 ed.RenameResidue(ligand_res,
"LIG")
644 ed.SetResidueNumber(ligand_res,
mol.ResNum(1))
649 def _compute_scores(self):
651 Compute the RMSD and lDDT-PLI scores for every possible target-model
652 ligand pair and store the result in internal matrices.
666 for target_id, target_ligand
in enumerate(self.
target_ligandstarget_ligands):
667 LogVerbose(
"Analyzing target ligand %s" % target_ligand)
668 for model_id, model_ligand
in enumerate(self.
model_ligandsmodel_ligands):
669 LogVerbose(
"Compare to model ligand %s" % model_ligand)
675 symmetries = _ComputeSymmetries(
676 model_ligand, target_ligand,
680 LogVerbose(
"Ligands %s and %s symmetry match" % (
681 str(model_ligand), str(target_ligand)))
682 except NoSymmetryError:
684 LogVerbose(
"No symmetry between %s and %s" % (
685 str(model_ligand), str(target_ligand)))
688 except TooManySymmetriesError:
690 LogVerbose(
"Too many symmetries between %s and %s" % (
691 str(model_ligand), str(target_ligand)))
694 except DisconnectedGraphError:
703 rmsd_result = self.
_compute_rmsd_compute_rmsd(symmetries, target_ligand,
705 lddt_pli_result = self.
_compute_lddtpli_compute_lddtpli(symmetries, target_ligand,
711 if (rmsd_result
is None) != (lddt_pli_result
is None):
715 raise Exception(
"Ligand scoring bug: discrepency between "
716 "RMSD and lDDT-PLI definition.")
717 if rmsd_result
is not None:
720 substructure_match = len(symmetries[0][0]) != len(model_ligand.atoms)
721 coverage = len(symmetries[0][0]) / len(model_ligand.atoms)
725 rmsd_result[
"substructure_match"] = substructure_match
726 rmsd_result[
"coverage"] = coverage
728 rmsd_result[
"unassigned"] =
False
730 lddt_pli_result[
"substructure_match"] = substructure_match
731 lddt_pli_result[
"coverage"] = coverage
733 lddt_pli_result[
"unassigned"] =
False
742 def _compute_rmsd(self, symmetries, target_ligand, model_ligand):
743 best_rmsd_result =
None
744 for r_i, r
in enumerate(self.
get_reprget_repr(target_ligand, model_ligand)):
745 rmsd = _SCRMSD_symmetries(symmetries, model_ligand,
746 target_ligand, transformation=r.transform)
748 cache_key = (target_ligand.handle.hash_code,
749 model_ligand.handle.hash_code, r_i)
752 if best_rmsd_result
is None or rmsd < best_rmsd_result[
"rmsd"]:
753 best_rmsd_result = {
"rmsd": rmsd,
755 "bs_ref_res": r.substructure.residues,
756 "bs_ref_res_mapped": r.ref_residues,
757 "bs_mdl_res_mapped": r.mdl_residues,
758 "bb_rmsd": r.bb_rmsd,
759 "target_ligand": target_ligand,
760 "model_ligand": model_ligand,
761 "chain_mapping": r.GetFlatChainMapping(),
762 "transform": r.transform,
763 "inconsistent_residues": r.inconsistent_residues}
765 return best_rmsd_result
768 def _compute_lddtpli(self, symmetries, target_ligand, model_ligand):
770 best_lddt_result =
None
771 for r_i, r
in enumerate(self.
get_reprget_repr(target_ligand, model_ligand)):
773 target_ligand, r.ref_residues,
774 r.substructure.residues)
775 ref_bs_ent_ligand = ref_bs_ent.FindResidue(
"_", 1)
778 ref_bs_ent_ligand.name:
779 mol.alg.lddt.CustomCompound.FromResidue(
783 custom_compounds=custom_compounds,
786 lddt_tot = 4 * sum([len(x)
for x
in lddt_scorer.ref_indices_ic])
790 target_ligand] = (
"no_contact",
791 "No lDDT-PLI contacts in the"
792 " reference structure")
795 model_ligand, r.mdl_residues, [])
796 mdl_bs_ent_ligand = mdl_bs_ent.FindResidue(
"_", 1)
799 mdl_editor = mdl_bs_ent.EditXCS()
800 for i, (trg_sym, mdl_sym)
in enumerate(symmetries):
801 for mdl_anum, trg_anum
in zip(mdl_sym, trg_sym):
803 trg_atom = ref_bs_ent_ligand.atoms[trg_anum]
804 mdl_atom = mdl_bs_ent_ligand.atoms[mdl_anum]
805 mdl_editor.RenameAtom(mdl_atom, trg_atom.name)
806 mdl_editor.UpdateICS()
808 global_lddt, local_lddt = lddt_scorer.lDDT(
809 mdl_bs_ent, chain_mapping={
"A":
"A",
"_":
"_"},
813 its_awesome = (best_lddt_result
is None)
or \
814 (global_lddt > best_lddt_result[
"lddt_pli"])
817 if (
not its_awesome)
and (global_lddt == best_lddt_result[
"lddt_pli"]):
818 rmsd_cache_key = (target_ligand.handle.hash_code,
819 model_ligand.handle.hash_code, r_i)
821 if rmsd < best_lddt_result[
"rmsd"]:
825 rmsd_cache_key = (target_ligand.handle.hash_code,
826 model_ligand.handle.hash_code, r_i)
827 best_lddt_result = {
"lddt_pli": global_lddt,
829 "lddt_pli_n_contacts": lddt_tot,
830 "rmsd": self.
_rmsd_cache_rmsd_cache[rmsd_cache_key],
831 "bs_ref_res": r.substructure.residues,
832 "bs_ref_res_mapped": r.ref_residues,
833 "bs_mdl_res_mapped": r.mdl_residues,
834 "bb_rmsd": r.bb_rmsd,
835 "target_ligand": target_ligand,
836 "model_ligand": model_ligand,
837 "chain_mapping": r.GetFlatChainMapping(),
838 "transform": r.transform,
839 "inconsistent_residues": r.inconsistent_residues}
841 return best_lddt_result
845 def _find_ligand_assignment(mat1, mat2=None, coverage=None, coverage_delta=None):
846 """ Find the ligand assignment based on mat1. If mat2 is provided, it
847 will be used to break ties in mat1. If mat2 is not provided, ties will
848 be resolved by taking the first match arbitrarily.
850 Both mat1 and mat2 should "look" like RMSD - ie be between inf (bad)
857 mat2[~np.isnan(mat2)] = np.inf
861 coverage = np.copy(mat1)
864 coverage = np.copy(coverage)
869 LogDebug(
"No model or target ligand, returning no assignment.")
872 def _get_best_match(mat1_val, coverage_val):
873 """ Extract the row/column indices of the prediction matching the
875 mat1_match_idx = np.argwhere((mat1 == mat1_val) & (coverage >= coverage_val))
877 if len(mat1_match_idx) > 1:
879 best_mat2_match = [mat2[tuple(x)]
for x
in mat1_match_idx]
882 best_mat2_idx = np.array(best_mat2_match).argmin()
884 return mat1_match_idx[best_mat2_idx]
886 return mat1_match_idx[0]
889 min_coverage = np.max(coverage)
891 while min_coverage > 0
and not np.all(np.isnan(mat1)):
892 LogVerbose(
"Looking for matches with coverage >= %s" % min_coverage)
893 min_mat1 = LigandScorer._nanmin_nowarn(mat1, coverage < min_coverage)
894 while not np.isnan(min_mat1):
895 max_i_trg, max_i_mdl = _get_best_match(min_mat1, min_coverage)
899 alternative_matches = (mat1[:, max_i_mdl] < min_mat1) & (
900 coverage[:, max_i_mdl] > (min_coverage - coverage_delta))
901 if np.any(alternative_matches):
903 LogVerbose(
"Found match with lower coverage but better score")
904 min_mat1 = np.nanmin(mat1[alternative_matches])
905 max_i_trg, max_i_mdl = _get_best_match(min_mat1, min_coverage - coverage_delta)
908 mat1[max_i_trg, :] = np.nan
909 mat1[:, max_i_mdl] = np.nan
910 mat2[max_i_trg, :] = np.nan
911 mat2[:, max_i_mdl] = np.nan
912 coverage[max_i_trg, :] = -np.inf
913 coverage[:, max_i_mdl] = -np.inf
916 assignments.append((max_i_trg, max_i_mdl))
919 min_mat1 = LigandScorer._nanmin_nowarn(mat1, coverage < min_coverage)
921 raise Exception(
"Ligand scoring bug: hit appatent infinite loop!")
924 min_coverage = np.max(coverage)
926 raise Exception(
"Ligand scoring bug: hit appatent infinite loop!")
931 def _nanmin_nowarn(array, mask):
932 """Compute np.nanmin but ignore the RuntimeWarning."""
933 masked_array = np_ma.masked_array(array, mask=mask)
934 with warnings.catch_warnings():
935 warnings.simplefilter(
"ignore")
936 min = np.nanmin(masked_array, )
937 if np_ma.is_masked(min):
943 def _reverse_lddt(lddt):
944 """Reverse lDDT means turning it from a number between 0 and 1 to a
945 number between infinity and 0 (0 being better).
947 In practice, this is 1/lDDT. If lDDT is 0, the result is infinity.
949 with warnings.catch_warnings():
950 warnings.simplefilter(
"ignore")
951 return np.float64(1) / lddt
953 def _assign_ligands_rmsd(self):
954 """Assign (map) ligands between model and target.
956 Sets self._rmsd and self._rmsd_details.
964 self.
_rmsd_rmsd = mat_tuple[0]
971 def _assign_matrices(self, mat1, mat2, data, main_key):
973 Perform the ligand assignment, ie find the mapping between model and
976 The algorithm starts by assigning the "best" mapping, and then discards
977 the target and model ligands (row, column) so that every model ligand
978 can be assigned to a single target ligand, and every target ligand
979 is only assigned to a single model ligand. Repeat until there is
980 nothing left to assign.
982 In case of a tie in values in `mat1`, it uses `mat2` to break the tie.
984 This algorithm doesn't guarantee a globally optimal assignment.
986 Both `mat1` and `mat2` should contain values between 0 and infinity,
987 with lower values representing better scores. Use the
988 :meth:`_reverse_lddt` method to convert lDDT values to such a score.
990 :param mat1: the main ligand assignment criteria (RMSD or lDDT-PLI)
991 :param mat2: the secondary ligand assignment criteria (lDDT-PLI or RMSD)
992 :param data: the data (either self._rmsd_full_matrix or self._lddt_pli_matrix)
993 :param main_key: the key of data (dictionnaries within `data`) to
994 assign into out_main.
995 :return: a tuple with 2 dictionaries of matrices containing the main
996 data, and details, respectively.
1004 assigned_mdl = [
False] * len(self.
model_ligandsmodel_ligands)
1005 for assignment
in assignments:
1006 trg_idx, mdl_idx = assignment
1007 assigned_mdl[mdl_idx] =
True
1008 assigned_trg[trg_idx] =
True
1010 mdl_cname = mdl_lig.chain.name
1011 mdl_resnum = mdl_lig.number
1012 if mdl_cname
not in out_main:
1013 out_main[mdl_cname] = {}
1014 out_details[mdl_cname] = {}
1015 out_main[mdl_cname][mdl_resnum] = data[
1016 trg_idx, mdl_idx][main_key]
1017 out_details[mdl_cname][mdl_resnum] = data[
1021 assigned_trg, assigned_mdl, [out_main], [out_details], [main_key])
1022 return out_main, out_details, unassigned_trg, unassigned_mdl
1024 def _assign_unassigned(self, assigned_trg, assigned_mdl,
1025 out_main, out_details, main_key):
1029 unassigned_trg_idx = [i
for i, x
in enumerate(assigned_trg)
if not x]
1030 unassigned_mdl_idx = [i
for i, x
in enumerate(assigned_mdl)
if not x]
1032 for mdl_idx
in unassigned_mdl_idx:
1035 mdl_cname = mdl_lig.chain.name
1036 mdl_resnum = mdl_lig.number
1037 if mdl_cname
not in unassigned_mdl:
1038 unassigned_mdl[mdl_cname] = {}
1039 unassigned_mdl[mdl_cname][mdl_resnum] = reason
1041 for i, _
in enumerate(out_main):
1042 if mdl_cname
not in out_main[i]:
1043 out_main[i][mdl_cname] = {}
1044 out_details[i][mdl_cname] = {}
1045 out_main[i][mdl_cname][mdl_resnum] =
None
1046 out_details[i][mdl_cname][mdl_resnum] = {
1048 "reason_short": reason[0],
1049 "reason_long": reason[1],
1052 LogInfo(
"Model ligand %s is unassigned: %s" % (
1053 mdl_lig.qualified_name, reason[1]))
1055 for trg_idx
in unassigned_trg_idx:
1058 trg_cname = trg_lig.chain.name
1059 trg_resnum = trg_lig.number
1060 if trg_cname
not in unassigned_trg:
1061 unassigned_trg[trg_cname] = {}
1062 unassigned_trg[trg_cname][trg_resnum] = reason
1063 LogInfo(
"Target ligand %s is unassigned: %s" % (
1064 trg_lig.qualified_name, reason[1]))
1066 return unassigned_trg, unassigned_mdl
1069 def _assign_matrix(self, mat, data1, main_key1, data2, main_key2):
1071 Perform the ligand assignment, ie find the mapping between model and
1072 target ligands, based on a single matrix
1074 The algorithm starts by assigning the "best" mapping, and then discards
1075 the target and model ligands (row, column) so that every model ligand
1076 can be assigned to a single target ligand, and every target ligand
1077 is only assigned to a single model ligand. Repeat until there is
1078 nothing left to assign.
1080 This algorithm doesn't guarantee a globally optimal assignment.
1082 `mat` should contain values between 0 and infinity,
1083 with lower values representing better scores. Use the
1084 :meth:`_reverse_lddt` method to convert lDDT values to such a score.
1086 :param mat: the ligand assignment criteria (RMSD or lDDT-PLI)
1087 :param data1: the first data (either self._rmsd_full_matrix or self._lddt_pli_matrix)
1088 :param main_key1: the first key of data (dictionnaries within `data`) to
1089 assign into out_main.
1090 :param data2: the second data (either self._rmsd_full_matrix or self._lddt_pli_matrix)
1091 :param main_key2: the second key of data (dictionnaries within `data`) to
1092 assign into out_main.
1093 :return: a tuple with 4 dictionaries of matrices containing the main
1094 data1, details1, main data2 and details2, respectively.
1104 assigned_mdl = [
False] * len(self.
model_ligandsmodel_ligands)
1105 for assignment
in assignments:
1106 trg_idx, mdl_idx = assignment
1107 assigned_mdl[mdl_idx] =
True
1108 assigned_trg[trg_idx] =
True
1110 mdl_cname = mdl_lig.chain.name
1111 mdl_resnum = mdl_lig.number
1113 if mdl_cname
not in out_main1:
1114 out_main1[mdl_cname] = {}
1115 out_details1[mdl_cname] = {}
1116 out_main1[mdl_cname][mdl_resnum] = data1[
1117 trg_idx, mdl_idx][main_key1]
1118 out_details1[mdl_cname][mdl_resnum] = data1[
1121 if mdl_cname
not in out_main2:
1122 out_main2[mdl_cname] = {}
1123 out_details2[mdl_cname] = {}
1124 out_main2[mdl_cname][mdl_resnum] = data2[
1125 trg_idx, mdl_idx][main_key2]
1126 out_details2[mdl_cname][mdl_resnum] = data2[
1130 assigned_trg, assigned_mdl,
1131 [out_main1, out_main2], [out_details1, out_details2],
1132 [main_key1, main_key2])
1134 return out_main1, out_details1, out_main2, out_details2, \
1135 unassigned_trg, unassigned_mdl
1137 def _assign_ligands_lddt_pli(self):
1138 """ Assign ligands based on lDDT-PLI.
1140 Sets self._lddt_pli and self._lddt_pli_details.
1153 def _assign_ligands_rmsd_only(self):
1154 """Assign (map) ligands between model and target based on RMSD only.
1156 Sets self._rmsd, self._rmsd_details, self._lddt_pli and
1157 self._lddt_pli_details.
1164 self.
_rmsd_rmsd = mat_tuple[0]
1173 """ Get the matrix of RMSD values.
1175 Target ligands are in rows, model ligands in columns.
1177 NaN values indicate that no RMSD could be computed (i.e. different
1180 :rtype: :class:`~numpy.ndarray`
1188 for i, j
in np.ndindex(shape):
1196 """ Get the matrix of lDDT-PLI values.
1198 Target ligands are in rows, model ligands in columns.
1200 NaN values indicate that no lDDT-PLI could be computed (i.e. different
1203 :rtype: :class:`~numpy.ndarray`
1211 for i, j
in np.ndindex(shape):
1219 """ Get the matrix of model ligand atom coverage in the target.
1221 Target ligands are in rows, model ligands in columns.
1223 A value of 0 indicates that there was no isomorphism between the model
1224 and target ligands. If `substructure_match=False`, only full match
1225 isomorphisms are considered, and therefore only values of 1.0 and 0.0
1228 :rtype: :class:`~numpy.ndarray`
1236 """Get a dictionary of RMSD score values, keyed by model ligand
1237 (chain name, :class:`~ost.mol.ResNum`).
1239 If the scoring object was instantiated with `unassigned=True`, some
1240 scores may be `None`.
1242 :rtype: :class:`dict`
1244 if self.
_rmsd_rmsd
is None:
1249 return self.
_rmsd_rmsd
1253 """Get a dictionary of RMSD score details (dictionaries), keyed by
1254 model ligand (chain name, :class:`~ost.mol.ResNum`).
1256 The value is a dictionary. For ligands that were assigned (mapped) to
1257 the target, the dictionary contain the following information:
1259 * `rmsd`: the RMSD score value.
1260 * `lddt_lp`: the lDDT score of the ligand pocket (lDDT-LP).
1261 * `bs_ref_res`: a list of residues (:class:`~ost.mol.ResidueHandle`)
1262 that define the binding site in the reference.
1263 * `bs_ref_res_mapped`: a list of residues
1264 (:class:`~ost.mol.ResidueHandle`) in the reference binding site
1265 that could be mapped to the model.
1266 * `bs_mdl_res_mapped`: a list of residues
1267 (:class:`~ost.mol.ResidueHandle`) in the model that were mapped to
1268 the reference binding site. The residues are in the same order as
1269 `bs_ref_res_mapped`.
1270 * `bb_rmsd`: the RMSD of the binding site backbone after superposition
1271 * `target_ligand`: residue handle of the target ligand.
1272 * `model_ligand`: residue handle of the model ligand.
1273 * `chain_mapping`: local chain mapping as a dictionary, with target
1274 chain name as key and model chain name as value.
1275 * `transform`: transformation to superpose the model onto the target.
1276 * `substructure_match`: whether the score is the result of a partial
1277 (substructure) match. A value of `True` indicates that the target
1278 ligand covers only part of the model, while `False` indicates a
1280 * `coverage`: the fraction of model atoms covered by the assigned
1281 target ligand, in the interval (0, 1]. If `substructure_match`
1282 is `False`, this will always be 1.
1283 * `inconsistent_residues`: a list of tuples of mapped residues views
1284 (:class:`~ost.mol.ResidueView`) with residue names that differ
1285 between the reference and the model, respectively.
1286 The list is empty if all residue names match, which is guaranteed
1287 if `check_resnames=True`.
1288 Note: more binding site mappings may be explored during scoring,
1289 but only inconsistencies in the selected mapping are reported.
1290 * `unassigned`: only if the scorer was instantiated with
1291 `unassigned=True`: `False`
1293 If the scoring object was instantiated with `unassigned=True`, in
1294 addition the unassigned ligands will be reported with a score of `None`
1295 and the following information:
1297 * `unassigned`: `True`,
1298 * `reason_short`: a short token of the reason, see
1299 :attr:`unassigned_model_ligands` for details and meaning.
1300 * `reason_long`: a human-readable text of the reason, see
1301 :attr:`unassigned_model_ligands` for details and meaning.
1304 :rtype: :class:`dict`
1315 """Get a dictionary of lDDT-PLI score values, keyed by model ligand
1316 (chain name, :class:`~ost.mol.ResNum`).
1318 If the scoring object was instantiated with `unassigned=True`, some
1319 scores may be `None`.
1321 :rtype: :class:`dict`
1332 """Get a dictionary of lDDT-PLI score details (dictionaries), keyed by
1333 model ligand (chain name, :class:`~ost.mol.ResNum`).
1335 Each sub-dictionary contains the following information:
1337 * `lddt_pli`: the lDDT-PLI score value.
1338 * `rmsd`: the RMSD score value corresponding to the lDDT-PLI
1339 chain mapping and assignment. This may differ from the RMSD-based
1340 assignment. Note that a different isomorphism than `lddt_pli` may
1342 * `lddt_lp`: the lDDT score of the ligand pocket (lDDT-LP).
1343 * `lddt_pli_n_contacts`: number of total contacts used in lDDT-PLI,
1344 summed over all thresholds. Can be divided by 8 to obtain the number
1346 * `bs_ref_res`: a list of residues (:class:`~ost.mol.ResidueHandle`)
1347 that define the binding site in the reference.
1348 * `bs_ref_res_mapped`: a list of residues
1349 (:class:`~ost.mol.ResidueHandle`) in the reference binding site
1350 that could be mapped to the model.
1351 * `bs_mdl_res_mapped`: a list of residues
1352 (:class:`~ost.mol.ResidueHandle`) in the model that were mapped to
1353 the reference binding site. The residues are in the same order as
1354 `bs_ref_res_mapped`.
1355 * `bb_rmsd`: the RMSD of the binding site backbone after superposition.
1356 Note: not used for lDDT-PLI computation.
1357 * `target_ligand`: residue handle of the target ligand.
1358 * `model_ligand`: residue handle of the model ligand.
1359 * `chain_mapping`: local chain mapping as a dictionary, with target
1360 chain name as key and model chain name as value.
1361 * `transform`: transformation to superpose the model onto the target
1363 * `substructure_match`: whether the score is the result of a partial
1364 (substructure) match. A value of `True` indicates that the target
1365 ligand covers only part of the model, while `False` indicates a
1367 * `inconsistent_residues`: a list of tuples of mapped residues views
1368 (:class:`~ost.mol.ResidueView`) with residue names that differ
1369 between the reference and the model, respectively.
1370 The list is empty if all residue names match, which is guaranteed
1371 if `check_resnames=True`.
1372 Note: more binding site mappings may be explored during scoring,
1373 but only inconsistencies in the selected mapping are reported.
1374 * `unassigned`: only if the scorer was instantiated with
1375 `unassigned=True`: `False`
1377 If the scoring object was instantiated with `unassigned=True`, in
1378 addition the unmapped ligands will be reported with a score of `None`
1379 and the following information:
1381 * `unassigned`: `True`,
1382 * `reason_short`: a short token of the reason, see
1383 :attr:`unassigned_model_ligands` for details and meaning.
1384 * `reason_long`: a human-readable text of the reason, see
1385 :attr:`unassigned_model_ligands` for details and meaning.
1386 * `lddt_pli`: `None`
1388 :rtype: :class:`dict`
1399 """Get a dictionary of target ligands not assigned to any model ligand,
1400 keyed by target ligand (chain name, :class:`~ost.mol.ResNum`).
1402 The assignment for the lDDT-PLI score is used (and is controlled
1403 by the `rmsd_assignment` argument).
1405 Each item contains a string from a controlled dictionary
1406 about the reason for the absence of assignment.
1407 A human-readable description can be obtained from the
1408 :attr:`unassigned_target_ligand_descriptions` property.
1410 Currently, the following reasons are reported:
1412 * `no_ligand`: there was no ligand in the model.
1413 * `disconnected`: the ligand graph was disconnected.
1414 * `binding_site`: no residues were in proximity of the ligand.
1415 * `model_representation`: no representation of the reference binding
1416 site was found in the model. (I.e. the binding site was not modeled,
1417 or the model ligand was positioned too far in combination with
1418 `full_bs_search=False`)
1419 * `identity`: the ligand was not found in the model (by graph
1420 isomorphism). Check your ligand connectivity, and enable the
1421 `substructure_match` option if the target ligand is incomplete.
1422 * `stoichiometry`: there was a possible assignment in the model, but
1423 the model ligand was already assigned to a different target ligand.
1424 This indicates different stoichiometries.
1425 * `symmetries`: too many symmetries were found (by graph isomorphisms).
1426 Increase `max_symmetries`.
1427 * `no_contact`: there were no lDDT contacts between the binding site
1428 and the ligand, and lDDT-PLI is undefined. Increase the value of
1429 `lddt_pli_radius` to at least the value of the binding site `radius`.
1431 Some of these reasons can be overlapping, but a single reason will be
1434 :rtype: :class:`dict`
1445 for resnum, val
in res.items():
1452 """Get a human-readable description of why target ligands were
1453 unassigned, as a dictionary keyed by the controlled dictionary
1454 from :attr:`unassigned_target_ligands`.
1462 """Get a dictionary of model ligands not assigned to any target ligand,
1463 keyed by model ligand (chain name, :class:`~ost.mol.ResNum`).
1465 The assignment for the lDDT-PLI score is used (and is controlled
1466 by the `rmsd_assignment` argument).
1468 Each item contains a string from a controlled dictionary
1469 about the reason for the absence of assignment.
1470 A human-readable description can be obtained from the
1471 :attr:`unassigned_model_ligand_descriptions` property.
1472 Currently, the following reasons are reported:
1474 * `no_ligand`: there was no ligand in the target.
1475 * `disconnected`: the ligand graph is disconnected.
1476 * `binding_site`: a potential assignment was found in the target, but
1477 there were no polymer residues in proximity of the ligand in the
1479 * `model_representation`: a potential assignment was found in the target,
1480 but no representation of the binding site was found in the model.
1481 (I.e. the binding site was not modeled, or the model ligand was
1482 positioned too far in combination with `full_bs_search=False`)
1483 * `identity`: the ligand was not found in the target (by graph
1484 isomorphism). Check your ligand connectivity, and enable the
1485 `substructure_match` option if the target ligand is incomplete.
1486 * `stoichiometry`: there was a possible assignment in the target, but
1487 the model target was already assigned to a different model ligand.
1488 This indicates different stoichiometries.
1489 * `symmetries`: too many symmetries were found (by graph isomorphisms).
1490 Increase `max_symmetries`.
1491 * `no_contact`: there were no lDDT contacts between the binding site
1492 and the ligand in the target structure, and lDDT-PLI is undefined.
1493 Increase the value of `lddt_pli_radius` to at least the value of the
1494 binding site `radius`.
1496 Some of these reasons can be overlapping, but a single reason will be
1499 :rtype: :class:`dict`
1510 for resnum, val
in res.items():
1517 """Get a human-readable description of why model ligands were
1518 unassigned, as a dictionary keyed by the controlled dictionary
1519 from :attr:`unassigned_model_ligands`.
1526 def _set_custom_mapping(self, mapping):
1527 """ sets self.__model_mapping with a full blown MappingResult object
1529 :param mapping: mapping with trg chains as key and mdl ch as values
1530 :type mapping: :class:`dict`
1533 chem_mapping, chem_group_alns, mdl = \
1534 chain_mapper.GetChemMapping(self.
modelmodel)
1539 if len(mapping) != len(set(mapping.keys())):
1540 raise RuntimeError(f
"Expect unique trg chain names in mapping. Got "
1541 f
"{mapping.keys()}")
1542 if len(mapping) != len(set(mapping.values())):
1543 raise RuntimeError(f
"Expect unique mdl chain names in mapping. Got "
1544 f
"{mapping.values()}")
1546 trg_chains = set([ch.GetName()
for ch
in chain_mapper.target.chains])
1547 mdl_chains = set([ch.GetName()
for ch
in mdl.chains])
1548 for k,v
in mapping.items():
1549 if k
not in trg_chains:
1550 raise RuntimeError(f
"Target chain \"{k}\" is not available "
1551 f
"in target processed for chain mapping "
1553 if v
not in mdl_chains:
1554 raise RuntimeError(f
"Model chain \"{v}\" is not available "
1555 f
"in model processed for chain mapping "
1558 for trg_ch, mdl_ch
in mapping.items():
1559 trg_group_idx =
None
1560 mdl_group_idx =
None
1561 for idx, group
in enumerate(chain_mapper.chem_groups):
1565 for idx, group
in enumerate(chem_mapping):
1569 if trg_group_idx
is None or mdl_group_idx
is None:
1570 raise RuntimeError(
"Could not establish a valid chem grouping "
1571 "of chain names provided in custom mapping.")
1573 if trg_group_idx != mdl_group_idx:
1574 raise RuntimeError(f
"Chem group mismatch in custom mapping: "
1575 f
"target chain \"{trg_ch}\" groups with the "
1576 f
"following chemically equivalent target "
1578 f
"{chain_mapper.chem_groups[trg_group_idx]} "
1579 f
"but model chain \"{mdl_ch}\" maps to the "
1580 f
"following target chains: "
1581 f
"{chain_mapper.chem_groups[mdl_group_idx]}")
1583 pairs = set([(trg_ch, mdl_ch)
for trg_ch, mdl_ch
in mapping.items()])
1585 chain_mapping._GetRefMdlAlns(chain_mapper.chem_groups,
1586 chain_mapper.chem_group_alignments,
1592 final_mapping = list()
1593 for ref_chains
in chain_mapper.chem_groups:
1594 mapped_mdl_chains = list()
1595 for ref_ch
in ref_chains:
1596 if ref_ch
in mapping:
1597 mapped_mdl_chains.append(mapping[ref_ch])
1599 mapped_mdl_chains.append(
None)
1600 final_mapping.append(mapped_mdl_chains)
1603 for ref_group, mdl_group
in zip(chain_mapper.chem_groups,
1605 for ref_ch, mdl_ch
in zip(ref_group, mdl_group):
1606 if ref_ch
is not None and mdl_ch
is not None:
1607 aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1608 trg_view = chain_mapper.target.Select(f
"cname={mol.QueryQuoteName(ref_ch)}")
1609 mdl_view = mdl.Select(f
"cname={mol.QueryQuoteName(mdl_ch)}")
1610 aln.AttachView(0, trg_view)
1611 aln.AttachView(1, mdl_view)
1612 alns[(ref_ch, mdl_ch)] = aln
1615 chain_mapper.chem_groups,
1617 final_mapping, alns)
1619 def _find_unassigned_model_ligand_reason(self, ligand, assignment="lddt_pli", check=True):
1625 raise ValueError(
"Ligand %s is not in self.model_ligands" % ligand)
1629 details = getattr(self, assignment +
"_details")
1630 if ligand.chain.name
in details
and ligand.number
in details[ligand.chain.name]:
1631 ligand_details = details[ligand.chain.name][ligand.number]
1632 if not (
"unassigned" in ligand_details
and ligand_details[
"unassigned"]):
1633 raise RuntimeError(
"Ligand %s is mapped to %s" % (ligand, ligand_details[
"target_ligand"]))
1637 return (
"no_ligand",
"No ligand in the target")
1640 graph = _ResidueToGraph(ligand)
1641 if not networkx.is_connected(graph):
1642 return (
"disconnected",
"Ligand graph is disconnected")
1646 if np.isnan(assigned):
1653 return_symmetries=
False)
1654 except (NoSymmetryError, DisconnectedGraphError):
1656 except TooManySymmetriesError:
1664 assignment_matrix = getattr(self, assignment +
"_matrix")
1665 all_nan = np.all(np.isnan(assignment_matrix[:, ligand_idx]))
1673 return (
"stoichiometry",
1674 "Ligand was already assigned to an other "
1675 "model ligand (different stoichiometry)")
1676 elif assigned == -1:
1678 return (
"symmetries",
1679 "Too many symmetries were found.")
1683 iso =
"subgraph isomorphism"
1685 iso =
"full graph isomorphism"
1686 return (
"identity",
"Ligand was not found in the target (by %s)" % iso)
1688 def _find_unassigned_target_ligand_reason(self, ligand, assignment="lddt_pli", check=True):
1694 raise ValueError(
"Ligand %s is not in self.target_ligands" % ligand)
1698 details = getattr(self, assignment +
"_details")
1699 for cname, chain_ligands
in details.items():
1700 for rnum, details
in chain_ligands.items():
1701 if "unassigned" in details
and details[
"unassigned"]:
1703 if details[
'target_ligand'] == ligand:
1704 raise RuntimeError(
"Ligand %s is mapped to %s.%s" % (
1705 ligand, cname, rnum))
1709 return (
"no_ligand",
"No ligand in the model")
1712 graph = _ResidueToGraph(ligand)
1713 if not networkx.is_connected(graph):
1714 return (
"disconnected",
"Ligand graph is disconnected")
1720 for model_lig_idx, assigned
in enumerate(
1722 if np.isnan(assigned):
1729 return_symmetries=
False)
1730 except (NoSymmetryError, DisconnectedGraphError):
1732 except TooManySymmetriesError:
1739 return (
"stoichiometry",
1740 "Ligand was already assigned to an other "
1741 "target ligand (different stoichiometry)")
1742 elif assigned == -1:
1744 return (
"symmetries",
1745 "Too many symmetries were found.")
1749 iso =
"subgraph isomorphism"
1751 iso =
"full graph isomorphism"
1752 return (
"identity",
"Ligand was not found in the model (by %s)" % iso)
1779 if mdl_ligand.handle.hash_code
not in self.
_get_repr_input_get_repr_input:
1786 for at
in mdl_ligand.atoms:
1789 for close_at
in close_atoms:
1790 chains.add(close_at.GetChain().GetName())
1796 query +=
','.join([mol.QueryQuoteName(x)
for x
in chains])
1800 chem_mapping = list()
1802 chem_mapping.append([x
for x
in m
if x
in chains])
1822 key = (target_ligand.handle.hash_code, 0)
1824 key = (target_ligand.handle.hash_code, model_ligand.handle.hash_code)
1826 if key
not in self.
_repr_repr:
1841 self.
_repr_repr[key] = reprs
1846 "model_representation",
1847 "No representation of the reference binding site was "
1848 "found in the model")
1850 return self.
_repr_repr[key]
1853 def _ResidueToGraph(residue, by_atom_index=False):
1854 """Return a NetworkX graph representation of the residue.
1856 :param residue: the residue from which to derive the graph
1857 :type residue: :class:`ost.mol.ResidueHandle` or
1858 :class:`ost.mol.ResidueView`
1859 :param by_atom_index: Set this parameter to True if you need the nodes to
1860 be labeled by atom index (within the residue).
1861 Otherwise, if False, the nodes will be labeled by
1863 :type by_atom_index: :class:`bool`
1864 :rtype: :class:`~networkx.classes.graph.Graph`
1866 Nodes are labeled with the Atom's uppercase :attr:`~ost.mol.AtomHandle.element`.
1868 nxg = networkx.Graph()
1870 for atom
in residue.atoms:
1871 nxg.add_node(atom.name, element=atom.element.upper())
1875 nxg.add_edges_from([(
1877 b.second.name)
for a
in residue.atoms
for b
in a.GetBondList()])
1880 nxg = networkx.relabel_nodes(nxg,
1881 {a: b
for a, b
in zip(
1882 [a.name
for a
in residue.atoms],
1883 range(len(residue.atoms)))},
1888 def SCRMSD(model_ligand, target_ligand, transformation=geom.Mat4(),
1889 substructure_match=
False, max_symmetries=1e6):
1890 """Calculate symmetry-corrected RMSD.
1892 Binding site superposition must be computed separately and passed as
1895 :param model_ligand: The model ligand
1896 :type model_ligand: :class:`ost.mol.ResidueHandle` or
1897 :class:`ost.mol.ResidueView`
1898 :param target_ligand: The target ligand
1899 :type target_ligand: :class:`ost.mol.ResidueHandle` or
1900 :class:`ost.mol.ResidueView`
1901 :param transformation: Optional transformation to apply on each atom
1902 position of model_ligand.
1903 :type transformation: :class:`ost.geom.Mat4`
1904 :param substructure_match: Set this to True to allow partial target
1906 :type substructure_match: :class:`bool`
1907 :param max_symmetries: If more than that many isomorphisms exist, raise
1908 a :class:`TooManySymmetriesError`. This can only be assessed by
1909 generating at least that many isomorphisms and can take some time.
1910 :type max_symmetries: :class:`int`
1911 :rtype: :class:`float`
1912 :raises: :class:`NoSymmetryError` when no symmetry can be found,
1913 :class:`DisconnectedGraphError` when ligand graph is disconnected,
1914 :class:`TooManySymmetriesError` when more than `max_symmetries`
1915 isomorphisms are found.
1918 symmetries = _ComputeSymmetries(model_ligand, target_ligand,
1919 substructure_match=substructure_match,
1921 max_symmetries=max_symmetries)
1922 return _SCRMSD_symmetries(symmetries, model_ligand, target_ligand,
1926 def _SCRMSD_symmetries(symmetries, model_ligand, target_ligand,
1928 """Compute SCRMSD with pre-computed symmetries. Internal. """
1931 for i, (trg_sym, mdl_sym)
in enumerate(symmetries):
1933 trg_lig_rmsd_ent = mol.CreateEntity()
1934 trg_lig_rmsd_editor = trg_lig_rmsd_ent.EditXCS()
1935 trg_lig_rmsd_chain = trg_lig_rmsd_editor.InsertChain(
"_")
1936 trg_lig_rmsd_res = trg_lig_rmsd_editor.AppendResidue(trg_lig_rmsd_chain,
"LIG")
1938 mdl_lig_rmsd_ent = mol.CreateEntity()
1939 mdl_lig_rmsd_editor = mdl_lig_rmsd_ent.EditXCS()
1940 mdl_lig_rmsd_chain = mdl_lig_rmsd_editor.InsertChain(
"_")
1941 mdl_lig_rmsd_res = mdl_lig_rmsd_editor.AppendResidue(mdl_lig_rmsd_chain,
"LIG")
1943 for mdl_anum, trg_anum
in zip(mdl_sym, trg_sym):
1945 trg_atom = target_ligand.atoms[trg_anum]
1946 mdl_atom = model_ligand.atoms[mdl_anum]
1948 trg_lig_rmsd_editor.InsertAtom(trg_lig_rmsd_res, trg_atom.name, trg_atom.pos)
1949 mdl_lig_rmsd_editor.InsertAtom(mdl_lig_rmsd_res, mdl_atom.name, mdl_atom.pos)
1951 trg_lig_rmsd_editor.UpdateICS()
1952 mdl_lig_rmsd_editor.UpdateICS()
1954 rmsd = mol.alg.CalculateRMSD(mdl_lig_rmsd_ent.CreateFullView(),
1955 trg_lig_rmsd_ent.CreateFullView(),
1957 if rmsd < best_rmsd:
1963 def _ComputeSymmetries(model_ligand, target_ligand, substructure_match=False,
1964 by_atom_index=False, return_symmetries=True,
1965 max_symmetries=1e6):
1966 """Return a list of symmetries (isomorphisms) of the model onto the target
1969 :param model_ligand: The model ligand
1970 :type model_ligand: :class:`ost.mol.ResidueHandle` or
1971 :class:`ost.mol.ResidueView`
1972 :param target_ligand: The target ligand
1973 :type target_ligand: :class:`ost.mol.ResidueHandle` or
1974 :class:`ost.mol.ResidueView`
1975 :param substructure_match: Set this to True to allow partial ligands
1977 :type substructure_match: :class:`bool`
1978 :param by_atom_index: Set this parameter to True if you need the symmetries
1979 to refer to atom index (within the residue).
1980 Otherwise, if False, the symmetries refer to atom
1982 :type by_atom_index: :class:`bool`
1983 :type return_symmetries: If Truthy, return the mappings, otherwise simply
1984 return True if a mapping is found (and raise if
1985 no mapping is found). This is useful to quickly
1986 find out if a mapping exist without the expensive
1987 step to find all the mappings.
1988 :type return_symmetries: :class:`bool`
1989 :param max_symmetries: If more than that many isomorphisms exist, raise
1990 a :class:`TooManySymmetriesError`. This can only be assessed by
1991 generating at least that many isomorphisms and can take some time.
1992 :type max_symmetries: :class:`int`
1993 :raises: :class:`NoSymmetryError` when no symmetry can be found;
1994 :class:`TooManySymmetriesError` when more than `max_symmetries`
1995 isomorphisms are found.
2000 model_graph = _ResidueToGraph(model_ligand, by_atom_index=by_atom_index)
2001 target_graph = _ResidueToGraph(target_ligand, by_atom_index=by_atom_index)
2003 if not networkx.is_connected(model_graph):
2005 if not networkx.is_connected(target_graph):
2012 gm = networkx.algorithms.isomorphism.GraphMatcher(
2013 model_graph, target_graph, node_match=
lambda x, y:
2014 x[
"element"] == y[
"element"])
2015 if gm.is_isomorphic():
2016 if not return_symmetries:
2019 for i, isomorphism
in enumerate(gm.isomorphisms_iter()):
2020 if i >= max_symmetries:
2022 "Too many symmetries between %s and %s" % (
2023 str(model_ligand), str(target_ligand)))
2024 symmetries.append((list(isomorphism.values()), list(isomorphism.keys())))
2025 assert len(symmetries) > 0
2026 LogDebug(
"Found %s isomorphic mappings (symmetries)" % len(symmetries))
2027 elif gm.subgraph_is_isomorphic()
and substructure_match:
2028 if not return_symmetries:
2031 for i, isomorphism
in enumerate(gm.subgraph_isomorphisms_iter()):
2032 if i >= max_symmetries:
2034 "Too many symmetries between %s and %s" % (
2035 str(model_ligand), str(target_ligand)))
2036 symmetries.append((list(isomorphism.values()), list(isomorphism.keys())))
2037 assert len(symmetries) > 0
2039 assert len(symmetries[0][0]) == len(target_ligand.atoms)
2040 LogDebug(
"Found %s subgraph isomorphisms (symmetries)" % len(symmetries))
2041 elif gm.subgraph_is_isomorphic():
2042 LogDebug(
"Found subgraph isomorphisms (symmetries), but"
2043 " ignoring because substructure_match=False")
2045 str(model_ligand), str(target_ligand)))
2047 LogDebug(
"Found no isomorphic mappings (symmetries)")
2049 str(model_ligand), str(target_ligand)))
2054 """Exception raised when no symmetry can be found.
2059 class TooManySymmetriesError(ValueError):
2060 """Exception raised when too many symmetries are found.
2065 """Exception raised when the ligand graph is disconnected.
2070 __all__ = [
"LigandScorer",
"SCRMSD",
"NoSymmetryError",
2071 "TooManySymmetriesError",
"DisconnectedGraphError"]
def get_get_repr_input(self, mdl_ligand)
_unassigned_model_ligands
def _find_unassigned_target_ligand_reason(self, ligand, assignment="lddt_pli", check=True)
def _build_binding_site_entity(ligand, residues, extra_residues=[])
def _assign_matrices(self, mat1, mat2, data, main_key)
def lddt_pli_matrix(self)
def _compute_scores(self)
def coverage_matrix(self)
def _find_unassigned_model_ligand_reason(self, ligand, assignment="lddt_pli", check=True)
def unassigned_model_ligand_descriptions(self)
def unassigned_target_ligands(self)
_unassigned_model_ligand_short
def _prepare_ligands(new_entity, old_entity, ligands, rename_chain)
_assignment_match_coverage
def get_target_binding_site(self, target_ligand)
def unassigned_model_ligands(self)
def _compute_lddtpli(self, symmetries, target_ligand, model_ligand)
_unassigned_target_ligand_short
def _extract_ligands(entity)
def unassigned_target_ligand_descriptions(self)
def _find_ligand_assignment(mat1, mat2=None, coverage=None, coverage_delta=None)
def _assign_matrix(self, mat, data1, main_key1, data2, main_key2)
def chain_mapping_mdl(self)
_unassigned_target_ligand_descriptions
_unassigned_model_ligand_descriptions
_unassigned_target_ligands
def _assign_ligands_rmsd(self)
_unassigned_target_ligands_reason
def _assign_ligands_rmsd_only(self)
def _assign_ligands_lddt_pli(self)
def get_repr(self, target_ligand, model_ligand)
def lddt_pli_details(self)
def chem_group_alns(self)
def _set_custom_mapping(self, mapping)
def _compute_rmsd(self, symmetries, target_ligand, model_ligand)
def _assign_unassigned(self, assigned_trg, assigned_mdl, out_main, out_details, main_key)
def __init__(self, model, target, model_ligands=None, target_ligands=None, resnum_alignments=False, check_resnames=True, rename_ligand_chain=False, chain_mapper=None, substructure_match=False, coverage_delta=0.2, radius=4.0, lddt_pli_radius=6.0, lddt_lp_radius=10.0, model_bs_radius=20, binding_sites_topn=100000, global_chain_mapping=False, rmsd_assignment=False, n_max_naive=12, max_symmetries=1e5, custom_mapping=None, unassigned=False, full_bs_search=False)
def SCRMSD(model_ligand, target_ligand, transformation=geom.Mat4(), substructure_match=False, max_symmetries=1e6)