OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
qsscoring.py
Go to the documentation of this file.
1 """
2 Scoring of quaternary structures (QS). The QS scoring is according to the paper
3 by `Bertoni et al. <https://dx.doi.org/10.1038/s41598-017-09654-8>`_.
4 
5 .. note ::
6 
7  Requirements for use:
8 
9  - A default :class:`compound library <ost.conop.CompoundLib>` must be defined
10  and accessible via :func:`~ost.conop.GetDefaultLib`. This is set by default
11  when executing scripts with ``ost``. Otherwise, you must set this with
12  :func:`~ost.conop.SetDefaultLib`.
13  - ClustalW must be installed (unless you provide chain mappings)
14  - Python modules `numpy` and `scipy` must be installed and available
15  (e.g. use ``pip install scipy numpy``)
16 
17 Authors: Gerardo Tauriello, Martino Bertoni
18 """
19 
20 from ost import mol, geom, conop, seq, settings, PushVerbosityLevel
21 from ost import LogError, LogWarning, LogScript, LogInfo, LogVerbose, LogDebug
22 from ost.bindings.clustalw import ClustalW
23 from ost.mol.alg import lDDTScorer
24 from ost.seq.alg.renumber import Renumber
25 import numpy as np
26 from scipy.misc import factorial
27 from scipy.special import binom
28 from scipy.cluster.hierarchy import fclusterdata
29 import itertools
30 
31 ###############################################################################
32 # QS scoring
33 ###############################################################################
34 
35 class QSscoreError(Exception):
36  """Exception to be raised for "acceptable" exceptions in QS scoring.
37 
38  Those are cases we might want to capture for default behavior.
39  """
40  pass
41 
42 class QSscorer:
43  """Object to compute QS scores.
44 
45  Simple usage without any precomputed contacts, symmetries and mappings:
46 
47  .. code-block:: python
48 
49  import ost
50  from ost.mol.alg import qsscoring
51 
52  # load two biounits to compare
53  ent_full = ost.io.LoadPDB('3ia3', remote=True)
54  ent_1 = ent_full.Select('cname=A,D')
55  ent_2 = ent_full.Select('cname=B,C')
56  # get score
57  ost.PushVerbosityLevel(3)
58  try:
59  qs_scorer = qsscoring.QSscorer(ent_1, ent_2)
60  ost.LogScript('QSscore:', str(qs_scorer.global_score))
61  ost.LogScript('Chain mapping used:', str(qs_scorer.chain_mapping))
62  # commonly you want the QS global score as output
63  qs_score = qs_scorer.global_score
64  except qsscoring.QSscoreError as ex:
65  # default handling: report failure and set score to 0
66  ost.LogError('QSscore failed:', str(ex))
67  qs_score = 0
68 
69  For maximal performance when computing QS scores of the same entity with many
70  others, it is advisable to construct and reuse :class:`QSscoreEntity` objects.
71 
72  Any known / precomputed information can be filled into the appropriate
73  attribute here (no checks done!). Otherwise most quantities are computed on
74  first access and cached (lazy evaluation). Setters are provided to set values
75  with extra checks (e.g. :func:`SetSymmetries`).
76 
77  All necessary seq. alignments are done by global BLOSUM62-based alignment. A
78  multiple sequence alignment is performed with ClustalW unless
79  :attr:`chain_mapping` is provided manually. You will need to have an
80  executable ``clustalw`` or ``clustalw2`` in your ``PATH`` or you must set
81  :attr:`clustalw_bin` accordingly. Otherwise an exception
82  (:class:`ost.settings.FileNotFound`) is thrown.
83 
84  Formulas for QS scores:
85 
86  ::
87 
88  - QS_best = weighted_scores / (weight_sum + weight_extra_mapped)
89  - QS_global = weighted_scores / (weight_sum + weight_extra_all)
90  -> weighted_scores = sum(w(min(d1,d2)) * (1 - abs(d1-d2)/12)) for shared
91  -> weight_sum = sum(w(min(d1,d2))) for shared
92  -> weight_extra_mapped = sum(w(d)) for all mapped but non-shared
93  -> weight_extra_all = sum(w(d)) for all non-shared
94  -> w(d) = 1 if d <= 5, exp(-2 * ((d-5.0)/4.28)^2) else
95 
96  In the formulas above:
97 
98  * "d": CA/CB-CA/CB distance of an "inter-chain contact" ("d1", "d2" for
99  "shared" contacts).
100  * "mapped": we could map chains of two structures and align residues in
101  :attr:`alignments`.
102  * "shared": pairs of residues which are "mapped" and have
103  "inter-chain contact" in both structures.
104  * "inter-chain contact": CB-CB pairs (CA for GLY) with distance <= 12 A
105  (fallback to CA-CA if :attr:`calpha_only` is True).
106  * "w(d)": weighting function (prob. of 2 res. to interact given CB distance)
107  from `Xu et al. 2009 <https://dx.doi.org/10.1016%2Fj.jmb.2008.06.002>`_.
108 
109  :param ent_1: First structure to be scored.
110  :type ent_1: :class:`QSscoreEntity`, :class:`~ost.mol.EntityHandle` or
111  :class:`~ost.mol.EntityView`
112  :param ent_2: Second structure to be scored.
113  :type ent_2: :class:`QSscoreEntity`, :class:`~ost.mol.EntityHandle` or
114  :class:`~ost.mol.EntityView`
115  :param res_num_alignment: Sets :attr:`res_num_alignment`
116 
117  :raises: :class:`QSscoreError` if input structures are invalid or are monomers
118  or have issues that make it impossible for a QS score to be computed.
119 
120  .. attribute:: qs_ent_1
121 
122  :class:`QSscoreEntity` object for *ent_1* given at construction.
123  If entity names (:attr:`~QSscoreEntity.original_name`) are not unique, we
124  set it to 'pdb_1' using :func:`~QSscoreEntity.SetName`.
125 
126  .. attribute:: qs_ent_2
127 
128  :class:`QSscoreEntity` object for *ent_2* given at construction.
129  If entity names (:attr:`~QSscoreEntity.original_name`) are not unique, we
130  set it to 'pdb_2' using :func:`~QSscoreEntity.SetName`.
131 
132  .. attribute:: calpha_only
133 
134  True if any of the two structures is CA-only (after cleanup).
135 
136  :type: :class:`bool`
137 
138  .. attribute:: max_ca_per_chain_for_cm
139 
140  Maximal number of CA atoms to use in each chain to determine chain mappings.
141  Setting this to -1 disables the limit. Limiting it speeds up determination
142  of symmetries and chain mappings. By default it is set to 100.
143 
144  :type: :class:`int`
145 
146  .. attribute:: res_num_alignment
147 
148  Forces each alignment in :attr:`alignments` to be based on residue numbers
149  instead of using a global BLOSUM62-based alignment.
150 
151  :type: :class:`bool`
152  """
153  def __init__(self, ent_1, ent_2, res_num_alignment=False):
154  # generate QSscoreEntity objects?
155  if isinstance(ent_1, QSscoreEntity):
156  self.qs_ent_1 = ent_1
157  else:
158  self.qs_ent_1 = QSscoreEntity(ent_1)
159  if isinstance(ent_2, QSscoreEntity):
160  self.qs_ent_2 = ent_2
161  else:
162  self.qs_ent_2 = QSscoreEntity(ent_2)
163  # check validity of inputs
164  if not self.qs_ent_1.is_valid or not self.qs_ent_2.is_valid:
165  raise QSscoreError("Invalid input in QSscorer!")
166  # set names for structures
167  if self.qs_ent_1.original_name == self.qs_ent_2.original_name:
168  self.qs_ent_1.SetName('pdb_1')
169  self.qs_ent_2.SetName('pdb_2')
170  else:
171  self.qs_ent_1.SetName(self.qs_ent_1.original_name)
172  self.qs_ent_2.SetName(self.qs_ent_2.original_name)
173  # set other public attributes
174  self.res_num_alignment = res_num_alignment
175  self.calpha_only = self.qs_ent_1.calpha_only or self.qs_ent_2.calpha_only
177  # init cached stuff
178  self._chem_mapping = None
179  self._ent_to_cm_1 = None
180  self._ent_to_cm_2 = None
181  self._symm_1 = None
182  self._symm_2 = None
183  self._chain_mapping = None
184  self._chain_mapping_scheme = None
185  self._alignments = None
186  self._mapped_residues = None
187  self._global_score = None
188  self._best_score = None
189  self._superposition = None
190  self._clustalw_bin = None
191 
192  @property
193  def chem_mapping(self):
194  """Inter-complex mapping of chemical groups.
195 
196  Each group (see :attr:`QSscoreEntity.chem_groups`) is mapped according to
197  highest sequence identity. Alignment is between longest sequences in groups.
198 
199  Limitations:
200 
201  - If different numbers of groups, we map only the groups for the complex
202  with less groups (rest considered unmapped and shown as warning)
203  - The mapping is forced: the "best" mapping will be chosen independently of
204  how low the seq. identity may be
205 
206  :getter: Computed on first use (cached)
207  :type: :class:`dict` with key = :class:`tuple` of chain names in
208  :attr:`qs_ent_1` and value = :class:`tuple` of chain names in
209  :attr:`qs_ent_2`.
210 
211  :raises: :class:`QSscoreError` if we end up having no chains for either
212  entity in the mapping (can happen if chains do not have CA atoms).
213  """
214  if self._chem_mapping is None:
215  self._chem_mapping = _GetChemGroupsMapping(self.qs_ent_1, self.qs_ent_2)
216  return self._chem_mapping
217 
218  @property
219  def ent_to_cm_1(self):
220  """Subset of :attr:`qs_ent_1` used to compute chain mapping and symmetries.
221 
222  Properties:
223 
224  - Includes only residues aligned according to :attr:`chem_mapping`
225  - Includes only 1 CA atom per residue
226  - Has at least 5 and at most :attr:`max_ca_per_chain_for_cm` atoms per chain
227  - All chains of the same chemical group have the same number of atoms
228  (also in :attr:`ent_to_cm_2` according to :attr:`chem_mapping`)
229  - All chains appearing in :attr:`chem_mapping` appear in this entity
230  (so the two can be safely used together)
231 
232  This entity might be transformed (i.e. all positions rotated/translated by
233  same transformation matrix) if this can speed up computations. So do not
234  assume fixed global positions (but relative distances will remain fixed).
235 
236  :getter: Computed on first use (cached)
237  :type: :class:`~ost.mol.EntityHandle`
238 
239  :raises: :class:`QSscoreError` if any chain ends up having less than 5 res.
240  """
241  if self._ent_to_cm_1 is None:
243  return self._ent_to_cm_1
244 
245  @property
246  def ent_to_cm_2(self):
247  """Subset of :attr:`qs_ent_1` used to compute chain mapping and symmetries
248  (see :attr:`ent_to_cm_1` for details).
249  """
250  if self._ent_to_cm_2 is None:
252  return self._ent_to_cm_2
253 
254  @property
255  def symm_1(self):
256  """Symmetry groups for :attr:`qs_ent_1` used to speed up chain mapping.
257 
258  This is a list of chain-lists where each chain-list can be used reconstruct
259  the others via cyclic C or dihedral D symmetry. The first chain-list is used
260  as a representative symmetry group. For heteromers, the group-members must
261  contain all different seqres in oligomer.
262 
263  Example: symm. groups [(A,B,C), (D,E,F), (G,H,I)] means that there are
264  symmetry transformations to get (D,E,F) and (G,H,I) from (A,B,C).
265 
266  Properties:
267 
268  - All symmetry group tuples have the same length (num. of chains)
269  - All chains in :attr:`ent_to_cm_1` appear (w/o duplicates)
270  - For heteros: symmetry group tuples have all different chem. groups
271  - Trivial symmetry group = one tuple with all chains (used if inconsistent
272  data provided or if no symmetry is found)
273  - Either compatible to :attr:`symm_2` or trivial symmetry groups used.
274  Compatibility requires same lengths of symmetry group tuples and it must
275  be possible to get an overlap (80% of residues covered within 6 A of a
276  (chem. mapped) chain) of all chains in representative symmetry groups by
277  superposing one pair of chains.
278 
279  :getter: Computed on first use (cached)
280  :type: :class:`list` of :class:`tuple` of :class:`str` (chain names)
281  """
282  if self._symm_1 is None:
283  self._ComputeSymmetry()
284  return self._symm_1
285 
286  @property
287  def symm_2(self):
288  """Symmetry groups for :attr:`qs_ent_2` (see :attr:`symm_1` for details)."""
289  if self._symm_2 is None:
290  self._ComputeSymmetry()
291  return self._symm_2
292 
293  def SetSymmetries(self, symm_1, symm_2):
294  """Set user-provided symmetry groups.
295 
296  These groups are restricted to chain names appearing in :attr:`ent_to_cm_1`
297  and :attr:`ent_to_cm_2` respectively. They are only valid if they cover all
298  chains and both *symm_1* and *symm_2* have same lengths of symmetry group
299  tuples. Otherwise trivial symmetry group used (see :attr:`symm_1`).
300 
301  :param symm_1: Value to set for :attr:`symm_1`.
302  :param symm_2: Value to set for :attr:`symm_2`.
303  """
304  # restrict chain names
305  self._symm_1 = _CleanUserSymmetry(symm_1, self.ent_to_cm_1)
306  self._symm_2 = _CleanUserSymmetry(symm_2, self.ent_to_cm_2)
307  # check that we have reasonable symmetries set (fallback: all chains)
308  if not _AreValidSymmetries(self._symm_1, self._symm_2):
309  self._symm_1 = [tuple(ch.name for ch in self.ent_to_cm_1.chains)]
310  self._symm_2 = [tuple(ch.name for ch in self.ent_to_cm_2.chains)]
311 
312  @property
313  def chain_mapping(self):
314  """Mapping from :attr:`ent_to_cm_1` to :attr:`ent_to_cm_2`.
315 
316  Properties:
317 
318  - Mapping is between chains of same chem. group (see :attr:`chem_mapping`)
319  - Each chain can appear only once in mapping
320  - All chains of complex with less chains are mapped
321  - Symmetry (:attr:`symm_1`, :attr:`symm_2`) is taken into account
322 
323  Details on algorithms used to find mapping:
324 
325  - We try all pairs of chem. mapped chains within symmetry group and get
326  superpose-transformation for them
327  - First option: check for "sufficient overlap" of other chain-pairs
328 
329  - For each chain-pair defined above: apply superposition to full oligomer
330  and map chains based on structural overlap
331  - Structural overlap = X% of residues in second oligomer covered within Y
332  Angstrom of a (chem. mapped) chain in first oligomer. We successively
333  try (X,Y) = (80,4), (40,6) and (20,8) to be less and less strict in
334  mapping (warning shown for most permissive one).
335  - If multiple possible mappings are found, we choose the one which leads
336  to the lowest multi-chain-RMSD given the superposition
337 
338  - Fallback option: try all mappings to find minimal multi-chain-RMSD
339  (warning shown)
340 
341  - For each chain-pair defined above: apply superposition, try all (!)
342  possible chain mappings (within symmetry group) and keep mapping with
343  lowest multi-chain-RMSD
344  - Repeat procedure above to resolve symmetry. Within the symmetry group we
345  can use the chain mapping computed before and we just need to find which
346  symmetry group in first oligomer maps to which in the second one. We
347  again try all possible combinations...
348  - Limitations:
349 
350  - Trying all possible mappings is a combinatorial nightmare (factorial).
351  We throw an exception if too many combinations (e.g. octomer vs
352  octomer with no usable symmetry)
353  - The mapping is forced: the "best" mapping will be chosen independently
354  of how badly they fit in terms of multi-chain-RMSD
355  - As a result, such a forced mapping can lead to a large range of
356  resulting QS scores. An extreme example was observed between 1on3.1
357  and 3u9r.1, where :attr:`global_score` can range from 0.12 to 0.43
358  for mappings with very similar multi-chain-RMSD.
359 
360  :getter: Computed on first use (cached)
361  :type: :class:`dict` with key / value = :class:`str` (chain names, key
362  for :attr:`ent_to_cm_1`, value for :attr:`ent_to_cm_2`)
363  :raises: :class:`QSscoreError` if there are too many combinations to check
364  to find a chain mapping.
365  """
366  if self._chain_mapping is None:
368  _GetChainMapping(self.ent_to_cm_1, self.ent_to_cm_2, self.symm_1,
369  self.symm_2, self.chem_mapping)
370  LogInfo('Mapping found: %s' % str(self._chain_mapping))
371  return self._chain_mapping
372 
373  @property
375  """Mapping scheme used to get :attr:`chain_mapping`.
376 
377  Possible values:
378 
379  - 'strict': 80% overlap needed within 4 Angstrom (overlap based mapping).
380  - 'tolerant': 40% overlap needed within 6 Angstrom (overlap based mapping).
381  - 'permissive': 20% overlap needed within 8 Angstrom (overlap based
382  mapping). It's best if you check mapping manually!
383  - 'extensive': Extensive search used for mapping detection (fallback). This
384  approach has known limitations and may be removed in future versions.
385  Mapping should be checked manually!
386  - 'user': :attr:`chain_mapping` was set by user before first use of this
387  attribute.
388 
389  :getter: Computed with :attr:`chain_mapping` on first use (cached)
390  :type: :class:`str`
391  :raises: :class:`QSscoreError` as in :attr:`chain_mapping`.
392  """
393  if self._chain_mapping_scheme is None:
394  # default: user provided
395  self._chain_mapping_scheme = 'user'
396  # get chain mapping and make sure internal variable is set
397  # -> will not compute and only update _chain_mapping if user provided
398  # -> will compute and overwrite _chain_mapping_scheme else
399  self._chain_mapping = self.chain_mapping
400  return self._chain_mapping_scheme
401 
402  @property
403  def alignments(self):
404  """List of successful sequence alignments using :attr:`chain_mapping`.
405 
406  There will be one alignment for each mapped chain and they are ordered by
407  their chain names in :attr:`qs_ent_1`.
408 
409  The first sequence of each alignment belongs to :attr:`qs_ent_1` and the
410  second one to :attr:`qs_ent_2`. The sequences are named according to the
411  mapped chain names and have views attached into :attr:`QSscoreEntity.ent`
412  of :attr:`qs_ent_1` and :attr:`qs_ent_2`.
413 
414  If :attr:`res_num_alignment` is False, each alignment is performed using a
415  global BLOSUM62-based alignment. Otherwise, the positions in the alignment
416  sequences are simply given by the residue number so that residues with
417  matching numbers are aligned.
418 
419  :getter: Computed on first use (cached)
420  :type: :class:`list` of :class:`~ost.seq.AlignmentHandle`
421  """
422  if self._alignments is None:
423  self._alignments = _GetMappedAlignments(self.qs_ent_1.ent,
424  self.qs_ent_2.ent,
425  self.chain_mapping,
426  self.res_num_alignment)
427  return self._alignments
428 
429  @property
430  def mapped_residues(self):
431  """Mapping of shared residues in :attr:`alignments`.
432 
433  :getter: Computed on first use (cached)
434  :type: :class:`dict` *mapped_residues[c1][r1] = r2* with:
435  *c1* = Chain name in first entity (= first sequence in aln),
436  *r1* = Residue number in first entity,
437  *r2* = Residue number in second entity
438  """
439  if self._mapped_residues is None:
440  self._mapped_residues = _GetMappedResidues(self.alignments)
441  return self._mapped_residues
442 
443  @property
444  def global_score(self):
445  """QS-score with penalties.
446 
447  The range of the score is between 0 (i.e. no interface residues are shared
448  between biounits) and 1 (i.e. the interfaces are identical).
449 
450  The global QS-score is computed applying penalties when interface residues
451  or entire chains are missing (i.e. anything that is not mapped in
452  :attr:`mapped_residues` / :attr:`chain_mapping`) in one of the biounits.
453 
454  :getter: Computed on first use (cached)
455  :type: :class:`float`
456  :raises: :class:`QSscoreError` if only one chain is mapped
457  """
458  if self._global_score is None:
459  self._ComputeScores()
460  return self._global_score
461 
462  @property
463  def best_score(self):
464  """QS-score without penalties.
465 
466  Like :attr:`global_score`, but neglecting additional residues or chains in
467  one of the biounits (i.e. the score is calculated considering only mapped
468  chains and residues).
469 
470  :getter: Computed on first use (cached)
471  :type: :class:`float`
472  :raises: :class:`QSscoreError` if only one chain is mapped
473  """
474  if self._best_score is None:
475  self._ComputeScores()
476  return self._best_score
477 
478  @property
479  def superposition(self):
480  """Superposition result based on shared CA atoms in :attr:`alignments`.
481 
482  The superposition can be used to map :attr:`QSscoreEntity.ent` of
483  :attr:`qs_ent_1` onto the one of :attr:`qs_ent_2`. Use
484  :func:`ost.geom.Invert` if you need the opposite transformation.
485 
486  :getter: Computed on first use (cached)
487  :type: :class:`ost.mol.alg.SuperpositionResult`
488  """
489  if self._superposition is None:
490  self._superposition = _GetQsSuperposition(self.alignments)
491  # report it
492  sup_rmsd = self._superposition.rmsd
493  cmp_view = self._superposition.view1
494  LogInfo('CA RMSD for %s aligned residues on %s chains: %.2f' \
495  % (cmp_view.residue_count, cmp_view.chain_count, sup_rmsd))
496  return self._superposition
497 
498  @property
499  def clustalw_bin(self):
500  """
501  Full path to ``clustalw`` or ``clustalw2`` executable to use for multiple
502  sequence alignments (unless :attr:`chain_mapping` is provided manually).
503 
504  :getter: Located in path on first use (cached)
505  :type: :class:`str`
506  """
507  if self._clustalw_bin is None:
508  self._clustalw_bin = settings.Locate(('clustalw', 'clustalw2'))
509  return self._clustalw_bin
510 
511  def GetOligoLDDTScorer(self, settings, penalize_extra_chains=True):
512  """
513  :return: :class:`OligoLDDTScorer` object, setup for this QS scoring problem.
514  :param settings: Passed to :class:`OligoLDDTScorer` constructor.
515  :param penalize_extra_chains: Passed to :class:`OligoLDDTScorer` constructor.
516  """
517  if penalize_extra_chains:
518  return OligoLDDTScorer(self.qs_ent_1.ent, self.qs_ent_2.ent,
519  self.alignments, self.calpha_only, settings,
520  True, self.chem_mapping)
521  else:
522  return OligoLDDTScorer(self.qs_ent_1.ent, self.qs_ent_2.ent,
523  self.alignments, self.calpha_only, settings, False)
524 
525 
526  ##############################################################################
527  # Class internal helpers (anything that doesnt easily work without this class)
528  ##############################################################################
529 
530  def _ComputeAlignedEntities(self):
531  """Fills cached ent_to_cm_1 and ent_to_cm_2."""
532  # get aligned residues via MSA
533  ev1, ev2 = _GetAlignedResidues(self.qs_ent_1, self.qs_ent_2,
534  self.chem_mapping,
536  self.clustalw_bin)
537  # extract new entities
538  self._ent_to_cm_1 = mol.CreateEntityFromView(ev1, True)
539  self._ent_to_cm_2 = mol.CreateEntityFromView(ev2, True)
540  # name them
541  self._ent_to_cm_1.SetName(self.qs_ent_1.GetName())
542  self._ent_to_cm_2.SetName(self.qs_ent_2.GetName())
543 
544  def _ComputeSymmetry(self):
545  """Fills cached symm_1 and symm_2."""
546  # get them
547  self._symm_1, self._symm_2 = \
548  _FindSymmetry(self.qs_ent_1, self.qs_ent_2, self.ent_to_cm_1,
549  self.ent_to_cm_2, self.chem_mapping)
550  # check that we have reasonable symmetries set (fallback: all chains)
551  if not _AreValidSymmetries(self._symm_1, self._symm_2):
552  self._symm_1 = [tuple(ch.name for ch in self.ent_to_cm_1.chains)]
553  self._symm_2 = [tuple(ch.name for ch in self.ent_to_cm_2.chains)]
554 
555  def _ComputeScores(self):
556  """Fills cached global_score and best_score."""
557  if len(self.chain_mapping) < 2:
558  raise QSscoreError("QS-score is not defined for monomers")
559  # get contacts
560  if self.calpha_only:
561  contacts_1 = self.qs_ent_1.contacts_ca
562  contacts_2 = self.qs_ent_2.contacts_ca
563  else:
564  contacts_1 = self.qs_ent_1.contacts
565  contacts_2 = self.qs_ent_2.contacts
566  # get scores
567  scores = _GetScores(contacts_1, contacts_2, self.mapped_residues,
568  self.chain_mapping)
569  self._best_score = scores[0]
570  self._global_score = scores[1]
571  # report scores
572  LogInfo('QSscore %s, %s: best: %.2f, global: %.2f' \
573  % (self.qs_ent_1.GetName(), self.qs_ent_2.GetName(),
574  self._best_score, self._global_score))
575 
576 
577 ###############################################################################
578 # Entity with cached entries for QS scoring
579 ###############################################################################
580 
581 class QSscoreEntity(object):
582  """Entity with cached entries for QS scoring.
583 
584  Any known / precomputed information can be filled into the appropriate
585  attribute here as long as they are labelled as read/write. Otherwise the
586  quantities are computed on first access and cached (lazy evaluation). The
587  heaviest load is expected when computing :attr:`contacts` and
588  :attr:`contacts_ca`.
589 
590  :param ent: Entity to be used for QS scoring. A copy of it will be processed.
591  :type ent: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
592 
593  .. attribute:: is_valid
594 
595  True, if successfully initialized. False, if input structure is monomer or
596  has less than 2 protein chains with >= 20 residues.
597 
598  :type: :class:`bool`
599 
600  .. attribute:: original_name
601 
602  Name set for *ent* when object was created.
603 
604  :type: :class:`str`
605 
606  .. attribute:: ent
607 
608  Cleaned version of *ent* passed at construction. Hydrogens are removed, the
609  entity is processed with a :class:`~ost.conop.RuleBasedProcessor` and chains
610  listed in :attr:`removed_chains` have been removed. The name of this entity
611  might change during scoring (see :func:`GetName`). Otherwise, this will be
612  fixed.
613 
614  :type: :class:`~ost.mol.EntityHandle`
615 
616  .. attribute:: removed_chains
617 
618  Chains removed from *ent* passed at construction. These are ligand and water
619  chains as well as small (< 20 res.) peptides or chains with no amino acids
620  (determined by chem. type, which is set by rule based processor).
621 
622  :type: :class:`list` of :class:`str`
623 
624  .. attribute:: calpha_only
625 
626  Whether entity is CA-only (i.e. it has 0 CB atoms)
627 
628  :type: :class:`bool`
629  """
630  def __init__(self, ent):
631  # copy entity and process/clean it
632  self.original_name = ent.GetName()
633  ent = mol.CreateEntityFromView(ent.Select('ele!=H and aname!=HN'), True)
634  if not conop.GetDefaultLib():
635  raise RuntimeError("QSscore computation requires a compound library!")
636  pr = conop.RuleBasedProcessor(conop.GetDefaultLib())
637  pr.Process(ent)
638  self.ent, self.removed_chains, self.calpha_only = _CleanInputEntity(ent)
639  # check if it's suitable for QS scoring
640  if self.ent.chain_count == 0:
641  LogError('Bad input file: ' + ent.GetName() + '. No chains left after '
642  'removing water, ligands and small peptides.')
643  self.is_valid = False
644  elif self.ent.chain_count == 1:
645  LogWarning('Structure ' + ent.GetName() + ' is a monomer.')
646  self.is_valid = True
647  else:
648  self.is_valid = True
649  # init cached stuff
650  self._ca_entity = None
651  self._ca_chains = None
652  self._alignments = {}
653  self._chem_groups = None
654  self._angles = {}
655  self._axis = {}
656  self._contacts = None
657  self._contacts_ca = None
658 
659  def GetName(self):
660  """Wrapper to :func:`~ost.mol.EntityHandle.GetName` of :attr:`ent`.
661  This is used to uniquely identify the entity while scoring. The name may
662  therefore change while :attr:`original_name` remains fixed.
663  """
664  # for duck-typing and convenience
665  return self.ent.GetName()
666 
667  def SetName(self, new_name):
668  """Wrapper to :func:`~ost.mol.EntityHandle.SetName` of :attr:`ent`.
669  Use this to change unique identifier while scoring (see :func:`GetName`).
670  """
671  # for duck-typing and convenience
672  self.ent.SetName(new_name)
673 
674  @property
675  def ca_entity(self):
676  """
677  Reduced representation of :attr:`ent` with only CA atoms.
678  This guarantees that each included residue has exactly one atom.
679 
680  :getter: Computed on first use (cached)
681  :type: :class:`~ost.mol.EntityHandle`
682  """
683  if self._ca_entity is None:
684  self._ca_entity = _GetCAOnlyEntity(self.ent)
685  return self._ca_entity
686 
687  @property
688  def ca_chains(self):
689  """
690  Map of chain names in :attr:`ent` to sequences with attached view to CA-only
691  chains (into :attr:`ca_entity`). Useful for alignments and superpositions.
692 
693  :getter: Computed on first use (cached)
694  :type: :class:`dict` (key = :class:`str`,
695  value = :class:`~ost.seq.SequenceHandle`)
696  """
697  if self._ca_chains is None:
698  self._ca_chains = dict()
699  ca_entity = self.ca_entity
700  for ch in ca_entity.chains:
701  self._ca_chains[ch.name] = seq.SequenceFromChain(ch.name, ch)
702  return self._ca_chains
703 
704  def GetAlignment(self, c1, c2):
705  """Get sequence alignment of chain *c1* with chain *c2*.
706  Computed on first use based on :attr:`ca_chains` (cached).
707 
708  :param c1: Chain name for first chain to align.
709  :type c1: :class:`str`
710  :param c2: Chain name for second chain to align.
711  :type c2: :class:`str`
712  :rtype: :class:`~ost.seq.AlignmentHandle` or None if it failed.
713  """
714  if (c1,c2) not in self._alignments:
715  ca_chains = self.ca_chains
716  self._alignments[(c1,c2)] = _AlignAtomSeqs(ca_chains[c1], ca_chains[c2])
717  return self._alignments[(c1,c2)]
718 
719  @property
720  def chem_groups(self):
721  """
722  Intra-complex group of chemically identical (seq. id. > 95%) polypeptide
723  chains as extracted from :attr:`ca_chains`. First chain in group is the one
724  with the longest sequence.
725 
726  :getter: Computed on first use (cached)
727  :type: :class:`list` of :class:`list` of :class:`str` (chain names)
728  """
729  if self._chem_groups is None:
730  self._chem_groups = _GetChemGroups(self, 95)
731  LogInfo('Chemically equivalent chain-groups in %s: %s' \
732  % (self.GetName(), str(self._chem_groups)))
733  return self._chem_groups
734 
735  def GetAngles(self, c1, c2):
736  """Get Euler angles from superposition of chain *c1* with chain *c2*.
737  Computed on first use based on :attr:`ca_chains` (cached).
738 
739  :param c1: Chain name for first chain to superpose.
740  :type c1: :class:`str`
741  :param c2: Chain name for second chain to superpose.
742  :type c2: :class:`str`
743  :return: 3 Euler angles (may contain nan if something fails).
744  :rtype: :class:`numpy.array`
745  """
746  if (c1,c2) not in self._angles:
747  self._GetSuperposeData(c1, c2)
748  return self._angles[(c1,c2)]
749 
750  def GetAxis(self, c1, c2):
751  """Get axis of symmetry from superposition of chain *c1* with chain *c2*.
752  Computed on first use based on :attr:`ca_chains` (cached).
753 
754  :param c1: Chain name for first chain to superpose.
755  :type c1: :class:`str`
756  :param c2: Chain name for second chain to superpose.
757  :type c2: :class:`str`
758  :return: Rotational axis (may contain nan if something fails).
759  :rtype: :class:`numpy.array`
760  """
761  if (c1,c2) not in self._axis:
762  self._GetSuperposeData(c1, c2)
763  return self._axis[(c1,c2)]
764 
765  @property
766  def contacts(self):
767  """
768  Connectivity dictionary (**read/write**).
769  As given by :func:`GetContacts` with *calpha_only* = False on :attr:`ent`.
770 
771  :getter: Computed on first use (cached)
772  :setter: Uses :func:`FilterContacts` to ensure that we only keep contacts
773  for chains in the cleaned entity.
774  :type: See return type of :func:`GetContacts`
775  """
776  if self._contacts is None:
777  self._contacts = GetContacts(self.ent, False)
778  return self._contacts
779 
780  @contacts.setter
781  def contacts(self, new_contacts):
782  chain_names = set([ch.name for ch in self.ent.chains])
783  self._contacts = FilterContacts(new_contacts, chain_names)
784 
785  @property
786  def contacts_ca(self):
787  """
788  CA-only connectivity dictionary (**read/write**).
789  Like :attr:`contacts` but with *calpha_only* = True in :func:`GetContacts`.
790  """
791  if self._contacts_ca is None:
792  self._contacts_ca = GetContacts(self.ent, True)
793  return self._contacts_ca
794 
795  @contacts_ca.setter
796  def contacts_ca(self, new_contacts):
797  chain_names = set([ch.name for ch in self.ent.chains])
798  self._contacts_ca = FilterContacts(new_contacts, chain_names)
799 
800  ##############################################################################
801  # Class internal helpers (anything that doesnt easily work without this class)
802  ##############################################################################
803 
804  def _GetSuperposeData(self, c1, c2):
805  """Fill _angles and _axis from superposition of CA chains of c1 and c2."""
806  # get aligned views (must contain identical numbers of atoms!)
807  aln = self.GetAlignment(c1, c2)
808  if not aln:
809  # fallback for non-aligned stuff (nan)
810  self._angles[(c1,c2)] = np.empty(3) * np.nan
811  self._axis[(c1,c2)] = np.empty(3) * np.nan
812  return
813  v1, v2 = seq.ViewsFromAlignment(aln)
814  if v1.atom_count < 3:
815  # fallback for non-aligned stuff (nan)
816  self._angles[(c1,c2)] = np.empty(3) * np.nan
817  self._axis[(c1,c2)] = np.empty(3) * np.nan
818  return
819  # get transformation
820  sup_res = mol.alg.SuperposeSVD(v1, v2, apply_transform=False)
821  Rt = sup_res.transformation
822  # extract angles
823  a,b,c = _GetAngles(Rt)
824  self._angles[(c1,c2)] = np.asarray([a,b,c])
825  # extract axis of symmetry
826  R3 = geom.Rotation3(Rt.ExtractRotation())
827  self._axis[(c1,c2)] = np.asarray(R3.GetRotationAxis().data)
828 
829 ###############################################################################
830 # Contacts computations
831 ###############################################################################
832 
833 def FilterContacts(contacts, chain_names):
834  """Filter contacts to contain only contacts for chains in *chain_names*.
835 
836  :param contacts: Connectivity dictionary as produced by :func:`GetContacts`.
837  :type contacts: :class:`dict`
838  :param chain_names: Chain names to keep.
839  :type chain_names: :class:`list` or (better) :class:`set`
840  :return: New connectivity dictionary (format as in :func:`GetContacts`)
841  :rtype: :class:`dict`
842  """
843  # create new dict with subset
844  filtered_contacts = dict()
845  for c1 in contacts:
846  if c1 in chain_names:
847  new_contacts = dict()
848  for c2 in contacts[c1]:
849  if c2 in chain_names:
850  new_contacts[c2] = contacts[c1][c2]
851  # avoid adding empty dicts
852  if new_contacts:
853  filtered_contacts[c1] = new_contacts
854  return filtered_contacts
855 
856 def GetContacts(entity, calpha_only, dist_thr=12.0):
857  """Get inter-chain contacts of a macromolecular entity.
858 
859  Contacts are pairs of residues within a given distance belonging to different
860  chains. They are stored once per pair and include the CA/CB-CA/CB distance.
861 
862  :param entity: An entity to check connectivity for.
863  :type entity: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
864  :param calpha_only: If True, we only consider CA-CA distances. Else, we use CB
865  unless the residue is a GLY.
866  :type calpha_only: :class:`bool`
867  :param dist_thr: Maximal CA/CB-CA/CB distance to be considered in contact.
868  :type dist_thr: :class:`float`
869  :return: A connectivity dictionary. A pair of residues with chain names
870  *ch_name1* & *ch_name2* (*ch_name1* < *ch_name2*), residue numbers
871  *res_num1* & *res_num2* and distance *dist* (<= *dist_thr*) are
872  stored as *result[ch_name1][ch_name2][res_num1][res_num2]* = *dist*.
873  :rtype: :class:`dict`
874  """
875  # get ent copy to search on
876  if calpha_only:
877  ev = entity.Select("aname=CA")
878  else:
879  ev = entity.Select("(rname=GLY and aname=CA) or aname=CB")
880  ent = mol.CreateEntityFromView(ev, True)
881  # search all vs all
882  contacts = dict()
883  for atom in ent.atoms:
884  ch_name1 = atom.chain.name
885  res_num1 = atom.residue.number.num
886  close_atoms = ent.FindWithin(atom.pos, dist_thr)
887  for close_atom in close_atoms:
888  ch_name2 = close_atom.chain.name
889  if ch_name2 > ch_name1:
890  res_num2 = close_atom.residue.number.num
891  dist = geom.Distance(atom.pos, close_atom.pos)
892  # add to contacts
893  if ch_name1 not in contacts:
894  contacts[ch_name1] = dict()
895  if ch_name2 not in contacts[ch_name1]:
896  contacts[ch_name1][ch_name2] = dict()
897  if res_num1 not in contacts[ch_name1][ch_name2]:
898  contacts[ch_name1][ch_name2][res_num1] = dict()
899  contacts[ch_name1][ch_name2][res_num1][res_num2] = round(dist, 3)
900  # DONE
901  return contacts
902 
903 ###############################################################################
904 # Oligo-lDDT scores
905 ###############################################################################
906 
907 class OligoLDDTScorer(object):
908  """Helper class to calculate oligomeric lDDT scores.
909 
910  This class can be used independently, but commonly it will be created by
911  calling :func:`QSscorer.GetOligoLDDTScorer`.
912 
913  .. note::
914 
915  By construction, lDDT scores are not symmetric and hence it matters which
916  structure is the reference (:attr:`ref`) and which one is the model
917  (:attr:`mdl`). Extra residues in the model are generally not considered.
918  Extra chains in both model and reference can be considered by setting the
919  :attr:`penalize_extra_chains` flag to True.
920 
921  :param ref: Sets :attr:`ref`
922  :param mdl: Sets :attr:`mdl`
923  :param alignments: Sets :attr:`alignments`
924  :param calpha_only: Sets :attr:`calpha_only`
925  :param settings: Sets :attr:`settings`
926  :param penalize_extra_chains: Sets :attr:`penalize_extra_chains`
927  :param chem_mapping: Sets :attr:`chem_mapping`. Must be given if
928  *penalize_extra_chains* is True.
929 
930  .. attribute:: ref
931  mdl
932 
933  Full reference/model entity to be scored. The entity must contain all chains
934  mapped in :attr:`alignments` and may also contain additional ones which are
935  considered if :attr:`penalize_extra_chains` is True.
936 
937  :type: :class:`~ost.mol.EntityHandle`
938 
939  .. attribute:: alignments
940 
941  One alignment for each mapped chain of :attr:`ref`/:attr:`mdl` as defined in
942  :attr:`QSscorer.alignments`. The first sequence of each alignment belongs to
943  :attr:`ref` and the second one to :attr:`mdl`. Sequences must have sequence
944  naming and attached views as defined in :attr:`QSscorer.alignments`.
945 
946  :type: :class:`list` of :class:`~ost.seq.AlignmentHandle`
947 
948  .. attribute:: calpha_only
949 
950  If True, restricts lDDT score to CA only.
951 
952  :type: :class:`bool`
953 
954  .. attribute:: settings
955 
956  Settings to use for lDDT scoring.
957 
958  :type: :class:`~ost.mol.alg.lDDTSettings`
959 
960  .. attribute:: penalize_extra_chains
961 
962  If True, extra chains in both :attr:`ref` and :attr:`mdl` will penalize the
963  lDDT scores.
964 
965  :type: :class:`bool`
966 
967  .. attribute:: chem_mapping
968 
969  Inter-complex mapping of chemical groups as defined in
970  :attr:`QSscorer.chem_mapping`. Used to find "chem-mapped" chains in
971  :attr:`ref` for unmapped chains in :attr:`mdl` when penalizing scores.
972  Each unmapped model chain can add extra reference-contacts according to the
973  average total contacts of each single "chem-mapped" reference chain. If
974  there is no "chem-mapped" reference chain, a warning is shown and the model
975  chain is ignored.
976 
977 
978  Only relevant if :attr:`penalize_extra_chains` is True.
979 
980  :type: :class:`dict` with key = :class:`tuple` of chain names in
981  :attr:`ref` and value = :class:`tuple` of chain names in
982  :attr:`mdl`.
983  """
984 
985  # NOTE: one could also allow computation of both penalized and unpenalized
986  # in same object -> must regenerate lddt_ref / lddt_mdl though
987 
988  def __init__(self, ref, mdl, alignments, calpha_only, settings,
989  penalize_extra_chains=False, chem_mapping=None):
990  # sanity checks
991  if chem_mapping is None and penalize_extra_chains:
992  raise RuntimeError("Must provide chem_mapping when requesting penalty "
993  "for extra chains!")
994  if not penalize_extra_chains:
995  # warn for unmapped model chains
996  unmapped_mdl_chains = self._GetUnmappedMdlChains(mdl, alignments)
997  if unmapped_mdl_chains:
998  LogWarning('MODEL contains chains unmapped to REFERENCE, '
999  'lDDT is not considering MODEL chains %s' \
1000  % str(list(unmapped_mdl_chains)))
1001  # warn for unmapped reference chains
1002  ref_chains = set(ch.name for ch in ref.chains)
1003  mapped_ref_chains = set(aln.GetSequence(0).GetName() for aln in alignments)
1004  unmapped_ref_chains = (ref_chains - mapped_ref_chains)
1005  if unmapped_ref_chains:
1006  LogWarning('REFERENCE contains chains unmapped to MODEL, '
1007  'lDDT is not considering REFERENCE chains %s' \
1008  % str(list(unmapped_ref_chains)))
1009  # prepare fields
1010  self.ref = ref
1011  self.mdl = mdl
1012  self.alignments = alignments
1013  self.calpha_only = calpha_only
1014  self.settings = settings
1015  self.penalize_extra_chains = penalize_extra_chains
1016  self.chem_mapping = chem_mapping
1017  self._sc_lddt = None
1018  self._oligo_lddt = None
1019  self._weighted_lddt = None
1020  self._lddt_ref = None
1021  self._lddt_mdl = None
1022  self._oligo_lddt_scorer = None
1023  self._mapped_lddt_scorers = None
1024  self._ref_scorers = None
1025  self._model_penalty = None
1026 
1027  @property
1028  def oligo_lddt(self):
1029  """Oligomeric lDDT score.
1030 
1031  The score is computed as conserved contacts divided by the total contacts
1032  in the reference using the :attr:`oligo_lddt_scorer`, which uses the full
1033  complex as reference/model structure. If :attr:`penalize_extra_chains` is
1034  True, the reference/model complexes contain all chains (otherwise only the
1035  mapped ones) and additional contacts are added to the reference's total
1036  contacts for unmapped model chains according to the :attr:`chem_mapping`.
1037 
1038  The main difference with :attr:`weighted_lddt` is that the lDDT scorer
1039  "sees" the full complex here (incl. inter-chain contacts), while the
1040  weighted single chain score looks at each chain separately.
1041 
1042  :getter: Computed on first use (cached)
1043  :type: :class:`float`
1044  """
1045  if self._oligo_lddt is None:
1046  LogInfo('Reference %s has: %s chains' \
1047  % (self.ref.GetName(), self.ref.chain_count))
1048  LogInfo('Model %s has: %s chains' \
1049  % (self.mdl.GetName(), self.mdl.chain_count))
1050 
1051  # score with or w/o extra-chain penalty
1052  if self.penalize_extra_chains:
1053  denominator = self.oligo_lddt_scorer.total_contacts
1054  denominator += self._GetModelPenalty()
1055  if denominator > 0:
1056  oligo_lddt = self.oligo_lddt_scorer.conserved_contacts \
1057  / float(denominator)
1058  else:
1059  oligo_lddt = 0.0
1060  else:
1061  oligo_lddt = self.oligo_lddt_scorer.global_score
1062  self._oligo_lddt = oligo_lddt
1063  return self._oligo_lddt
1064 
1065  @property
1066  def weighted_lddt(self):
1067  """Weighted average of single chain lDDT scores.
1068 
1069  The score is computed as a weighted average of single chain lDDT scores
1070  (see :attr:`sc_lddt_scorers`) using the total contacts of each single
1071  reference chain as weights. If :attr:`penalize_extra_chains` is True,
1072  unmapped chains are added with a 0 score and total contacts taken from
1073  the actual reference chains or (for unmapped model chains) using the
1074  :attr:`chem_mapping`.
1075 
1076  See :attr:`oligo_lddt` for a comparison of the two scores.
1077 
1078  :getter: Computed on first use (cached)
1079  :type: :class:`float`
1080  """
1081  if self._weighted_lddt is None:
1082  scores = [s.global_score for s in self.sc_lddt_scorers]
1083  weights = [s.total_contacts for s in self.sc_lddt_scorers]
1084  nominator = sum([s * w for s, w in zip(scores, weights)])
1085  if self.penalize_extra_chains:
1086  ref_scorers = self._GetRefScorers()
1087  denominator = sum(s.total_contacts for s in ref_scorers.values())
1088  denominator += self._GetModelPenalty()
1089  else:
1090  denominator = sum(weights)
1091  if denominator > 0:
1092  self._weighted_lddt = nominator / float(denominator)
1093  else:
1094  self._weighted_lddt = 0.0
1095  return self._weighted_lddt
1096 
1097  @property
1098  def lddt_ref(self):
1099  """The reference entity used for oligomeric lDDT scoring
1100  (:attr:`oligo_lddt` / :attr:`oligo_lddt_scorer`).
1101 
1102  Since the lDDT computation requires a single chain with mapped residue
1103  numbering, all chains of :attr:`ref` are appended into a single chain X with
1104  unique residue numbers according to the column-index in the alignment. The
1105  alignments are in the same order as they appear in :attr:`alignments`.
1106  Additional residues are appended at the end of the chain with unique residue
1107  numbers. Unmapped chains are only added if :attr:`penalize_extra_chains` is
1108  True. Only CA atoms are considered if :attr:`calpha_only` is True.
1109 
1110  :getter: Computed on first use (cached)
1111  :type: :class:`~ost.mol.EntityHandle`
1112  """
1113  if self._lddt_ref is None:
1114  self._PrepareOligoEntities()
1115  return self._lddt_ref
1116 
1117  @property
1118  def lddt_mdl(self):
1119  """The model entity used for oligomeric lDDT scoring
1120  (:attr:`oligo_lddt` / :attr:`oligo_lddt_scorer`).
1121 
1122  Like :attr:`lddt_ref`, this is a single chain X containing all chains of
1123  :attr:`mdl`. The residue numbers match the ones in :attr:`lddt_ref` where
1124  aligned and have unique numbers for additional residues.
1125 
1126  :getter: Computed on first use (cached)
1127  :type: :class:`~ost.mol.EntityHandle`
1128  """
1129  if self._lddt_mdl is None:
1130  self._PrepareOligoEntities()
1131  return self._lddt_mdl
1132 
1133  @property
1135  """lDDT Scorer object for :attr:`lddt_ref` and :attr:`lddt_mdl`.
1136 
1137  :getter: Computed on first use (cached)
1138  :type: :class:`~ost.mol.alg.lDDTScorer`
1139  """
1140  if self._oligo_lddt_scorer is None:
1142  references=[self.lddt_ref.Select("")],
1143  model=self.lddt_mdl.Select(""),
1144  settings=self.settings)
1145  return self._oligo_lddt_scorer
1146 
1147  @property
1149  """List of scorer objects for each chain mapped in :attr:`alignments`.
1150 
1151  :getter: Computed on first use (cached)
1152  :type: :class:`list` of :class:`MappedLDDTScorer`
1153  """
1154  if self._mapped_lddt_scorers is None:
1155  self._mapped_lddt_scorers = list()
1156  for aln in self.alignments:
1157  mapped_lddt_scorer = MappedLDDTScorer(aln, self.calpha_only,
1158  self.settings)
1159  self._mapped_lddt_scorers.append(mapped_lddt_scorer)
1160  return self._mapped_lddt_scorers
1161 
1162  @property
1163  def sc_lddt_scorers(self):
1164  """List of lDDT scorer objects extracted from :attr:`mapped_lddt_scorers`.
1165 
1166  :type: :class:`list` of :class:`~ost.mol.alg.lDDTScorer`
1167  """
1168  return [mls.lddt_scorer for mls in self.mapped_lddt_scorers]
1169 
1170  @property
1171  def sc_lddt(self):
1172  """List of global scores extracted from :attr:`sc_lddt_scorers`.
1173 
1174  If scoring for a mapped chain fails, an error is displayed and a score of 0
1175  is assigned.
1176 
1177  :getter: Computed on first use (cached)
1178  :type: :class:`list` of :class:`float`
1179  """
1180  if self._sc_lddt is None:
1181  self._sc_lddt = list()
1182  for lddt_scorer in self.sc_lddt_scorers:
1183  try:
1184  self._sc_lddt.append(lddt_scorer.global_score)
1185  except Exception as ex:
1186  LogError('Single chain lDDT failed:', str(ex))
1187  self._sc_lddt.append(0.0)
1188  return self._sc_lddt
1189 
1190  ##############################################################################
1191  # Class internal helpers
1192  ##############################################################################
1193 
1194  def _PrepareOligoEntities(self):
1195  # simple wrapper to avoid code duplication
1196  self._lddt_ref, self._lddt_mdl = _MergeAlignedChains(
1197  self.alignments, self.ref, self.mdl, self.calpha_only,
1198  self.penalize_extra_chains)
1199 
1200  @staticmethod
1201  def _GetUnmappedMdlChains(mdl, alignments):
1202  # assume model is second sequence in alignment and is named by chain
1203  mdl_chains = set(ch.name for ch in mdl.chains)
1204  mapped_mdl_chains = set(aln.GetSequence(1).GetName() for aln in alignments)
1205  return (mdl_chains - mapped_mdl_chains)
1206 
1207  def _GetRefScorers(self):
1208  # single chain lddt scorers for each reference chain (key = chain name)
1209  if self._ref_scorers is None:
1210  # collect from mapped_lddt_scorers
1211  ref_scorers = dict()
1212  for mapped_lddt_scorer in self.mapped_lddt_scorers:
1213  ref_ch_name = mapped_lddt_scorer.reference_chain_name
1214  ref_scorers[ref_ch_name] = mapped_lddt_scorer.lddt_scorer
1215  # add new ones where needed
1216  for ch in self.ref.chains:
1217  if ch.name not in ref_scorers:
1218  if self.calpha_only:
1219  ref_chain = ch.Select('aname=CA')
1220  else:
1221  ref_chain = ch.Select('')
1222  ref_scorers[ch.name] = lDDTScorer(
1223  references=[ref_chain],
1224  model=ref_chain,
1225  settings=self.settings)
1226  # store in cache
1227  self._ref_scorers = ref_scorers
1228  # fetch from cache
1229  return self._ref_scorers
1230 
1231  def _GetModelPenalty(self):
1232  # extra value to add to total number of distances for extra model chains
1233  # -> estimated from chem-mapped reference chains
1234  if self._model_penalty is None:
1235  # sanity check
1236  if self.chem_mapping is None:
1237  raise RuntimeError("Must provide chem_mapping when requesting penalty "
1238  "for extra model chains!")
1239  # get cached ref_scorers
1240  ref_scorers = self._GetRefScorers()
1241  # get unmapped model chains
1242  unmapped_mdl_chains = self._GetUnmappedMdlChains(self.mdl, self.alignments)
1243  # map extra chains to ref. chains
1244  model_penalty = 0
1245  for ch_name_mdl in sorted(unmapped_mdl_chains):
1246  # get penalty for chain
1247  cur_penalty = None
1248  for cm_ref, cm_mdl in self.chem_mapping.iteritems():
1249  if ch_name_mdl in cm_mdl:
1250  # penalize by an average of the chem. mapped ref. chains
1251  cur_penalty = 0
1252  for ch_name_ref in cm_ref:
1253  # assumes that total_contacts is cached (for speed)
1254  cur_penalty += ref_scorers[ch_name_ref].total_contacts
1255  cur_penalty /= float(len(cm_ref))
1256  break
1257  # report penalty
1258  if cur_penalty is None:
1259  LogWarning('Extra MODEL chain %s could not be chemically mapped to '
1260  'any chain in REFERENCE, lDDT cannot consider it!' \
1261  % ch_name_mdl)
1262  else:
1263  LogScript('Extra MODEL chain %s added to lDDT score by considering '
1264  'chemically mapped chains in REFERENCE.' % ch_name_mdl)
1265  model_penalty += cur_penalty
1266  # store in cache
1267  self._model_penalty = model_penalty
1268  # fetch from cache
1269  return self._model_penalty
1270 
1271 
1272 class MappedLDDTScorer(object):
1273  """A simple class to calculate a single-chain lDDT score on a given chain to
1274  chain mapping as extracted from :class:`OligoLDDTScorer`.
1275 
1276  :param alignment: Sets :attr:`alignment`
1277  :param calpha_only: Sets :attr:`calpha_only`
1278  :param settings: Sets :attr:`settings`
1279 
1280  .. attribute:: alignment
1281 
1282  Alignment with two sequences named according to the mapped chains and with
1283  views attached to both sequences (e.g. one of the items of
1284  :attr:`QSscorer.alignments`).
1285 
1286  The first sequence is assumed to be the reference and the second one the
1287  model. Since the lDDT score is not symmetric (extra residues in model are
1288  ignored), the order is important.
1289 
1290  :type: :class:`~ost.seq.AlignmentHandle`
1291 
1292  .. attribute:: calpha_only
1293 
1294  If True, restricts lDDT score to CA only.
1295 
1296  :type: :class:`bool`
1297 
1298  .. attribute:: settings
1299 
1300  Settings to use for lDDT scoring.
1301 
1302  :type: :class:`~ost.mol.alg.lDDTSettings`
1303 
1304  .. attribute:: lddt_scorer
1305 
1306  lDDT Scorer object for the given chains.
1307 
1308  :type: :class:`~ost.mol.alg.lDDTScorer`
1309 
1310  .. attribute:: reference_chain_name
1311 
1312  Chain name of the reference.
1313 
1314  :type: :class:`str`
1315 
1316  .. attribute:: model_chain_name
1317 
1318  Chain name of the model.
1319 
1320  :type: :class:`str`
1321  """
1322  def __init__(self, alignment, calpha_only, settings):
1323  # prepare fields
1324  self.alignment = alignment
1325  self.calpha_only = calpha_only
1326  self.settings = settings
1327  self.lddt_scorer = None # set in _InitScorer
1328  self.reference_chain_name = alignment.sequences[0].name
1329  self.model_chain_name = alignment.sequences[1].name
1330  self._old_number_label = "old_num"
1331  self._extended_alignment = None # set in _InitScorer
1332  # initialize lDDT scorer
1333  self._InitScorer()
1334 
1336  """
1337  :return: Scores for each residue
1338  :rtype: :class:`list` of :class:`dict` with one item for each residue
1339  existing in model and reference:
1340 
1341  - "residue_number": Residue number in reference chain
1342  - "residue_name": Residue name in reference chain
1343  - "lddt": local lDDT
1344  - "conserved_contacts": number of conserved contacts
1345  - "total_contacts": total number of contacts
1346  """
1347  scores = list()
1348  assigned_residues = list()
1349  # Make sure the score is calculated
1350  self.lddt_scorer.global_score
1351  for col in self._extended_alignment:
1352  if col[0] != "-" and col.GetResidue(3).IsValid():
1353  ref_res = col.GetResidue(0)
1354  mdl_res = col.GetResidue(1)
1355  ref_res_renum = col.GetResidue(2)
1356  mdl_res_renum = col.GetResidue(3)
1357  if ref_res.one_letter_code != ref_res_renum.one_letter_code:
1358  raise RuntimeError("Reference residue name mapping inconsistent: %s != %s" %
1359  (ref_res.one_letter_code,
1360  ref_res_renum.one_letter_code))
1361  if mdl_res.one_letter_code != mdl_res_renum.one_letter_code:
1362  raise RuntimeError("Model residue name mapping inconsistent: %s != %s" %
1363  (mdl_res.one_letter_code,
1364  mdl_res_renum.one_letter_code))
1365  if ref_res.GetNumber().num != ref_res_renum.GetIntProp(self._old_number_label):
1366  raise RuntimeError("Reference residue number mapping inconsistent: %s != %s" %
1367  (ref_res.GetNumber().num,
1368  ref_res_renum.GetIntProp(self._old_number_label)))
1369  if mdl_res.GetNumber().num != mdl_res_renum.GetIntProp(self._old_number_label):
1370  raise RuntimeError("Model residue number mapping inconsistent: %s != %s" %
1371  (mdl_res.GetNumber().num,
1372  mdl_res_renum.GetIntProp(self._old_number_label)))
1373  if ref_res.qualified_name in assigned_residues:
1374  raise RuntimeError("Duplicated residue in reference: " %
1375  (ref_res.qualified_name))
1376  else:
1377  assigned_residues.append(ref_res.qualified_name)
1378  # check if property there (may be missing for CA-only)
1379  if mdl_res_renum.HasProp(self.settings.label):
1380  scores.append({
1381  "residue_number": ref_res.GetNumber().num,
1382  "residue_name": ref_res.name,
1383  "lddt": mdl_res_renum.GetFloatProp(self.settings.label),
1384  "conserved_contacts": mdl_res_renum.GetFloatProp(self.settings.label + "_conserved"),
1385  "total_contacts": mdl_res_renum.GetFloatProp(self.settings.label + "_total")})
1386  return scores
1387 
1388  ##############################################################################
1389  # Class internal helpers (anything that doesnt easily work without this class)
1390  ##############################################################################
1391 
1392  def _InitScorer(self):
1393  # Use copy of alignment (extended by 2 extra sequences for renumbering)
1394  aln = self.alignment.Copy()
1395  # Get chains and renumber according to alignment (for lDDT)
1396  reference = Renumber(
1397  aln.GetSequence(0),
1398  old_number_label=self._old_number_label).CreateFullView()
1399  refseq = seq.CreateSequence(
1400  "reference_renumbered",
1401  aln.GetSequence(0).GetString())
1402  refseq.AttachView(reference)
1403  aln.AddSequence(refseq)
1404  model = Renumber(
1405  aln.GetSequence(1),
1406  old_number_label=self._old_number_label).CreateFullView()
1407  modelseq = seq.CreateSequence(
1408  "model_renumbered",
1409  aln.GetSequence(1).GetString())
1410  modelseq.AttachView(model)
1411  aln.AddSequence(modelseq)
1412  # Filter to CA-only if desired (done after AttachView to not mess it up)
1413  if self.calpha_only:
1414  self.lddt_scorer = lDDTScorer(
1415  references=[reference.Select('aname=CA')],
1416  model=model.Select('aname=CA'),
1417  settings=self.settings)
1418  else:
1419  self.lddt_scorer = lDDTScorer(
1420  references=[reference],
1421  model=model,
1422  settings=self.settings)
1423  # Store alignment for later
1424  self._extended_alignment = aln
1425 
1426 ###############################################################################
1427 # HELPERS
1428 ###############################################################################
1429 
1430 # general
1431 
1432 def _AlignAtomSeqs(seq_1, seq_2):
1433  """
1434  :type seq_1: :class:`ost.seq.SequenceHandle`
1435  :type seq_2: :class:`ost.seq.SequenceHandle`
1436  :return: Alignment of two sequences using a global alignment. Views attached
1437  to the input sequences will remain attached in the aln.
1438  :rtype: :class:`~ost.seq.AlignmentHandle` or None if it failed.
1439  """
1440  # NOTE: If the two sequence have a greatly different length
1441  # a local alignment could be a better choice...
1442  aln = None
1443  alns = seq.alg.GlobalAlign(seq_1, seq_2, seq.alg.BLOSUM62)
1444  if alns: aln = alns[0]
1445  if not aln:
1446  LogWarning('Failed to align %s to %s' % (seq_1.name, seq_2.name))
1447  LogWarning('%s: %s' % (seq_1.name, seq_1.string))
1448  LogWarning('%s: %s' % (seq_2.name, seq_2.string))
1449  return aln
1450 
1451 def _FixSelectChainName(ch_name):
1452  """
1453  :return: String to be used with Select(cname=<RETURN>). Takes care of putting
1454  quotation marks where needed.
1455  :rtype: :class:`str`
1456  :param ch_name: Single chain name (:class:`str`).
1457  """
1458  if ch_name in ['-', '_', ' ']:
1459  return '"%c"' % ch_name
1460  else:
1461  return ch_name
1462 
1463 def _FixSelectChainNames(ch_names):
1464  """
1465  :return: String to be used with Select(cname=<RETURN>). Takes care of joining
1466  and putting quotation marks where needed.
1467  :rtype: :class:`str`
1468  :param ch_names: Some iterable list of chain names (:class:`str` items).
1469  """
1470  chain_set = set([_FixSelectChainName(ch_name) for ch_name in ch_names])
1471  return ','.join(chain_set)
1472 
1473 # QS entity
1474 
1475 def _CleanInputEntity(ent):
1476  """
1477  :param ent: The OST entity to be cleaned.
1478  :type ent: :class:`EntityHandle` or :class:`EntityView`
1479  :return: A tuple of 3 items: :attr:`QSscoreEntity.ent`,
1480  :attr:`QSscoreEntity.removed_chains`,
1481  :attr:`QSscoreEntity.calpha_only`
1482  """
1483  # find chains to remove
1484  removed_chains = []
1485  for ch in ent.chains:
1486  # we remove chains if they are small-peptides or if the contain no aa
1487  # or if they contain only unknown or modified residues
1488  if ch.name in ['-', '_'] \
1489  or ch.residue_count < 20 \
1490  or not any(r.chem_type.IsAminoAcid() for r in ch.residues) \
1491  or not (set(r.one_letter_code for r in ch.residues) - {'?', 'X'}):
1492  removed_chains.append(ch.name)
1493 
1494  # remove them from *ent*
1495  if removed_chains:
1496  view = ent.Select('cname!=%s' % _FixSelectChainNames(removed_chains))
1497  ent_new = mol.CreateEntityFromView(view, True)
1498  ent_new.SetName(ent.GetName())
1499  else:
1500  ent_new = ent
1501 
1502  # check if CA only
1503  calpha_only = False
1504  if ent_new.atom_count > 0 and ent_new.Select('aname=CB').atom_count == 0:
1505  LogInfo('Structure %s is a CA only structure!' % ent_new.GetName())
1506  calpha_only = True
1507 
1508  # report and return
1509  if removed_chains:
1510  LogInfo('Chains removed from %s: %s' \
1511  % (ent_new.GetName(), ''.join(removed_chains)))
1512  LogInfo('Chains in %s: %s' \
1513  % (ent_new.GetName(), ''.join([c.name for c in ent_new.chains])))
1514  return ent_new, removed_chains, calpha_only
1515 
1516 def _GetCAOnlyEntity(ent):
1517  """
1518  :param ent: Entity to process
1519  :type ent: :class:`EntityHandle` or :class:`EntityView`
1520  :return: New entity with only CA and only one atom per residue
1521  (see :attr:`QSscoreEntity.ca_entity`)
1522  """
1523  # cook up CA only view (diff from Select = guaranteed 1 atom per residue)
1524  ca_view = ent.CreateEmptyView()
1525  # add chain by chain
1526  for res in ent.residues:
1527  ca_atom = res.FindAtom("CA")
1528  if ca_atom.IsValid():
1529  ca_view.AddAtom(ca_atom)
1530  # finalize
1531  return mol.CreateEntityFromView(ca_view, False)
1532 
1533 def _GetChemGroups(qs_ent, seqid_thr=95.):
1534  """
1535  :return: Intra-complex group of chemically identical polypeptide chains
1536  (see :attr:`QSscoreEntity.chem_groups`)
1537 
1538  :param qs_ent: Entity to process
1539  :type qs_ent: :class:`QSscoreEntity`
1540  :param seqid_thr: Threshold used to decide when two chains are identical.
1541  95 percent tolerates the few mutations crystallographers
1542  like to do.
1543  :type seqid_thr: :class:`float`
1544  """
1545  # get data from qs_ent
1546  ca_chains = qs_ent.ca_chains
1547  chain_names = sorted(ca_chains.keys())
1548  # get pairs of identical chains
1549  # NOTE: this scales quadratically with number of chains and may be optimized
1550  # -> one could merge it with "merge transitive pairs" below...
1551  id_seqs = []
1552  for ch_1, ch_2 in itertools.combinations(chain_names, 2):
1553  aln = qs_ent.GetAlignment(ch_1, ch_2)
1554  if aln and seq.alg.SequenceIdentity(aln) > seqid_thr:
1555  id_seqs.append((ch_1, ch_2))
1556  # trivial case: no matching pairs
1557  if not id_seqs:
1558  return [[name] for name in chain_names]
1559 
1560  # merge transitive pairs
1561  groups = []
1562  for ch_1, ch_2 in id_seqs:
1563  found = False
1564  for g in groups:
1565  if ch_1 in g or ch_2 in g:
1566  found = True
1567  g.add(ch_1)
1568  g.add(ch_2)
1569  if not found:
1570  groups.append(set([ch_1, ch_2]))
1571  # sort internally based on sequence length
1572  chem_groups = []
1573  for g in groups:
1574  ranked_g = sorted([(-ca_chains[ch].length, ch) for ch in g])
1575  chem_groups.append([ch for _,ch in ranked_g])
1576  # add other dissimilar chains
1577  for ch in chain_names:
1578  if not any(ch in g for g in chem_groups):
1579  chem_groups.append([ch])
1580 
1581  return chem_groups
1582 
1583 def _GetAngles(Rt):
1584  """Computes the Euler angles given a transformation matrix.
1585 
1586  :param Rt: Rt operator.
1587  :type Rt: :class:`ost.geom.Mat4`
1588  :return: A :class:`tuple` of angles for each axis (x,y,z)
1589  """
1590  rot = np.asmatrix(Rt.ExtractRotation().data).reshape(3,3)
1591  tx = np.arctan2(rot[2,1], rot[2,2])
1592  if tx < 0:
1593  tx += 2*np.pi
1594  ty = np.arctan2(rot[2,0], np.sqrt(rot[2,1]**2 + rot[2,2]**2))
1595  if ty < 0:
1596  ty += 2*np.pi
1597  tz = np.arctan2(rot[1,0], rot[0,0])
1598  if tz < 0:
1599  tz += 2*np.pi
1600  return tx,ty,tz
1601 
1602 # QS scorer
1603 
1604 def _GetChemGroupsMapping(qs_ent_1, qs_ent_2):
1605  """
1606  :return: Inter-complex mapping of chemical groups
1607  (see :attr:`QSscorer.chem_mapping`)
1608 
1609  :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
1610  :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
1611  """
1612  # get chem. groups and unique representative
1613  chem_groups_1 = qs_ent_1.chem_groups
1614  chem_groups_2 = qs_ent_2.chem_groups
1615  repr_chains_1 = {x[0]: tuple(x) for x in chem_groups_1}
1616  repr_chains_2 = {x[0]: tuple(x) for x in chem_groups_2}
1617 
1618  # if entities do not have different number of unique chains, we get the
1619  # mapping for the smaller set
1620  swapped = False
1621  if len(repr_chains_2) < len(repr_chains_1):
1622  repr_chains_1, repr_chains_2 = repr_chains_2, repr_chains_1
1623  qs_ent_1, qs_ent_2 = qs_ent_2, qs_ent_1
1624  swapped = True
1625 
1626  # find the closest to each chain between the two entities
1627  # NOTE: this may still be sensible to orthology problem
1628  # -> currently we use a global alignment and seq. id. to rank pairs
1629  # -> we also tried local alignments and weighting the seq. id. by the
1630  # coverages of the alignments (gapless string in aln. / seq. length)
1631  # but global aln performed better...
1632  chain_pairs = []
1633  ca_chains_1 = qs_ent_1.ca_chains
1634  ca_chains_2 = qs_ent_2.ca_chains
1635  for ch_1 in repr_chains_1.keys():
1636  for ch_2 in repr_chains_2.keys():
1637  aln = _AlignAtomSeqs(ca_chains_1[ch_1], ca_chains_2[ch_2])
1638  if aln:
1639  chains_seqid = seq.alg.SequenceIdentity(aln)
1640  LogVerbose('Sequence identity', ch_1, ch_2, 'seqid=%.2f' % chains_seqid)
1641  chain_pairs.append((chains_seqid, ch_1, ch_2))
1642 
1643  # get top matching groups first
1644  chain_pairs = sorted(chain_pairs, reverse=True)
1645  chem_mapping = {}
1646  for _, c1, c2 in chain_pairs:
1647  skip = False
1648  for a,b in chem_mapping.iteritems():
1649  if repr_chains_1[c1] == a or repr_chains_2[c2] == b:
1650  skip = True
1651  break
1652  if not skip:
1653  chem_mapping[repr_chains_1[c1]] = repr_chains_2[c2]
1654  if swapped:
1655  chem_mapping = {y: x for x, y in chem_mapping.iteritems()}
1656  qs_ent_1, qs_ent_2 = qs_ent_2, qs_ent_1
1657 
1658  # notify chains without partner
1659  mapped_1 = set([i for s in chem_mapping.keys() for i in s])
1660  chains_1 = set([c.name for c in qs_ent_1.ent.chains])
1661  if chains_1 - mapped_1:
1662  LogWarning('Unmapped Chains in %s: %s'
1663  % (qs_ent_1.GetName(), ','.join(list(chains_1 - mapped_1))))
1664 
1665  mapped_2 = set([i for s in chem_mapping.values() for i in s])
1666  chains_2 = set([c.name for c in qs_ent_2.ent.chains])
1667  if chains_2 - mapped_2:
1668  LogWarning('Unmapped Chains in %s: %s'
1669  % (qs_ent_2.GetName(), ','.join(list(chains_2 - mapped_2))))
1670 
1671  # check if we have any chains left
1672  LogInfo('Chemical chain-groups mapping: ' + str(chem_mapping))
1673  if len(mapped_1) < 1 or len(mapped_2) < 1:
1674  raise QSscoreError('Less than 1 chains left in chem_mapping.')
1675  return chem_mapping
1676 
1677 def _SelectFew(l, max_elements):
1678  """Return l or copy of l with at most *max_elements* entries."""
1679  n_el = len(l)
1680  if n_el <= max_elements:
1681  return l
1682  else:
1683  # cheap integer ceiling (-1 to ensure that x*max_elements gets d_el = x)
1684  d_el = ((n_el - 1) // max_elements) + 1
1685  new_l = list()
1686  for i in range(0, n_el, d_el):
1687  new_l.append(l[i])
1688  return new_l
1689 
1690 def _GetAlignedResidues(qs_ent_1, qs_ent_2, chem_mapping, max_ca_per_chain,
1691  clustalw_bin):
1692  """
1693  :return: Tuple of two :class:`~ost.mol.EntityView` objects containing subsets
1694  of *qs_ent_1* and *qs_ent_2*. Two entities are later created from
1695  those views (see :attr:`QSscorer.ent_to_cm_1` and
1696  :attr:`QSscorer.ent_to_cm_2`)
1697 
1698  :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
1699  :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
1700  :param chem_mapping: See :attr:`QSscorer.chem_mapping`
1701  :param max_ca_per_chain: See :attr:`QSscorer.max_ca_per_chain_for_cm`
1702  """
1703  # make sure name doesn't contain spaces and is unique
1704  def _FixName(seq_name, seq_names):
1705  # get rid of spaces and make it unique
1706  seq_name = seq_name.replace(' ', '-')
1707  while seq_name in seq_names:
1708  seq_name += '-'
1709  return seq_name
1710  # resulting views into CA entities using CA chain sequences
1711  ent_view_1 = qs_ent_1.ca_entity.CreateEmptyView()
1712  ent_view_2 = qs_ent_2.ca_entity.CreateEmptyView()
1713  ca_chains_1 = qs_ent_1.ca_chains
1714  ca_chains_2 = qs_ent_2.ca_chains
1715  # go through all mapped chemical groups
1716  for group_1, group_2 in chem_mapping.iteritems():
1717  # MSA with ClustalW
1718  seq_list = seq.CreateSequenceList()
1719  # keep sequence-name (must be unique) to view mapping
1720  seq_to_empty_view = dict()
1721  for ch in group_1:
1722  sequence = ca_chains_1[ch].Copy()
1723  sequence.name = _FixName(qs_ent_1.GetName() + '.' + ch, seq_to_empty_view)
1724  seq_to_empty_view[sequence.name] = ent_view_1
1725  seq_list.AddSequence(sequence)
1726  for ch in group_2:
1727  sequence = ca_chains_2[ch].Copy()
1728  sequence.name = _FixName(qs_ent_2.GetName() + '.' + ch, seq_to_empty_view)
1729  seq_to_empty_view[sequence.name] = ent_view_2
1730  seq_list.AddSequence(sequence)
1731  alnc = ClustalW(seq_list, clustalw=clustalw_bin)
1732 
1733  # collect aligned residues (list of lists of sequence_count valid residues)
1734  residue_lists = list()
1735  sequence_count = alnc.sequence_count
1736  for col in alnc:
1737  residues = [col.GetResidue(ir) for ir in range(sequence_count)]
1738  if all([r.IsValid() for r in residues]):
1739  residue_lists.append(residues)
1740  # check number of aligned residues
1741  if len(residue_lists) < 5:
1742  raise QSscoreError('Not enough aligned residues.')
1743  elif max_ca_per_chain > 0:
1744  residue_lists = _SelectFew(residue_lists, max_ca_per_chain)
1745  # check what view is for which residue
1746  res_views = [seq_to_empty_view[alnc.sequences[ir].name] \
1747  for ir in range(sequence_count)]
1748  # add to view (note: only 1 CA atom per residue in here)
1749  for res_list in residue_lists:
1750  for res, view in zip(res_list, res_views):
1751  view.AddResidue(res, mol.ViewAddFlag.INCLUDE_ATOMS)
1752  # done
1753  return ent_view_1, ent_view_2
1754 
1755 
1756 def _FindSymmetry(qs_ent_1, qs_ent_2, ent_to_cm_1, ent_to_cm_2, chem_mapping):
1757  """
1758  :return: A pair of comparable symmetry groups (for :attr:`QSscorer.symm_1`
1759  and :attr:`QSscorer.symm_2`) between the two structures.
1760  Empty lists if no symmetry identified.
1761 
1762  :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
1763  :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
1764  :param ent_to_cm_1: See :attr:`QSscorer.ent_to_cm_1`
1765  :param ent_to_cm_2: See :attr:`QSscorer.ent_to_cm_2`
1766  :param chem_mapping: See :attr:`QSscorer.chem_mapping`
1767  """
1768  LogInfo('Identifying Symmetry Groups...')
1769 
1770  # get possible symmetry groups
1771  symm_subg_1 = _GetSymmetrySubgroups(qs_ent_1, ent_to_cm_1,
1772  chem_mapping.keys())
1773  symm_subg_2 = _GetSymmetrySubgroups(qs_ent_2, ent_to_cm_2,
1774  chem_mapping.values())
1775 
1776  # check combination of groups
1777  LogInfo('Selecting Symmetry Groups...')
1778  # check how many mappings are to be checked for each pair of symmetry groups
1779  # -> lower number here will speed up worst-case runtime of _GetChainMapping
1780  # NOTE: this is tailored to speed up brute force all vs all mapping test
1781  # for preferred _CheckClosedSymmetry this is suboptimal!
1782  best_symm = []
1783  for symm_1, symm_2 in itertools.product(symm_subg_1, symm_subg_2):
1784  s1 = symm_1[0]
1785  s2 = symm_2[0]
1786  if len(s1) != len(s2):
1787  LogDebug('Discarded', str(symm_1), str(symm_2),
1788  ': different size of symmetry groups')
1789  continue
1790  count = _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping)
1791  nr_mapp = count['intra']['mappings'] + count['inter']['mappings']
1792  LogDebug('OK', str(symm_1), str(symm_2), ': %s mappings' % nr_mapp)
1793  best_symm.append((nr_mapp, symm_1, symm_2))
1794 
1795  for _, symm_1, symm_2 in sorted(best_symm):
1796  s1 = symm_1[0]
1797  s2 = symm_2[0]
1798  group_1 = ent_to_cm_1.Select('cname=%s' % _FixSelectChainNames(s1))
1799  group_2 = ent_to_cm_2.Select('cname=%s' % _FixSelectChainNames(s2))
1800  # check if by superposing a pair of chains within the symmetry group to
1801  # superpose all chains within the symmetry group
1802  # -> if successful, the symmetry groups are compatible
1803  closed_symm = _CheckClosedSymmetry(group_1, group_2, symm_1, symm_2,
1804  chem_mapping, 6, 0.8, find_best=False)
1805 
1806  if closed_symm:
1807  return symm_1, symm_2
1808 
1809  # nothing found
1810  LogInfo('No coherent symmetry identified between structures')
1811  return [], []
1812 
1813 
1814 def _GetChainMapping(ent_1, ent_2, symm_1, symm_2, chem_mapping):
1815  """
1816  :return: Tuple with mapping from *ent_1* to *ent_2* (see
1817  :attr:`QSscorer.chain_mapping`) and scheme used (see
1818  :attr:`QSscorer.chain_mapping_scheme`)
1819 
1820  :param ent_1: See :attr:`QSscorer.ent_to_cm_1`
1821  :param ent_2: See :attr:`QSscorer.ent_to_cm_2`
1822  :param symm_1: See :attr:`QSscorer.symm_1`
1823  :param symm_2: See :attr:`QSscorer.symm_2`
1824  :param chem_mapping: See :attr:`QSscorer.chem_mapping`
1825  """
1826  LogInfo('Symmetry-groups used in %s: %s' % (ent_1.GetName(), str(symm_1)))
1827  LogInfo('Symmetry-groups used in %s: %s' % (ent_2.GetName(), str(symm_2)))
1828 
1829  # quick check for closed symmetries
1830  thresholds = [( 'strict', 4, 0.8),
1831  ( 'tolerant', 6, 0.4),
1832  ('permissive', 8, 0.2)]
1833  for scheme, sup_thr, sup_fract in thresholds:
1834  # check if by superposing a pair of chains within the symmetry group to
1835  # superpose all chains of the two oligomers
1836  # -> this also returns the resulting mapping of chains
1837  mapping = _CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2,
1838  chem_mapping, sup_thr, sup_fract)
1839  if mapping:
1840  LogInfo('Closed Symmetry with %s parameters' % scheme)
1841  if scheme == 'permissive':
1842  LogWarning('Permissive thresholds used for overlap based mapping ' + \
1843  'detection: check mapping manually: %s' % mapping)
1844  return mapping, scheme
1845 
1846  # NOTE that what follows below is sub-optimal:
1847  # - if the two structures don't fit at all, we may map chains rather randomly
1848  # - we may need a lot of operations to find globally "best" mapping
1849  # -> COST = O(N^2) * O(N!)
1850  # (first = all pairwise chain-superpose, latter = all chain mappings)
1851  # -> a greedy chain mapping choice algo (pick best RMSD for each one-by-one)
1852  # could reduce this to O(N^2) * O(N^2) but it would need to be validated
1853  # - one could also try some other heuristic based on center of atoms of chains
1854  # -> at this point we get a bad mapping anyways...
1855 
1856  # if we get here, we will need to check many combinations of mappings
1857  # -> check how many mappings must be checked and limit
1858  count = _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping)
1859  LogInfo('Intra Symmetry-group mappings to check: %s' \
1860  % count['intra']['mappings'])
1861  LogInfo('Inter Symmetry-group mappings to check: %s' \
1862  % count['inter']['mappings'])
1863  nr_mapp = count['intra']['mappings'] + count['inter']['mappings']
1864  if nr_mapp > 100000: # 322560 is octamer vs octamer
1865  raise QSscoreError('Too many possible mappings: %s' % nr_mapp)
1866 
1867  # to speed up the computations we cache chain views and RMSDs
1868  cached_rmsd = _CachedRMSD(ent_1, ent_2)
1869 
1870  # get INTRA symmetry group chain mapping
1871  check = 0
1872  intra_mappings = [] # list of (RMSD, c1, c2, mapping) tuples (best superpose)
1873  # limit chem mapping to reference symmetry group
1874  ref_symm_1 = symm_1[0]
1875  ref_symm_2 = symm_2[0]
1876  asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
1877  # for each chemically identical group
1878  for g1, g2 in asu_chem_mapping.iteritems():
1879  # try to superpose all possible pairs
1880  for c1, c2 in itertools.product(g1, g2):
1881  # get superposition transformation
1882  LogVerbose('Superposing chains: %s, %s' % (c1, c2))
1883  res = cached_rmsd.GetSuperposition(c1, c2)
1884  # compute RMSD of possible mappings
1885  cur_mappings = [] # list of (RMSD, mapping) tuples
1886  for mapping in _ListPossibleMappings(c1, c2, asu_chem_mapping):
1887  check += 1
1888  chains_rmsd = cached_rmsd.GetMappedRMSD(mapping, res.transformation)
1889  cur_mappings.append((chains_rmsd, mapping))
1890  LogVerbose(str(mapping), str(chains_rmsd))
1891  best_rmsd, best_mapp = min(cur_mappings)
1892  intra_mappings.append((best_rmsd, c1, c2, best_mapp))
1893  # best chain-chain superposition defines the intra asu mapping
1894  _, intra_asu_c1, intra_asu_c2, intra_asu_mapp = min(intra_mappings)
1895 
1896  # if only one asu is present in any of the complexes, we're done
1897  if len(symm_1) == 1 or len(symm_2) == 1:
1898  mapping = intra_asu_mapp
1899  else:
1900  # the mapping is the element position within the asu chain list
1901  # -> this speed up a lot, assuming that the order of chain in asu groups
1902  # is following the order of symmetry operations
1903  index_asu_c1 = ref_symm_1.index(intra_asu_c1)
1904  index_asu_c2 = ref_symm_2.index(intra_asu_c2)
1905  index_mapp = {ref_symm_1.index(s1): ref_symm_2.index(s2) \
1906  for s1, s2 in intra_asu_mapp.iteritems()}
1907  LogInfo('Intra symmetry-group mapping: %s' % str(intra_asu_mapp))
1908 
1909  # get INTER symmetry group chain mapping
1910  # we take the first symmetry group from the complex having the most
1911  if len(symm_1) < len(symm_2):
1912  symm_combinations = list(itertools.product(symm_1, [symm_2[0]]))
1913  else:
1914  symm_combinations = list(itertools.product([symm_1[0]], symm_2))
1915 
1916  full_asu_mapp = {tuple(symm_1): tuple(symm_2)}
1917  full_mappings = [] # list of (RMSD, mapping) tuples
1918  for s1, s2 in symm_combinations:
1919  # get superposition transformation (we take best chain-pair in asu)
1920  LogVerbose('Superposing symmetry-group: %s, %s in %s, %s' \
1921  % (s1[index_asu_c1], s2[index_asu_c2], s1, s2))
1922  res = cached_rmsd.GetSuperposition(s1[index_asu_c1], s2[index_asu_c2])
1923  # compute RMSD of possible mappings
1924  for asu_mapp in _ListPossibleMappings(s1, s2, full_asu_mapp):
1925  check += 1
1926  # need to extract full chain mapping (use indexing)
1927  mapping = {}
1928  for a, b in asu_mapp.iteritems():
1929  for id_a, id_b in index_mapp.iteritems():
1930  mapping[a[id_a]] = b[id_b]
1931  chains_rmsd = cached_rmsd.GetMappedRMSD(mapping, res.transformation)
1932  full_mappings.append((chains_rmsd, mapping))
1933  LogVerbose(str(mapping), str(chains_rmsd))
1934 
1935  if check != nr_mapp:
1936  LogWarning('Mapping number estimation was wrong') # sanity check
1937 
1938  # return best (lowest RMSD) mapping
1939  mapping = min(full_mappings)[1]
1940 
1941  LogWarning('Extensive search used for mapping detection (fallback). This ' + \
1942  'approach has known limitations. Check mapping manually: %s' \
1943  % mapping)
1944  return mapping, 'extensive'
1945 
1946 
1947 def _GetSymmetrySubgroups(qs_ent, ent, chem_groups):
1948  """
1949  Identify the symmetry (either cyclic C or dihedral D) of the protein and find
1950  all possible symmetry subgroups. This is done testing all combination of chain
1951  superposition and clustering them by the angles (D) or the axis (C) of the Rt
1952  operator.
1953 
1954  Groups of superposition which can fully reconstruct the structure are possible
1955  symmetry subgroups.
1956 
1957  :param qs_ent: Entity with cached angles and axis.
1958  :type qs_ent: :class:`QSscoreEntity`
1959  :param ent: Entity from which to extract chains (perfect alignment assumed
1960  for superposition as in :attr:`QSscorer.ent_to_cm_1`).
1961  :type ent: :class:`EntityHandle` or :class:`EntityView`
1962  :param chem_groups: List of tuples/lists of chain names in *ent*. Each list
1963  contains all chains belonging to a chem. group (could be
1964  from :attr:`QSscoreEntity.chem_groups` or from "keys()"
1965  or "values()" of :attr:`QSscorer.chem_mapping`)
1966 
1967  :return: A list of possible symmetry subgroups (each in same format as
1968  :attr:`QSscorer.symm_1`). If no symmetry is found, we return a list
1969  with a single symmetry subgroup with a single group with all chains.
1970  """
1971  # get angles / axis for pairwise transformations within same chem. group
1972  angles = {}
1973  axis = {}
1974  for cnames in chem_groups:
1975  for c1, c2 in itertools.combinations(cnames, 2):
1976  cur_angles = qs_ent.GetAngles(c1, c2)
1977  if not np.any(np.isnan(cur_angles)):
1978  angles[(c1,c2)] = cur_angles
1979  cur_axis = qs_ent.GetAxis(c1, c2)
1980  if not np.any(np.isnan(cur_axis)):
1981  axis[(c1,c2)] = cur_axis
1982 
1983  # cluster superpositions angles at different thresholds
1984  symm_groups = []
1985  LogVerbose('Possible symmetry-groups for: %s' % ent.GetName())
1986  for angle_thr in np.arange(0.1, 1, 0.1):
1987  d_symm_groups = _GetDihedralSubgroups(ent, chem_groups, angles, angle_thr)
1988  if d_symm_groups:
1989  for group in d_symm_groups:
1990  if group not in symm_groups:
1991  symm_groups.append(group)
1992  LogVerbose('Dihedral: %s' % group)
1993  LogInfo('Symmetry threshold %.1f used for angles of %s' \
1994  % (angle_thr, ent.GetName()))
1995  break
1996 
1997  # cluster superpositions axis at different thresholds
1998  for axis_thr in np.arange(0.1, 1, 0.1):
1999  c_symm_groups = _GetCyclicSubgroups(ent, chem_groups, axis, axis_thr)
2000  if c_symm_groups:
2001  for group in c_symm_groups:
2002  if group not in symm_groups:
2003  symm_groups.append(group)
2004  LogVerbose('Cyclic: %s' % group)
2005  LogInfo('Symmetry threshold %.1f used for axis of %s' \
2006  % (axis_thr, ent.GetName()))
2007  break
2008 
2009  # fall back to single "group" containing all chains if none found
2010  if not symm_groups:
2011  LogInfo('No symmetry-group detected in %s' % ent.GetName())
2012  symm_groups = [[tuple([c for g in chem_groups for c in g])]]
2013 
2014  return symm_groups
2015 
2016 def _GetDihedralSubgroups(ent, chem_groups, angles, angle_thr):
2017  """
2018  :return: Dihedral subgroups for :func:`_GetSymmetrySubgroups`
2019  (same return type as there). Empty list if fail.
2020 
2021  :param ent: See :func:`_GetSymmetrySubgroups`
2022  :param chem_groups: See :func:`_GetSymmetrySubgroups`
2023  :param angles: :class:`dict` (key = chain-pair-tuple, value = Euler angles)
2024  :param angle_thr: Euler angles distance threshold for clustering (float).
2025  """
2026  # cluster superpositions angles
2027  if len(angles) == 0:
2028  # nothing to be done here
2029  return []
2030  else:
2031  same_angles = _ClusterData(angles, angle_thr, _AngleArrayDistance)
2032 
2033  # get those which are non redundant and covering all chains
2034  symm_groups = []
2035  for clst in same_angles.values():
2036  group = clst.keys()
2037  if _ValidChainGroup(group, ent):
2038  if len(chem_groups) > 1:
2039  # if hetero, we want to group toghether different chains only
2040  symm_groups.append(zip(*group))
2041  else:
2042  # if homo, we also want pairs
2043  symm_groups.append(group)
2044  symm_groups.append(zip(*group))
2045  return symm_groups
2046 
2047 def _GetCyclicSubgroups(ent, chem_groups, axis, axis_thr):
2048  """
2049  :return: Cyclic subgroups for :func:`_GetSymmetrySubgroups`
2050  (same return type as there). Empty list if fail.
2051 
2052  :param ent: See :func:`_GetSymmetrySubgroups`
2053  :param chem_groups: See :func:`_GetSymmetrySubgroups`
2054  :param angles: :class:`dict` (key = chain-pair-tuple, value = rotation axis)
2055  :param angle_thr: Axis distance threshold for clustering (float).
2056  """
2057  # cluster superpositions axis
2058  if len(axis) == 0:
2059  # nothing to be done here
2060  return []
2061  else:
2062  same_axis = _ClusterData(axis, axis_thr, _AxisDistance)
2063 
2064  # use to get grouping
2065  symm_groups = []
2066  for clst in same_axis.values():
2067  all_chain = [item for sublist in clst.keys() for item in sublist]
2068  if len(set(all_chain)) == ent.chain_count:
2069  # if we have an hetero we can exploit cyclic symmetry for grouping
2070  if len(chem_groups) > 1:
2071  ref_chem_group = chem_groups[0]
2072  # try two criteria for grouping
2073  cyclic_group_closest = []
2074  cyclic_group_iface = []
2075  for c1 in ref_chem_group:
2076  group_closest = [c1]
2077  group_iface = [c1]
2078  for chains in chem_groups[1:]:
2079  # center of atoms distance
2080  closest = _GetClosestChain(ent, c1, chains)
2081  # chain with bigger interface
2082  closest_iface = _GetClosestChainInterface(ent, c1, chains)
2083  group_closest.append(closest)
2084  group_iface.append(closest_iface)
2085  cyclic_group_closest.append(tuple(group_closest))
2086  cyclic_group_iface.append(tuple(group_iface))
2087  if _ValidChainGroup(cyclic_group_closest, ent):
2088  symm_groups.append(cyclic_group_closest)
2089  if _ValidChainGroup(cyclic_group_iface, ent):
2090  symm_groups.append(cyclic_group_iface)
2091  # if it is a homo, there's not much we can group
2092  else:
2093  symm_groups.append(chem_groups)
2094  return symm_groups
2095 
2096 def _ClusterData(data, thr, metric):
2097  """Wrapper for fclusterdata to get dict of clusters.
2098 
2099  :param data: :class:`dict` (keys for ID, values used for clustering)
2100  :return: :class:`dict` {cluster_idx: {data-key: data-value}}
2101  """
2102  # special case length 1
2103  if len(data) == 1:
2104  return {0: {data.keys()[0]: data.values()[0]} }
2105  # do the clustering
2106  cluster_indices = fclusterdata(np.asarray(data.values()), thr,
2107  method='complete', criterion='distance',
2108  metric=metric)
2109  # fclusterdata output is cluster ids -> put into dict of clusters
2110  res = {}
2111  for cluster_idx, data_key in zip(cluster_indices, data.keys()):
2112  if not cluster_idx in res:
2113  res[cluster_idx] = {}
2114  res[cluster_idx][data_key] = data[data_key]
2115  return res
2116 
2117 def _AngleArrayDistance(u, v):
2118  """
2119  :return: Average angular distance of two arrays of angles.
2120  :param u: Euler angles.
2121  :param v: Euler angles.
2122  """
2123  dist = []
2124  for a,b in zip(u,v):
2125  delta = abs(a-b)
2126  if delta > np.pi:
2127  delta = abs(2*np.pi - delta)
2128  dist.append(delta)
2129  return np.mean(dist)
2130 
2131 def _AxisDistance(u, v):
2132  """
2133  :return: Euclidean distance between two rotation axes. Axes can point in
2134  either direction so we ensure to use the closer one.
2135  :param u: Rotation axis.
2136  :param v: Rotation axis.
2137  """
2138  # get axes as arrays (for convenience)
2139  u = np.asarray(u)
2140  v = np.asarray(v)
2141  # get shorter of two possible distances (using v or -v)
2142  dv1 = u - v
2143  dv2 = u + v
2144  d1 = np.dot(dv1, dv1)
2145  d2 = np.dot(dv2, dv2)
2146  return np.sqrt(min(d1, d2))
2147 
2148 def _GetClosestChain(ent, ref_chain, chains):
2149  """
2150  :return: Chain closest to *ref_chain* based on center of atoms distance.
2151  :rtype: :class:`str`
2152  :param ent: See :func:`_GetSymmetrySubgroups`
2153  :param ref_chain: We look for chains closest to this one
2154  :type ref_chain: :class:`str`
2155  :param chains: We only consider these chains
2156  :type chains: :class:`list` of :class:`str`
2157  """
2158  contacts = []
2159  ref_center = ent.FindChain(ref_chain).center_of_atoms
2160  for ch in chains:
2161  center = ent.FindChain(ch).center_of_atoms
2162  contacts.append((geom.Distance(ref_center, center), ch))
2163  closest_chain = min(contacts)[1]
2164  return closest_chain
2165 
2166 def _GetClosestChainInterface(ent, ref_chain, chains):
2167  """
2168  :return: Chain with biggest interface (within 10 A) to *ref_chain*.
2169  :rtype: :class:`str`
2170  :param ent: See :func:`_GetSymmetrySubgroups`
2171  :param ref_chain: We look for chains closest to this one
2172  :type ref_chain: :class:`str`
2173  :param chains: We only consider these chains
2174  :type chains: :class:`list` of :class:`str`
2175  """
2176  # NOTE: this is computed on a subset of the full biounit and might be
2177  # inaccurate. Also it could be extracted from QSscoreEntity.contacts.
2178  closest = []
2179  for ch in chains:
2180  iface_view = ent.Select('cname="%s" and 10 <> [cname="%s"]' \
2181  % (ref_chain, ch))
2182  nr_res = iface_view.residue_count
2183  closest.append((nr_res, ch))
2184  closest_chain = max(closest)[1]
2185  return closest_chain
2186 
2187 def _ValidChainGroup(group, ent):
2188  """
2189  :return: True, if *group* has unique chain names and as many chains as *ent*
2190  :rtype: :class:`bool`
2191  :param group: Symmetry groups to check
2192  :type group: Same as :attr:`QSscorer.symm_1`
2193  :param ent: See :func:`_GetSymmetrySubgroups`
2194  """
2195  all_chain = [item for sublist in group for item in sublist]
2196  if len(all_chain) == len(set(all_chain)) and\
2197  len(all_chain) == ent.chain_count:
2198  return True
2199  return False
2200 
2201 def _LimitChemMapping(chem_mapping, limit_1, limit_2):
2202  """
2203  :return: Chem. mapping containing only chains in *limit_1* and *limit_2*
2204  :rtype: Same as :attr:`QSscorer.chem_mapping`
2205  :param chem_mapping: See :attr:`QSscorer.chem_mapping`
2206  :param limit_1: Limits chain names in chem_mapping.keys()
2207  :type limit_1: List/tuple of strings
2208  :param limit_2: Limits chain names in chem_mapping.values()
2209  :type limit_2: List/tuple of strings
2210  """
2211  # use set intersection to keep only mapping for chains in limit_X
2212  limit_1_set = set(limit_1)
2213  limit_2_set = set(limit_2)
2214  asu_chem_mapping = dict()
2215  for key, value in chem_mapping.iteritems():
2216  new_key = tuple(set(key) & limit_1_set)
2217  if new_key:
2218  asu_chem_mapping[new_key] = tuple(set(value) & limit_2_set)
2219  return asu_chem_mapping
2220 
2221 
2222 def _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping):
2223  """
2224  :return: Dictionary of number of mappings and superpositions to be performed.
2225  Returned as *result[X][Y] = number* with X = "intra" or "inter" and
2226  Y = "mappings" or "superpositions". The idea is that for each
2227  pairwise superposition we check all possible mappings.
2228  We can check the combinations within (intra) a symmetry group and
2229  once established, we check the combinations between different (inter)
2230  symmetry groups.
2231  :param symm_1: See :attr:`QSscorer.symm_1`
2232  :param symm_2: See :attr:`QSscorer.symm_2`
2233  :param chem_mapping: See :attr:`QSscorer.chem_mapping`
2234  """
2235  # setup
2236  c = {}
2237  c['intra'] = {}
2238  c['inter'] = {}
2239  c['intra']['mappings'] = 0
2240  c['intra']['superpositions'] = 0
2241  c['inter']['mappings'] = 0
2242  c['inter']['superpositions'] = 0
2243  # intra ASU mappings within reference symmetry groups
2244  ref_symm_1 = symm_1[0]
2245  ref_symm_2 = symm_2[0]
2246  asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
2247  for g1, g2 in asu_chem_mapping.iteritems():
2248  chain_superpositions = len(g1) * len(g2)
2249  c['intra']['superpositions'] += chain_superpositions
2250  map_per_sup = _PermutationOrCombinations(g1[0], g2[0], asu_chem_mapping)
2251  c['intra']['mappings'] += chain_superpositions * map_per_sup
2252  if len(symm_1) == 1 or len(symm_2) == 1:
2253  return c
2254  # inter ASU mappings
2255  asu_superposition = min(len(symm_1), len(symm_2))
2256  c['inter']['superpositions'] = asu_superposition
2257  asu = {tuple(symm_1): tuple(symm_2)}
2258  map_per_sup = _PermutationOrCombinations(ref_symm_1, ref_symm_2, asu)
2259  c['inter']['mappings'] = asu_superposition * map_per_sup
2260  return c
2261 
2262 def _PermutationOrCombinations(sup1, sup2, chem_mapping):
2263  """Should match len(_ListPossibleMappings(sup1, sup2, chem_mapping))."""
2264  # remove superposed elements, put smallest complex as key
2265  chem_map = {}
2266  for a,b in chem_mapping.iteritems():
2267  new_a = tuple([x for x in a if x != sup1])
2268  new_b = tuple([x for x in b if x != sup2])
2269  chem_map[new_a] = new_b
2270  mapp_nr = 1.0
2271  for a, b in chem_map.iteritems():
2272  if len(a) == len(b):
2273  mapp_nr *= factorial(len(a))
2274  elif len(a) < len(b):
2275  mapp_nr *= binom(len(b), len(a))
2276  elif len(a) > len(b):
2277  mapp_nr *= binom(len(a), len(b))
2278  return int(mapp_nr)
2279 
2280 def _ListPossibleMappings(sup1, sup2, chem_mapping):
2281  """
2282  Return a flat list of all possible mappings given *chem_mapping* and keeping
2283  mapping of *sup1* and *sup2* fixed. For instance if elements are chain names
2284  this is all the mappings to check for a given superposition.
2285 
2286  Elements in first complex are defined by *chem_mapping.keys()* (list of list
2287  of elements) and elements in second complex by *chem_mapping.values()*. If
2288  complexes don't have same number of elements, we map only elements for the one
2289  with less. Also mapping is only between elements of mapped groups according to
2290  *chem_mapping*.
2291 
2292  :rtype: :class:`list` of :class:`dict` (key = element in chem_mapping-key,
2293  value = element in chem_mapping-value)
2294  :param sup1: Element for which mapping is fixed.
2295  :type sup1: Like element in chem_mapping-key
2296  :param sup2: Element for which mapping is fixed.
2297  :type sup2: Like element in chem_mapping-value
2298  :param chem_mapping: Defines mapping between groups of elements (e.g. result
2299  of :func:`_LimitChemMapping`).
2300  :type chem_mapping: :class:`dict` with key / value = :class:`tuple`
2301 
2302  :raises: :class:`QSscoreError` if reference complex (first one or one with
2303  less elements) has more elements for any given mapped group.
2304  """
2305  # find smallest complex
2306  chain1 = [i for s in chem_mapping.keys() for i in s]
2307  chain2 = [i for s in chem_mapping.values() for i in s]
2308  swap = False
2309  if len(chain1) > len(chain2):
2310  swap = True
2311  # remove superposed elements, put smallest complex as key
2312  chem_map = {}
2313  for a, b in chem_mapping.iteritems():
2314  new_a = tuple([x for x in a if x != sup1])
2315  new_b = tuple([x for x in b if x != sup2])
2316  if swap:
2317  chem_map[new_b] = new_a
2318  else:
2319  chem_map[new_a] = new_b
2320  # generate permutations or combinations of chemically
2321  # equivalent chains
2322  chem_perm = []
2323  chem_ref = []
2324  for a, b in chem_map.iteritems():
2325  if len(a) == len(b):
2326  chem_perm.append(list(itertools.permutations(b)))
2327  chem_ref.append(a)
2328  elif len(a) < len(b):
2329  chem_perm.append(list(itertools.combinations(b, len(a))))
2330  chem_ref.append(a)
2331  else:
2332  raise QSscoreError('Impossible to define reference group: %s' \
2333  % str(chem_map))
2334  # save the list of possible mappings
2335  mappings = []
2336  flat_ref = [i for s in chem_ref for i in s]
2337  for perm in itertools.product(*chem_perm):
2338  flat_perm = [i for s in perm for i in s]
2339  d = {c1: c2 for c1, c2 in zip(flat_ref, flat_perm)}
2340  if swap:
2341  d = {v: k for k, v in d.items()}
2342  d.update({sup1: sup2})
2343  mappings.append(d)
2344  return mappings
2345 
2346 
2347 def _CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2, chem_mapping,
2348  sup_thr=4, sup_fract=0.8, find_best=True):
2349  """
2350  Quick check if we can superpose two chains and get a mapping for all other
2351  chains using the same transformation. The mapping is defined by sufficient
2352  overlap of the transformed chain of *ent_1* onto another chain in *ent_2*.
2353 
2354  :param ent_1: Entity to map to *ent_2* (perfect alignment assumed between
2355  chains of same chem. group as in :attr:`QSscorer.ent_to_cm_1`).
2356  Views are ok but only to select full chains!
2357  :param ent_2: Entity to map to (perfect alignment assumed between
2358  chains of same chem. group as in :attr:`QSscorer.ent_to_cm_2`).
2359  Views are ok but only to select full chains!
2360  :param symm_1: Symmetry groups to use. We only superpose chains within
2361  reference symmetry group of *symm_1* and *symm_2*.
2362  See :attr:`QSscorer.symm_1`
2363  :param symm_2: See :attr:`QSscorer.symm_2`
2364  :param chem_mapping: See :attr:`QSscorer.chem_mapping`.
2365  All chains in *ent_1* / *ent_2* must be listed here!
2366  :param sup_thr: Distance around transformed chain in *ent_1* to check for
2367  overlap.
2368  :type sup_thr: :class:`float`
2369  :param sup_fract: Fraction of atoms in chain of *ent_2* that must be
2370  overlapped for overlap to be sufficient.
2371  :type sup_fract: :class:`float`
2372  :param find_best: If True, we look for best mapping according to
2373  :func:`_GetMappedRMSD`. Otherwise, we return first suitable
2374  mapping.
2375  :type find_best: :class:`bool`
2376 
2377  :return: Mapping from *ent_1* to *ent_2* or None if none found. Mapping, if
2378  found, is as in :attr:`QSscorer.chain_mapping`.
2379  :rtype: :class:`dict` (:class:`str` / :class:`str`)
2380  """
2381  # limit chem mapping to reference symmetry group
2382  ref_symm_1 = symm_1[0]
2383  ref_symm_2 = symm_2[0]
2384  asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
2385  # for each chemically identical group
2386  rmsd_mappings = []
2387  for g1, g2 in asu_chem_mapping.iteritems():
2388  # try to superpose all possible pairs
2389  # -> note that some chain-chain combinations may work better than others
2390  # to superpose the full oligomer (e.g. if some chains are open/closed)
2391  for c1, c2 in itertools.product(g1, g2):
2392  # get superposition transformation
2393  chain_1 = ent_1.Select('cname="%s"' % c1)
2394  chain_2 = ent_2.Select('cname="%s"' % c2)
2395  res = mol.alg.SuperposeSVD(chain_1, chain_2, apply_transform=False)
2396  # look for overlaps
2397  mapping = _GetSuperpositionMapping(ent_1, ent_2, chem_mapping,
2398  res.transformation, sup_thr,
2399  sup_fract)
2400  if not mapping:
2401  continue
2402  # early abort if we only want the first one
2403  if not find_best:
2404  return mapping
2405  else:
2406  # get RMSD for sorting
2407  rmsd = _GetMappedRMSD(ent_1, ent_2, mapping, res.transformation)
2408  rmsd_mappings.append((rmsd, mapping))
2409  # return best mapping
2410  if rmsd_mappings:
2411  return min(rmsd_mappings)[1]
2412  else:
2413  return None
2414 
2415 def _GetSuperpositionMapping(ent_1, ent_2, chem_mapping, transformation,
2416  sup_thr, sup_fract):
2417  """
2418  :return: Dict with chain mapping from *ent_1* to *ent_2* or None if failed
2419  (see :func:`_CheckClosedSymmetry`).
2420  :param ent_1: See :func:`_CheckClosedSymmetry`
2421  :param ent_2: See :func:`_CheckClosedSymmetry`
2422  :param chem_mapping: See :func:`_CheckClosedSymmetry`
2423  :param transformation: Superposition transformation to be applied to *ent_1*.
2424  :param sup_thr: See :func:`_CheckClosedSymmetry`
2425  :param sup_fract: See :func:`_CheckClosedSymmetry`
2426  """
2427  # NOTE: this seems to be the comput. most expensive part in QS scoring
2428  # -> it could be moved to C++ for speed-up
2429  # -> the next expensive bits are ClustalW and GetContacts btw...
2430 
2431  # swap if needed (ent_1 must be shorter or same)
2432  if ent_1.chain_count > ent_2.chain_count:
2433  swap = True
2434  ent_1, ent_2 = ent_2, ent_1
2435  transformation = geom.Invert(transformation)
2436  chem_pairs = zip(chem_mapping.values(), chem_mapping.keys())
2437  else:
2438  swap = False
2439  chem_pairs = chem_mapping.iteritems()
2440  # sanity check
2441  if ent_1.chain_count == 0:
2442  return None
2443  # extract valid partners for each chain in ent_1
2444  chem_partners = dict()
2445  for cg1, cg2 in chem_pairs:
2446  for ch in cg1:
2447  chem_partners[ch] = set(cg2)
2448  # get mapping for each chain in ent_1
2449  mapping = dict()
2450  mapped_chains = set()
2451  for ch_1 in ent_1.chains:
2452  # get all neighbors split by chain (NOTE: this muight be moved to C++)
2453  ch_atoms = {ch_2.name: set() for ch_2 in ent_2.chains}
2454  for a_1 in ch_1.handle.atoms:
2455  transformed_pos = geom.Vec3(transformation * geom.Vec4(a_1.pos))
2456  a_within = ent_2.FindWithin(transformed_pos, sup_thr)
2457  for a_2 in a_within:
2458  ch_atoms[a_2.chain.name].add(a_2.hash_code)
2459  # get one with most atoms in overlap
2460  max_num, max_name = max((len(atoms), name)
2461  for name, atoms in ch_atoms.iteritems())
2462  # early abort if overlap fraction not good enough
2463  ch_2 = ent_2.FindChain(max_name)
2464  if max_num == 0 or max_num / float(ch_2.handle.atom_count) < sup_fract:
2465  return None
2466  # early abort if mapped to chain of different chem. group
2467  ch_1_name = ch_1.name
2468  if ch_1_name not in chem_partners:
2469  raise RuntimeError("Chem. mapping doesn't contain all chains!")
2470  if max_name not in chem_partners[ch_1_name]:
2471  return None
2472  # early abort if multiple ones mapped to same chain
2473  if max_name in mapped_chains:
2474  return None
2475  # set mapping
2476  mapping[ch_1_name] = max_name
2477  mapped_chains.add(max_name)
2478  # unswap if needed and return
2479  if swap:
2480  mapping = {v: k for k, v in mapping.iteritems()}
2481  return mapping
2482 
2483 def _GetMappedRMSD(ent_1, ent_2, chain_mapping, transformation):
2484  """
2485  :return: RMSD between complexes considering chain mapping.
2486  :param ent_1: Entity mapped to *ent_2* (perfect alignment assumed between
2487  mapped chains as in :attr:`QSscorer.ent_to_cm_1`).
2488  :param ent_2: Entity which was mapped to (perfect alignment assumed between
2489  mapped chains as in :attr:`QSscorer.ent_to_cm_2`).
2490  :param chain_mapping: See :attr:`QSscorer.chain_mapping`
2491  :param transformation: Superposition transformation to be applied to *ent_1*.
2492  """
2493  # collect RMSDs and atom counts chain by chain and combine afterwards
2494  rmsds = []
2495  atoms = []
2496  for c1, c2 in chain_mapping.iteritems():
2497  # get views and atom counts
2498  chain_1 = ent_1.Select('cname="%s"' % c1)
2499  chain_2 = ent_2.Select('cname="%s"' % c2)
2500  atom_count = chain_1.atom_count
2501  if atom_count != chain_2.atom_count:
2502  raise RuntimeError('Chains in _GetMappedRMSD must be perfectly aligned!')
2503  # get RMSD
2504  rmsd = mol.alg.CalculateRMSD(chain_1, chain_2, transformation)
2505  # update lists
2506  rmsds.append(rmsd)
2507  atoms.append(float(atom_count))
2508  # combine (reminder: RMSD = sqrt(sum(atom_distance^2)/atom_count))
2509  rmsd = np.sqrt( sum([a * r**2 for a, r in zip(atoms, rmsds)]) / sum(atoms) )
2510  return rmsd
2511 
2513  """Helper object for repetitive RMSD calculations.
2514  Meant to speed up :func:`_GetChainMapping` but could also be used to replace
2515  :func:`_GetMappedRMSD` in :func:`_CheckClosedSymmetry`.
2516 
2517  :param ent_1: See :attr:`QSscorer.ent_to_cm_1`
2518  :param ent_2: See :attr:`QSscorer.ent_to_cm_2`
2519  """
2520  def __init__(self, ent_1, ent_2):
2521  # initialize caches and keep entities
2522  self.ent_1 = ent_1
2523  self.ent_2 = ent_2
2524  self._chain_views_1 = dict()
2525  self._chain_views_2 = dict()
2526  self._pair_rmsd = dict() # key = (c1,c2), value = (atom_count,rmsd)
2527 
2528  def GetChainView1(self, cname):
2529  """Get cached view on chain *cname* for :attr:`ent_1`."""
2530  if cname not in self._chain_views_1:
2531  self._chain_views_1[cname] = self.ent_1.Select('cname="%s"' % cname)
2532  return self._chain_views_1[cname]
2533 
2534  def GetChainView2(self, cname):
2535  """Get cached view on chain *cname* for :attr:`ent_2`."""
2536  if cname not in self._chain_views_2:
2537  self._chain_views_2[cname] = self.ent_2.Select('cname="%s"' % cname)
2538  return self._chain_views_2[cname]
2539 
2540  def GetSuperposition(self, c1, c2):
2541  """Get superposition result (no change in entities!) for *c1* to *c2*.
2542  This invalidates cached RMSD results used in :func:`GetMappedRMSD`.
2543 
2544  :param c1: chain name for :attr:`ent_1`.
2545  :param c2: chain name for :attr:`ent_2`.
2546  """
2547  # invalidate _pair_rmsd
2548  self._pair_rmsd = dict()
2549  # superpose
2550  chain_1 = self.GetChainView1(c1)
2551  chain_2 = self.GetChainView2(c2)
2552  if chain_1.atom_count != chain_2.atom_count:
2553  raise RuntimeError('Chains in GetSuperposition not perfectly aligned!')
2554  return mol.alg.SuperposeSVD(chain_1, chain_2, apply_transform=False)
2555 
2556  def GetMappedRMSD(self, chain_mapping, transformation):
2557  """
2558  :return: RMSD between complexes considering chain mapping.
2559  :param chain_mapping: See :attr:`QSscorer.chain_mapping`.
2560  :param transformation: Superposition transformation (e.g. res.transformation
2561  for res = :func:`GetSuperposition`).
2562  """
2563  # collect RMSDs and atom counts chain by chain and combine afterwards
2564  rmsds = []
2565  atoms = []
2566  for c1, c2 in chain_mapping.iteritems():
2567  # cached?
2568  if (c1, c2) in self._pair_rmsd:
2569  atom_count, rmsd = self._pair_rmsd[(c1, c2)]
2570  else:
2571  # get views and atom counts
2572  chain_1 = self.GetChainView1(c1)
2573  chain_2 = self.GetChainView2(c2)
2574  atom_count = chain_1.atom_count
2575  if atom_count != chain_2.atom_count:
2576  raise RuntimeError('Chains in GetMappedRMSD not perfectly aligned!')
2577  # get RMSD
2578  rmsd = mol.alg.CalculateRMSD(chain_1, chain_2, transformation)
2579  self._pair_rmsd[(c1, c2)] = (atom_count, rmsd)
2580  # update lists
2581  rmsds.append(rmsd)
2582  atoms.append(float(atom_count))
2583  # combine (reminder: RMSD = sqrt(sum(atom_distance^2)/atom_count))
2584  rmsd = np.sqrt( sum([a * r**2 for a, r in zip(atoms, rmsds)]) / sum(atoms) )
2585  return rmsd
2586 
2587 
2588 def _CleanUserSymmetry(symm, ent):
2589  """Clean-up user provided symmetry.
2590 
2591  :param symm: Symmetry group as in :attr:`QSscorer.symm_1`
2592  :param ent: Entity from which to extract chain names
2593  :type ent: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
2594  :return: New symmetry group limited to chains in sub-structure *ent*
2595  (see :attr:`QSscorer.symm_1`). Empty list if invalid symmetry.
2596  """
2597  # restrict symm to only contain chains in ent
2598  chain_names = set([ch.name for ch in ent.chains])
2599  new_symm = list()
2600  for symm_group in symm:
2601  new_group = tuple(ch for ch in symm_group if ch in chain_names)
2602  if new_group:
2603  new_symm.append(new_group)
2604  # report it
2605  if new_symm != symm:
2606  LogInfo("Cleaned user symmetry for %s: %s" % (ent.GetName(), new_symm))
2607  # do all groups have same length?
2608  lengths = [len(symm_group) for symm_group in new_symm]
2609  if lengths[1:] != lengths[:-1]:
2610  LogWarning('User symmetry group of different sizes for %s. Ignoring it!' \
2611  % (ent.GetName()))
2612  return []
2613  # do we cover all chains?
2614  s_chain_names = set([ch for symm_group in new_symm for ch in symm_group])
2615  if len(s_chain_names) != sum(lengths):
2616  LogWarning('User symmetry group for %s has duplicate chains. Ignoring it!' \
2617  % (ent.GetName()))
2618  return []
2619  if s_chain_names != chain_names:
2620  LogWarning('User symmetry group for %s is missing chains. Ignoring it!' \
2621  % (ent.GetName()))
2622  return []
2623  # ok all good
2624  return new_symm
2625 
2626 def _AreValidSymmetries(symm_1, symm_2):
2627  """Check symmetry pair for major problems.
2628 
2629  :return: False if any of the two is empty or if they're compatible in size.
2630  :rtype: :class:`bool`
2631  """
2632  if not symm_1 or not symm_2:
2633  return False
2634  if len(symm_1) != 1 or len(symm_2) != 1:
2635  if not len(symm_1[0]) == len(symm_2[0]):
2636  LogWarning('Symmetry groups of different sizes. Ignoring them!')
2637  return False
2638  return True
2639 
2640 def _GetMappedAlignments(ent_1, ent_2, chain_mapping, res_num_alignment):
2641  """
2642  :return: Alignments of 2 structures given chain mapping
2643  (see :attr:`QSscorer.alignments`).
2644  :param ent_1: Entity containing all chains in *chain_mapping.keys()*.
2645  Views to this entity attached to first sequence of each aln.
2646  :param ent_2: Entity containing all chains in *chain_mapping.values()*.
2647  Views to this entity attached to second sequence of each aln.
2648  :param chain_mapping: See :attr:`QSscorer.chain_mapping`
2649  :param res_num_alignment: See :attr:`QSscorer.res_num_alignment`
2650  """
2651  alns = list()
2652  for ch_1_name in sorted(chain_mapping):
2653  # get both sequences incl. attached view
2654  ch_1 = ent_1.FindChain(ch_1_name)
2655  ch_2 = ent_2.FindChain(chain_mapping[ch_1_name])
2656  if res_num_alignment:
2657  max_res_num = max([r.number.GetNum() for r in ch_1.residues] +
2658  [r.number.GetNum() for r in ch_2.residues])
2659  ch1_aln = ["-"] * max_res_num
2660  ch2_aln = ["-"] * max_res_num
2661  for res in ch_1.residues:
2662  ch1_aln[res.number.GetNum() - 1] = res.GetOneLetterCode()
2663  ch1_aln = "".join(ch1_aln)
2664  seq_1 = seq.CreateSequence(ch_1.name, str(ch1_aln))
2665  seq_1.AttachView(ch_1.Select(""))
2666  for res in ch_2.residues:
2667  ch2_aln[res.number.GetNum() - 1] = res.GetOneLetterCode()
2668  ch2_aln = "".join(ch2_aln)
2669  seq_2 = seq.CreateSequence(ch_2.name, str(ch2_aln))
2670  seq_2.AttachView(ch_2.Select(""))
2671  # Create alignment
2672  aln = seq.CreateAlignment()
2673  aln.AddSequence(seq_1)
2674  aln.AddSequence(seq_2)
2675  else:
2676  seq_1 = seq.SequenceFromChain(ch_1.name, ch_1)
2677  seq_2 = seq.SequenceFromChain(ch_2.name, ch_2)
2678  # align them
2679  aln = _AlignAtomSeqs(seq_1, seq_2)
2680  if aln:
2681  alns.append(aln)
2682  return alns
2683 
2684 def _GetMappedResidues(alns):
2685  """
2686  :return: Mapping of shared residues in *alns* (with views attached)
2687  (see :attr:`QSscorer.mapped_residues`).
2688  :param alns: See :attr:`QSscorer.alignments`
2689  """
2690  mapped_residues = dict()
2691  for aln in alns:
2692  # prepare dict for c1
2693  c1 = aln.GetSequence(0).name
2694  mapped_residues[c1] = dict()
2695  # get shared residues
2696  v1, v2 = seq.ViewsFromAlignment(aln)
2697  for res_1, res_2 in zip(v1.residues, v2.residues):
2698  r1 = res_1.number.num
2699  r2 = res_2.number.num
2700  mapped_residues[c1][r1] = r2
2701 
2702  return mapped_residues
2703 
2704 def _GetExtraWeights(contacts, done, mapped_residues):
2705  """Return sum of extra weights for contacts of chains in set and not in done.
2706  :return: Tuple (weight_extra_mapped, weight_extra_all).
2707  weight_extra_mapped only sums if both cX,rX in mapped_residues
2708  weight_extra_all sums all
2709  :param contacts: See :func:`GetContacts` for first entity
2710  :param done: List of (c1, c2, r1, r2) tuples to ignore
2711  :param mapped_residues: See :func:`_GetMappedResidues`
2712  """
2713  weight_extra_mapped = 0
2714  weight_extra_non_mapped = 0
2715  for c1 in contacts:
2716  for c2 in contacts[c1]:
2717  for r1 in contacts[c1][c2]:
2718  for r2 in contacts[c1][c2][r1]:
2719  if (c1, c2, r1, r2) not in done:
2720  weight = _weight(contacts[c1][c2][r1][r2])
2721  if c1 in mapped_residues and r1 in mapped_residues[c1] \
2722  and c2 in mapped_residues and r2 in mapped_residues[c2]:
2723  weight_extra_mapped += weight
2724  else:
2725  weight_extra_non_mapped += weight
2726  return weight_extra_mapped, weight_extra_mapped + weight_extra_non_mapped
2727 
2728 def _GetScores(contacts_1, contacts_2, mapped_residues, chain_mapping):
2729  """Get QS scores (see :class:`QSscorer`).
2730 
2731  Note that if some chains are not to be considered at all, they must be removed
2732  from *contacts_1* / *contacts_2* prior to calling this.
2733 
2734  :param contacts_1: See :func:`GetContacts` for first entity
2735  :param contacts_2: See :func:`GetContacts` for second entity
2736  :param mapped_residues: See :func:`_GetMappedResidues`
2737  :param chain_mapping: Maps any chain name in *mapped_residues* to chain name
2738  for *contacts_2*.
2739  :type chain_mapping: :class:`dict` (:class:`str` / :class:`str`)
2740  :return: Tuple (QS_best, QS_global) (see :attr:`QSscorer.best_score` and
2741  see :attr:`QSscorer.global_score`)
2742  """
2743  # keep track of considered contacts as set of (c1,c2,r1,r2) tuples
2744  done_1 = set()
2745  done_2 = set()
2746  weighted_scores = 0
2747  weight_sum = 0
2748  # naming cXY, rXY: X = 1/2 for contact, Y = 1/2/T for mapping (T = tmp)
2749  # -> d1 = contacts_1[c11][c21][r11][r21], d2 = contacts_2[c12][c22][r12][r22]
2750  for c11 in contacts_1:
2751  # map to other one
2752  if c11 not in mapped_residues: continue
2753  c1T = chain_mapping[c11]
2754  # second chain
2755  for c21 in contacts_1[c11]:
2756  # map to other one
2757  if c21 not in mapped_residues: continue
2758  c2T = chain_mapping[c21]
2759  # flip if needed
2760  flipped_chains = (c1T > c2T)
2761  if flipped_chains:
2762  c12, c22 = c2T, c1T
2763  else:
2764  c12, c22 = c1T, c2T
2765  # skip early if no contacts there
2766  if c12 not in contacts_2 or c22 not in contacts_2[c12]: continue
2767  # loop over res.num. in c11
2768  for r11 in contacts_1[c11][c21]:
2769  # map to other one and skip if no contacts there
2770  if r11 not in mapped_residues[c11]: continue
2771  r1T = mapped_residues[c11][r11]
2772  # loop over res.num. in c21
2773  for r21 in contacts_1[c11][c21][r11]:
2774  # map to other one and skip if no contacts there
2775  if r21 not in mapped_residues[c21]: continue
2776  r2T = mapped_residues[c21][r21]
2777  # flip if needed
2778  if flipped_chains:
2779  r12, r22 = r2T, r1T
2780  else:
2781  r12, r22 = r1T, r2T
2782  # skip early if no contacts there
2783  if r12 not in contacts_2[c12][c22]: continue
2784  if r22 not in contacts_2[c12][c22][r12]: continue
2785  # ok now we can finally do the scoring
2786  d1 = contacts_1[c11][c21][r11][r21]
2787  d2 = contacts_2[c12][c22][r12][r22]
2788  weight = _weight(min(d1, d2))
2789  weighted_scores += weight * (1 - abs(d1 - d2) / 12)
2790  weight_sum += weight
2791  # keep track of done ones
2792  done_1.add((c11, c21, r11, r21))
2793  done_2.add((c12, c22, r12, r22))
2794 
2795  LogVerbose("Shared contacts: %d" % len(done_1))
2796 
2797  # add the extra weights
2798  weights_extra_1 = _GetExtraWeights(contacts_1, done_1, mapped_residues)
2799  mapped_residues_2 = dict()
2800  for c1 in mapped_residues:
2801  c2 = chain_mapping[c1]
2802  mapped_residues_2[c2] = set()
2803  for r1 in mapped_residues[c1]:
2804  mapped_residues_2[c2].add(mapped_residues[c1][r1])
2805  weights_extra_2 = _GetExtraWeights(contacts_2, done_2, mapped_residues_2)
2806  weight_extra_mapped = weights_extra_1[0] + weights_extra_2[0]
2807  weight_extra_all = weights_extra_1[1] + weights_extra_2[1]
2808 
2809  # get scores
2810  denom_best = weight_sum + weight_extra_mapped
2811  denom_all = weight_sum + weight_extra_all
2812  if denom_best == 0:
2813  QS_best = 0
2814  else:
2815  QS_best = weighted_scores / denom_best
2816  if denom_all == 0:
2817  QS_global = 0
2818  else:
2819  QS_global = weighted_scores / denom_all
2820  return QS_best, QS_global
2821 
2822 def _weight(dist):
2823  """
2824  This weight expresses the probability of two residues to interact given the CB
2825  distance (from Xu et al. http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2573399/)
2826  """
2827  if dist <= 5.0:
2828  return 1
2829  x = (dist-5.0)/4.28;
2830  return np.exp(-2*x*x)
2831 
2832 
2833 def _GetQsSuperposition(alns):
2834  """
2835  :return: Superposition result based on shared CA atoms in *alns*
2836  (with views attached) (see :attr:`QSscorer.superposition`).
2837  :param alns: See :attr:`QSscorer.alignments`
2838  """
2839  # check input
2840  if not alns:
2841  raise QSscoreError('No successful alignments for superposition!')
2842  # prepare views
2843  view_1 = alns[0].GetSequence(0).attached_view.CreateEmptyView()
2844  view_2 = alns[0].GetSequence(1).attached_view.CreateEmptyView()
2845  # collect CA from alignments in proper order
2846  for aln in alns:
2847  for col in aln:
2848  res_1 = col.GetResidue(0)
2849  res_2 = col.GetResidue(1)
2850  if res_1.IsValid() and res_2.IsValid():
2851  ca_1 = res_1.FindAtom('CA')
2852  ca_2 = res_2.FindAtom('CA')
2853  if ca_1.IsValid() and ca_2.IsValid():
2854  view_1.AddAtom(ca_1)
2855  view_2.AddAtom(ca_2)
2856  # superpose them without chainging entities
2857  res = mol.alg.SuperposeSVD(view_1, view_2, apply_transform=False)
2858  return res
2859 
2860 
2861 def _AddResidue(edi, res, rnum, chain, calpha_only):
2862  """
2863  Add residue *res* with res. num. *run* to given *chain* using editor *edi*.
2864  Either all atoms added or (if *calpha_only*) only CA.
2865  """
2866  if calpha_only:
2867  ca_atom = res.FindAtom('CA')
2868  if ca_atom.IsValid():
2869  new_res = edi.AppendResidue(chain, res.name, rnum)
2870  edi.InsertAtom(new_res, ca_atom.name, ca_atom.pos)
2871  else:
2872  new_res = edi.AppendResidue(chain, res.name, rnum)
2873  for atom in res.atoms:
2874  edi.InsertAtom(new_res, atom.name, atom.pos)
2875 
2876 def _MergeAlignedChains(alns, ent_1, ent_2, calpha_only, penalize_extra_chains):
2877  """
2878  Create two new entities (based on the alignments attached views) where all
2879  residues have same numbering (when they're aligned) and they are all pushed to
2880  a single chain X. Also append extra chains contained in *ent_1* and *ent_2*
2881  but not contained in *alns*.
2882 
2883  Used for :attr:`QSscorer.lddt_ref` and :attr:`QSscorer.lddt_mdl`
2884 
2885  :param alns: List of alignments with attached views (first sequence: *ent_1*,
2886  second: *ent_2*). Residue number in single chain is column index
2887  of current alignment + sum of lengths of all previous alignments
2888  (order of alignments as in input list).
2889  :type alns: See :attr:`QSscorer.alignments`
2890  :param ent_1: First entity to process.
2891  :type ent_1: :class:`~ost.mol.EntityHandle`
2892  :param ent_2: Second entity to process.
2893  :type ent_2: :class:`~ost.mol.EntityHandle`
2894  :param calpha_only: If True, we only include CA atoms instead of all.
2895  :type calpha_only: :class:`bool`
2896  :param penalize_extra_chains: If True, extra chains are added to model and
2897  reference. Otherwise, only mapped ones.
2898  :type penalize_extra_chains: :class:`bool`
2899 
2900  :return: Tuple of two single chain entities (from *ent_1* and from *ent_2*)
2901  :rtype: :class:`tuple` of :class:`~ost.mol.EntityHandle`
2902  """
2903  # first new entity
2904  ent_ren_1 = mol.CreateEntity()
2905  ed_1 = ent_ren_1.EditXCS()
2906  new_chain_1 = ed_1.InsertChain('X')
2907  # second one
2908  ent_ren_2 = mol.CreateEntity()
2909  ed_2 = ent_ren_2.EditXCS()
2910  new_chain_2 = ed_2.InsertChain('X')
2911  # the alignment already contains sorted chains
2912  rnum = 0
2913  chain_done_1 = set()
2914  chain_done_2 = set()
2915  for aln in alns:
2916  chain_done_1.add(aln.GetSequence(0).name)
2917  chain_done_2.add(aln.GetSequence(1).name)
2918  for col in aln:
2919  rnum += 1
2920  # add valid residues to single chain entities
2921  res_1 = col.GetResidue(0)
2922  if res_1.IsValid():
2923  _AddResidue(ed_1, res_1, rnum, new_chain_1, calpha_only)
2924  res_2 = col.GetResidue(1)
2925  if res_2.IsValid():
2926  _AddResidue(ed_2, res_2, rnum, new_chain_2, calpha_only)
2927  # extra chains?
2928  if penalize_extra_chains:
2929  for chain in ent_1.chains:
2930  if chain.name in chain_done_1:
2931  continue
2932  for res in chain.residues:
2933  rnum += 1
2934  _AddResidue(ed_1, res, rnum, new_chain_1, calpha_only)
2935  for chain in ent_2.chains:
2936  if chain.name in chain_done_2:
2937  continue
2938  for res in chain.residues:
2939  rnum += 1
2940  _AddResidue(ed_2, res, rnum, new_chain_2, calpha_only)
2941  # get entity names
2942  ent_ren_1.SetName(aln.GetSequence(0).GetAttachedView().GetName())
2943  ent_ren_2.SetName(aln.GetSequence(1).GetAttachedView().GetName())
2944  # connect atoms
2945  if not conop.GetDefaultLib():
2946  raise RuntimeError("QSscore computation requires a compound library!")
2947  pr = conop.RuleBasedProcessor(conop.GetDefaultLib())
2948  pr.Process(ent_ren_1)
2949  ed_1.UpdateICS()
2950  pr.Process(ent_ren_2)
2951  ed_2.UpdateICS()
2952  return ent_ren_1, ent_ren_2
2953 
2954 
2955 # specify public interface
2956 __all__ = ('QSscoreError', 'QSscorer', 'QSscoreEntity', 'FilterContacts',
2957  'GetContacts', 'OligoLDDTScorer', 'MappedLDDTScorer')
def _ComputeAlignedEntities
Class internal helpers (anything that doesnt easily work without this class)
Definition: qsscoring.py:530
def _GetSuperposeData
Class internal helpers (anything that doesnt easily work without this class)
Definition: qsscoring.py:804
Mat4 DLLEXPORT_OST_GEOM Invert(const Mat4 &m)
def _InitScorer
Class internal helpers (anything that doesnt easily work without this class)
Definition: qsscoring.py:1392
Real Distance(const Vec3 &p1, const Vec3 &p2)
returns the distance between two points
Definition: vecmat3_op.hh:199
Three dimensional vector class, using Real precision.
Definition: vec3.hh:42
def _PrepareOligoEntities
Class internal helpers.
Definition: qsscoring.py:1194