OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
superpose.py
Go to the documentation of this file.
1 """
2 Superposition of structures made simple.
3 
4 Authors: Stefan Bienert
5 """
6 
7 import math
8 import ost.mol.alg
9 import ost.seq.alg
10 
11 def ParseAtomNames(atoms):
12  """
13  Parses different representations of a list of atom names and returns a
14  :class:`set`, understandable by :func:`~ost.mol.alg.MatchResidueByNum`. In
15  essence, this function translates
16 
17  * None to ``None``
18 
19  * 'all' to ``None``
20 
21  * 'backbone' to ``set(['N', 'CA', 'C', 'O'])``
22 
23  * 'aname1, aname2' to ``set(['aname1', 'aname2'])``
24 
25  * ``['aname1', 'aname2']`` to ``set(['aname1', 'aname2'])``
26 
27  :param atoms: Identifier or list of atoms
28  :type atoms: :class:`str`, :class:`list`, :class:`set`
29  :returns: A :class:`set` of atoms.
30  """
31  ## get a set of atoms or None
32  if atoms==None:
33  return None
34  if isinstance(atoms, str):
35  if atoms.upper()=='ALL':
36  return None
37  if atoms.upper()=='BACKBONE':
38  return set(['N', 'CA', 'C', 'O'])
39  ## if no recognised expression, split at ','
40  return set([a.strip() for a in atoms.split(',')])
41  return set(atoms)
42 
43 
44 def _EmptyView(view):
45  """
46  for internal use, only
47  """
48  if isinstance(view, ost.mol.EntityHandle):
49  return view.CreateEmptyView()
50  return view.handle.CreateEmptyView()
51 
52 
53 def _fetch_atoms(r_a, r_b, result_a, result_b, atmset):
54  """
55  for internal use, only
56  """
57  ## compare atoms of residues
58  for a_a in r_a.GetAtomList():
59  if atmset==None or a_a.name in atmset:
60  a_b = r_b.FindAtom(a_a.name)
61  if a_b.IsValid():
62  result_a.AddAtom(a_a)
63  result_b.AddAtom(a_b)
64  return result_a, result_b
65 
66 
67 def _no_of_chains(ent_a, ent_b):
68  """
69  for internal use, only
70  """
71  ## get lower no. of chains
72  if ent_a.chain_count < ent_b.chain_count:
73  return ent_a.chain_count
74  return ent_b.chain_count
75 
76 
77 def MatchResidueByNum(ent_a, ent_b, atoms='all'):
78  """
79  Returns a tuple of views containing exactly the same number of atoms.
80  Residues are matched by residue number. A subset of atoms to be included in
81  the views can be specified in the **atoms** argument. Regardless of what the
82  list of **atoms** says, only those present in two matched residues will be
83  included in the views. Chains are processed in the order they occur in the
84  entities. If **ent_a** and **ent_b** contain a different number of chains,
85  processing stops with the lower count.
86 
87  :param ent_a: The first entity
88  :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
89  :param ent_b: The second entity
90  :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
91  :param atoms: The subset of atoms to be included in the two views.
92  :type atoms: :class:`str`, :class:`list`, :class:`set`
93  :returns: Two :class:`~ost.mol.EntityView` instances with the same amount of
94  residues matched by number. Each residue will have the same number
95  & type of atoms.
96  """
97  ## init. final views
98  result_a=_EmptyView(ent_a)
99  result_b=_EmptyView(ent_b)
100  n_chains=_no_of_chains(ent_a, ent_b)
101  atmset=ParseAtomNames(atoms)
102  ## iterate chains
103  for i in range(0, n_chains):
104  chain_a=ent_a.chains[i]
105  chain_b=ent_b.chains[i]
106  residues_a=iter(chain_a.residues)
107  ## decide on order of residues
108  if chain_a.InSequence() and chain_b.InSequence():
109  residues_b=iter(chain_b.residues)
110  ## check residues & copy to views
111  try:
112  while True:
113  r_a=residues_a.next()
114  r_b=residues_b.next()
115  while r_a.number!=r_b.number:
116  while r_a.number<r_b.number:
117  r_a=residues_a.next()
118  while r_b.number<r_a.number:
119  r_b=residues_b.next()
120  assert r_a.number==r_b.number
121  result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
122  except StopIteration:
123  pass
124  else:
125  ## iterate one list of residues, search in other list
126  try:
127  while True:
128  r_a=residues_a.next()
129  r_b=chain_b.FindResidue(r_a.number)
130  if r_b.IsValid():
131  result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
132  except StopIteration:
133  pass
134  result_a.AddAllInclusiveBonds()
135  result_b.AddAllInclusiveBonds()
136  return result_a, result_b
137 
138 
139 def MatchResidueByIdx(ent_a, ent_b, atoms='all'):
140  """
141  Returns a tuple of views containing exactly the same number of atoms.
142  Residues are matched by position in the chains of an entity. A subset of
143  atoms to be included in the views can be specified in the **atoms** argument.
144  Regardless of what the list of **atoms** says, only those present in two
145  matched residues will be included in the views. Chains are processed in order
146  of appearance. If **ent_a** and **ent_b** contain a different number of
147  chains, processing stops with the lower count. The number of residues per
148  chain is supposed to be the same.
149 
150  :param ent_a: The first entity
151  :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
152  :param ent_b: The second entity
153  :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
154  :param atoms: The subset of atoms to be included in the two views.
155  :type atoms: :class:`str`, :class:`list`, :class:`set`
156  :returns: Two :class:`~ost.mol.EntityView` instances with the same amount of
157  residues matched by position. Each residue will have the same number
158  & type of atoms.
159  """
160  not_supported="MatchResidueByIdx has no support for chains of different "\
161  +"lengths"
162  ## init. final views
163  result_a=_EmptyView(ent_a)
164  result_b=_EmptyView(ent_b)
165  n_chains=_no_of_chains(ent_a, ent_b)
166  atmset=ParseAtomNames(atoms)
167  ## iterate chains
168  for i in range(0, n_chains):
169  chain_a=ent_a.chains[i]
170  chain_b=ent_b.chains[i]
171  ## check equal no. of residues
172  if chain_a.residue_count!=chain_b.residue_count:
173  raise RuntimeError(not_supported)
174  ## iterate residues & copy to views
175  for r_a, r_b in zip(chain_a.residues, chain_b.residues):
176  result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
177  result_a.AddAllInclusiveBonds()
178  result_b.AddAllInclusiveBonds()
179  return result_a, result_b
180 
181 
182 def _MatchResidueByAln(ent_a, ent_b, atoms, alnmethod):
183  """
184  For internal use, only
185  """
186  ## init. final views
187  result_a = _EmptyView(ent_a)
188  result_b = _EmptyView(ent_b)
189  n_chains = _no_of_chains(ent_a, ent_b)
190  atmset = ParseAtomNames(atoms)
191  ## iterate chains
192  for i in range(0, n_chains):
193  chain_a = ent_a.chains[i]
194  chain_b = ent_b.chains[i]
195  ## fetch residues
196  s_a = ''.join([r.one_letter_code
197  for r in chain_a.Select('protein=True').residues])
198  s_b = ''.join([r.one_letter_code
199  for r in chain_b.Select('protein=True').residues])
200  ## create sequence from residue lists & alignment
201  seq_a = ost.seq.CreateSequence(chain_a.name, s_a)
202  seq_b = ost.seq.CreateSequence(chain_b.name, s_b)
203  aln_a_b = alnmethod(seq_a, seq_b, ost.seq.alg.BLOSUM62)
204  ## evaluate alignment
205  for aln in aln_a_b:
206  ## bind chain to alignment
207  aln.AttachView(0, chain_a.Select('protein=True'))
208  aln.AttachView(1, chain_b.Select('protein=True'))
209  ## select residues (only replacement edges)
210  for i in range(0, aln.GetLength()):
211  if aln.sequences[0][i]!='-' and aln.sequences[1][i]!='-':
212  r_a = aln.GetResidue(0,i)
213  r_b = aln.GetResidue(1,i)
214  result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
215  result_a.AddAllInclusiveBonds()
216  result_b.AddAllInclusiveBonds()
217  return result_a, result_b
218 
219 def MatchResidueByLocalAln(ent_a, ent_b, atoms='all'):
220  """
221  Match residues by local alignment. Takes **ent_a** and **ent_b**, extracts
222  the sequences chain-wise and aligns them in Smith/Waterman manner using the
223  BLOSUM62 matrix for scoring. The residues of the entities are then matched
224  based on this alignment. Only atoms present in both residues are included in
225  the views. Chains are processed in order of appearance. If **ent_a** and
226  **ent_b** contain a different number of chains, processing stops with
227  the lower count.
228 
229  :param ent_a: The first entity
230  :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
231  :param ent_b: The second entity
232  :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
233  :param atoms: The subset of atoms to be included in the two views.
234  :type atoms: :class:`str`, :class:`list`, :class:`set`
235  :returns: Two :class:`~ost.mol.EntityView` instances with the same number of
236  residues. Each residue will have the same number & type of atoms.
237  """
238  return _MatchResidueByAln(ent_a, ent_b, atoms, ost.seq.alg.LocalAlign)
239 
240 def MatchResidueByGlobalAln(ent_a, ent_b, atoms='all'):
241  """
242  Match residues by global alignment. Takes **ent_a** and **ent_b**, extracts
243  the sequences chain-wise and aligns them in Needleman/Wunsch manner using the
244  BLOSUM62 matrix for scoring. The residues of the entities are then matched
245  based on this alignment. Only atoms present in both residues are included in
246  the views. Chains are processed in order of appearance. If **ent_a** and
247  **ent_b** contain a different number of chains, processing stops with
248  the lower count.
249 
250  :param ent_a: The first entity
251  :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
252  :param ent_b: The second entity
253  :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
254  :param atoms: The subset of atoms to be included in the two views.
255  :type atoms: :class:`str`, :class:`list`, :class:`set`
256  :returns: Two :class:`~ost.mol.EntityView` instances with the same number of
257  residues. Each residue will have the same number & type of atoms.
258  """
259  return _MatchResidueByAln(ent_a, ent_b, atoms, ost.seq.alg.GlobalAlign)
260 
261 
262 def Superpose(ent_a, ent_b, match='number', atoms='all'):
263  """
264  Superposes the model entity onto the reference. To do so, two views are
265  created, returned with the result. **atoms** describes what goes in to these
266  views and **match** the selection method. For superposition,
267  :func:`~ost.mol.alg.SuperposeSVD` is called. For matching, following methods
268  are recognised:
269 
270  * ``number`` - select residues by residue number, includes **atoms**, calls
271  :func:`~ost.mol.alg.MatchResidueByNum`
272 
273  * ``index`` - select residues by index in chain, includes **atoms**, calls
274  :func:`~ost.mol.alg.MatchResidueByIdx`
275 
276  * ``local-aln`` - select residues from a Smith/Waterman alignment, includes
277  **atoms**, calls :func:`~ost.mol.alg.MatchResidueByLocalAln`
278 
279  * ``global-aln`` - select residues from a Needleman/Wunsch alignment, includes
280  **atoms**, calls :func:`~ost.mol.alg.MatchResidueByGlobalAln`
281 
282  :param ent_a: The model entity
283  :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
284  :param ent_b: The reference entity
285  :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
286  :param match: Method to gather residues/ atoms
287  :type match: :class:`str`
288  :param atoms: The subset of atoms to be used in the superposition.
289  :type atoms: :class:`str`, :class:`list`, :class:`set`
290  :returns: An instance of :class:`SuperpositionResult`, containing members
291 
292  * ``rmsd`` - RMSD of the superposed entities
293 
294  * ``view1`` - First :class:`~ost.mol.EntityView` used
295 
296  * ``view2`` - Second :class:`~ost.mol.EntityView` used
297  """
298  not_supported="Superpose called with unsupported matching request."
299  ## create views to superpose
300  if match.upper() == 'NUMBER':
301  view_a, view_b = MatchResidueByNum(ent_a, ent_b, atoms)
302  elif match.upper() == 'INDEX':
303  view_a, view_b=MatchResidueByIdx(ent_a, ent_b, atoms)
304  elif match.upper() == 'LOCAL-ALN':
305  view_a, view_b=_MatchResidueByAln(ent_a, ent_b, atoms,
306  ost.seq.alg.LocalAlign)
307  elif match.upper() == 'GLOBAL-ALN':
308  view_a, view_b=_MatchResidueByAln(ent_a, ent_b, atoms,
309  ost.seq.alg.GlobalAlign)
310  else:
311  raise ValueError(not_supported)
312  ## action
313  res=ost.mol.alg.SuperposeSVD(view_a, view_b)
314  return res