OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
superpose.py
Go to the documentation of this file.
1 """
2 Superposition of structures made simple.
3 
4 Authors: Stefan Bienert
5 """
6 
7 import math
8 import ost.mol.alg
9 import ost.seq.alg
10 
11 def ParseAtomNames(atoms):
12  """
13  Parses different representations of a list of atom names and returns a
14  :class:`set`, understandable by :func:`~ost.mol.alg.MatchResidueByNum`. In
15  essence, this function translates
16 
17  * None to ``None``
18 
19  * 'all' to ``None``
20 
21  * 'backbone' to ``set(['N', 'CA', 'C', 'O'])``
22 
23  * 'aname1, aname2' to ``set(['aname1', 'aname2'])``
24 
25  * ``['aname1', 'aname2']`` to ``set(['aname1', 'aname2'])``
26 
27  :param atoms: Identifier or list of atoms
28  :type atoms: :class:`str`, :class:`list`, :class:`set`
29  :returns: A :class:`set` of atoms.
30  """
31  ## get a set of atoms or None
32  if atoms==None:
33  return None
34  if isinstance(atoms, str):
35  if atoms.upper()=='ALL':
36  return None
37  if atoms.upper()=='BACKBONE':
38  return set(['N', 'CA', 'C', 'O'])
39  ## if no recognised expression, split at ','
40  return set([a.strip() for a in atoms.split(',')])
41  return set(atoms)
42 
43 
44 def _EmptyView(view):
45  """
46  for internal use, only
47  """
48  if isinstance(view, ost.mol.EntityHandle):
49  return view.CreateEmptyView()
50  return view.handle.CreateEmptyView()
51 
52 
53 def _fetch_atoms(r_a, r_b, result_a, result_b, atmset):
54  """
55  for internal use, only
56  """
57  ## compare atoms of residues
58  for a_a in r_a.GetAtomList():
59  if atmset==None or a_a.name in atmset:
60  a_b = r_b.FindAtom(a_a.name)
61  if a_b.IsValid():
62  result_a.AddAtom(a_a)
63  result_b.AddAtom(a_b)
64  return result_a, result_b
65 
66 
67 def _no_of_chains(ent_a, ent_b):
68  """
69  for internal use, only
70  """
71  ## get lower no. of chains
72  if ent_a.chain_count < ent_b.chain_count:
73  return ent_a.chain_count
74  return ent_b.chain_count
75 
76 
77 def MatchResidueByNum(ent_a, ent_b, atoms='all'):
78  """
79  Returns a tuple of views containing exactly the same number of atoms.
80  Residues are matched by residue number. A subset of atoms to be included in
81  the views can be specified in the **atoms** argument. Regardless of what the
82  list of **atoms** says, only those present in two matched residues will be
83  included in the views. Chains are processed in the order they occur in the
84  entities. If **ent_a** and **ent_b** contain a different number of chains,
85  processing stops with the lower count.
86 
87  :param ent_a: The first entity
88  :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
89  :param ent_b: The second entity
90  :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
91  :param atoms: The subset of atoms to be included in the two views.
92  :type atoms: :class:`str`, :class:`list`, :class:`set`
93  :returns: Two :class:`~ost.mol.EntityView` instances with the same amount of
94  residues matched by number. Each residue will have the same number
95  & type of atoms.
96  """
97  ## init. final views
98  result_a=_EmptyView(ent_a)
99  result_b=_EmptyView(ent_b)
100  n_chains=_no_of_chains(ent_a, ent_b)
101  atmset=ParseAtomNames(atoms)
102  ## iterate chains
103  for i in range(0, n_chains):
104  chain_a=ent_a.chains[i]
105  chain_b=ent_b.chains[i]
106  residues_a=iter(chain_a.residues)
107  ## decide on order of residues
108  if chain_a.InSequence() and chain_b.InSequence():
109  residues_b=iter(chain_b.residues)
110  ## check residues & copy to views
111  try:
112  while True:
113  r_a=residues_a.next()
114  r_b=residues_b.next()
115  while r_a.number!=r_b.number:
116  while r_a.number<r_b.number:
117  r_a=residues_a.next()
118  while r_b.number<r_a.number:
119  r_b=residues_b.next()
120  assert r_a.number==r_b.number
121  result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
122  except StopIteration:
123  pass
124  else:
125  ## iterate one list of residues, search in other list
126  try:
127  while True:
128  r_a=residues_a.next()
129  r_b=chain_b.FindResidue(r_a.number)
130  if r_b.IsValid():
131  result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
132  except StopIteration:
133  pass
134  result_a.AddAllInclusiveBonds()
135  result_b.AddAllInclusiveBonds()
136  return result_a, result_b
137 
138 
139 def MatchResidueByIdx(ent_a, ent_b, atoms='all'):
140  """
141  Returns a tuple of views containing exactly the same number of atoms.
142  Residues are matched by position in the chains of an entity. A subset of
143  atoms to be included in the views can be specified in the **atoms** argument.
144  Regardless of what the list of **atoms** says, only those present in two
145  matched residues will be included in the views. Chains are processed in order
146  of appearance. If **ent_a** and **ent_b** contain a different number of
147  chains, processing stops with the lower count. The number of residues per
148  chain is supposed to be the same.
149 
150  :param ent_a: The first entity
151  :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
152  :param ent_b: The second entity
153  :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
154  :param atoms: The subset of atoms to be included in the two views.
155  :type atoms: :class:`str`, :class:`list`, :class:`set`
156  :returns: Two :class:`~ost.mol.EntityView` instances with the same amount of
157  residues matched by position. Each residue will have the same number
158  & type of atoms.
159  """
160  not_supported="MatchResidueByIdx has no support for chains of different "\
161  +"lengths"
162  ## init. final views
163  result_a=_EmptyView(ent_a)
164  result_b=_EmptyView(ent_b)
165  n_chains=_no_of_chains(ent_a, ent_b)
166  atmset=ParseAtomNames(atoms)
167  ## iterate chains
168  for i in range(0, n_chains):
169  chain_a=ent_a.chains[i]
170  chain_b=ent_b.chains[i]
171  ## check equal no. of residues
172  if chain_a.residue_count!=chain_b.residue_count:
173  raise RuntimeError(not_supported)
174  ## iterate residues & copy to views
175  for r_a, r_b in zip(chain_a.residues, chain_b.residues):
176  result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
177  result_a.AddAllInclusiveBonds()
178  result_b.AddAllInclusiveBonds()
179  return result_a, result_b
180 
181 
182 def _MatchResidueByAln(ent_a, ent_b, atoms, alnmethod):
183  """
184  For internal use, only
185  """
186  ## init. final views
187  result_a = _EmptyView(ent_a)
188  result_b = _EmptyView(ent_b)
189  n_chains = _no_of_chains(ent_a, ent_b)
190  atmset = ParseAtomNames(atoms)
191  ## iterate chains
192  for i in range(0, n_chains):
193  chain_a = ent_a.chains[i]
194  chain_b = ent_b.chains[i]
195  ## fetch residues
196  s_a = ''.join([r.one_letter_code
197  for r in chain_a.Select('protein=True').residues])
198  s_b = ''.join([r.one_letter_code
199  for r in chain_b.Select('protein=True').residues])
200  ## create sequence from residue lists & alignment
201  seq_a = ost.seq.CreateSequence(chain_a.name, s_a)
202  seq_b = ost.seq.CreateSequence(chain_b.name, s_b)
203  aln_a_b = alnmethod(seq_a, seq_b, ost.seq.alg.BLOSUM62)
204  ## evaluate alignment
205  max_aln_res = 0
206  for a in range(0, len(aln_a_b)):
207  aln = aln_a_b[a]
208  aln_res_len = 0
209  match_list = list()
210  for i in range(0, aln.GetLength()):
211  if aln.sequences[0][i]!='-' and aln.sequences[1][i]!='-':
212  aln_res_len += 1
213  match_list.append(i)
214  if aln_res_len > max_aln_res:
215  max_aln_res = aln_res_len
216  max_aln_idx = a
217  max_matches = match_list
218 
219  aln = aln_a_b[max_aln_idx]
220  ## bind chain to alignment
221  aln.AttachView(0, chain_a.Select('protein=True'))
222  aln.AttachView(1, chain_b.Select('protein=True'))
223  ## select residues (only replacement edges)
224  for i in max_matches:
225  r_a = aln.GetResidue(0,i)
226  r_b = aln.GetResidue(1,i)
227  result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
228  result_a.AddAllInclusiveBonds()
229  result_b.AddAllInclusiveBonds()
230  return result_a, result_b
231 
232 def MatchResidueByLocalAln(ent_a, ent_b, atoms='all'):
233  """
234  Match residues by local alignment. Takes **ent_a** and **ent_b**, extracts
235  the sequences chain-wise and aligns them in Smith/Waterman manner using the
236  BLOSUM62 matrix for scoring. The residues of the entities are then matched
237  based on this alignment. Only atoms present in both residues are included in
238  the views. Chains are processed in order of appearance. If **ent_a** and
239  **ent_b** contain a different number of chains, processing stops with
240  the lower count.
241 
242  :param ent_a: The first entity
243  :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
244  :param ent_b: The second entity
245  :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
246  :param atoms: The subset of atoms to be included in the two views.
247  :type atoms: :class:`str`, :class:`list`, :class:`set`
248  :returns: Two :class:`~ost.mol.EntityView` instances with the same number of
249  residues. Each residue will have the same number & type of atoms.
250  """
251  return _MatchResidueByAln(ent_a, ent_b, atoms, ost.seq.alg.LocalAlign)
252 
253 def MatchResidueByGlobalAln(ent_a, ent_b, atoms='all'):
254  """
255  Match residues by global alignment. Takes **ent_a** and **ent_b**, extracts
256  the sequences chain-wise and aligns them in Needleman/Wunsch manner using the
257  BLOSUM62 matrix for scoring. The residues of the entities are then matched
258  based on this alignment. Only atoms present in both residues are included in
259  the views. Chains are processed in order of appearance. If **ent_a** and
260  **ent_b** contain a different number of chains, processing stops with
261  the lower count.
262 
263  :param ent_a: The first entity
264  :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
265  :param ent_b: The second entity
266  :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
267  :param atoms: The subset of atoms to be included in the two views.
268  :type atoms: :class:`str`, :class:`list`, :class:`set`
269  :returns: Two :class:`~ost.mol.EntityView` instances with the same number of
270  residues. Each residue will have the same number & type of atoms.
271  """
272  return _MatchResidueByAln(ent_a, ent_b, atoms, ost.seq.alg.GlobalAlign)
273 
274 
275 def Superpose(ent_a, ent_b, match='number', atoms='all', iterative=False, max_iterations=5, distance_threshold=3.0):
276  """
277  Superposes the model entity onto the reference. To do so, two views are
278  created, returned with the result. **atoms** describes what goes into these
279  views and **match** the selection method. For superposition,
280  :func:`~ost.mol.alg.SuperposeSVD` is called. For matching, the following methods
281  are recognised:
282 
283  * ``number`` - select residues by residue number, includes **atoms**, calls
284  :func:`~ost.mol.alg.MatchResidueByNum`
285 
286  * ``index`` - select residues by index in chain, includes **atoms**, calls
287  :func:`~ost.mol.alg.MatchResidueByIdx`
288 
289  * ``local-aln`` - select residues from a Smith/Waterman alignment, includes
290  **atoms**, calls :func:`~ost.mol.alg.MatchResidueByLocalAln`
291 
292  * ``global-aln`` - select residues from a Needleman/Wunsch alignment, includes
293  **atoms**, calls :func:`~ost.mol.alg.MatchResidueByGlobalAln`
294 
295  There is also an option to use **iterative** matching which allows for an
296  iterative approach to superposing two structures. **iterative** takes two
297  additional parameters, **max_iteration** and **distance_threshold**.
298 
299  :param ent_a: The model entity
300  :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
301 
302  :param ent_b: The reference entity
303  :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
304 
305  :param match: Method to gather residues/ atoms
306  :type match: :class:`str`
307 
308  :param atoms: The subset of atoms to be used in the superposition
309  :type atoms: :class:`str`, :class:`list`, :class:`set`
310 
311  :param max_iterations: They number of iterations that will be run during
312  iterative superposition
313  :type max_iterations: :class:`int`
314 
315  :param distance_threshold: The distance threshold between which two atoms
316  that will be used in the next superposition
317  iteration
318  :type distance_threshold: :class:`float`
319 
320  :returns: An instance of :class:`SuperpositionResult`, containing members
321 
322  * ``rmsd`` - RMSD of the superposed entities
323 
324  * ``view1`` - First :class:`~ost.mol.EntityView` used
325 
326  * ``view2`` - Second :class:`~ost.mol.EntityView` used
327  """
328  not_supported="Superpose called with unsupported matching request."
329  ## create views to superpose
330  if match.upper() == 'NUMBER':
331  view_a, view_b = MatchResidueByNum(ent_a, ent_b, atoms)
332  elif match.upper() == 'INDEX':
333  view_a, view_b=MatchResidueByIdx(ent_a, ent_b, atoms)
334  elif match.upper() == 'LOCAL-ALN':
335  view_a, view_b=_MatchResidueByAln(ent_a, ent_b, atoms,
336  ost.seq.alg.LocalAlign)
337  elif match.upper() == 'GLOBAL-ALN':
338  view_a, view_b=_MatchResidueByAln(ent_a, ent_b, atoms,
339  ost.seq.alg.GlobalAlign)
340  else:
341  raise ValueError(not_supported)
342  ## action
343  if iterative:
344  res=ost.mol.alg.IterativeSuperposeSVD(view_a, view_b, max_iterations, distance_threshold)
345  else:
346  res=ost.mol.alg.SuperposeSVD(view_a, view_b)
347  return res