OpenStructure
trajectory_analysis.py
Go to the documentation of this file.
1 """
2 **This Module requires numpy**
3 
4 This module contains functions to analyze trajectories, mainly
5 similiraty measures baed on RMSDS and pairwise distances.
6 
7 Author: Niklaus Johner (niklaus.johner@unibas.ch)
8 """
9 
10 import ost.mol.alg
11 import ost.geom
12 from ost import LogError
13 import os
14 
15 def smooth(vec,n):
16 #Function to smooth a vector or a list of floats
17 #for each element it takes the average over itself and the
18 #n elements on each side, so over (2n+1) elements
19  try:
20  vec2=vec.copy()
21  except:
22  vec2=vec[:]
23  for i in range(n):
24  v=0.0
25  count=1.0
26  v+=vec[i]
27  for j in range(n):
28  count+=1
29  v+=vec[i+j+1]
30  for j in range(i):
31  count+=1
32  v+=vec[i-(j+1)]
33  vec2[i]=v/float(count)
34  for i in range(1,n+1):
35  v=0.0
36  count=1.0
37  v+=vec[-i]
38  for j in range(n):
39  count+=1
40  v+=vec[-(i+j+1)]
41  for j in range(i-1):
42  count+=1
43  v+=vec[-i+j+1]
44  vec2[-i]=v/float(count)
45  for i in range(n,len(vec2)-n):
46  v=vec[i]
47  for j in range(n):
48  v+=vec[i+j+1]
49  v+=vec[i-j-1]
50  vec2[i]=v/float(2.*n+1.)
51  return vec2
52 
53 
54 """
55 From here on the module needs numpy
56 """
57 
58 def RMSD_Matrix_From_Traj(t,sele,first=0,last=-1,align=True,align_sele=None):
59  """
60  This function calculates a matrix M such that M[i,j] is the
61  RMSD (calculated on **sele**) between frames i and j of the trajectory **t**
62  aligned on sele.
63 
64  :param t: the trajectory
65  :param sele: the selection used for alignment and RMSD calculation
66  :param first: the first frame of t to be used
67  :param last: the last frame of t to be used
68  :type t: :class:`~ost.mol.CoordGroupHandle`
69  :type sele: :class:`~ost.mol.EntityView`
70  :type first: :class:`int`
71  :type last: :class:`int`
72 
73  :return: Returns a numpy N\\ :subscript:`frames`\\ xN\\ :subscript:`frames` matrix,
74  where N\\ :subscript:`frames` is the number of frames.
75  """
76  if not align_sele:align_sele=sele
77  try:
78  import numpy as npy
79  if last==-1:last=t.GetFrameCount()
80  n_frames=last-first
81  rmsd_matrix=npy.identity(n_frames)
82  for i in range(n_frames):
83  if align:
84  t=ost.mol.alg.SuperposeFrames(t,align_sele,begin=first,end=last,ref=i)
85  eh=t.GetEntity()
86  t.CopyFrame(i)
87  rmsd_matrix[i,:]=ost.mol.alg.AnalyzeRMSD(t,sele,sele)
88  if i==0:
89  last=last-first
90  first=0
91  return rmsd_matrix
92  except ImportError:
93  LogError("Function needs numpy, but I could not import it.")
94  raise
95 
96 
97 def PairwiseDistancesFromTraj(t,sele,first=0,last=-1,seq_sep=1):
98  """
99  This function calculates the distances between any pair of atoms in **sele**
100  with sequence separation larger than **seq_sep** from a trajectory **t**.
101  It return a matrix containing one line for each atom pair and N\\ :subscript:`frames` columns, where
102  N\\ :subscript:`frames` is the number of frames in the trajectory.
103 
104  :param t: the trajectory
105  :param sele: the selection used to determine the atom pairs
106  :param first: the first frame of t to be used
107  :param last: the last frame of t to be used
108  :param seq_sep: The minimal sequence separation between atom pairs
109  :type t: :class:`~ost.mol.CoordGroupHandle`
110  :type sele: :class:`~ost.mol.EntityView`
111  :type first: :class:`int`
112  :type last: :class:`int`
113  :type seq_sep: :class:`int`
114 
115  :return: a numpy N\\ :subscript:`pairs`\\ xN\\ :subscript:`frames` matrix.
116  """
117  try:
118  import numpy as npy
119  if last==-1:last=t.GetFrameCount()
120  n_frames=last-first
121  n_var=0
122  for i,a1 in enumerate(sele.atoms):
123  for j,a2 in enumerate(sele.atoms):
124  if not j-i<seq_sep:n_var+=1
125  #n_var=sele.GetAtomCount()
126  #n_var=(n_var-1)*(n_var)/2.
127  dist_matrix=npy.zeros(n_frames*n_var)
128  dist_matrix=dist_matrix.reshape(n_var,n_frames)
129  k=0
130  for i,a1 in enumerate(sele.atoms):
131  for j,a2 in enumerate(sele.atoms):
132  if j-i<seq_sep:continue
133  dist_matrix[k]=ost.mol.alg.AnalyzeDistanceBetwAtoms(t,a1.GetHandle(),a2.GetHandle())[first:last]
134  k+=1
135  return dist_matrix
136  except ImportError:
137  LogError("Function needs numpy, but I could not import it.")
138  raise
139 
141  """
142  This function calculates an distance matrix M(N\\ :subscript:`frames`\\ xN\\ :subscript:`frames`\\ ) from
143  the pairwise distances matrix D(N\\ :subscript:`pairs`\\ xN\\ :subscript:`frames`\\ ), where
144  N\\ :subscript:`frames` is the number of frames in the trajectory
145  and N\\ :subscript:`pairs` the number of atom pairs.
146  M[i,j] is the distance between frame i and frame j
147  calculated as a p-norm of the differences in distances
148  from the two frames (distance-RMSD for p=2).
149 
150  :param distances: a pairwise distance matrix as obtained from
151  :py:func:`~mol.alg.trajectory_analysis.PairwiseDistancesFromTraj`
152  :param p: exponent used for the p-norm.
153 
154  :return: a numpy N\\ :subscript:`frames`\\ xN\\ :subscript:`frames` matrix, where N\\ :subscript:`frames`
155  is the number of frames.
156  """
157  try:
158  import numpy as npy
159  n1=distances.shape[0]
160  n2=distances.shape[1]
161  dist_mat=npy.identity(n2)
162  for i in range(n2):
163  for j in range(n2):
164  if j<=i:continue
165  d=(((abs(distances[:,i]-distances[:,j])**p).sum())/float(n1))**(1./p)
166  dist_mat[i,j]=d
167  dist_mat[j,i]=d
168  return dist_mat
169  except ImportError:
170  LogError("Function needs numpy, but I could not import it.")
171  raise
172 
173 def DistRMSDFromTraj(t,sele,ref_sele,radius=7.0,average=False,seq_sep=4,first=0,last=-1):
174  """
175  This function calculates the distance RMSD from a trajectory.
176  The distances selected for the calculation are all the distances
177  between pair of atoms from residues that are at least **seq_sep** apart
178  in the sequence and that are smaller than **radius** in **ref_sel**.
179  The number and order of atoms in **ref_sele** and **sele** should be the same.
180 
181  :param t: the trajectory
182  :param sele: the selection used to calculate the distance RMSD
183  :param ref_sele: the reference selection used to determine the atom pairs and reference distances
184  :param radius: the upper limit of distances in ref_sele considered for the calculation
185  :param seq_sep: the minimal sequence separation between atom pairs considered for the calculation
186  :param average: use the average distance in the trajectory as reference instead of the distance obtained from ref_sele
187  :param first: the first frame of t to be used
188  :param last: the last frame of t to be used
189 
190  :type t: :class:`~ost.mol.CoordGroupHandle`
191  :type sele: :class:`~ost.mol.EntityView`
192  :type ref_sele: :class:`~ost.mol.EntityView`
193  :type radius: :class:`float`
194  :type average: :class:`bool`
195  :type first: :class:`int`
196  :type last: :class:`int`
197  :type seq_sep: :class:`int`
198 
199  :return: a numpy vecor dist_rmsd(N\\ :subscript:`frames`).
200  """
201  if not sele.GetAtomCount()==ref_sele.GetAtomCount():
202  print('Not same number of atoms in the two views')
203  return
204  try:
205  import numpy as npy
206  if last==-1:last=t.GetFrameCount()
207  n_frames=last-first
208  dist_rmsd=npy.zeros(n_frames)
209  pair_count=0.0
210  for i,a1 in enumerate(ref_sele.atoms):
211  for j,a2 in enumerate(ref_sele.atoms):
212  if j<=i:continue
213  r1=a1.GetResidue()
214  c1=r1.GetChain()
215  r2=a2.GetResidue()
216  c2=r2.GetChain()
217  if c1==c2 and abs(r2.GetNumber().num-r1.GetNumber().num)<seq_sep:continue
218  d=ost.geom.Distance(a1.pos,a2.pos)
219  if d<radius:
220  a3=sele.atoms[i]
221  a4=sele.atoms[j]
222  d_traj=ost.mol.alg.AnalyzeDistanceBetwAtoms(t,a3.GetHandle(),a4.GetHandle())[first:last]
223  if average:d=npy.mean(d_traj)
224  for k,el in enumerate(d_traj):
225  dist_rmsd[k]+=(el-d)**2.0
226  pair_count+=1.0
227  return (dist_rmsd/float(pair_count))**0.5
228  except ImportError:
229  LogError("Function needs numpy, but I could not import it.")
230  raise
231 
232 def AverageDistanceMatrixFromTraj(t,sele,first=0,last=-1):
233  """
234  This function calcultes the distance between each pair of atoms
235  in **sele**, averaged over the trajectory **t**.
236 
237  :param t: the trajectory
238  :param sele: the selection used to determine the atom pairs
239  :param first: the first frame of t to be used
240  :param last: the last frame of t to be used
241  :type t: :class:`~ost.mol.CoordGroupHandle`
242  :type sele: :class:`~ost.mol.EntityView`
243  :type first: :class:`int`
244  :type last: :class:`int`
245 
246  :return: a numpy N\\ :subscript:`pairs`\\ xN\\ :subscript:`pairs` matrix, where N\\ :subscript:`pairs`
247  is the number of atom pairs in **sele**.
248  """
249  try:
250  import numpy as npy
251  except ImportError:
252  LogError("Function needs numpy, but I could not import it.")
253  raise
254  n_atoms=sele.GetAtomCount()
255  M=npy.zeros([n_atoms,n_atoms])
256  for i,a1 in enumerate(sele.atoms):
257  for j,a2 in enumerate(sele.atoms):
258  if j>i:continue
259  d=ost.mol.alg.AnalyzeDistanceBetwAtoms(t,a1.GetHandle(),a2.GetHandle())[first:last]
260  M[i,j]=npy.mean(d)
261  M[j,i]=npy.mean(d)
262  return M
263 
264 def AnalyzeDistanceFluctuationMatrix(t,sele,first=0,last=-1):
265  try:
266  import numpy as npy
267  except ImportError:
268  LogError("Function needs numpy, but I could not import it.")
269  raise
270  n_atoms=sele.GetAtomCount()
271  M=npy.zeros([n_atoms,n_atoms])
272  for i,a1 in enumerate(sele.atoms):
273  for j,a2 in enumerate(sele.atoms):
274  if i>j:continue
275  d=ost.mol.alg.AnalyzeDistanceBetwAtoms(t,a1.GetHandle(),a2.GetHandle())[first:last]
276  M[j,i]=npy.std(d)
277  M[i,j]=npy.std(d)
278  return M
279 
280 def IterativeSuperposition(t,sele,threshold=1.0,initial_sele=None,iterations=5,ref_frame=0):
281  if initial_sele:current_sele=initial_sele
282  else: current_sele=sele
283  for i in range(iterations):
284  t=ost.mol.alg.SuperposeFrames(t,current_sele,ref=ref_frame)
285  al=[a for a in sele.atoms if ost.mol.alg.AnalyzeRMSF(t,ost.mol.CreateViewFromAtoms([a]))<threshold]
286  if len(al)==0:return
287  current_sele=ost.mol.CreateViewFromAtoms(al)
288  return current_sele
289 
def DistanceMatrixFromPairwiseDistances(distances, p=2)
def DistRMSDFromTraj(t, sele, ref_sele, radius=7.0, average=False, seq_sep=4, first=0, last=-1)
def IterativeSuperposition(t, sele, threshold=1.0, initial_sele=None, iterations=5, ref_frame=0)
def PairwiseDistancesFromTraj(t, sele, first=0, last=-1, seq_sep=1)
def AverageDistanceMatrixFromTraj(t, sele, first=0, last=-1)
def AnalyzeDistanceFluctuationMatrix(t, sele, first=0, last=-1)
def RMSD_Matrix_From_Traj(t, sele, first=0, last=-1, align=True, align_sele=None)
Real DLLEXPORT_OST_MOL_ALG AnalyzeRMSF(const CoordGroupHandle &traj, const EntityView &selection, int from=0, int to=-1, unsigned int stride=1)
std::vector< Real > DLLEXPORT_OST_MOL_ALG AnalyzeDistanceBetwAtoms(const CoordGroupHandle &traj, const AtomHandle &a1, const AtomHandle &a2, unsigned int stride=1)
std::vector< Real > DLLEXPORT_OST_MOL_ALG AnalyzeRMSD(const CoordGroupHandle &traj, const EntityView &reference_view, const EntityView &sele, unsigned int stride=1)
CoordGroupHandle DLLEXPORT_OST_MOL_ALG SuperposeFrames(CoordGroupHandle &cg, EntityView &sel, int begin=0, int end=-1, int ref=-1)
returns a superposed version of coord group, superposed on a reference frame