OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
trajectory_analysis.py
Go to the documentation of this file.
1 """
2 **This Module requires numpy**
3 
4 This module contains functions to analyze trajectories, mainly
5 similiraty measures baed on RMSDS and pairwise distances.
6 
7 Author: Niklaus Johner (niklaus.johner@unibas.ch)
8 """
9 
10 import ost.mol.alg
11 import ost.geom
12 from ost import LogError
13 import os
14 
15 def smooth(vec,n):
16 #Function to smooth a vector or a list of floats
17 #for each element it takes the average over itself and the
18 #n elements on each side, so over (2n+1) elements
19  try:
20  vec2=vec.copy()
21  except:
22  vec2=vec[:]
23  for i in range(n):
24  v=0.0
25  count=1.0
26  v+=vec[i]
27  for j in range(n):
28  count+=1
29  v+=vec[i+j+1]
30  for j in range(i):
31  count+=1
32  v+=vec[i-(j+1)]
33  vec2[i]=v/float(count)
34  for i in range(1,n+1):
35  v=0.0
36  count=1.0
37  v+=vec[-i]
38  for j in range(n):
39  count+=1
40  v+=vec[-(i+j+1)]
41  for j in range(i-1):
42  count+=1
43  v+=vec[-i+j+1]
44  vec2[-i]=v/float(count)
45  for i in range(n,len(vec2)-n):
46  v=vec[i]
47  for j in range(n):
48  v+=vec[i+j+1]
49  v+=vec[i-j-1]
50  vec2[i]=v/float(2.*n+1.)
51  return vec2
52 
53 
54 """
55 From here on the module needs numpy
56 """
57 
58 def RMSD_Matrix_From_Traj(t,sele,first=0,last=-1):
59  """
60  This function calculates a matrix M such that M[i,j] is the
61  RMSD (calculated on **sele**) between frames i and j of the trajectory **t**
62  aligned on sele.
63 
64  :param t: the trajectory
65  :param sele: the selection used for alignment and RMSD calculation
66  :param first: the first frame of t to be used
67  :param last: the last frame of t to be used
68  :type t: :class:`~ost.mol.CoordGroupHandle`
69  :type sele: :class:`~ost.mol.EntityView`
70  :type first: :class:`int`
71  :type last: :class:`int`
72 
73  :return: Returns a numpy N\ :subscript:`frames`\ xN\ :subscript:`frames` matrix,
74  where N\ :subscript:`frames` is the number of frames.
75  """
76  try:
77  import numpy as npy
78  if last==-1:last=t.GetFrameCount()
79  n_frames=last-first
80  rmsd_matrix=npy.identity(n_frames)
81  for i in range(n_frames):
82  t=ost.mol.alg.SuperposeFrames(t,sele,begin=first,end=last,ref=i)
83  eh=t.GetEntity()
84  t.CopyFrame(i)
85  rmsd_matrix[i,:]=ost.mol.alg.AnalyzeRMSD(t,sele,sele)
86  if i==0:
87  last=last-first
88  first=0
89  return rmsd_matrix
90  except ImportError:
91  LogError("Function needs numpy, but I could not import it.")
92  raise
93 
94 
95 def PairwiseDistancesFromTraj(t,sele,first=0,last=-1,seq_sep=1):
96  """
97  This function calculates the distances between any pair of atoms in **sele**
98  with sequence separation larger than **seq_sep** from a trajectory **t**.
99  It return a matrix containing one line for each atom pair and N\ :subscript:`frames` columns, where
100  N\ :subscript:`frames` is the number of frames in the trajectory.
101 
102  :param t: the trajectory
103  :param sele: the selection used to determine the atom pairs
104  :param first: the first frame of t to be used
105  :param last: the last frame of t to be used
106  :param seq_sep: The minimal sequence separation between atom pairs
107  :type t: :class:`~ost.mol.CoordGroupHandle`
108  :type sele: :class:`~ost.mol.EntityView`
109  :type first: :class:`int`
110  :type last: :class:`int`
111  :type seq_sep: :class:`int`
112 
113  :return: a numpy N\ :subscript:`pairs`\ xN\ :subscript:`frames` matrix.
114  """
115  try:
116  import numpy as npy
117  if last==-1:last=t.GetFrameCount()
118  n_frames=last-first
119  n_var=0
120  for i,a1 in enumerate(sele.atoms):
121  for j,a2 in enumerate(sele.atoms):
122  if not j-i<seq_sep:n_var+=1
123  #n_var=sele.GetAtomCount()
124  #n_var=(n_var-1)*(n_var)/2.
125  dist_matrix=npy.zeros(n_frames*n_var)
126  dist_matrix=dist_matrix.reshape(n_var,n_frames)
127  k=0
128  for i,a1 in enumerate(sele.atoms):
129  for j,a2 in enumerate(sele.atoms):
130  if j-i<seq_sep:continue
131  dist_matrix[k]=ost.mol.alg.AnalyzeDistanceBetwAtoms(t,a1.GetHandle(),a2.GetHandle())[first:last]
132  k+=1
133  return dist_matrix
134  except ImportError:
135  LogError("Function needs numpy, but I could not import it.")
136  raise
137 
139  """
140  This function calculates an distance matrix M(N\ :subscript:`frames`\ xN\ :subscript:`frames`\ ) from
141  the pairwise distances matrix D(N\ :subscript:`pairs`\ xN\ :subscript:`frames`\ ), where
142  N\ :subscript:`frames` is the number of frames in the trajectory
143  and N\ :subscript:`pairs` the number of atom pairs.
144  M[i,j] is the distance between frame i and frame j
145  calculated as a p-norm of the differences in distances
146  from the two frames (distance-RMSD for p=2).
147 
148  :param distances: a pairwise distance matrix as obtained from
149  :py:func:`~mol.alg.trajectory_analysis.PairwiseDistancesFromTraj`
150  :param p: exponent used for the p-norm.
151 
152  :return: a numpy N\ :subscript:`frames`\ xN\ :subscript:`frames` matrix, where N\ :subscript:`frames`
153  is the number of frames.
154  """
155  try:
156  import numpy as npy
157  n1=distances.shape[0]
158  n2=distances.shape[1]
159  dist_mat=npy.identity(n2)
160  for i in range(n2):
161  for j in range(n2):
162  if j<=i:continue
163  d=(((abs(distances[:,i]-distances[:,j])**p).sum())/float(n1))**(1./p)
164  dist_mat[i,j]=d
165  dist_mat[j,i]=d
166  return dist_mat
167  except ImportError:
168  LogError("Function needs numpy, but I could not import it.")
169  raise
170 
171 def DistRMSDFromTraj(t,sele,ref_sele,radius=7.0,average=False,seq_sep=4,first=0,last=-1):
172  """
173  This function calculates the distance RMSD from a trajectory.
174  The distances selected for the calculation are all the distances
175  between pair of atoms from residues that are at least **seq_sep** apart
176  in the sequence and that are smaller than **radius** in **ref_sel**.
177  The number and order of atoms in **ref_sele** and **sele** should be the same.
178 
179  :param t: the trajectory
180  :param sele: the selection used to calculate the distance RMSD
181  :param ref_sele: the reference selection used to determine the atom pairs and reference distances
182  :param radius: the upper limit of distances in ref_sele considered for the calculation
183  :param seq_sep: the minimal sequence separation between atom pairs considered for the calculation
184  :param average: use the average distance in the trajectory as reference instead of the distance obtained from ref_sele
185  :param first: the first frame of t to be used
186  :param last: the last frame of t to be used
187 
188  :type t: :class:`~ost.mol.CoordGroupHandle`
189  :type sele: :class:`~ost.mol.EntityView`
190  :type ref_sele: :class:`~ost.mol.EntityView`
191  :type radius: :class:`float`
192  :type average: :class:`bool`
193  :type first: :class:`int`
194  :type last: :class:`int`
195  :type seq_sep: :class:`int`
196 
197  :return: a numpy vecor dist_rmsd(N\ :subscript:`frames`).
198  """
199  if not sele.GetAtomCount()==ref_sele.GetAtomCount():
200  print 'Not same number of atoms in the two views'
201  return
202  try:
203  import numpy as npy
204  if last==-1:last=t.GetFrameCount()
205  n_frames=last-first
206  dist_rmsd=npy.zeros(n_frames)
207  pair_count=0.0
208  for i,a1 in enumerate(ref_sele.atoms):
209  for j,a2 in enumerate(ref_sele.atoms):
210  if j<=i:continue
211  r1=a1.GetResidue()
212  c1=r1.GetChain()
213  r2=a2.GetResidue()
214  c2=r2.GetChain()
215  if c1==c2 and abs(r2.GetNumber().num-r1.GetNumber().num)<seq_sep:continue
216  d=ost.geom.Distance(a1.pos,a2.pos)
217  if d<radius:
218  a3=sele.atoms[i]
219  a4=sele.atoms[j]
220  d_traj=ost.mol.alg.AnalyzeDistanceBetwAtoms(t,a3.GetHandle(),a4.GetHandle())[first:last]
221  if average:d=npy.mean(d_traj)
222  for k,el in enumerate(d_traj):
223  dist_rmsd[k]+=(el-d)**2.0
224  pair_count+=1.0
225  return (dist_rmsd/float(pair_count))**0.5
226  except ImportError:
227  LogError("Function needs numpy, but I could not import it.")
228  raise
229 
230 def AverageDistanceMatrixFromTraj(t,sele,first=0,last=-1):
231  """
232  This function calcultes the distance between each pair of atoms
233  in **sele**, averaged over the trajectory **t**.
234 
235  :param t: the trajectory
236  :param sele: the selection used to determine the atom pairs
237  :param first: the first frame of t to be used
238  :param last: the last frame of t to be used
239  :type t: :class:`~ost.mol.CoordGroupHandle`
240  :type sele: :class:`~ost.mol.EntityView`
241  :type first: :class:`int`
242  :type last: :class:`int`
243 
244  :return: a numpy N\ :subscript:`pairs`\ xN\ :subscript:`pairs` matrix, where N\ :subscript:`pairs`
245  is the number of atom pairs in **sele**.
246  """
247  try:
248  import numpy as npy
249  except ImportError:
250  LogError("Function needs numpy, but I could not import it.")
251  raise
252  n_atoms=sele.GetAtomCount()
253  M=npy.zeros([n_atoms,n_atoms])
254  for i,a1 in enumerate(sele.atoms):
255  for j,a2 in enumerate(sele.atoms):
256  d=ost.mol.alg.AnalyzeDistanceBetwAtoms(t,a1.GetHandle(),a2.GetHandle())[first:last]
257  M[i,j]=npy.mean(d)
258  M[j,i]=npy.mean(d)
259  return M
260 
261 
262