OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
tmtools.py
Go to the documentation of this file.
1 #------------------------------------------------------------------------------
2 # This file is part of the OpenStructure project <www.openstructure.org>
3 #
4 # Copyright (C) 2008-2009 by the OpenStructure authors
5 #
6 # This library is free software; you can redistribute it and/or modify it under
7 # the terms of the GNU Lesser General Public License as published by the Free
8 # Software Foundation; either version 3.0 of the License, or (at your option)
9 # any later version.
10 # This library is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 # details.
14 #
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this library; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #------------------------------------------------------------------------------
19 """
20 Wrappers for the tmalign and tmscore utilities.
21 
22 References:
23 
24 tmscore: Yang Zhang and Jeffrey Skolnick, Proteins 2004 57: 702-710
25 tmalign: Y. Zhang and J. Skolnick, Nucl. Acids Res. 2005 33, 2302-9
26 
27 
28 Authors: Pascal Benkert, Marco Biasini
29 """
30 
31 import subprocess, os, tempfile, platform
32 from ost import settings, io, geom, seq
33 
34 def _SetupFiles(models):
35  # create temporary directory
36  tmp_dir_name=tempfile.mkdtemp()
37  dia = 'PDB'
38  for index, model in enumerate(models):
39  for chain in model.chains:
40  if len(chain.name) > 1:
41  dia = 'CHARMM'
42  break;
43  for res in chain.residues:
44  if len(res.name) > 3:
45  dia = 'CHARMM'
46  break;
47  io.SavePDB(model, os.path.join(tmp_dir_name, 'model%02d.pdb' % (index+1)), dialect=dia)
48  return tmp_dir_name
49 
50 def _CleanupFiles(dir_name):
51  import shutil
52  shutil.rmtree(dir_name)
53 
55  """
56  Holds the result of running TMalign
57 
58  .. attribute:: rmsd
59 
60  The RMSD of the common Calpha atoms of both structures
61 
62  .. attribute:: transform
63 
64  The transform that superposes the model onto the reference structure.
65 
66  :type: :class:`~ost.geom.Mat4`
67 
68  .. attribute:: alignment
69 
70  The alignment of the structures, that is the pairing of Calphas of both
71  structures. Since the programs only read ATOM records, residues consisting
72  of HETATMs (MSE) are not included in the alignment.
73 
74  :type: :class:`~ost.seq.AlignmentHandle`
75 
76  .. attribute:: tm_score
77 
78  The TM-score of the structural superposition
79 
80  """
81  def __init__(self, rmsd, tm_score, aligned_length, transform,
82  ref_sequence, alignment):
83 
84  self.rmsd=rmsd
85  self.tm_score=tm_score
86  self.aligned_length=aligned_length
87  self.transform=transform
88  self.ref_sequence =ref_sequence
89  self.alignment=alignment
90 
91 def _ParseTmAlign(lines,lines_matrix):
92  info_line=lines[12].split(',')
93  aln_length=float(info_line[0].split('=')[1].strip())
94  rmsd=float(info_line[1].split('=')[1].strip())
95  tm_score=float(lines[14].split('=')[1].split('(')[0].strip())
96  tf1=[float(i.strip()) for i in lines_matrix[2].split()]
97  tf2=[float(i.strip()) for i in lines_matrix[3].split()]
98  tf3=[float(i.strip()) for i in lines_matrix[4].split()]
99  rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
100  tf2[4], tf3[2], tf3[3], tf3[4])
101  tf=geom.Mat4(rot)
102  tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
103  seq1 = seq.CreateSequence("1",lines[18].strip())
104  seq2 = seq.CreateSequence("2",lines[20].strip())
105  alignment = seq.CreateAlignment()
106  alignment.AddSequence(seq2)
107  alignment.AddSequence(seq1)
108  return TMAlignResult(rmsd, tm_score, aln_length, tf, seq2, alignment)
109 
110 def _RunTmAlign(tmalign, tmp_dir):
111  model1_filename=os.path.join(tmp_dir, 'model01.pdb')
112  model2_filename=os.path.join(tmp_dir, 'model02.pdb')
113  if platform.system() == "Windows":
114  tmalign_path=settings.Locate('tmalign.exe', explicit_file_name=tmalign)
115  command="\"%s\" %s %s -m %s" %(os.path.normpath(tmalign_path), model1_filename, model2_filename, os.path.join(tmp_dir,'matrix.txt'))
116  else:
117  tmalign_path=settings.Locate('tmalign', explicit_file_name=tmalign)
118  command="\"%s\" \"%s\" \"%s\" -m \"%s\"" %(tmalign_path, model1_filename, model2_filename, os.path.join(tmp_dir,'matrix.txt'))
119  ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
120  ps.wait()
121  lines=ps.stdout.readlines()
122  if (len(lines))<22:
123  _CleanupFiles(tmp_dir)
124  raise RuntimeError("tmalign superposition failed")
125  matrix_file=open(os.path.join(tmp_dir,'matrix.txt'))
126  lines_matrix=matrix_file.readlines()
127  matrix_file.close()
128  return _ParseTmAlign(lines,lines_matrix)
129 
131  def __init__(self, rmsd, aligned_length, tm_score, transform, alignment):
132  self.rmsd=rmsd
133  self.tm_score=tm_score
134  self.aligned_length=aligned_length
135  self.transform=transform
136  self.alignment=alignment
137 
138 def _ParseMmAlign(lines):
139  info_line=lines[10].split(',')
140  aln_length=float(info_line[0].split('=')[1].strip())
141  rmsd=float(info_line[1].split('=')[1].strip())
142  tm_score=float(info_line[2].split('=')[1].strip())
143  tf1=[float(i.strip()) for i in lines[14].split()]
144  tf2=[float(i.strip()) for i in lines[15].split()]
145  tf3=[float(i.strip()) for i in lines[16].split()]
146  rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
147  tf2[4], tf3[2], tf3[3], tf3[4])
148  tf=geom.Mat4(rot)
149  tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
150  seq1 = seq.CreateSequence("1",lines[19].strip())
151  seq2 = seq.CreateSequence("2",lines[21].strip())
152  alignment = seq.CreateAlignment()
153  alignment.AddSequence(seq2)
154  alignment.AddSequence(seq1)
155  return MMAlignResult(rmsd, aln_length, tm_score, tf, alignment)
156 
157 def _RunMmAlign(mmalign, tmp_dir):
158  model1_filename=os.path.join(tmp_dir, 'model01.pdb')
159  model2_filename=os.path.join(tmp_dir, 'model02.pdb')
160  if platform.system() == "Windows":
161  mmalign_path=settings.Locate('mmalign.exe', explicit_file_name=mmalign)
162  command="\"%s\" %s %s" %(os.path.normpath(mmalign_path), model1_filename, model2_filename)
163  else:
164  mmalign_path=settings.Locate('MMalign', explicit_file_name=mmalign)
165  command="\"%s\" \"%s\" \"%s\"" %(mmalign_path, model1_filename, model2_filename)
166  ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
167  ps.wait()
168  lines=ps.stdout.readlines()
169  if (len(lines))<22:
170  _CleanupFiles(tmp_dir)
171  raise RuntimeError("mmalign superposition failed")
172  return _ParseMmAlign(lines)
173 
175  """
176  Holds the result of running TMscore
177 
178  .. attribute:: rmsd_common
179 
180  The RMSD of the common Calpha atoms of both structures
181 
182  .. attribute:: rmsd_below_five
183 
184  The RMSD of all Calpha atoms that can be superposed below five Angstroem
185 
186  .. attribute:: tm_score
187 
188  The TM-score of the structural superposition
189 
190  .. attribute:: transform
191 
192  The transform that superposes the model onto the reference structure.
193 
194  :type: :class:`~ost.geom.Mat4`
195 
196  .. attribute:: gdt_ha
197 
198  The GDT_HA of the model to the reference structure.
199 
200  .. attribute:: gdt_ts
201 
202  The GDT_TS of the model to the reference structure.
203 
204  """
205  def __init__(self, rmsd_common, tm_score, max_sub,
206  gdt_ts, gdt_ha, rmsd_below_five, transform):
207  self.rmsd_common=rmsd_common
208  self.tm_score=tm_score
209  self.max_sub=max_sub
210  self.gdt_ts=gdt_ts
211  self.gdt_ha=gdt_ha
212  self.rmsd_below_five=rmsd_below_five
213  self.transform=transform
214 
215 def _ParseTmScore(lines):
216  tf1=[float(i.strip()) for i in lines[23].split()]
217  tf2=[float(i.strip()) for i in lines[24].split()]
218  tf3=[float(i.strip()) for i in lines[25].split()]
219  rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
220  tf2[4], tf3[2], tf3[3], tf3[4])
221  tf=geom.Mat4(rot)
222  tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
223  result=TMScoreResult(float(lines[14].split()[-1].strip()),
224  float(lines[16].split()[2].strip()),
225  float(lines[17].split()[1].strip()),
226  float(lines[18].split()[1].strip()),
227  float(lines[19].split()[1].strip()),
228  float(lines[27].split()[-1].strip()),
229  tf)
230  return result
231 
232 def _RunTmScore(tmscore, tmp_dir):
233  model1_filename=os.path.join(tmp_dir, 'model01.pdb')
234  model2_filename=os.path.join(tmp_dir, 'model02.pdb')
235  if platform.system() == "Windows":
236  tmscore_path=settings.Locate('tmscore.exe', explicit_file_name=tmscore)
237  command="\"%s\" %s %s" %(os.path.normpath(tmscore_path), model1_filename,
238  model2_filename)
239  else:
240  tmscore_path=settings.Locate('tmscore', explicit_file_name=tmscore)
241  command="\"%s\" \"%s\" \"%s\"" % (tmscore_path, model1_filename,
242  model2_filename)
243  ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
244  ps.wait()
245  lines=ps.stdout.readlines()
246  if (len(lines))<22:
247  _CleanupFiles(tmp_dir)
248  raise RuntimeError("tmscore superposition failed")
249  return _ParseTmScore(lines)
250 
251 
252 def TMAlign(model1, model2, tmalign=None):
253  """
254  Performs a sequence independent superposition of model1 onto model2, the
255  reference.
256 
257 
258  :param model1: The model structure. If the superposition is successful, will
259  be superposed onto the reference structure
260  :type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
261  :param model2: The reference structure
262  :type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
263  :param tmalign: If not None, the path to the tmalign executable.
264  :returns: The result of the tmscore superposition
265  :rtype: :class:`TMAlignResult`
266 
267  :raises: :class:`~ost.settings.FileNotFound` if tmalign could not be located.
268  :raises: :class:`RuntimeError` if the superposition failed
269  """
270  tmp_dir_name=_SetupFiles((model1, model2))
271  result=_RunTmAlign(tmalign, tmp_dir_name)
272  model1.handle.EditXCS().ApplyTransform(result.transform)
273  _CleanupFiles(tmp_dir_name)
274  return result
275 
276 def MMAlign(model1, model2, mmalign=None):
277  """
278  Run tmalign on two protein structures
279  """
280  tmp_dir_name=_SetupFiles((model1, model2))
281  result=_RunMmAlign(mmalign, tmp_dir_name)
282  model1.handle.EditXCS().ApplyTransform(result.transform)
283  _CleanupFiles(tmp_dir_name)
284  return result
285 
286 def TMScore(model1, model2, tmscore=None):
287  """
288  Performs a sequence dependent superposition of model1 onto model2,
289  the reference.
290 
291  :param model1: The model structure. If the superposition is successful, will
292  be superposed onto the reference structure
293  :type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
294  :param model2: The reference structure
295  :type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
296  :param tmscore: If not None, the path to the tmscore executable.
297  :returns: The result of the tmscore superposition
298  :rtype: :class:`TMScoreResult`
299 
300  :raises: :class:`~ost.settings.FileNotFound` if tmalign could not be located.
301  :raises: :class:`RuntimeError` if the superposition failed
302  """
303  tmp_dir_name=_SetupFiles((model1, model2))
304  result=_RunTmScore(tmscore, tmp_dir_name)
305  model1.handle.EditXCS().ApplyTransform(result.transform)
306  _CleanupFiles(tmp_dir_name)
307  return result