OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
cadscore.py
Go to the documentation of this file.
1 #------------------------------------------------------------------------------
2 # This file is part of the OpenStructure project <www.openstructure.org>
3 #
4 # Copyright (C) 2008-2009 by the OpenStructure authors
5 #
6 # This library is free software; you can redistribute it and/or modify it under
7 # the terms of the GNU Lesser General Public License as published by the Free
8 # Software Foundation; either version 3.0 of the License, or (at your option)
9 # any later version.
10 # This library is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 # details.
14 #
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this library; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #------------------------------------------------------------------------------
19 """
20 Wrapper for the CAD score.
21 
22 References:
23 
24 Olechnovic K, Kulberkyte E, Venclovas C., CAD-score: A new contact area
25 difference-based function for evaluation of protein structural models
26 Proteins. 2012 Aug 30. [Epub ahead of print]
27 
28 Authors: Valerio Mariani, Alessandro Barbato
29 """
30 
31 import subprocess, os, tempfile, platform, re
32 from ost import settings, io, mol
33 
34 def _SetupFiles(model,reference):
35  # create temporary directory
36  tmp_dir_name=tempfile.mkdtemp()
37  dia = 'PDB'
38  for chain in model.chains:
39  if chain.name==" ":
40  raise RuntimeError("One of the chains in the model has no name. Cannot "
41  "calculate CAD score")
42  if len(chain.name) > 1:
43  dia = 'CHARMM'
44  break;
45  for res in chain.residues:
46  if len(res.name) > 3:
47  dia = 'CHARMM'
48  break;
49  io.SavePDB(model, os.path.join(tmp_dir_name, 'model.pdb'), dialect=dia)
50  dia = 'PDB'
51  for chain in reference.chains:
52  if chain.name==" ":
53  raise RuntimeError("One of the chains in the reference has no name. "
54  "Cannot calculate CAD score")
55  if len(chain.name) > 1:
56  dia = 'CHARMM'
57  break;
58  for res in chain.residues:
59  if len(res.name) > 3:
60  dia = 'CHARMM'
61  break;
62  io.SavePDB(reference, os.path.join(tmp_dir_name, 'reference.pdb'),dialect=dia)
63  return tmp_dir_name
64 
65 def _CleanupFiles(dir_name):
66  import shutil
67  shutil.rmtree(dir_name)
68 
69 class CADResult:
70  """
71  Holds the result of running CAD
72 
73  .. attribute:: globalAA
74 
75  The global CAD's atom-atom (AA) score
76 
77  .. attribute:: localAA
78 
79  Dictionary containing local CAD's atom-atom (AA) scores.
80 
81  :type: dictionary (key: tuple(chain, resnum) (e.g.:
82  ("A", ost.mol.ResNum(24)), value: CAD local AA score
83  (see CAD Documentation online)
84  """
85  def __init__(self, globalAA, localAA):
86  self.globalAA=globalAA
87  self.localAA=localAA
88 
89 def _ParseCADGlobal(lines):
90  header = lines[0].split()
91  aa_idx = header.index("AA")
92  aa_score=float(lines[1].split()[aa_idx])
93  return aa_score
94 
95 def _ParseCADLocal(lines):
96  local_scores_idx = None
97  for line_idx in range(len(lines)):
98  if "local_scores" in lines[line_idx]:
99  local_scores_idx = line_idx
100  break
101  if local_scores_idx == None:
102  raise RuntimeError("Failed to parse local cadscores")
103  local_aa_dict={}
104  for line_idx in range(local_scores_idx+2, len(lines)):
105  items=lines[line_idx].split()
106  local_aa = float(items[2])
107  if local_aa < 0.0:
108  continue # invalid CAD score
109  key = (items[0], mol.ResNum(int(items[1])))
110  local_aa_dict[key] = local_aa
111 
112  return local_aa_dict
113 
114 def _ParseVoronotaGlobal(lines):
115  return float(lines[0].split()[4])
116 
117 def _ParseVoronotaLocal(lines):
118  local_aa_dict={}
119  chain_name_regex = r'c<\D+>'
120  resnum_regex = r'r<\d+>'
121  insertion_code_regex = r'i<\D>'
122  for line in lines:
123  local_aa = float(line.split()[-1])
124  if local_aa < 0.0:
125  continue # invalid CAD score
126  chain_data = re.findall(chain_name_regex, line)
127  resnum_data = re.findall(resnum_regex, line)
128  insertion_code_data = re.findall(insertion_code_regex, line)
129  resnum = None
130  if len(insertion_code_data) == 0:
131  resnum = mol.ResNum(int(resnum_data[0][1:].strip('><')))
132  else:
133  resnum = mol.ResNum(int(resnum_data[0][1:].strip('><')),
134  insertion_code_data[0][1:].strip('><'))
135  key = (chain_data[0][1:].strip('><'), resnum)
136  local_aa_dict[key] = local_aa
137  return local_aa_dict
138 
139 def _RunCAD(tmp_dir, mode, cad_bin_path, old_regime):
140 
141  model_filename=os.path.join(tmp_dir, 'model.pdb')
142  reference_filename=os.path.join(tmp_dir, 'reference.pdb')
143  globalAA = None
144  localAA = None
145 
146  if platform.system() == "Windows":
147  raise RuntimeError('CAD score not available on Windows')
148 
149  if mode == "classic":
150  cad_calc_path = None
151  cad_read_g_path = None
152  cad_read_l_path = None
153  if cad_bin_path:
154  cad_calc_path = settings.Locate('CADscore_calc.bash',
155  search_paths=[cad_bin_path],
156  search_system_paths=False)
157  cad_read_g_path = settings.Locate('CADscore_read_global_scores.bash',
158  search_paths=[cad_bin_path],
159  search_system_paths=False)
160  cad_read_l_path=settings.Locate('CADscore_read_local_scores.bash',
161  search_paths=[cad_bin_path],
162  search_system_paths=False)
163  # also try to locate the actual executable that is called from the
164  # bash scripts
165  executable_path = settings.Locate('voroprot2',
166  search_paths=[cad_bin_path],
167  search_system_paths=False)
168  else:
169  cad_calc_path = settings.Locate('CADscore_calc.bash')
170  cad_read_g_path = settings.Locate('CADscore_read_global_scores.bash')
171  cad_read_l_path = settings.Locate('CADscore_read_local_scores.bash')
172  # also try to locate the actual executable that is called from the
173  # bash scripts
174  executable_path = settings.Locate('voroprot2')
175  command1="\"%s\" -m \"%s\" -t \"%s\" -D \"%s\"" %(cad_calc_path,
176  model_filename,
177  reference_filename,
178  os.path.join(tmp_dir,
179  "cadtemp"))
180  command2="\"%s\" -D \"%s\"" %(cad_read_g_path, os.path.join(tmp_dir,
181  "cadtemp"))
182  command3="\"%s\" -m \"%s\" -t \"%s\" -D \"%s\" -c AA" %(cad_read_l_path,
183  model_filename,
184  reference_filename,
185  os.path.join(tmp_dir,
186  "cadtemp"))
187 
188  ps1=subprocess.Popen(command1, shell=True, stdout=subprocess.PIPE)
189  ps1.wait()
190  ps2=subprocess.Popen(command2, shell=True, stdout=subprocess.PIPE)
191  ps2.wait()
192  lines=ps2.stdout.readlines()
193  try:
194  globalAA=_ParseCADGlobal(lines)
195  except:
196  raise RuntimeError("CAD calculation failed")
197  ps3=subprocess.Popen(command3, shell=True, stdout=subprocess.PIPE)
198  ps3.wait()
199  lines=ps3.stdout.readlines()
200  try:
201  localAA=_ParseCADLocal(lines)
202  except:
203  raise RuntimeError("CAD calculation failed")
204 
205  elif mode == "voronota":
206  local_score_filename = os.path.join(tmp_dir, "local_scores.txt")
207  voronota_cadscore_path = None
208  if cad_bin_path:
209  voronota_cadscore_path = settings.Locate("voronota-cadscore",
210  search_paths=[cad_bin_path],
211  search_system_paths=False)
212  # also try to locate the actual executable that is called from the
213  # bash script
214  executable_path = settings.Locate("voronota",
215  search_paths=[cad_bin_path],
216  search_system_paths=False)
217  else:
218  voronota_cadscore_path = settings.Locate("voronota-cadscore")
219  # also try to locate the actual executable that is called from the
220  # bash script
221  executable_path = settings.Locate("voronota")
222  cmd = [voronota_cadscore_path, '-m', model_filename, '-t',
223  reference_filename, '--contacts-query-by-code AA',
224  '--output-residue-scores', local_score_filename]
225  if old_regime:
226  cmd.append("--old-regime")
227  cmd = ' '.join(cmd)
228  ps = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
229  ps.wait()
230  try:
231  globalAA = _ParseVoronotaGlobal(ps.stdout.readlines())
232  except:
233  raise RuntimeError("CAD calculation failed")
234  try:
235  localAA = _ParseVoronotaLocal(open(local_score_filename).readlines())
236  except:
237  raise RuntimeError("CAD calculation failed")
238 
239  else:
240  raise RuntimeError("Invalid CAD mode! Allowed are: "
241  "[\"classic\", \"voronota\"]")
242 
243 
244  return CADResult(globalAA,localAA)
245 
246 def _HasInsertionCodes(model, reference):
247  for r in model.residues:
248  if r.GetNumber().GetInsCode() != "\0":
249  print r
250  return True
251  for r in reference.residues:
252  if r.GetNumber().GetInsCode() != "\0":
253  print r
254  return True
255  return False
256 
257 def _MapLabels(model, cad_results, label):
258  for k,v in cad_results.localAA.iteritems():
259  r = model.FindResidue(k[0], k[1])
260  if not r.IsValid():
261  raise RuntimeError("Failed to map cadscore on residues: " +
262  "CAD score estimated for residue in chain \"" +
263  k[0] + "\" with ResNum " + str(k[1]) + ". Residue " +
264  "could not be found in model.")
265  r.SetFloatProp(label, v)
266 
267 def CADScore(model, reference, mode = "classic", label = "localcad",
268  old_regime = False, cad_bin_path = None):
269  """
270  Calculates global and local atom-atom (AA) CAD Scores.
271 
272  You can either access the original implementation available from
273  https://bitbucket.org/kliment/cadscore/downloads/
274  or the new implementation which is part of the Voronota package
275  available from https://bitbucket.org/kliment/voronota/downloads/.
276 
277  The scores of the two implementations differ but strongly correlate
278  as the contacts between atoms are estimated differently. When using
279  the "voronota" *mode* you can minimize those discrepancies by
280  setting the *old_regime* flag to True.
281 
282  Furthermore, the "voronota" *mode* generates per-residue scores that
283  are inverted when compared to the classical implementation
284  (0.0: bad, 1.0 good).
285 
286  :param model: The model structure.
287  :type model: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
288  :param reference: The reference structure
289  :type reference: :class:`~ost.mol.EntityView` or
290  :class:`~ost.mol.EntityHandle`
291  :param mode: What CAD score implementation to use, must be one in
292  ["classic", "voronota"]
293  :param label: Local CAD scores will be mapped on residues of model as
294  float property with this name
295  :type label: :class:`str`
296  :param old_regime: Only has an effect if *mode* is "voronota". If set to true,
297  the discrepancies between the two modes is minimized but
298  the behaviour of inverted scores persists.
299  :type old_regime: :class:`bool`
300  :param cad_bin_path: Path to search for the required executables
301  (["CADscore_calc.bash",
302  "CADscore_read_global_scores.bash",
303  "CADscore_read_local_scores.bash"] for "classic" *mode*
304  or ["voronota-cadscore"] for "voronota" *mode*). If not
305  set, the env path is searched.
306  :type cad_bin_path: :class:`str`
307  :returns: The result of the CAD score calculation
308  :rtype: :class:`CADResult`
309 
310  :raises: :class:`~ost.settings.FileNotFound` if any of the CAD score
311  executables could not be located.
312  :raises: :class:`RuntimeError` if the calculation failed
313  """
314  if mode == "classic" and _HasInsertionCodes(model, reference):
315  raise RuntimeError("The classic CAD score implementation does not support "
316  "insertion codes in residues")
317  tmp_dir_name=_SetupFiles(model, reference)
318  result=_RunCAD(tmp_dir_name, mode, cad_bin_path, old_regime)
319  _CleanupFiles(tmp_dir_name)
320  _MapLabels(model, result, label)
321  return result