OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
naccess.py
Go to the documentation of this file.
1 '''
2 Naccess module
3 
4 Author: Florian Kiefer, Gerardo Tauriello (cleanup/speedup)
5 
6 This module is for calculating surface areas
7 from OpenStructure using the external program naccess
8 
9 How To Use This Module:
10  1. Import it (e.g. as "from ost.bindings import naccess")
11  2. Use it (e.g. as "sasa = naccess.CalculateSurfaceArea(entity)")
12 
13 Requirement:
14  - naccess installed
15 '''
16 
17 import tempfile
18 import subprocess
19 import os
20 import re
21 from ost import io, mol, settings, LogWarning
22 
23 def _GetExecutable(naccess_exe):
24  """
25  Method to check if naccess executable is present
26 
27  :param naccess: Explicit path to naccess executable
28  :returns: Path to the executable
29  :exception: FileNotFound if executable is not found
30  """
31  return settings.Locate('naccess', explicit_file_name=naccess_exe)
32 
33 def _CheckNaccessRoot(naccess_root):
34  """
35  :return: True, if given directory contains "accall" binary and files
36  "vdw.radii" and "standard.data".
37  :param naccess_root: Path to naccess folder to check.
38  """
39  accall_exe = os.path.join(naccess_root, "accall")
40  check = (os.path.exists(accall_exe) and os.access(accall_exe, os.X_OK) \
41  and os.path.exists(os.path.join(naccess_root, "vdw.radii")) \
42  and os.path.exists(os.path.join(naccess_root, "standard.data")))
43  if not check:
44  LogWarning("NACCESS: Could not find required files to launch accall " \
45  "directly in %s." % naccess_root)
46  return check
47 
48 def _SetupFiles(entity, selection, scratch_dir, max_number_of_atoms):
49  """
50  Setup files for naccess calculation in temporary directory
51 
52  :param entity: OST entity to calculate surface
53  :param selection: Calculate surface for subset of entity
54  :param scratch_dir: Scratch directory. A subfolder for temporary files
55  is created in there. If not specified, a default
56  directory is used (see :func:`tempfile.mkdtemp`).
57  :param max_number_of_atoms: Max Number of atoms in the entity (i.e. is limited
58  in the default NACCESS version to 50 000)
59  :returns: Tuple containing temporary directory, input filename for naccess and
60  directory of the input file
61  :exception: RuntimeError if selection is not valid or too many atoms
62  """
63 
64  # create temporary directory
65  tmp_dir_name = ""
66  if scratch_dir != None:
67  tmp_dir_name = tempfile.mkdtemp(dir=scratch_dir)
68  else:
69  tmp_dir_name = tempfile.mkdtemp()
70 
71  # select as specified by user
72  if selection != "":
73  entity_view = entity.Select(selection)
74  else:
75  entity_view = entity
76  if len(entity_view.atoms) > max_number_of_atoms:
77  raise RuntimeError, "Too much atoms for NACCESS (> %s)" % max_number_of_atoms
78  if not entity_view.IsValid():
79  raise RuntimeError, "Could not create view for selection (%s)"%(selection)
80 
81  # write entity to tmp file
82  tmp_file_name = "entity.pdb"
83  tmp_file_base = os.path.join(tmp_dir_name, "entity")
84  io.SavePDB(entity_view, os.path.join(tmp_dir_name, tmp_file_name))
85  return (tmp_dir_name, tmp_file_name, tmp_file_base)
86 
87 
88 def _ParseAsaFile(entity, file, asa_atom):
89  """
90  Reads Area file (.asa) and attach asa per atom to an entitiy
91 
92  :param entity: EntityHandle or EntityView for attaching sasa on atom level
93  :param file: Filename of area file
94  :param asa_atom: Name of the float property for SASA
95  """
96 
97  asa_fh = open(file)
98  asa_lines = asa_fh.readlines()
99  asa_fh.close()
100 
101  for l in asa_lines:
102  if l.startswith("ATOM"):
103  # get res_number, chain_id and atom name
104  atom_name = l[12:16]
105  chain_id = l[21]
106  res_number = l[22:27]
107  asa = l[54:63]
108  atom_name = atom_name.strip()
109  chain_id = chain_id
110  res_number = res_number.strip()
111  asa = asa.strip()
112  m = re.match(r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
113  di = m.groupdict()
114 
115  if di["ins"] == None:
116  resNum = mol.ResNum(int(di["num"]))
117  else:
118  resNum = mol.ResNum(int(di["num"]), di["ins"])
119 
120  a = entity.FindAtom(chain_id, resNum, atom_name)
121  if(a.IsValid()):
122  a.SetFloatProp(asa_atom, float(asa))
123  else:
124  LogWarning("NACCESS: invalid asa entry %s %s %s" \
125  % (chain_id, resNum, atom_name))
126 
127 def _ParseRsaFile(entity, file, asa_abs, asa_rel):
128  """
129  Reads Area file (.rsa) and attach asa (absolute + relative) per residue to an entitiy
130 
131  :param entity: EntityHandle or EntityView for attaching sasa on atom level
132  :param file: Filename of .rsa file
133  :param asa_atom: Name of the float property for absolute SASA
134  :param asa_atom: Name of the float property for relative SASA
135  :exception: RuntimeError if residue names are not the same
136  """
137  area_fh = open(file)
138  area_lines = area_fh.readlines()
139  area_fh.close()
140  # shift first line
141  area_lines = area_lines[4:]
142  # parse lines
143  for l in area_lines:
144  if l.startswith("RES"):
145  # extract data
146  p = re.compile(r'\s+')
147  res_name = l[3:8]
148  res_name = res_name.strip()
149  chain_id = l[8:9]
150  res_number = l[9:14]
151  res_number = res_number.strip()
152  abs_all, rel_all = l[15:28].strip().split()
153  m = re.match(r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
154  di = m.groupdict()
155  if di["ins"] == None:
156  resNum = mol.ResNum(int(di["num"]))
157  else:
158  resNum = mol.ResNum(int(di["num"]), di["ins"])
159  # set res. props
160  res = entity.FindResidue(chain_id, resNum)
161  if res_name == res.name:
162  res.SetFloatProp(asa_rel, float(rel_all) )
163  res.SetFloatProp(asa_abs, float(abs_all) )
164  else:
165  raise RuntimeError, "Residue Names are not the same for ResNumb: %s (%s vs %s)" % (res_number, res.name, res_name)
166 
167 
168 def __CleanupFiles(dir_name):
169  """
170  Method which recursively deletes a directory
171 
172  :warning: This method removes also non-empty directories without asking, so
173  be careful!
174  """
175  import shutil
176  shutil.rmtree(dir_name)
177 
178 
179 def _RunACCALL(command, temp_dir, query):
180  """
181  Fast method to run the Naccess surface calculation.
182 
183  This method starts the accall binary directly and pipes in the input provided
184  in *query*. This is faster than calling the "naccess" script since the script
185  has a constant overhead of roughly 1.3s in each call.
186 
187  :param command: Command to execute
188  :param temp_dir: Command is executed with this working directory
189  :param query: User input to pipe into *command*
190  :returns: stdout of command
191  :exception: CalledProcessError for non-zero return value
192  """
193  proc = subprocess.Popen(command, stdout=subprocess.PIPE,
194  stderr=subprocess.PIPE, stdin=subprocess.PIPE,
195  cwd=temp_dir)
196  stdout_value, stderr_value = proc.communicate(query)
197 
198  # check for successful completion of naccess
199  if proc.returncode != 0:
200  LogWarning("WARNING: naccess error\n%s\n%s" % (stdout_value, stderr_value))
201  raise subprocess.CalledProcessError(proc.returncode, command)
202 
203  return stdout_value
204 
205 
206 def _RunNACCESS(command, temp_dir):
207  """
208  Method to run the Naccess surface calculation.
209 
210  This method starts the external Naccess executable and returns the stdout.
211 
212  :param command: Command to execute
213  :param temp_dir: Command is executed with this working directory
214  :returns: stdout of command
215  :exception: CalledProcessError for non-zero return value
216  """
217  proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE,
218  cwd=temp_dir)
219  stdout_value, stderr_value = proc.communicate()
220 
221  # check for successful completion of naccess
222  if proc.returncode != 0:
223  LogWarning("WARNING: naccess error\n%s" % stdout_value)
224  raise subprocess.CalledProcessError(proc.returncode, command)
225 
226  return stdout_value
227 
228 
229 def CalculateSurfaceArea(entity, radius=1.4,
230  include_hydrogens=False, include_hetatm = False,
231  include_water=False, selection="", naccess_exe=None,
232  naccess_root=None, keep_files=False, asa_abs= "asaAbs",
233  asa_rel="asaRel", asa_atom="asaAtom", scratch_dir=None,
234  max_number_of_atoms=50000):
235  """
236  Calculates analytical the solvent accessible surface area by using the
237  external naccess program
238 
239  This method calculates the molecular surface areas by invoking the external
240  program naccess. First, it is checked if the naccess executable is present, then,
241  the necessary files are prepared in a temporary directory and naccess is
242  executed. The last step is to remove the temporary directory.
243 
244 
245  :param entity: OST entity to calculate surface
246  :param radius: Surface probe radius
247  :param include_hydrogens: Calculate surface including hydrogens
248  :param include_hetatm: Calculate surface including hetatms
249  :param include_water: Calculate surface including water
250  :param selection: Calculate surface for subset of entity
251  :param naccess_exe: naccess executable (full path to executable)
252  :param naccess_root: Path to folder containing "accall" binary and
253  files "vdw.radii" and "standard.data". This is the
254  fastest way to call naccess!
255  :param keep_files: If True, do not delete temporary files
256  :param asa_abs: Attaches per residue absolute SASA to specified
257  FloatProp on residue level
258  :param asa_rel: Attaches per residue relative SASA to specified
259  FloatProp on residue level
260  :param asa_atom: Attaches per atom SASA to specified FloatProp at
261  atom level
262  :param scratch_dir: Scratch directory. A subfolder for temporary files
263  is created in there. If not specified, a default
264  directory is used (see :func:`tempfile.mkdtemp`).
265  :param max_number_of_atoms: Max Number of atoms in the entity (i.e. is limited
266  in the default NACCESS version to 50 000)
267 
268  :returns: absolute SASA calculated using asa_atom
269  """
270 
271  # check if naccess executable is specified
272  if naccess_root and _CheckNaccessRoot(naccess_root):
273  # use faster, direct call to accall binary
274  fast_mode = True
275  else:
276  # get naccess executable
277  naccess_executable = _GetExecutable(naccess_exe)
278  # see if we can extract naccess_root from there (fallback to old mode)
279  naccess_root = os.path.dirname(naccess_executable)
280  fast_mode = _CheckNaccessRoot(naccess_root)
281 
282  # setup files for naccess
283  (naccess_data_dir, naccess_data_file, naccess_data_base) \
284  = _SetupFiles(entity, selection, scratch_dir, max_number_of_atoms)
285 
286  try:
287  # call naccess
288  if fast_mode:
289  # cook up stdin query (same logic as naccess script)
290  query = "PDBFILE %s\n" \
291  "VDWFILE %s\n" \
292  "STDFILE %s\n" \
293  "PROBE %f\n" \
294  "ZSLICE 0.05\n" \
295  % (naccess_data_file, os.path.join(naccess_root, "vdw.radii"),
296  os.path.join(naccess_root, "standard.data"), radius)
297  if include_hydrogens:
298  query += "HYDROGENS\n"
299  if include_water:
300  query += "WATERS\n"
301  if include_hetatm:
302  query += "HETATOMS\n"
303  # call it
304  command = os.path.join(naccess_root, "accall")
305  _RunACCALL(command, naccess_data_dir, query)
306  else:
307  LogWarning("NACCESS: Falling back to slower call to %s." \
308  % naccess_executable)
309  # set command line
310  command = "%s %s -p %f " % \
311  (naccess_executable, naccess_data_file, radius)
312  if include_hydrogens:
313  command = "%s -y" % command
314  if include_water:
315  command = "%s -w" % command
316  if include_hetatm:
317  command = "%s -h" % command
318  # execute naccess
319  _RunNACCESS(command, naccess_data_dir)
320 
321  # parse outout
322  new_asa = os.path.join(naccess_data_dir, "%s.asa" % naccess_data_base)
323  _ParseAsaFile(entity, new_asa, asa_atom)
324 
325  new_rsa = os.path.join(naccess_data_dir, "%s.rsa" % naccess_data_base)
326  _ParseRsaFile(entity, new_rsa, asa_abs, asa_rel)
327 
328  finally:
329  # clean up
330  if not keep_files:
331  __CleanupFiles(naccess_data_dir)
332 
333  # sum up Asa for all atoms
334  sasa = 0.0
335  for a in entity.atoms:
336  sasa += a.GetFloatProp(asa_atom, 0.0)
337 
338  return sasa