OpenStructure
naccess.py
Go to the documentation of this file.
1 '''
2 Naccess module
3 
4 Author: Florian Kiefer, Gerardo Tauriello (cleanup/speedup)
5 
6 This module is for calculating surface areas
7 from OpenStructure using the external program naccess
8 
9 How To Use This Module:
10  1. Import it (e.g. as "from ost.bindings import naccess")
11  2. Use it (e.g. as "sasa = naccess.CalculateSurfaceArea(entity)")
12 
13 Requirement:
14  - naccess installed
15 '''
16 
17 import tempfile
18 import subprocess
19 import os
20 import re
21 from ost import io, mol, settings, LogWarning
22 
23 def _GetExecutable(naccess_exe):
24  """
25  Method to check if naccess executable is present
26 
27  :param naccess: Explicit path to naccess executable
28  :returns: Path to the executable
29  :exception: FileNotFound if executable is not found
30  """
31  return settings.Locate('naccess', explicit_file_name=naccess_exe)
32 
33 def _CheckNaccessRoot(naccess_root):
34  """
35  :return: True, if given directory contains "accall" binary and files
36  "vdw.radii" and "standard.data".
37  :param naccess_root: Path to naccess folder to check.
38  """
39  accall_exe = os.path.join(naccess_root, "accall")
40  check = (os.path.exists(accall_exe) and os.access(accall_exe, os.X_OK) \
41  and os.path.exists(os.path.join(naccess_root, "vdw.radii")) \
42  and os.path.exists(os.path.join(naccess_root, "standard.data")))
43  if not check:
44  LogWarning("NACCESS: Could not find required files to launch accall " \
45  "directly in %s." % naccess_root)
46  return check
47 
48 def _SetupFiles(entity, selection, scratch_dir, max_number_of_atoms):
49  """
50  Setup files for naccess calculation in temporary directory
51 
52  :param entity: OST entity to calculate surface
53  :param selection: Calculate surface for subset of entity
54  :param scratch_dir: Scratch directory. A subfolder for temporary files
55  is created in there. If not specified, a default
56  directory is used (see :func:`tempfile.mkdtemp`).
57  :param max_number_of_atoms: Max Number of atoms in the entity (i.e. is limited
58  in the default NACCESS version to 50 000)
59  :returns: Tuple containing temporary directory, input filename for naccess and
60  directory of the input file
61  :exception: RuntimeError if selection is not valid or too many atoms
62  """
63 
64  # create temporary directory
65  tmp_dir_name = ""
66  if scratch_dir != None:
67  tmp_dir_name = tempfile.mkdtemp(dir=scratch_dir)
68  else:
69  tmp_dir_name = tempfile.mkdtemp()
70 
71  # select as specified by user
72  if selection != "":
73  entity_view = entity.Select(selection)
74  else:
75  entity_view = entity
76  if len(entity_view.atoms) > max_number_of_atoms:
77  raise RuntimeError("Too much atoms for NACCESS (> %s)" % max_number_of_atoms)
78  if not entity_view.IsValid():
79  raise RuntimeError("Could not create view for selection (%s)"%(selection))
80 
81  # write entity to tmp file
82  tmp_file_name = "entity.pdb"
83  tmp_file_base = os.path.join(tmp_dir_name, "entity")
84  io.SavePDB(entity_view, os.path.join(tmp_dir_name, tmp_file_name))
85  return (tmp_dir_name, tmp_file_name, tmp_file_base)
86 
87 
88 def _ParseAsaFile(entity, file, asa_atom):
89  """
90  Reads Area file (.asa) and attach asa per atom to an entitiy
91 
92  :param entity: EntityHandle or EntityView for attaching sasa on atom level
93  :param file: Filename of area file
94  :param asa_atom: Name of the float property for SASA
95  """
96 
97  asa_fh = open(file)
98  asa_lines = asa_fh.readlines()
99  asa_fh.close()
100 
101  for l in asa_lines:
102  if l.startswith("ATOM"):
103  # get res_number, chain_id and atom name
104  atom_name = l[12:16]
105  chain_id = l[21]
106  res_number = l[22:27]
107  asa = l[54:63]
108  atom_name = atom_name.strip()
109  chain_id = chain_id
110  res_number = res_number.strip()
111  asa = asa.strip()
112  m = re.match(r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
113  di = m.groupdict()
114 
115  if di["ins"] == None:
116  resNum = mol.ResNum(int(di["num"]))
117  else:
118  resNum = mol.ResNum(int(di["num"]), di["ins"])
119 
120  a = entity.FindAtom(chain_id, resNum, atom_name)
121  if(a.IsValid()):
122  a.SetFloatProp(asa_atom, float(asa))
123  else:
124  LogWarning("NACCESS: invalid asa entry %s %s %s" \
125  % (chain_id, resNum, atom_name))
126 
127 def _ParseRsaFile(entity, file, asa_abs, asa_rel):
128  """
129  Reads Area file (.rsa) and attach asa (absolute + relative) per residue to an entitiy
130 
131  :param entity: EntityHandle or EntityView for attaching sasa on atom level
132  :param file: Filename of .rsa file
133  :param asa_atom: Name of the float property for absolute SASA
134  :param asa_atom: Name of the float property for relative SASA
135  :exception: RuntimeError if residue names are not the same
136  """
137  area_fh = open(file)
138  area_lines = area_fh.readlines()
139  area_fh.close()
140  # shift first line
141  area_lines = area_lines[4:]
142  # parse lines
143  for l in area_lines:
144  if l.startswith("RES"):
145  # extract data
146  p = re.compile(r'\s+')
147  res_name = l[3:8]
148  res_name = res_name.strip()
149  chain_id = l[8:9]
150  res_number = l[9:14]
151  res_number = res_number.strip()
152  abs_all, rel_all = l[15:28].strip().split()
153  m = re.match(r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
154  di = m.groupdict()
155  if di["ins"] == None:
156  resNum = mol.ResNum(int(di["num"]))
157  else:
158  resNum = mol.ResNum(int(di["num"]), di["ins"])
159  # set res. props
160  res = entity.FindResidue(chain_id, resNum)
161  if res_name == res.name:
162  res.SetFloatProp(asa_rel, float(rel_all) )
163  res.SetFloatProp(asa_abs, float(abs_all) )
164  else:
165  raise RuntimeError("Residue Names are not the same for ResNumb: %s (%s vs %s)" % (res_number, res.name, res_name))
166 
167 
168 def __CleanupFiles(dir_name):
169  """
170  Method which recursively deletes a directory
171 
172  :warning: This method removes also non-empty directories without asking, so
173  be careful!
174  """
175  import shutil
176  shutil.rmtree(dir_name)
177 
178 
179 def _RunACCALL(command, temp_dir, query):
180  """
181  Fast method to run the Naccess surface calculation.
182 
183  This method starts the accall binary directly and pipes in the input provided
184  in *query*. This is faster than calling the "naccess" script since the script
185  has a constant overhead of roughly 1.3s in each call.
186 
187  :param command: Command to execute
188  :param temp_dir: Command is executed with this working directory
189  :param query: User input to pipe into *command*
190  :returns: stdout of command
191  :exception: CalledProcessError for non-zero return value
192  """
193 
194  proc = subprocess.Popen(command, cwd=temp_dir, stdout=subprocess.PIPE,
195  stderr=subprocess.PIPE, stdin=subprocess.PIPE)
196  stdout_value, stderr_value = proc.communicate(query.encode())
197 
198  # check for successful completion of naccess
199  if proc.returncode != 0:
200  LogWarning("WARNING: naccess error\n%s\n%s" % (stdout_value.decode(),
201  stderr_value.decode()))
202  raise subprocess.CalledProcessError(proc.returncode, command)
203 
204  return stdout_value.decode()
205 
206 
207 def _RunNACCESS(command, temp_dir):
208  """
209  Method to run the Naccess surface calculation.
210 
211  This method starts the external Naccess executable and returns the stdout.
212 
213  :param command: Command to execute
214  :param temp_dir: Command is executed with this working directory
215  :returns: stdout of command
216  :exception: CalledProcessError for non-zero return value
217  """
218  proc = subprocess.Popen(command, cwd=temp_dir, shell=True,
219  stdout=subprocess.PIPE)
220  stdout_value, stderr_value = proc.communicate()
221 
222  # check for successful completion of naccess
223  if proc.returncode != 0:
224  LogWarning("WARNING: naccess error\n%s" % stdout_value.decode())
225  raise subprocess.CalledProcessError(proc.returncode, command)
226 
227  return stdout_value.decode()
228 
229 
230 def CalculateSurfaceArea(entity, radius=1.4,
231  include_hydrogens=False, include_hetatm = False,
232  include_water=False, selection="", naccess_exe=None,
233  naccess_root=None, keep_files=False, asa_abs= "asaAbs",
234  asa_rel="asaRel", asa_atom="asaAtom", scratch_dir=None,
235  max_number_of_atoms=50000):
236  """
237  Calculates analytical the solvent accessible surface area by using the
238  external naccess program
239 
240  This method calculates the molecular surface areas by invoking the external
241  program naccess. First, it is checked if the naccess executable is present, then,
242  the necessary files are prepared in a temporary directory and naccess is
243  executed. The last step is to remove the temporary directory.
244 
245 
246  :param entity: OST entity to calculate surface
247  :param radius: Surface probe radius
248  :param include_hydrogens: Calculate surface including hydrogens
249  :param include_hetatm: Calculate surface including hetatms
250  :param include_water: Calculate surface including water
251  :param selection: Calculate surface for subset of entity
252  :param naccess_exe: naccess executable (full path to executable)
253  :param naccess_root: Path to folder containing "accall" binary and
254  files "vdw.radii" and "standard.data". This is the
255  fastest way to call naccess!
256  :param keep_files: If True, do not delete temporary files
257  :param asa_abs: Attaches per residue absolute SASA to specified
258  FloatProp on residue level
259  :param asa_rel: Attaches per residue relative SASA to specified
260  FloatProp on residue level
261  :param asa_atom: Attaches per atom SASA to specified FloatProp at
262  atom level
263  :param scratch_dir: Scratch directory. A subfolder for temporary files
264  is created in there. If not specified, a default
265  directory is used (see :func:`tempfile.mkdtemp`).
266  :param max_number_of_atoms: Max Number of atoms in the entity (i.e. is limited
267  in the default NACCESS version to 50 000)
268 
269  :returns: absolute SASA calculated using asa_atom
270  """
271 
272  # check if naccess executable is specified
273  if naccess_root and _CheckNaccessRoot(naccess_root):
274  # use faster, direct call to accall binary
275  fast_mode = True
276  else:
277  # get naccess executable
278  naccess_executable = _GetExecutable(naccess_exe)
279  # see if we can extract naccess_root from there (fallback to old mode)
280  naccess_root = os.path.dirname(naccess_executable)
281  fast_mode = _CheckNaccessRoot(naccess_root)
282 
283  # setup files for naccess
284  (naccess_data_dir, naccess_data_file, naccess_data_base) \
285  = _SetupFiles(entity, selection, scratch_dir, max_number_of_atoms)
286 
287  try:
288  # call naccess
289  if fast_mode:
290  # cook up stdin query (same logic as naccess script)
291  query = "PDBFILE %s\n" \
292  "VDWFILE %s\n" \
293  "STDFILE %s\n" \
294  "PROBE %f\n" \
295  "ZSLICE 0.05\n" \
296  % (naccess_data_file, os.path.join(naccess_root, "vdw.radii"),
297  os.path.join(naccess_root, "standard.data"), radius)
298  if include_hydrogens:
299  query += "HYDROGENS\n"
300  if include_water:
301  query += "WATERS\n"
302  if include_hetatm:
303  query += "HETATOMS\n"
304  # call it
305  command = os.path.join(naccess_root, "accall")
306  _RunACCALL(command, naccess_data_dir, query)
307  else:
308  LogWarning("NACCESS: Falling back to slower call to %s." \
309  % naccess_executable)
310  # set command line
311  command = "%s %s -p %f " % \
312  (naccess_executable, naccess_data_file, radius)
313  if include_hydrogens:
314  command = "%s -y" % command
315  if include_water:
316  command = "%s -w" % command
317  if include_hetatm:
318  command = "%s -h" % command
319  # execute naccess
320  _RunNACCESS(command, naccess_data_dir)
321 
322  # parse outout
323  new_asa = os.path.join(naccess_data_dir, "%s.asa" % naccess_data_base)
324  _ParseAsaFile(entity, new_asa, asa_atom)
325 
326  new_rsa = os.path.join(naccess_data_dir, "%s.rsa" % naccess_data_base)
327  _ParseRsaFile(entity, new_rsa, asa_abs, asa_rel)
328 
329  finally:
330  # clean up
331  if not keep_files:
332  __CleanupFiles(naccess_data_dir)
333 
334  # sum up Asa for all atoms
335  sasa = 0.0
336  for a in entity.atoms:
337  sasa += a.GetFloatProp(asa_atom, 0.0)
338 
339  return sasa
def CalculateSurfaceArea(entity, radius=1.4, include_hydrogens=False, include_hetatm=False, include_water=False, selection="", naccess_exe=None, naccess_root=None, keep_files=False, asa_abs="asaAbs", asa_rel="asaRel", asa_atom="asaAtom", scratch_dir=None, max_number_of_atoms=50000)
Definition: naccess.py:235