OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
naccess.py
Go to the documentation of this file.
1 '''
2 Naccess module
3 
4 Author: Florian Kiefer
5 
6 This module is for calculating surface areas
7 from OpenStructure using the external program naccess
8 
9 How To Use This Module:
10  1. Import it (e.g. as "from ost.bindings import naccess")
11  2. Use it (e.g. as "sasa = naccess.CalculateSurfaceArea(entity)")
12 
13 Requirement:
14  - naccess installed
15 '''
16 
17 import tempfile
18 import subprocess
19 import os
20 import re
21 from ost import io
22 from ost import mol
23 from ost import settings
24 from ost import geom
25 
26 def _GetExecutable(naccess_exe):
27  """
28  Method to check if naccess executable is present
29 
30  :param naccess: Explicit path to msms executable
31  :returns: Path to the executable
32  :exception: FileNotFound if executable is not found
33  """
34  return settings.Locate('naccess', explicit_file_name=naccess_exe)
35 
36 def _SetupFiles(entity, selection, scratch_dir, max_number_of_atoms):
37  """
38  Setup files for naccess calculation in temporary directory
39 
40  :param entity: EntityHandle or EntityView to calculate surface
41  :param selection: Calculate surface for subset of entity
42  :param scratch_dir: Directory for temporary files (NACCESS is sensitive to "." in directory names
43  :param max_number_of_atoms: Max Number of atoms in the entity (i.e. is limited in the default NACCESS version to 50 000)
44  :returns: array containing temporary directory, input filename for naccess and directory of the input file
45  :exception: RuntimeError if selection is not valid
46  """
47 
48  # create temporary directory
49  tmp_dir_name=""
50  if scratch_dir!=None:
51  tmp_dir_name=tempfile.mkdtemp(dir=scratch_dir)
52  else:
53  tmp_dir_name=tempfile.mkdtemp()
54 
55  # select only heavy atoms if no_hydrogens is true
56  entity_view=entity.Select(selection)
57  if len(entity_view.atoms) > max_number_of_atoms:
58  raise RuntimeError, "Too much atoms for NACCESS (> %s)" % max_number_of_atoms
59  if not entity_view.IsValid():
60  raise RuntimeError, "Could not create view for selection (%s)"%(selection)
61 
62  # write entity to tmp file
63  tmp_file_name=os.path.join(tmp_dir_name,"entity.pdb")
64  tmp_file_base = os.path.join(tmp_dir_name,"entity")
65  io.SavePDB(entity_view, tmp_file_name)
66  return (tmp_dir_name, tmp_file_name, tmp_file_base)
67 
68 
69 def _ParseAsaFile(entity, file, asa_atom):
70  """
71  Reads Area file (.asa) and attach asa per atom to an entitiy
72 
73  :param entity: EntityHandle or EntityView for attaching sasa on atom level
74  :param file: Filename of area file
75  :param asa_atom: Name of the float property for SASA
76  """
77 
78  asa_fh = open(file)
79  asa_lines = asa_fh.readlines()
80  asa_fh.close()
81 
82  for l in asa_lines:
83  if l.startswith("ATOM"):
84  # get res_number, chain_id and atom name
85  atom_name = l[12:16]
86  chain_id = l[21]
87  res_number = l[22:27]
88  asa = l[54:63]
89  atom_name = atom_name.strip()
90  chain_id = chain_id
91  res_number = res_number.strip()
92  asa = asa.strip()
93  #print "res_number:", res_number
94  m=re.match(r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
95  di = m.groupdict()
96 
97  if di["ins"] == None:
98  resNum = mol.ResNum(int(di["num"]))
99  else:
100  resNum = mol.ResNum(int(di["num"]), di["ins"])
101  #print res_number, resNum.num, resNum.ins
102  a = entity.FindAtom(chain_id, resNum, atom_name)
103  if(a.IsValid()):
104  a.SetFloatProp(asa_atom, float(asa))
105  else:
106  print chain_id, resNum, atom_name
107 
108 
109 def _ParseRsaFile(enti,file, asa_abs, asa_rel):
110  """
111  Reads Area file (.rsa) and attach asa (absolute + relative) per residue to an entitiy
112 
113  :param entity: EntityHandle or EntityView for attaching sasa on atom level
114  :param file: Filename of .rsa file
115  :param asa_atom: Name of the float property for absolute SASA
116  :param asa_atom: Name of the float property for relative SASA
117  :exception: RuntimeError if residue names are not the same
118  """
119  area_fh = open(file)
120  area_lines = area_fh.readlines()
121  area_fh.close()
122  # shift first line
123  area_lines = area_lines[4:]
124 
125 
126  for l in area_lines:
127  if l.startswith("RES"):
128  p = re.compile(r'\s+')
129  t = p.split(l)
130  #res_name, chain_id , res_number, abs_all, rel_all = t[1:6]
131  res_name = l[3:8]
132  res_name = res_name.strip()
133  chain_id = l[8:9]
134  res_number = l[9:14]
135  res_number= res_number.strip()
136  #print l[15:30]
137  abs_all, rel_all =l[15:28].strip().split()
138  m=re.match(r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
139  di = m.groupdict()
140  if di["ins"] == None:
141  resNum = mol.ResNum(int(di["num"]))
142  else:
143  resNum = mol.ResNum(int(di["num"]), di["ins"])
144 
145  res = enti.handle.FindResidue(chain_id, resNum)
146  #res = entity.FindResidue(chain_id, mol.ResNum(int(res_number)))
147  #print res_name, chain_id, res_number
148  if res_name == res.name:
149  res.SetFloatProp(asa_rel, float(rel_all) )
150  res.SetFloatProp(asa_abs, float(abs_all) )
151  else:
152  raise RuntimeError, "Residue Names are not the same for ResNumb: %s (%s vs %s)" % (res_number, res.name, res_name)
153 
154 
155 def __CleanupFiles(dir_name):
156  """
157  Method which recursively deletes a directory
158 
159  :warning: This method removes also non-empty directories without asking, so
160  be careful!
161  """
162  import shutil
163  shutil.rmtree(dir_name)
164 
165 
166 def _RunNACCESS(command, temp_dir):
167  """
168  Method to run the MSMS surface calculation
169 
170  This method starts the external MSMS executable and returns the stdout of MSMS
171 
172  :param command: Command to execute
173  :returns: stdout of MSMS
174  :exception: CalledProcessError for non-zero return value
175  """
176  proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, cwd = temp_dir)
177  stdout_value, stderr_value = proc.communicate()
178 
179  #check for successful completion of msms
180  if proc.returncode!=0:
181  print "WARNING: naccess error\n", stdout_value
182  raise subprocess.CalledProcessError(proc.returncode, command)
183 
184  return stdout_value
185 
186 
187 
188 def CalculateSurfaceArea(entity, radius=1.4,
189  include_hydrogens=False, include_hetatm = False,
190  include_water = False, selection="",
191  naccess_exe=None, keep_files=False , asa_abs= "asaAbs", asa_rel="asaRel", asa_atom="asaAtom", scratch_dir = None, max_number_of_atoms=50000):
192  """
193  Calculates analytical the solvent accessible surface area by using the
194  external naccess program
195 
196  This method calculates the molecular surface areas by invoking the external
197  program naccess. First, it is checked if the naccess executable is present, then,
198  the necessary files are prepared in a temporary directory and naccess is
199  executed. The last step is to remove the temporary directory.
200 
201 
202  :param entity: OST entity to calculate surface
203  :param radius: Surface probe radius
204  :param include_hydrogens: Calculate surface including hydrogens
205  :param include_hetatm: Calculate surface including hetatms
206  :param include_water: Calculate surface including water
207  :param selection: Calculate surface for subset of entity
208  :param naccess_exe: naccess executable (full path to executable)
209  :param keep_files: Do not delete temporary files
210  :param asa_abs: Attaches per residue absolute SASA to specified FloatProp on residue level
211  :param asa_rel: Attaches per residue relative SASA to specified FloatProp on residue level
212  :param asa_atom: Attaches per atom SASA to specified FloatProp at atom level
213  :param scratch_dir: Directory for temporary files (NACCESS is sensitive to "." in directory names
214  :param max_number_of_atoms: Max Number of atoms in the entity (i.e. is limited in the default NACCESS version to 50 000)
215 
216  :returns: absolute SASA calculated using asa_atom
217  """
218 
219  import re
220 
221  # check if msms executable is specified
222  naccess_executable=_GetExecutable(naccess_exe)
223  # parse selection
224 
225  # setup files for msms
226  (naccess_data_dir, naccess_data_file,naccess_data_base )=_SetupFiles(entity, selection, scratch_dir, max_number_of_atoms)
227 
228  # set command line
229  command="%s %s -p %f " % \
230  (naccess_executable, naccess_data_file, radius)
231  if include_hydrogens:
232  command = "%s -w" % command
233  if include_water:
234  command = "%s -y" % command
235  if include_hetatm:
236  command = "%s -h" % command
237  #print command
238  stdout_value=_RunNACCESS(command, naccess_data_dir)
239 
240  new_asa= os.path.join(naccess_data_dir, "%s.asa" % naccess_data_base)
241  _ParseAsaFile(entity, new_asa, asa_atom)
242 
243  new_rsa = os.path.join(naccess_data_dir, "%s.rsa" % naccess_data_base)
244  _ParseRsaFile(entity, new_rsa, asa_abs, asa_rel)
245 
246  # Calculate Asa for all atoms:
247  sasa = 0.0
248  for a in entity.atoms:
249  sasa += a.GetFloatProp(asa_atom, 0.0)
250 
251 
252  # first read in result_file
253 
254  # parse MSMS output
255 
256  # clean up
257  if not keep_files:
258  __CleanupFiles(naccess_data_dir)
259 
260  return sasa
261