OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
naccess.py
Go to the documentation of this file.
1 '''
2 Naccess module
3 
4 Author: Florian Kiefer
5 
6 This module is for calculating surface areas
7 from OpenStructure using the external program naccess
8 
9 How To Use This Module:
10  1. Import it (e.g. as "from ost.bindings import naccess")
11  2. Use it (e.g. as "sasa = naccess.CalculateSurfaceArea(entity)")
12 
13 Requirement:
14  - naccess installed
15 '''
16 
17 import tempfile
18 import subprocess
19 import os
20 import re
21 from ost import io
22 from ost import mol
23 from ost import settings
24 from ost import geom
25 
26 ## \brief Method to check if naccess executable is present
27 #
28 # \param naccess Explicit path to msms executable
29 # \return Path to the executable
30 # \exception FileNotFound if executable is not found
31 def _GetExecutable(naccess_exe):
32  return settings.Locate('naccess', explicit_file_name=naccess_exe)
33 
34 ## \brief Setup files for naccess calculation in temporary directory
35 #
36 # \param entity EntityHandle or EntityView to calculate surface
37 # \param selection Calculate surface for subset of entity
38 # \param scratch_dir Directory for temporary files (NACCESS is sensitive to "." in directory names
39 # \param max_number_of_atoms Max Number of atoms in the entity (i.e. is limited in the default NACCESS version to 50 000)
40 # \return array containing temporary directory, input filename for naccess and directory of the input file
41 # \exception RuntimeError if selection is not valid
42 def _SetupFiles(entity, selection, scratch_dir, max_number_of_atoms):
43  # create temporary directory
44  tmp_dir_name=""
45  if scratch_dir!=None:
46  tmp_dir_name=tempfile.mkdtemp(dir=scratch_dir)
47  else:
48  tmp_dir_name=tempfile.mkdtemp()
49 
50  # select only heavy atoms if no_hydrogens is true
51  entity_view=entity.Select(selection)
52  if len(entity_view.atoms) > max_number_of_atoms:
53  raise RuntimeError, "Too much atoms for NACCESS (> %s)" % max_number_of_atoms
54  if not entity_view.IsValid():
55  raise RuntimeError, "Could not create view for selection (%s)"%(selection)
56 
57  # write entity to tmp file
58  tmp_file_name=os.path.join(tmp_dir_name,"entity.pdb")
59  tmp_file_base = os.path.join(tmp_dir_name,"entity")
60  io.SavePDB(entity_view, tmp_file_name)
61  return (tmp_dir_name, tmp_file_name, tmp_file_base)
62 
63 ## \brief Reads Area file (.asa) and attach asa per atom to an entitiy
64 #
65 # \param entity EntityHandle or EntityView for attaching sasa on atom level
66 # \param file Filename of area file
67 # \param asa_atom Name of the float property for SASA
68 
69 
70 def _ParseAsaFile(entity, file, asa_atom):
71 
72  asa_fh = open(file)
73  asa_lines = asa_fh.readlines()
74  asa_fh.close()
75 
76  for l in asa_lines:
77  if l.startswith("ATOM"):
78  # get res_number, chain_id and atom name
79  atom_name = l[12:16]
80  chain_id = l[21]
81  res_number = l[22:27]
82  asa = l[54:63]
83  atom_name = atom_name.strip()
84  chain_id = chain_id
85  res_number = res_number.strip()
86  asa = asa.strip()
87  #print "res_number:", res_number
88  m=re.match(r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
89  di = m.groupdict()
90 
91  if di["ins"] == None:
92  resNum = mol.ResNum(int(di["num"]))
93  else:
94  resNum = mol.ResNum(int(di["num"]), di["ins"])
95  #print res_number, resNum.num, resNum.ins
96  a = entity.FindAtom(chain_id, resNum, atom_name)
97  if(a.IsValid()):
98  a.SetFloatProp(asa_atom, float(asa))
99  else:
100  print chain_id, resNum, atom_name
101 
102 ## \brief Reads Area file (.rsa) and attach asa (absolute + relative) per residue to an entitiy
103 #
104 # \param entity EntityHandle or EntityView for attaching sasa on atom level
105 # \param file Filename of .rsa file
106 # \param asa_atom Name of the float property for absolute SASA
107 # \param asa_atom Name of the float property for relative SASA
108 # \exception RuntimeError if residue names are not the same
109 
110 def _ParseRsaFile(enti,file, asa_abs, asa_rel):
111  area_fh = open(file)
112  area_lines = area_fh.readlines()
113  area_fh.close()
114  # shift first line
115  area_lines = area_lines[4:]
116 
117 
118  for l in area_lines:
119  if l.startswith("RES"):
120  p = re.compile(r'\s+')
121  t = p.split(l)
122  #res_name, chain_id , res_number, abs_all, rel_all = t[1:6]
123  res_name = l[3:8]
124  res_name = res_name.strip()
125  chain_id = l[8:9]
126  res_number = l[9:14]
127  res_number= res_number.strip()
128  #print l[15:30]
129  abs_all, rel_all =l[15:28].strip().split()
130  m=re.match(r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
131  di = m.groupdict()
132  if di["ins"] == None:
133  resNum = mol.ResNum(int(di["num"]))
134  else:
135  resNum = mol.ResNum(int(di["num"]), di["ins"])
136 
137  res = enti.handle.FindResidue(chain_id, resNum)
138  #res = entity.FindResidue(chain_id, mol.ResNum(int(res_number)))
139  #print res_name, chain_id, res_number
140  if res_name == res.name:
141  res.SetFloatProp(asa_rel, float(rel_all) )
142  res.SetFloatProp(asa_abs, float(abs_all) )
143  else:
144  raise RuntimeError, "Residue Names are not the same for ResNumb: %s (%s vs %s)" % (res_number, res.name, res_name)
145 
146 ## \brief Method which recursively deletes a directory
147 #
148 # \warning This method removes also non-empty directories without asking, so
149 # be careful!
150 def __CleanupFiles(dir_name):
151  import shutil
152  shutil.rmtree(dir_name)
153 
154 ## \brief Method to run the MSMS surface calculation
155 #
156 # This method starts the external MSMS executable and returns the stdout of MSMS
157 #
158 # \param command Command to execute
159 # \return stdout of MSMS
160 # \exception CalledProcessError for non-zero return value
161 def _RunNACCESS(command, temp_dir):
162  proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, cwd = temp_dir)
163  stdout_value, stderr_value = proc.communicate()
164 
165  #check for successful completion of msms
166  if proc.returncode!=0:
167  print "WARNING: naccess error\n", stdout_value
168  raise subprocess.CalledProcessError(proc.returncode, command)
169 
170  return stdout_value
171 
172 
173 ## \brief Calculates analytical the solvent accessible surface
174 # area by using the external naccess program
175 #
176 # This method calculates the molecular surface areas by invoking the external
177 # program naccess. First, it is checked if the naccess executable is present, then,
178 # the necessary files are prepared in a temporary directory and naccess is
179 # executed. The last step is to remove the temporary directory.
180 #
181 #
182 # \param entity OST entity to calculate surface
183 # \param radius Surface probe radius
184 # \param include_hydrogens Calculate surface including hydrogens
185 # \param include_hetatm Calculate surface including hetatms
186 # \param include_water Calculate surface including water
187 # \param selection Calculate surface for subset of entity
188 # \param naccess _exe msms executable (full path to executable)
189 # \param keep_files Do not delete temporary files
190 # \param asa_abs Attaches per residue absolute SASA to specified FloatProp on residue level
191 # \param asa_rel Attaches per residue relative SASA to specified FloatProp on residue level
192 # \param asa_atom Attaches per atom SASA to specified FloatProp at atom level
193 # \param scratch_dir Directory for temporary files (NACCESS is sensitive to "." in directory names
194 # \param max_number_of_atoms Max Number of atoms in the entity (i.e. is limited in the default NACCESS version to 50 000)
195 
196 # \return absolute SASA calculated using asa_atom
197 def CalculateSurfaceArea(entity, radius=1.4,
198  include_hydrogens=False, include_hetatm = False,
199  include_water = False, selection="",
200  naccess_exe=None, keep_files=False , asa_abs= "asaAbs", asa_rel="asaRel", asa_atom="asaAtom", scratch_dir = None, max_number_of_atoms=50000):
201  import re
202 
203  # check if msms executable is specified
204  naccess_executable=_GetExecutable(naccess_exe)
205  # parse selection
206 
207  # setup files for msms
208  (naccess_data_dir, naccess_data_file,naccess_data_base )=_SetupFiles(entity, selection, scratch_dir)
209 
210  # set command line
211  command="%s %s -p %f " % \
212  (naccess_executable, naccess_data_file, radius)
213  if include_hydrogens:
214  command = "%s -w" % command
215  if include_water:
216  command = "%s -y" % command
217  if include_hetatm:
218  command = "%s -h" % command
219  #print command
220  stdout_value=_RunNACCESS(command, naccess_data_dir)
221 
222  new_asa= os.path.join(naccess_data_dir, "%s.asa" % naccess_data_base)
223  _ParseAsaFile(entity, new_asa, asa_atom)
224 
225  new_rsa = os.path.join(naccess_data_dir, "%s.rsa" % naccess_data_base)
226  _ParseRsaFile(entity, new_rsa, asa_abs, asa_rel)
227 
228  # Calculate Asa for all atoms:
229  sasa = 0.0
230  for a in entity.atoms:
231  sasa += a.GetFloatProp(asa_atom, 0.0)
232 
233 
234  # first read in result_file
235 
236  # parse MSMS output
237 
238  # clean up
239  if not keep_files:
240  __CleanupFiles(naccess_data_dir)
241 
242  return sasa
243