OpenStructure
antechamber.py
Go to the documentation of this file.
1 #------------------------------------------------------------------------------
2 # This file is part of the OpenStructure project <www.openstructure.org>
3 #
4 # Copyright (C) 2008-2020 by the OpenStructure authors
5 #
6 # This library is free software; you can redistribute it and/or modify it under
7 # the terms of the GNU Lesser General Public License as published by the Free
8 # Software Foundation; either version 3.0 of the License, or (at your option)
9 # any later version.
10 # This library is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 # details.
14 #
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this library; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #------------------------------------------------------------------------------
19 
20 # Functions to use Antechamber (from AmberTools) to automatically generate force
21 # field parameters. Allows the execution of Antechamber and the parsing of files
22 # generated by it.
23 
24 from ost.mol import mm
25 import ost
26 from ost import settings, mol, geom
27 import os, subprocess, math
28 
29 
31 def _GetInteraction(functype, atoms, params):
32  """Get an mm.Interaction with the given func-type and params for the given
33  atoms (name and types extracted from there)."""
34  interaction = mm.Interaction(functype)
35  interaction.SetNames([a.name for a in atoms])
36  interaction.SetTypes([a.GetStringProp('type') for a in atoms])
37  interaction.SetParam(params)
38  return interaction
39 
40 def _MakeComponentBuildingBlock(eh, ff_dict):
41  """Take EntityHandle eh (from ParseModifiedPDB) and ff_dict (from
42  ParseAmberForceField) and return BuildingBlock."""
43  # converters: length: A -> nm, angles: deg -> rad, energy: kcal -> kJ
44  dist_scale = 1./10.0
45  angle_scale = math.pi/180.
46  # bond strength in OpenMM needs a factor of 2 compared to Amber
47  bond_k_scale = 418.4*2.
48  angle_k_scale = 4.184
49 
50  # get atom typing (dictionary listing all atoms of a certain type)
51  atype_dict = {}
52  for a in eh.atoms:
53  atype = a.GetStringProp('type')
54  if not atype in atype_dict:
55  atype_dict[atype] = [a.handle]
56  else:
57  atype_dict[atype].append(a.handle)
58 
59  # set masses in entity handle (charges and types set in ParseModifiedPDB)
60  for atype, mass in ff_dict['MASS']:
61  for a in atype_dict[atype]: a.SetMass(mass)
62 
63  # start by adding atoms
64  bb = mm.BuildingBlock()
65  for a in eh.atoms:
66  bb.AddAtom(a.name, a.GetStringProp('type'), a.GetCharge(), a.GetMass())
67 
68  # add bonds: first all bonds from the entity, then force-field params
69  bl = eh.GetBondList()
70  for b in bl:
71  a1 = b.GetFirst()
72  a2 = b.GetSecond()
73  bond = mm.Interaction(mm.FuncType.HarmonicBond)
74  bond.SetNames([a1.name, a2.name])
75  bond.SetTypes([a1.GetStringProp('type'), a2.GetStringProp('type')])
76  bb.AddBond(bond)
77  added_bonds = []
78  for atype1, atype2, d0, k in ff_dict['BOND']:
79  for a1 in atype_dict[atype1]:
80  for a2 in atype_dict[atype2]:
81  # check connectivity and uniqueness of bond
82  if not mol.BondExists(a1, a2): continue
83  if [a1, a2] in added_bonds or [a2, a1] in added_bonds: continue
84  added_bonds.append([a1,a2])
85  params = [d0*dist_scale, k*bond_k_scale]
86  bond = _GetInteraction(mm.FuncType.HarmonicBond, [a1, a2], params)
87  bb.AddBond(bond, replace_existing=True)
88 
89  # add angles
90  added_angles = []
91  for atype1, atype2, atype3, a0, k in ff_dict['ANGL']:
92  # a2 is the central atom
93  for a2 in atype_dict[atype2]:
94  for a1 in atype_dict[atype1]:
95  if not mol.BondExists(a1, a2): continue
96  for a3 in atype_dict[atype3]:
97  # check connectivity and uniqueness of angle
98  if not mol.BondExists(a2, a3): continue
99  if a1 == a3: continue
100  if [a1, a2, a3] in added_angles or [a3, a2, a1] in added_angles:
101  continue
102  added_angles.append([a1, a2, a3])
103  angle = _GetInteraction(mm.FuncType.HarmonicAngle, [a1, a2, a3],
104  [a0*angle_scale, k*angle_k_scale*2])
105  bb.AddAngle(angle)
106 
107  # add dihedrals
108  for atype1, atype2, atype3, atype4, idiv, period, phase, k in ff_dict['DIHE']:
109  # there can be multiple ones for the same set of types!
110  added_dihedrals = []
111  for a1 in atype_dict[atype1]:
112  for a2 in atype_dict[atype2]:
113  if not mol.BondExists(a1, a2): continue
114  for a3 in atype_dict[atype3]:
115  if not mol.BondExists(a2, a3): continue
116  if a1 == a3: continue
117  for a4 in atype_dict[atype4]:
118  # check connectivity and uniqueness of dihedral (can be mirrored)
119  if not mol.BondExists(a3, a4): continue
120  if a2 == a4: continue
121  if [a1, a2, a3, a4] in added_dihedrals or \
122  [a4, a3, a2, a1] in added_dihedrals: continue
123  added_dihedrals.append([a1, a2, a3, a4])
124  dihe = _GetInteraction(mm.FuncType.PeriodicDihedral, [a1, a2, a3, a4],
125  [period, phase*angle_scale, k*angle_k_scale])
126  bb.AddDihedral(dihe)
127 
128  # add impropers
129  added_impropers = []
130  for atype1, atype2, atype3, atype4, period, phase, k in ff_dict['IMPR']:
131  # third atom is the central atom in amber force-field
132  for ac in atype_dict[atype3]:
133  for a1 in atype_dict[atype1]:
134  if not mol.BondExists(ac, a1): continue
135  for a2 in atype_dict[atype2]:
136  if not mol.BondExists(ac, a2): continue
137  if a1 == a2: continue
138  for a4 in atype_dict[atype4]:
139  # check connectivity and uniqueness of impr. (same central)
140  if not mol.BondExists(ac, a4): continue
141  if a2 == a4 or a1 == a4: continue
142  if [ac, a1, a2, a4] in added_impropers or \
143  [ac, a1, a4, a2] in added_impropers or \
144  [ac, a2, a1, a4] in added_impropers or \
145  [ac, a2, a4, a1] in added_impropers or \
146  [ac, a4, a1, a2] in added_impropers or \
147  [ac, a4, a2, a1] in added_impropers: continue
148  added_impropers.append([ac, a1, a2, a4])
149  impr = _GetInteraction(mm.FuncType.PeriodicImproper, [a1, a2, ac, a4],
150  [period, phase*angle_scale, k*angle_k_scale])
151  bb.AddImproper(impr)
152  return bb
153 
154 def _ParseModifiedPDB(filename):
155  """Read mpdb file produced by antechamber and return tuple of:
156  - EntityHandle with connectivity, atom types (property 'type') and charges
157  - Residue name as extracted from the mpdb file
158  A RuntimeError is raised if the file can contains multiple residues.
159  """
160  eh = mol.CreateEntity()
161  rname = ''
162  edi = eh.EditXCS(mol.BUFFERED_EDIT)
163  chain = edi.InsertChain('A')
164  bond_list = []
165  # get all atoms and bonds from file
166  with open(filename, 'r') as in_file:
167  for line in in_file:
168  # atom or connectivity
169  # -> fixed column format assumed for both
170  if line.startswith('ATOM'):
171  aname = line[12:17].strip()
172  # extract res. name and ensure uniqueness
173  if not rname:
174  rname = line[17:20].strip()
175  r = edi.AppendResidue(chain, rname)
176  elif rname != line[17:20].strip():
177  raise RuntimeError("More than one residue in file " + filename +\
178  ". Cannot parse!")
179  # extract and store type and charge
180  charge = float(line[54:66])
181  atype = line[78:80].strip()
182  a = edi.InsertAtom(r, aname, geom.Vec3())
183  a.SetStringProp('type', atype)
184  a.SetCharge(charge)
185  elif line.startswith('CONECT'):
186  ai1 = int(line[6:11])
187  # max. 4 bond partners...
188  for i in range(4):
189  try:
190  j = 11 + 5*i
191  ai2 = int(line[j:j+5])
192  # only unique bonds added to list
193  s = set([ai1, ai2])
194  if not s in bond_list: bond_list.append(s)
195  except:
196  # exception thrown for empty strings or non-integers
197  # -> skip
198  continue
199  # set all bonds in entity
200  for indices in bond_list:
201  indices = list(indices)
202  a1 = r.atoms[indices[0]-1]
203  a2 = r.atoms[indices[1]-1]
204  edi.Connect(a1, a2)
205  # finalize
206  edi.UpdateICS()
207  return eh, rname
208 
209 def _ParseAmberForceField(filename):
210  """Read frcmod file produced by parmchk2 and return dictionary with all
211  entries for masses, bonds, angles, dihedrals, impropers and non-bonded (LJ)
212  interactions. Stored as key/list-of-value pairs:
213  - 'MASS': [atype, mass]
214  - 'BOND': [atype1, atype2, d0, k]
215  - 'ANGL': [atype1, atype2, atype3, a0, k]
216  - 'DIHE': [atype1, atype2, atype3, atype4, idiv, period, phase, k/idiv]
217  - 'IMPR': [atype1, atype2, atype3, atype4, period, phase, k]
218  - 'NONB': [Rvdw, epsilon]
219  """
220  keywords = ['MASS', 'BOND', 'ANGL', 'DIHE', 'IMPR', 'NONB']
221  with open(filename, 'r') as in_file:
222  ff_dict = {}
223  for line in in_file:
224  # look for keywords
225  keyword = line[:4]
226  if not keyword in keywords: continue
227  # loop until empty line found
228  ff_dict[keyword] = []
229  line = next(in_file)
230  while len(line.strip()) > 0:
231  # check for warnings
232  if 'ATTN' in line:
233  ost.LogWarning('The following line in ' + filename + ' (' + keyword +\
234  ' section) needs revision:\n' + line.strip())
235  # fixed column format -> extract entries dep. on current keyword
236  if keyword == 'MASS':
237  atype = line[0:2].strip()
238  s = line[2:].split()
239  mass = float(s[0])
240  ff_dict[keyword].append([atype, mass])
241  elif keyword == 'BOND':
242  atype1 = line[:2].strip()
243  atype2 = line[3:5].strip()
244  s = line[5:].split()
245  k = float(s[0])
246  d0 = float(s[1])
247  ff_dict[keyword].append([atype1, atype2, d0, k])
248  elif keyword == 'ANGL':
249  atype1 = line[:2].strip()
250  atype2 = line[3:5].strip()
251  atype3 = line[6:8].strip()
252  s = line[8:].split()
253  k = float(s[0])
254  a0 = float(s[1])
255  ff_dict[keyword].append([atype1, atype2, atype3, a0, k])
256  elif keyword == 'DIHE':
257  atype1 = line[:2].strip()
258  atype2 = line[3:5].strip()
259  atype3 = line[6:8].strip()
260  atype4 = line[9:11].strip()
261  s = line[11:].split()
262  idiv = float(s[0])
263  k = float(s[1])
264  phase = float(s[2])
265  # negative periods = there is more than one term for that dihedral
266  # -> no need to do anything special about that here...
267  period = abs(float(s[3]))
268  ff_dict[keyword].append([atype1, atype2, atype3, atype4, idiv,
269  period, phase, k/float(idiv)])
270  elif keyword == 'IMPR':
271  atype1 = line[:2].strip()
272  atype2 = line[3:5].strip()
273  atype3 = line[6:8].strip()
274  atype4 = line[9:11].strip()
275  s = line[11:].split()
276  k = float(s[0])
277  phase = float(s[1])
278  period = float(s[2])
279  ff_dict[keyword].append([atype1, atype2, atype3, atype4, period,
280  phase, k])
281  elif keyword == 'NONB':
282  line = line.strip()
283  atype = line[0:2].strip()
284  s = line[2:].split()
285  Rvdw = float(s[0])
286  epsilon = float(s[1])
287  ff_dict[keyword].append([atype, Rvdw, epsilon])
288  # next...
289  line = next(in_file)
290  return ff_dict
291 
292 
293 def RunAntechamber(res_name, filename, format='ccif', amberhome=None,
294  base_out_dir=None):
295  """Run Antechamber to guess force field parameters for a given residue name.
296 
297  This requires an installation of AmberTools (tested with AmberTools15) with
298  binaries ``antechamber`` and ``parmchk2``.
299 
300  This has the same restrictions as Antechamber itself and we assume the input
301  to be uncharged. Note that Antechamber cannot deal with metal ions and other
302  non-organic elements.
303 
304  The results are stored in a separate folder named `res_name` within
305  `base_out_dir` (if given, otherwise the current working directory). The main
306  output files are ``frcmod`` and ``out.mpdb``. The former contains force field
307  parameters and masses. The latter maps atom names to atom types and defines
308  the partial charges. The same output could be obtained as follows:
309 
310  .. code-block:: console
311 
312  $ antechamber -i <FILENAME> -fi <FORMAT> -bk '<RES_NAME>' -o out.mol2 -fo mol2 -c bcc -pf yes
313  $ parmchk2 -i out.mol2 -f mol2 -o frcmod -a Y
314  $ antechamber -i out.mol2 -fi mol2 -o out.mpdb -fo mpdb -pf yes
315 
316  The force field parameters can be manually modified if needed. It can for
317  instance happen that some parameters cannot be identified. Those lines will
318  be marked with a comment "ATTN, need revision".
319 
320  :param res_name: Residue name for which we desire force field parameters.
321  :type res_name: :class:`str`
322  :param filename: Path to a file which contains the necessary information for
323  `res_name`. It must include all hydrogens.
324  :type filename: :class:`str`
325  :param format: Format of file given with `filename`. Common formats are 'ccif'
326  for PDB's component dictionary or 'pdb' for a PDB file
327  containing the desired residue with all hydrogens.
328  :type format: :class:`str`
329  :param amberhome: Base path of your AmberTools installation. If not None,
330  we look for ``antechamber`` and ``parmchk2`` within
331  ``AMBERHOME/bin`` additionally to the system's ``PATH``.
332  :type amberhome: :class:`str`
333  :param base_out_dir: Path to a base path, where the output will be stored.
334  If None, the current working directory is used.
335  :type base_out_dir: :class:`str`
336  """
337  # find antechamber binaries
338  if amberhome is None:
339  search_paths = []
340  else:
341  search_paths = [os.path.join(amberhome, 'bin')]
342  try:
343  antechamber = settings.Locate('antechamber', search_paths=search_paths)
344  parmchk2 = settings.Locate('parmchk2', search_paths=search_paths)
345  except settings.FileNotFound as ex:
346  ost.LogError("Failed to find Antechamber binaries. Make sure you have "
347  "AmberTools installed!")
348  raise ex
349 
350  # prepare path
351  cwd = os.getcwd()
352  if base_out_dir is None:
353  base_out_dir = cwd
354  out_dir = os.path.abspath(os.path.join(base_out_dir, res_name))
355  if not os.path.exists(out_dir):
356  # note: this creates intermediate folders too
357  try:
358  os.makedirs(out_dir)
359  except Exception as ex:
360  ost.LogError("Failed to create output directory " + out_dir + "!")
361  raise ex
362 
363  # execute it
364  os.chdir(out_dir)
365  try:
366  cmds = [antechamber + " -i " + filename + " -fi " + format + " -bk " \
367  + res_name + " -o out.mol2 -fo mol2 -c bcc -pf yes",
368  parmchk2 + " -i out.mol2 -f mol2 -o frcmod -a Y",
369  antechamber + " -i out.mol2 -fi mol2 -o out.mpdb -fo mpdb -pf yes"]
370  all_sout = "Generating force field parameters for " + res_name + "\n"
371  all_serr = ""
372  for cmd in cmds:
373  all_sout += "-"*70 + "\n" + "Stdout of: " + cmd + "\n" + "-"*70 + "\n"
374  all_serr += "-"*70 + "\n" + "Stderr of: " + cmd + "\n" + "-"*70 + "\n"
375  job = subprocess.Popen(cmd.split(" "), stdout=subprocess.PIPE,
376  stderr=subprocess.PIPE)
377  sout, serr = job.communicate()
378  all_sout += sout
379  all_serr += serr
380  if job.returncode != 0:
381  ost.LogError("Unsuccessful execution of " + cmd + ". Return code: "\
382  + str(job.returncode))
383  # write command line outputs
384  with open("console.stdout", "w") as txt_file:
385  txt_file.write(all_sout)
386  with open("console.stderr", "w") as txt_file:
387  txt_file.write(all_serr)
388  except Exception as ex:
389  ost.LogError("Failed to excecute antechamber binaries!")
390  raise ex
391 
392  # get back to original path
393  os.chdir(cwd)
394 
395  # check result
396  frcmod_filename = os.path.join(out_dir, 'frcmod')
397  mpdb_filename = os.path.join(out_dir, 'out.mpdb')
398  if not os.path.exists(frcmod_filename):
399  raise RuntimeError("Failed to generate frcmod file with Antechamber!")
400  if not os.path.exists(mpdb_filename):
401  raise RuntimeError("Failed to generate out.mpdb file with Antechamber!")
402 
403 def AddFromFiles(force_field, frcmod_filename, mpdb_filename):
404  """Add data from a frcmod and an mpdb file to a force field.
405 
406  This will add a new :class:`~ost.mol.mm.BuildingBlock` to `force_field` for
407  the residue defined in those files (residue name is extracted from the mpdb
408  file which can only contain a single residue). Charges for each atom are
409  extracted from the mpdb file. According to the frcmod file, an
410  :class:`~ost.mol.mm.Interaction` is added for each bond, angle, dihedral and
411  improper. Atom types with masses and non-bonded interactions are added to
412  `force_field` as needed.
413 
414  :param force_field: A force field object to which the new parameters are
415  added.
416  :type force_field: :class:`~ost.mol.mm.Forcefield`
417  :param frcmod_filename: Path to ``frcmod`` file as generated by ``parmchk2``.
418  :type frcmod_filename: :class:`str`
419  :param mpdb_filename: Path to mpdb file as generated by ``antechamber``.
420  :type mpdb_filename: :class:`str`
421  :return: The updated force field (same as `force_field`).
422  :rtype: :class:`~ost.mol.mm.Forcefield`
423  """
424  # check files
425  if not os.path.exists(frcmod_filename):
426  raise RuntimeError("Could not find frcmod file: " + frcmod_filename)
427  if not os.path.exists(mpdb_filename):
428  raise RuntimeError("Could not find mpdb file: " + mpdb_filename)
429  # read in files
430  try:
431  eh, res_name = _ParseModifiedPDB(mpdb_filename)
432  except Exception as ex:
433  ost.LogError("Failed to parse mpdb file: " + mpdb_filename)
434  raise ex
435  try:
436  ff_dict = _ParseAmberForceField(frcmod_filename)
437  except Exception as ex:
438  ost.LogError("Failed to parse frcmod file: " + frcmod_filename)
439  raise ex
440  ost.LogInfo("Adding force field for " + res_name)
441  # add atoms to force field
442  for aname, mass in ff_dict['MASS']:
443  force_field.AddMass(aname, mass)
444  # add LJs
445  lj_sigma_scale = 2./10./2**(1./6.) # Rvdw to sigma in nm
446  lj_epsilon_scale = 4.184 # kcal to kJ
447  for aname, Rvdw, epsilon in ff_dict['NONB']:
448  # fix 0,0 (from OpenMM's processAmberForceField.py)
449  if Rvdw == 0 or epsilon == 0:
450  Rvdw, epsilon = 1, 0
451  lj = mm.Interaction(mm.FuncType.LJ)
452  lj.SetTypes([aname])
453  lj.SetParam([Rvdw*lj_sigma_scale, epsilon*lj_epsilon_scale])
454  force_field.AddLJ(lj)
455  # add building block
456  bb = _MakeComponentBuildingBlock(eh, ff_dict)
457  force_field.AddBuildingBlock(res_name, bb)
458 
459  return force_field
460 
461 def AddFromPath(force_field, out_dir):
462  """Add data from a directory created with :meth:`Run` to a force field.
463  See :meth:`AddFromFiles` for details.
464 
465  :param force_field: A force field object to which the new parameters are
466  added.
467  :type force_field: :class:`~ost.mol.mm.Forcefield`
468  :param out_dir: Output directory as created with :meth:`Run`. Must contain
469  files ``frcmod`` and ``out.mpdb``.
470  :type out_dir: :class:`str`
471  :return: The updated force field (same as `force_field`).
472  :rtype: :class:`~ost.mol.mm.Forcefield`
473  """
474  frcmod_filename = os.path.join(out_dir, 'frcmod')
475  mpdb_filename = os.path.join(out_dir, 'out.mpdb')
476  return AddFromFiles(force_field, frcmod_filename, mpdb_filename)
477 
478 def AddIon(force_field, res_name, atom_name, atom_mass, atom_charge, lj_sigma,
479  lj_epsilon):
480  """Add a single atom as an ion to a force field.
481 
482  Since Antechamber cannot deal with ions, you can add simple ones easily with
483  this function. This adds a :class:`~ost.mol.mm.BuildingBlock` to `force_field`
484  for the given residue name containing a single atom. The atom will have a type
485  with the same name as the atom name and the given mass, charge and non-bonded
486  (LJ) interaction parameters.
487 
488  :param force_field: A force field object to which the ion is added.
489  :type force_field: :class:`~ost.mol.mm.Forcefield`
490  :param res_name: Residue name for the ion to be added.
491  :type res_name: :class:`str`
492  :param atom_name: Atom name which is also used as atom type name.
493  :type atom_name: :class:`str`
494  :param atom_mass: Mass of the atom.
495  :type atom_mass: :class:`float`
496  :param atom_charge: Charge of the atom.
497  :type atom_charge: :class:`float`
498  :param lj_sigma: The sigma parameter for the non-bonded LJ interaction.
499  :type lj_sigma: :class:`float` in nm
500  :param lj_epsilon: The sigma parameter for the non-bonded LJ interaction.
501  :type lj_epsilon: :class:`float` in kJ/mol
502  """
503  # add mass (atom_type = atom_name)
504  force_field.AddMass(atom_name, atom_mass)
505  # add building block
506  bb = mm.BuildingBlock()
507  bb.AddAtom(atom_name, atom_name, atom_charge, atom_mass)
508  force_field.AddBuildingBlock(res_name, bb)
509  # add dummy LJ
510  lj = mm.Interaction(mm.FuncType.LJ)
511  lj.SetTypes([atom_name])
512  lj.SetParam([lj_sigma, lj_epsilon])
513  force_field.AddLJ(lj)
514 
515 __all__ = ('RunAntechamber', 'AddFromFiles', 'AddFromPath', 'AddIon',)
Three dimensional vector class, using Real precision.
Definition: vec3.hh:48
def AddFromFiles(force_field, frcmod_filename, mpdb_filename)
Definition: antechamber.py:403
def AddIon(force_field, res_name, atom_name, atom_mass, atom_charge, lj_sigma, lj_epsilon)
Definition: antechamber.py:479
def RunAntechamber(res_name, filename, format='ccif', amberhome=None, base_out_dir=None)
Definition: antechamber.py:294
def AddFromPath(force_field, out_dir)
Definition: antechamber.py:461