OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
__init__.py
Go to the documentation of this file.
1 #------------------------------------------------------------------------------
2 # This file is part of the OpenStructure project <www.openstructure.org>
3 #
4 # Copyright (C) 2008-2011 by the OpenStructure authors
5 #
6 # This library is free software; you can redistribute it and/or modify it under
7 # the terms of the GNU Lesser General Public License as published by the Free
8 # Software Foundation; either version 3.0 of the License, or (at your option)
9 # any later version.
10 # This library is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 # details.
14 #
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this library; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #------------------------------------------------------------------------------
19 import os, tempfile, ftplib, httplib
20 
21 from _ost_io import *
22 from ost import mol, geom, conop
23 
24 profiles=None
25 
26 class IOProfiles:
27  def __init__(self):
28  self._dict={}
29 
30  def __getitem__(self, key):
31  return IOProfileRegistry.Instance().Get(key)
32 
33  def __setitem__(self, key, value):
34  if isinstance(value, str):
35  value=self[value]
36  IOProfileRegistry.Instance().Set(key, value)
37  self._dict[key]=value
38 
39  def __len__(self):
40  return len(self._dict)
41 
42  def __iter__(self):
43  return self._dict.__iter__()
44 
45 if not profiles:
46  profiles=IOProfiles()
47  profiles['STRICT']=IOProfile(dialect='PDB', fault_tolerant=False,
48  strict_hydrogens=False, quack_mode=False)
49  profiles['SLOPPY']=IOProfile(dialect='PDB', fault_tolerant=True,
50  strict_hydrogens=False, quack_mode=True)
51  profiles['CHARMM']=IOProfile(dialect='CHARMM', fault_tolerant=True,
52  strict_hydrogens=False, quack_mode=False)
53  profiles['DEFAULT']='STRICT'
54 
55 def _override(val1, val2):
56  if val2!=None:
57  return val2
58  else:
59  return val1
60 
61 def LoadPDB(filename, restrict_chains="", no_hetatms=None,
62  fault_tolerant=None, load_multi=False, quack_mode=None,
63  join_spread_atom_records=None, calpha_only=None,
64  profile='DEFAULT', remote=False, dialect=None,
65  strict_hydrogens=None, seqres=False, bond_feasibility_check=None):
66  """
67  Load PDB file from disk and return one or more entities. Several options
68  allow to customize the exact behaviour of the PDB import. For more information
69  on these options, see :doc:`profile`.
70 
71  Residues are flagged as ligand if they are mentioned in a HET record.
72 
73  :param restrict_chains: If not an empty string, only chains listed in the
74  string will be imported.
75 
76  :param fault_tolerant: Enable/disable fault-tolerant import. If set, overrides
77  the value of :attr:`IOProfile.fault_tolerant`.
78 
79  :param no_hetatms: If set to True, HETATM records will be ignored. Overrides
80  the value of :attr:`IOProfile.no_hetatms`
81 
82  :param load_multi: If set to True, a list of entities will be returned instead
83  of only the first. This is useful when dealing with multi-PDB files.
84 
85  :param join_spread_atom_records: If set, overrides the value of
86  :attr:`IOProfile.join_spread_atom_records`.
87 
88  :param remote: If set to True, the method tries to load the pdb from the
89  remote pdb repository www.pdb.org. The filename is then interpreted as the
90  pdb id.
91 
92  :rtype: :class:`~ost.mol.EntityHandle` or a list thereof if `load_multi` is
93  True.
94 
95  :param dialect: Specifies the particular dialect to use. If set, overrides
96  the value of :attr:`IOProfile.dialect`
97 
98  :param seqres: Whether to read SEQRES records. If set to True, the loaded
99  entity and seqres entry will be returned as a tuple.
100 
101  :type dialect: :class:`str`
102 
103  :param strict_hydrogens: If set, overrides the value of
104  :attr:`IOProfile.strict_hydrogens`.
105 
106  :raises: :exc:`~ost.io.IOException` if the import fails due to an erroneous or
107  inexistent file
108  """
109  def _override(val1, val2):
110  if val2!=None:
111  return val2
112  else:
113  return val1
114  if isinstance(profile, str):
115  prof=profiles[profile].Copy()
116  elif isinstance(profile, IOProfile):
117  prof=profile.Copy()
118  else:
119  raise TypeError('profile must be of type string or IOProfile, '+\
120  'instead of %s'%type(profile))
121  if dialect not in (None, 'PDB', 'CHARMM',):
122  raise ValueError('dialect must be PDB or CHARMM')
123  prof.calpha_only=_override(prof.calpha_only, calpha_only)
124  prof.no_hetatms=_override(prof.no_hetatms, no_hetatms)
125  prof.dialect=_override(prof.dialect, dialect)
126  prof.quack_mode=_override(prof.quack_mode, quack_mode)
127  prof.strict_hydrogens=_override(prof.strict_hydrogens, strict_hydrogens)
128  prof.fault_tolerant=_override(prof.fault_tolerant, fault_tolerant)
129  prof.bond_feasibility_check=_override(prof.bond_feasibility_check, bond_feasibility_check)
130  prof.join_spread_atom_records=_override(prof.join_spread_atom_records,
131  join_spread_atom_records)
132 
133  tmp_file = None # avoid getting out of scope
134  if remote:
135  from ost.io.remote import RemoteGet
136  tmp_file =RemoteGet(filename)
137  filename = tmp_file.name
138 
139  conop_inst=conop.Conopology.Instance()
140  builder=conop_inst.GetBuilder("DEFAULT")
141  if prof.dialect=='PDB':
142  builder.dialect=conop.PDB_DIALECT
143  elif prof.dialect=='CHARMM':
144  builder.dialect=conop.CHARMM_DIALECT
145  builder.strict_hydrogens=prof.strict_hydrogens
146  builder.bond_feasibility_check=prof.bond_feasibility_check
147  reader=PDBReader(filename, prof)
148  reader.read_seqres=seqres
149  try:
150  if load_multi:
151  ent_list=[]
152  while reader.HasNext():
153  ent=mol.CreateEntity()
154  reader.Import(ent, restrict_chains)
155  conop_inst.ConnectAll(builder, ent, 0)
156  ent_list.append(ent)
157  if len(ent_list)==0:
158  raise IOError("File '%s' doesn't contain any entities" % filename)
159  return ent_list
160  else:
161  ent=mol.CreateEntity()
162  if reader.HasNext():
163  reader.Import(ent, restrict_chains)
164  conop_inst.ConnectAll(builder, ent, 0)
165  else:
166  raise IOError("File '%s' doesn't contain any entities" % filename)
167  if seqres:
168  return ent, reader.seqres
169  return ent
170  except:
171  raise
172 
173 def SavePDB(models, filename, dialect=None, pqr=False, profile='DEFAULT'):
174  """
175  Save entity or list of entities to disk. If a list of entities is supplied
176  the PDB file will be saved as a multi PDB file. Each of the entities is
177  wrapped into a MODEL/ENDMDL pair.
178 
179  If the atom number exceeds 99999, '*****' is used.
180 
181  :param models: The entity or list of entities (handles or views) to be saved
182  :param filename: The filename
183  :type filename: string
184  """
185  if not getattr(models, '__len__', None):
186  models=[models]
187  if isinstance(profile, str):
188  profile=profiles[profile].Copy()
189  elif isinstance(profile, IOProfile):
190  profile.Copy()
191  else:
192  raise TypeError('profile must be of type string or IOProfile, '+\
193  'instead of %s'%type(profile))
194  profile.dialect=_override(profile.dialect, dialect)
195  writer=PDBWriter(filename, profile)
196  writer.SetIsPQR(pqr)
197  if len(models)>1:
198  writer.write_multi_model=True
199  for model in models:
200  writer.Write(model)
201 
202 try:
203  from ost import img
204  LoadMap = LoadImage
205  SaveMap = SaveImage
206 except ImportError:
207  pass
208 
209  ## loads several images and puts them in an ImageList
210  # \sa \ref fft_li.py "View Fourier Transform Example"
211 def LoadImageList (files):
212  image_list=img.ImageList()
213  for file in files:
214  image=LoadImage(file)
215  image_list.append(image)
216  return image_list
217 
218 LoadMapList=LoadImageList
219 
220 def LoadCHARMMTraj(crd, dcd_file=None, profile='CHARMM',
221  lazy_load=False, stride=1,
222  dialect=None, detect_swap=True,swap_bytes=False):
223  """
224  Load CHARMM trajectory file.
225 
226  :param crd: EntityHandle or filename of the (PDB) file containing the
227  structure. The structure must have the same number of atoms as the
228  trajectory
229  :param dcd_file: The filename of the DCD file. If not set, and crd is a
230  string, the filename is set to the <crd>.dcd
231  :param layz_load: Whether the trajectory should be loaded on demand. Instead
232  of loading the complete trajectory into memory, the trajectory frames are
233  loaded from disk when requested.
234  :param stride: The spacing of the frames to load. When set to 2, for example,
235  every second frame is loaded from the trajectory. By default, every frame
236  is loaded.
237  :param dialect: The dialect for the PDB file to use. See :func:`LoadPDB`. If
238  set, overrides the value of the profile
239  :param profile: The IO profile to use for loading the PDB file. See
240  :doc:`profile`.
241  :param detect_swap: if True (the default), then automatic detection of endianess
242  is attempted, otherwise the swap_bytes parameter is used
243  :param swap_bytes: is detect_swap is False, this flag determines whether bytes
244  are swapped upon loading or not
245  """
246  if not isinstance(crd, mol.EntityHandle):
247  if dcd_file==None:
248  dcd_file='%s.dcd' % os.path.splitext(crd)[0]
249  crd=LoadPDB(crd, profile=profile, dialect=dialect)
250 
251  else:
252  if not dcd_file:
253  raise ValueError("No DCD filename given")
254  return LoadCHARMMTraj_(crd, dcd_file, stride, lazy_load, detect_swap, swap_bytes)
255 
256 def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=None, profile='DEFAULT', remote=False, strict_hydrogens=None, seqres=False, info=False):
257  """
258  Load MMCIF file from disk and return one or more entities. Several options
259  allow to customize the exact behaviour of the MMCIF import. For more
260  information on these options, see :doc:`profile`.
261 
262  Residues are flagged as ligand if they are mentioned in a HET record.
263 
264  :param restrict_chains: If not an empty string, only chains listed in the
265  string will be imported.
266 
267  :param fault_tolerant: Enable/disable fault-tolerant import. If set, overrides
268  the value of :attr:`IOProfile.fault_tolerant`.
269 
270  :param remote: If set to True, the method tries to load the pdb from the
271  remote pdb repository www.pdb.org. The filename is then interpreted as the
272  pdb id.
273 
274  :rtype: :class:`~ost.mol.EntityHandle`.
275 
276  :param strict_hydrogens: If set, overrides the value of
277  :attr:`IOProfile.strict_hydrogens`.
278 
279  :param seqres: Whether to read SEQRES records. If set to True, the loaded
280  entity and seqres entry will be returned as second item.
281 
282  :param info: Whether to return an info container with the other output.
283  Returns a :class:`MMCifInfo` object as last item.
284 
285  :raises: :exc:`~ost.io.IOException` if the import fails due to an erroneous
286  or inexistent file
287  """
288  def _override(val1, val2):
289  if val2!=None:
290  return val2
291  else:
292  return val1
293  if isinstance(profile, str):
294  prof = profiles[profile].Copy()
295  else:
296  prof = profile.Copy()
297 
298  prof.calpha_only=_override(prof.calpha_only, calpha_only)
299  prof.strict_hydrogens=_override(prof.strict_hydrogens, strict_hydrogens)
300  prof.fault_tolerant=_override(prof.fault_tolerant, fault_tolerant)
301 
302  if remote:
303  from ost.io.remote import RemoteGet
304  tmp_file =RemoteGet(filename, from_repo='cif')
305  filename = tmp_file.name
306 
307  conop_inst = conop.Conopology.Instance()
308  builder = conop_inst.GetBuilder("DEFAULT")
309 
310  builder.strict_hydrogens = prof.strict_hydrogens
311 
312  try:
313  ent = mol.CreateEntity()
314  reader = MMCifReader(filename, ent, prof)
315  reader.read_seqres = seqres
316  #if reader.HasNext():
317  reader.Parse()
318  conop_inst.ConnectAll(builder, ent, 0)
319  #else:
320  # raise IOError("File doesn't contain any entities")
321  if seqres and info:
322  return ent, reader.seqres, reader.info
323  if seqres:
324  return ent, reader.seqres
325  if info:
326  return ent, reader.info
327  return ent
328  except:
329  raise
330 
331 # this function uses a dirty trick: should be a member of MMCifInfoBioUnit
332 # which is totally C++, but we want the method in Python... so we define it
333 # here (__init__) and add it as a member to the class. With this, the first
334 # arguement is the usual 'self'.
335 # documentation for this function was moved to mmcif.rst,
336 # MMCifInfoBioUnit.PDBize, since this function is not included in SPHINX.
337 def _PDBize(biounit, asu, seqres=None, min_polymer_size=10,
338  transformation=False):
339  def _CopyAtoms(src_res, dst_res, edi, trans=geom.Mat4()):
340  atom_pos_wrong = False
341  for atom in src_res.atoms:
342  tmp_pos = geom.Vec4(atom.pos)
343  new_atom=edi.InsertAtom(dst_res, atom.name, geom.Vec3(trans*tmp_pos),
344  element=atom.element,
345  occupancy=atom.occupancy,
346  b_factor=atom.b_factor,
347  is_hetatm=atom.is_hetatom)
348  for p in range(0,3):
349  if new_atom.pos[p] <= -1000:
350  atom_pos_wrong = True
351  elif new_atom.pos[p] >= 10000:
352  atom_pos_wrong = True
353  return atom_pos_wrong
354 
355  chain_names='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz'
356  cur_chain_name = 0
357  water_chain = mol.ChainHandle()
358  ligand_chain = mol.ChainHandle()
359  a_pos_wrong = False
360  pdb_bu = mol.CreateEntity()
361  edi = pdb_bu.EditXCS(mol.BUFFERED_EDIT)
362  chains = biounit.GetChainList()
363  c_intvls = biounit.GetChainIntervalList()
364  o_intvls = biounit.GetOperationsIntervalList()
365  # create list of operations
366  # for cartesian products, operations are stored in a list, multiplied with
367  # the next list of operations and re-stored... until all lists of operations
368  # are multiplied in an all-against-all manner.
369  operations = biounit.GetOperations()
370  for i in range(0,len(c_intvls)):
371  trans_matrices = list()
372  l_operations = operations[o_intvls[i][0]:o_intvls[i][1]]
373  if len(l_operations) > 0:
374  for op in l_operations[0]:
375  rot = geom.Mat4()
376  rot.PasteRotation(op.rotation)
377  trans = geom.Mat4()
378  trans.PasteTranslation(op.translation)
379  tr = geom.Mat4()
380  tr = trans * rot
381  trans_matrices.append(tr)
382  for op_n in range(1, len(l_operations)):
383  tmp_ops = list()
384  for o in l_operations[op_n]:
385  rot = geom.Mat4()
386  rot.PasteRotation(o.rotation)
387  trans = geom.Mat4()
388  trans.PasteTranslation(o.translation)
389  tr = geom.Mat4()
390  tr = trans * rot
391  for t_o in trans_matrices:
392  tp = t_o * tr
393  tmp_ops.append(tp)
394  trans_matrices = tmp_ops
395  # select chains into a view as basis for each transformation
396  assu = asu.Select('cname='+','.join(chains[c_intvls[i][0]:c_intvls[i][1]]))
397  # use each transformation on the view, store as entity and transform, PDBize
398  # the result while adding everything to one large entity
399  for tr in trans_matrices:
400  # do a PDBize, add each new entity to the end product
401  for chain in assu.chains:
402  residue_count = len(chain.residues)
403  if seqres:
404  seqres_chain = seqres.FindSequence(chain.name)
405  if seqres_chain.IsValid():
406  residue_count = len(seqres_chain)
407  if chain.is_polymer and residue_count >= min_polymer_size:
408  if len(chain_names) == cur_chain_name:
409  raise RuntimeError('Running out of chain names')
410  new_chain = edi.InsertChain(chain_names[cur_chain_name])
411  cur_chain_name += 1
412  edi.SetChainDescription(new_chain, chain.description)
413  edi.SetChainType(new_chain, chain.type)
414  new_chain.SetStringProp('original_name', chain.name)
415  if chain.HasProp("pdb_auth_chain_name"):
416  new_chain.SetStringProp("pdb_auth_chain_name",
417  chain.GetStringProp("pdb_auth_chain_name"))
418  for res in chain.residues:
419  new_res = edi.AppendResidue(new_chain, res.name, res.number)
420  a_b = _CopyAtoms(res, new_res, edi, tr)
421  if not a_pos_wrong:
422  a_pos_wrong = a_b
423  elif chain.type == mol.CHAINTYPE_WATER:
424  if not water_chain.IsValid():
425  # water gets '-' as name
426  water_chain = edi.InsertChain('-')
427  edi.SetChainDescription(water_chain, chain.description)
428  edi.SetChainType(water_chain, chain.type)
429  for res in chain.residues:
430  new_res = edi.AppendResidue(water_chain, res.name)
431  new_res.SetStringProp('type', mol.StringFromChainType(chain.type))
432  new_res.SetStringProp('description', chain.description)
433  a_b = _CopyAtoms(res, new_res, edi, tr)
434  if not a_pos_wrong:
435  a_pos_wrong = a_b
436  else:
437  if not ligand_chain.IsValid():
438  # all ligands, put in one chain, are named '_'
439  ligand_chain = edi.InsertChain('_')
440  last_rnum = 0
441  else:
442  last_rnum = ligand_chain.residues[-1].number.num
443  residues=chain.residues
444  ins_code='\0'
445  if len(residues)>1:
446  ins_code='A'
447  for res in chain.residues:
448  new_res = edi.AppendResidue(ligand_chain, res.name,
449  mol.ResNum(last_rnum+1, ins_code))
450  new_res.SetStringProp('description', chain.description)
451  new_res.SetStringProp('type', mol.StringFromChainType(chain.type))
452  new_res.SetStringProp("original_name", chain.name)
453  if chain.HasProp("pdb_auth_chain_name"):
454  new_res.SetStringProp("pdb_auth_chain_name",
455  chain.GetStringProp("pdb_auth_chain_name"))
456  ins_code = chr(ord(ins_code)+1)
457  a_b = _CopyAtoms(res, new_res, edi, tr)
458  if not a_pos_wrong:
459  a_pos_wrong = a_b
460  move_to_origin = None
461  if a_pos_wrong:
462  start = pdb_bu.bounds.min
463  move_to_origin = geom.Mat4(1,0,0,(-999 - start[0]),
464  0,1,0,(-999 - start[1]),
465  0,0,1,(-999 - start[2]),
466  0,0,0,1)
467  edi = pdb_bu.EditXCS(mol.UNBUFFERED_EDIT)
468  edi.ApplyTransform(move_to_origin)
469  conop.ConnectAll(pdb_bu)
470  if transformation:
471  return pdb_bu, move_to_origin
472  return pdb_bu
473 
474 MMCifInfoBioUnit.PDBize = _PDBize
475 
476 ## \example fft_li.py
477 #
478 # This scripts loads one or more images and shows their Fourier Transforms on
479 # the screen. A viewer is opened for each loaded image. The Fourier Transform
480 # honors the origin of the reference system, which is assumed to be at the
481 # center of the image.
482 #
483 # Usage:
484 #
485 # \code giplt view_ft.py <image1> [<image2> <image3> .... ] \endcode
486 #
487 # <BR>
488 # <BR>