OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
__init__.py
Go to the documentation of this file.
1 #------------------------------------------------------------------------------
2 # This file is part of the OpenStructure project <www.openstructure.org>
3 #
4 # Copyright (C) 2008-2011 by the OpenStructure authors
5 #
6 # This library is free software; you can redistribute it and/or modify it under
7 # the terms of the GNU Lesser General Public License as published by the Free
8 # Software Foundation; either version 3.0 of the License, or (at your option)
9 # any later version.
10 # This library is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 # details.
14 #
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this library; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #------------------------------------------------------------------------------
19 import os, tempfile, ftplib, httplib
20 
21 from _ost_io import *
22 from ost import mol, geom, conop
23 
24 profiles=None
25 
26 class IOProfiles:
27  def __init__(self):
28  self._dict={}
29 
30  def __getitem__(self, key):
31  return IOProfileRegistry.Instance().Get(key)
32 
33  def __setitem__(self, key, value):
34  if isinstance(value, str):
35  value=self[value]
36  IOProfileRegistry.Instance().Set(key, value)
37  self._dict[key]=value
38 
39  def __len__(self):
40  return len(self._dict)
41 
42  def __iter__(self):
43  return self._dict.__iter__()
44 
45 if not profiles:
46  profiles=IOProfiles()
47  profiles['STRICT']=IOProfile(dialect='PDB', fault_tolerant=False,
48  strict_hydrogens=False, quack_mode=False)
49  profiles['SLOPPY']=IOProfile(dialect='PDB', fault_tolerant=True,
50  strict_hydrogens=False, quack_mode=True)
51  profiles['CHARMM']=IOProfile(dialect='CHARMM', fault_tolerant=True,
52  strict_hydrogens=False, quack_mode=False)
53  profiles['DEFAULT']='STRICT'
54 
55 def _override(val1, val2):
56  if val2!=None:
57  return val2
58  else:
59  return val1
60 
61 def __GetModelFromPDB(model_id, output_dir, file_pattern='pdb%s.ent.gz'):
62  file_name = file_pattern % model_id
63  file_path = os.path.join(output_dir,file_name)
64  try:
65  server="ftp.wwpdb.org"
66  ftp=ftplib.FTP(server,"anonymous","user@")
67  ftp.cwd("pub/pdb/data/structures/all/pdb")
68  ftp_retrfile=open(file_path,"wb")
69  ftp.retrbinary("RETR "+file_name,ftp_retrfile.write)
70  ftp_retrfile.close()
71  except:
72  conn=httplib.HTTPConnection('www.pdb.org')
73  conn.request('GET', '/pdb/files/%s.pdb.gz' % model_id )
74  response=conn.getresponse()
75  if response.status==200:
76  data=response.read()
77  f=open(os.path.join(output_dir, file_pattern % model_id), 'w+')
78  f.write(data)
79  f.close()
80  else:
81  conn.close()
82  return False
83  return os.path.getsize(file_path) > 0
84 
85 def LoadPDB(filename, restrict_chains="", no_hetatms=None,
86  fault_tolerant=None, load_multi=False, quack_mode=None,
87  join_spread_atom_records=None, calpha_only=None,
88  profile='DEFAULT', remote=False, dialect=None,
89  strict_hydrogens=None, seqres=False, bond_feasibility_check=None):
90  """
91  Load PDB file from disk and return one or more entities. Several options
92  allow to customize the exact behaviour of the PDB import. For more information
93  on these options, see :doc:`profile`.
94 
95  Residues are flagged as ligand if they are mentioned in a HET record.
96 
97  :param restrict_chains: If not an empty string, only chains listed in the
98  string will be imported.
99 
100  :param fault_tolerant: Enable/disable fault-tolerant import. If set, overrides
101  the value of :attr:`IOProfile.fault_tolerant`.
102 
103  :param no_hetatms: If set to True, HETATM records will be ignored. Overrides
104  the value of :attr:`IOProfile.no_hetatms`
105 
106  :param load_multi: If set to True, a list of entities will be returned instead
107  of only the first. This is useful when dealing with multi-PDB files.
108 
109  :param join_spread_atom_records: If set, overrides the value of
110  :attr:`IOProfile.join_spread_atom_records`.
111 
112  :param remote: If set to True, the method tries to load the pdb from the
113  remote pdb repository www.pdb.org. The filename is then interpreted as the
114  pdb id.
115 
116  :rtype: :class:`~ost.mol.EntityHandle` or a list thereof if `load_multi` is
117  True.
118 
119  :param dialect: Specifies the particular dialect to use. If set, overrides
120  the value of :attr:`IOProfile.dialect`
121 
122  :param seqres: Whether to read SEQRES records. If set to True, the loaded
123  entity and seqres entry will be returned as a tuple.
124 
125  :type dialect: :class:`str`
126 
127  :param strict_hydrogens: If set, overrides the value of
128  :attr:`IOProfile.strict_hydrogens`.
129 
130  :raises: :exc:`~ost.io.IOException` if the import fails due to an erroneous or
131  inexistent file
132  """
133  def _override(val1, val2):
134  if val2!=None:
135  return val2
136  else:
137  return val1
138  if isinstance(profile, str):
139  prof=profiles[profile].Copy()
140  elif isinstance(profile, IOProfile):
141  prof=profile.Copy()
142  else:
143  raise TypeError('profile must be of type string or IOProfile, '+\
144  'instead of %s'%type(profile))
145  if dialect not in (None, 'PDB', 'CHARMM',):
146  raise ValueError('dialect must be PDB or CHARMM')
147  prof.calpha_only=_override(prof.calpha_only, calpha_only)
148  prof.no_hetatms=_override(prof.no_hetatms, no_hetatms)
149  prof.dialect=_override(prof.dialect, dialect)
150  prof.quack_mode=_override(prof.quack_mode, quack_mode)
151  prof.strict_hydrogens=_override(prof.strict_hydrogens, strict_hydrogens)
152  prof.fault_tolerant=_override(prof.fault_tolerant, fault_tolerant)
153  prof.bond_feasibility_check=_override(prof.bond_feasibility_check, bond_feasibility_check)
154  prof.join_spread_atom_records=_override(prof.join_spread_atom_records,
155  join_spread_atom_records)
156 
157  if remote:
158  output_dir = tempfile.gettempdir()
159  if __GetModelFromPDB(filename, output_dir):
160  filename = os.path.join(output_dir, 'pdb%s.ent.gz' % filename)
161  else:
162  raise IOError('Can not load PDB %s from www.pdb.org'%filename)
163 
164  conop_inst=conop.Conopology.Instance()
165  builder=conop_inst.GetBuilder("DEFAULT")
166  if prof.dialect=='PDB':
167  builder.dialect=conop.PDB_DIALECT
168  elif prof.dialect=='CHARMM':
169  builder.dialect=conop.CHARMM_DIALECT
170  builder.strict_hydrogens=prof.strict_hydrogens
171  builder.bond_feasibility_check=prof.bond_feasibility_check
172  reader=PDBReader(filename, prof)
173  reader.read_seqres=seqres
174  try:
175  if load_multi:
176  ent_list=[]
177  while reader.HasNext():
178  ent=mol.CreateEntity()
179  reader.Import(ent, restrict_chains)
180  conop_inst.ConnectAll(builder, ent, 0)
181  ent_list.append(ent)
182  if len(ent_list)==0:
183  raise IOError("File '%s' doesn't contain any entities" % filename)
184  return ent_list
185  else:
186  ent=mol.CreateEntity()
187  if reader.HasNext():
188  reader.Import(ent, restrict_chains)
189  conop_inst.ConnectAll(builder, ent, 0)
190  else:
191  raise IOError("File '%s' doesn't contain any entities" % filename)
192  if seqres:
193  return ent, reader.seqres
194  return ent
195  except:
196  raise
197 
198 def SavePDB(models, filename, dialect=None, pqr=False, profile='DEFAULT'):
199  """
200  Save entity or list of entities to disk. If a list of entities is supplied
201  the PDB file will be saved as a multi PDB file. Each of the entities is
202  wrapped into a MODEL/ENDMDL pair.
203 
204  If the atom number exceeds 99999, '*****' is used.
205 
206  :param models: The entity or list of entities (handles or views) to be saved
207  :param filename: The filename
208  :type filename: string
209  """
210  if not getattr(models, '__len__', None):
211  models=[models]
212  if isinstance(profile, str):
213  profile=profiles[profile].Copy()
214  elif isinstance(profile, IOProfile):
215  profile.Copy()
216  else:
217  raise TypeError('profile must be of type string or IOProfile, '+\
218  'instead of %s'%type(profile))
219  profile.dialect=_override(profile.dialect, dialect)
220  writer=PDBWriter(filename, profile)
221  writer.SetIsPQR(pqr)
222  if len(models)>1:
223  writer.write_multi_model=True
224  for model in models:
225  writer.Write(model)
226 
227 try:
228  from ost import img
229  LoadMap = LoadImage
230  SaveMap = SaveImage
231 except ImportError:
232  pass
233 
234  ## loads several images and puts them in an ImageList
235  # \sa \ref fft_li.py "View Fourier Transform Example"
236 def LoadImageList (files):
237  image_list=img.ImageList()
238  for file in files:
239  image=LoadImage(file)
240  image_list.append(image)
241  return image_list
242 
243 LoadMapList=LoadImageList
244 
245 def LoadCHARMMTraj(crd, dcd_file=None, profile='CHARMM',
246  lazy_load=False, stride=1,
247  dialect=None, detect_swap=True,swap_bytes=False):
248  """
249  Load CHARMM trajectory file.
250 
251  :param crd: EntityHandle or filename of the (PDB) file containing the
252  structure. The structure must have the same number of atoms as the
253  trajectory
254  :param dcd_file: The filename of the DCD file. If not set, and crd is a
255  string, the filename is set to the <crd>.dcd
256  :param layz_load: Whether the trajectory should be loaded on demand. Instead
257  of loading the complete trajectory into memory, the trajectory frames are
258  loaded from disk when requested.
259  :param stride: The spacing of the frames to load. When set to 2, for example,
260  every second frame is loaded from the trajectory. By default, every frame
261  is loaded.
262  :param dialect: The dialect for the PDB file to use. See :func:`LoadPDB`. If
263  set, overrides the value of the profile
264  :param profile: The IO profile to use for loading the PDB file. See
265  :doc:`profile`.
266  :param detect_swap: if True (the default), then automatic detection of endianess
267  is attempted, otherwise the swap_bytes parameter is used
268  :param swap_bytes: is detect_swap is False, this flag determines whether bytes
269  are swapped upon loading or not
270  """
271  if not isinstance(crd, mol.EntityHandle):
272  if dcd_file==None:
273  dcd_file='%s.dcd' % os.path.splitext(crd)[0]
274  crd=LoadPDB(crd, profile=profile, dialect=dialect)
275 
276  else:
277  if not dcd_file:
278  raise ValueError("No DCD filename given")
279  return LoadCHARMMTraj_(crd, dcd_file, stride, lazy_load, detect_swap, swap_bytes)
280 
281 def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=None, profile='DEFAULT', remote=False, strict_hydrogens=None, seqres=False, info=False):
282  """
283  Load MMCIF file from disk and return one or more entities. Several options
284  allow to customize the exact behaviour of the MMCIF import. For more
285  information on these options, see :doc:`profile`.
286 
287  Residues are flagged as ligand if they are mentioned in a HET record.
288 
289  :param restrict_chains: If not an empty string, only chains listed in the
290  string will be imported.
291 
292  :param fault_tolerant: Enable/disable fault-tolerant import. If set, overrides
293  the value of :attr:`IOProfile.fault_tolerant`.
294 
295  :param remote: If set to True, the method tries to load the pdb from the
296  remote pdb repository www.pdb.org. The filename is then interpreted as the
297  pdb id.
298 
299  :rtype: :class:`~ost.mol.EntityHandle`.
300 
301  :param strict_hydrogens: If set, overrides the value of
302  :attr:`IOProfile.strict_hydrogens`.
303 
304  :param seqres: Whether to read SEQRES records. If set to True, the loaded
305  entity and seqres entry will be returned as second item.
306 
307  :param info: Whether to return an info container with the other output.
308  Returns a :class:`MMCifInfo` object as last item.
309 
310  :raises: :exc:`~ost.io.IOException` if the import fails due to an erroneous
311  or inexistent file
312  """
313  def _override(val1, val2):
314  if val2!=None:
315  return val2
316  else:
317  return val1
318  if isinstance(profile, str):
319  prof = profiles[profile].Copy()
320  else:
321  prof = profile.Copy()
322 
323  prof.calpha_only=_override(prof.calpha_only, calpha_only)
324  prof.strict_hydrogens=_override(prof.strict_hydrogens, strict_hydrogens)
325  prof.fault_tolerant=_override(prof.fault_tolerant, fault_tolerant)
326 
327  if remote:
328  output_dir = tempfile.gettempdir()
329  if __GetModelFromPDB(filename, output_dir):
330  filename = os.path.join(output_dir, 'pdb%s.ent.gz' % filename)
331  else:
332  raise IOError('Can not load PDB %s from www.pdb.org'%filename)
333 
334  conop_inst = conop.Conopology.Instance()
335  builder = conop_inst.GetBuilder("DEFAULT")
336 
337  builder.strict_hydrogens = prof.strict_hydrogens
338 
339  try:
340  ent = mol.CreateEntity()
341  reader = MMCifReader(filename, ent, prof)
342  reader.read_seqres = seqres
343  #if reader.HasNext():
344  reader.Parse()
345  conop_inst.ConnectAll(builder, ent, 0)
346  #else:
347  # raise IOError("File doesn't contain any entities")
348  if seqres and info:
349  return ent, reader.seqres, reader.info
350  if seqres:
351  return ent, reader.seqres
352  if info:
353  return ent, reader.info
354  return ent
355  except:
356  raise
357 
358 # this function uses a dirty trick: should be a member of MMCifInfoBioUnit
359 # which is totally C++, but we want the method in Python... so we define it
360 # here (__init__) and add it as a member to the class. With this, the first
361 # arguement is the usual 'self'.
362 # documentation for this function was moved to mmcif.rst,
363 # MMCifInfoBioUnit.PDBize, since this function is not included in SPHINX.
364 def _PDBize(biounit, asu, seqres=None, min_polymer_size=10):
365  def _CopyAtoms(src_res, dst_res, edi, trans=geom.Mat4()):
366  for atom in src_res.atoms:
367  tmp_pos = geom.Vec4(atom.pos)
368  new_atom=edi.InsertAtom(dst_res, atom.name, geom.Vec3(trans*tmp_pos),
369  element=atom.element,
370  occupancy=atom.occupancy,
371  b_factor=atom.b_factor,
372  is_hetatm=atom.is_hetatom)
373 
374  chain_names='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz'
375  # create list of operations
376  # for cartesian products, operations are stored in a list, multiplied with
377  # the next list of operations and re-stored... until all lists of operations
378  # are multiplied in an all-against-all manner.
379  operations = biounit.GetOperations()
380  trans_matrices = list()
381  if len(operations) > 0:
382  for op in operations[0]:
383  rot = geom.Mat4()
384  rot.PasteRotation(op.rotation)
385  trans = geom.Mat4()
386  trans.PasteTranslation(op.translation)
387  tr = geom.Mat4()
388  tr = trans * rot
389  trans_matrices.append(tr)
390  for op_n in range(1, len(operations)):
391  tmp_ops = list()
392  for o in operations[op_n]:
393  rot = geom.Mat4()
394  rot.PasteRotation(o.rotation)
395  trans = geom.Mat4()
396  trans.PasteTranslation(o.translation)
397  tr = geom.Mat4()
398  tr = trans * rot
399  for t_o in trans_matrices:
400  tp = t_o * tr
401  tmp_ops.append(tp)
402  trans_matrices = tmp_ops
403  # select chains into a view as basis for each transformation
404  assu = asu.Select('cname=' + ','.join(biounit.GetChainList()))
405  # use each transformation on the view, store as entity and transform, PDBize
406  # the result while adding everything to one large entity
407  pdb_bu = mol.CreateEntity()
408  edi = pdb_bu.EditXCS(mol.BUFFERED_EDIT)
409  cur_chain_name = 0
410  water_chain = mol.ChainHandle()
411  ligand_chain = mol.ChainHandle()
412  for tr in trans_matrices:
413  # do a PDBize, add each new entity to the end product
414  for chain in assu.chains:
415  residue_count = len(chain.residues)
416  if seqres:
417  seqres_chain = seqres.FindSequence(chain.name)
418  if seqres_chain.IsValid():
419  residue_count = len(seqres_chain)
420  if chain.is_polymer and residue_count >= min_polymer_size:
421  if len(chain_names) == cur_chain_name:
422  raise RuntimeError('Running out of chain names')
423  new_chain = edi.InsertChain(chain_names[cur_chain_name])
424  cur_chain_name += 1
425  edi.SetChainDescription(new_chain, chain.description)
426  edi.SetChainType(new_chain, chain.type)
427  new_chain.SetStringProp('original_name', chain.name)
428  if chain.HasProp("pdb_auth_chain_name"):
429  new_chain.SetStringProp("pdb_auth_chain_name",
430  chain.GetStringProp("pdb_auth_chain_name"))
431  for res in chain.residues:
432  new_res = edi.AppendResidue(new_chain, res.name, res.number)
433  _CopyAtoms(res, new_res, edi, tr)
434  elif chain.type == mol.CHAINTYPE_WATER:
435  if not water_chain.IsValid():
436  # water gets '-' as name
437  water_chain = edi.InsertChain('-')
438  edi.SetChainDescription(water_chain, chain.description)
439  edi.SetChainType(water_chain, chain.type)
440  for res in chain.residues:
441  new_res = edi.AppendResidue(water_chain, res.name)
442  new_res.SetStringProp('type', mol.StringFromChainType(chain.type))
443  new_res.SetStringProp('description', chain.description)
444  _CopyAtoms(res, new_res, edi, tr)
445  else:
446  if not ligand_chain.IsValid():
447  # all ligands, put in one chain, are named '_'
448  ligand_chain = edi.InsertChain('_')
449  last_rnum = 0
450  else:
451  last_rnum = ligand_chain.residues[-1].number.num
452  residues=chain.residues
453  ins_code='\0'
454  if len(residues)>1:
455  ins_code='A'
456  for res in chain.residues:
457  new_res = edi.AppendResidue(ligand_chain, res.name,
458  mol.ResNum(last_rnum+1, ins_code))
459  new_res.SetStringProp('description', chain.description)
460  new_res.SetStringProp('type', mol.StringFromChainType(chain.type))
461  new_res.SetStringProp("original_name", chain.name)
462  if chain.HasProp("pdb_auth_chain_name"):
463  new_res.SetStringProp("pdb_auth_chain_name",
464  chain.GetStringProp("pdb_auth_chain_name"))
465  ins_code = chr(ord(ins_code)+1)
466  _CopyAtoms(res, new_res, edi, tr)
467  conop.ConnectAll(pdb_bu)
468  return pdb_bu
469 
470 MMCifInfoBioUnit.PDBize = _PDBize
471 
472 ## \example fft_li.py
473 #
474 # This scripts loads one or more images and shows their Fourier Transforms on
475 # the screen. A viewer is opened for each loaded image. The Fourier Transform
476 # honors the origin of the reference system, which is assumed to be at the
477 # center of the image.
478 #
479 # Usage:
480 #
481 # \code giplt view_ft.py <image1> [<image2> <image3> .... ] \endcode
482 #
483 # <BR>
484 # <BR>