OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
cleanup.py
Go to the documentation of this file.
1 from ost import conop, mol
2 
3 def Cleanup(entity, strip_water=True, canonicalize=True, remove_ligands=True):
4  """
5  This function returns a cleaned-up (simplified) version of the protein
6  structure. Different parameters affect the behaviour of the function.
7 
8  :param strip_water: Whether to remove water from the structure
9  :param canonicalize: Whether to strip off modifications of amino acids and map
10  them back to their parent standard amino acid, e.g. selenium methionine to
11  methionine.For more complex amino acids, where the relation between the
12  modified and the standard parent amino acid is not known, sidechain atoms
13  are removed. D-peptide-linking residues are completely removed as well.
14  :param remove_ligands: Whether to remove ligands from the structure
15 
16  :return: a cleaned version of the entity
17  """
18  #setup
19  builder = conop.GetBuilder()
20  if not hasattr(builder, "compound_lib") :
21  raise RuntimeError( "Cannot cleanup structure, since the default builder doesn't use the compound library")
22  compound_lib = builder.compound_lib
23  clean_entity = entity.Copy()
24  ed = clean_entity.EditXCS()
25  #remove water residues
26  if strip_water:
27  _StripWater(clean_entity, ed)
28  #replace modified residues before removing ligands to avoid removing MSE and others
29  if canonicalize:
30  _CanonicalizeResidues(clean_entity, ed, compound_lib)
31  #remove all hetatoms that are not water
32  if remove_ligands:
33  _RemoveLigands(clean_entity, ed)
34  return clean_entity
35 
36 
37 def _StripWater(clean_entity, ed) :
38  """
39  This function removes water residues from the structure
40  """
41  for res in clean_entity.residues:
42  if res.IsValid():
43  if res.chem_class == mol.WATER:
44  ed.DeleteResidue(res.handle)
45  ed.UpdateICS()
46  return
47 
48 def _RemoveLigands(clean_entity, ed) :
49  """
50  This function removes ligands from the structure
51  """
52  for res in clean_entity.residues:
53  if res.IsValid():
54  #WHEN mmCIF WILL BE USED, CHANGE IsPeptideLinking() TO IsProtein()
55  if not res.IsPeptideLinking() and res.atoms[0].is_hetatom and res.chem_class != mol.WATER:
56  ed.DeleteResidue(res.handle)
57  ed.UpdateICS()
58  return
59 
60 def _CanonicalizeResidues(clean_entity, ed, compound_lib) :
61  """
62  This function strips off modifications of amino acids and maps
63  them back to their parent standard amino acid, e.g. selenium methionine to
64  methionine.For more complex amino acids, where the relation between the
65  modified and the standard parent amino acid is not known, sidechain atoms
66  are removed. D-peptide-linking residues are completely removed as well.
67  """
68 
69  for res in clean_entity.residues:
70  if res.IsValid() and res.IsPeptideLinking() :
71  parent_olc = res.one_letter_code
72  if parent_olc == "X" :
73  _DeleteSidechain(res, ed)
74  for atom in res.atoms:
75  atom.is_hetatom = False
76  else:
77  parent_tlc = conop.OneLetterCodeToResidueName(parent_olc)
78  parent_res = compound_lib.FindCompound(parent_tlc)
79  if not parent_res:
80  _DeleteSidechain(res, ed)
81  for atom in res.atoms:
82  atom.is_hetatom = False
83  print "Removing sidechain of %s, beacuse it has not been found in the compound library"% parent_tlc
84  else:
85  #collect atom's names
86  modif_atom_names = set([atom.name for atom in res.atoms
87  if atom.element != "H" and atom.element != "D" ])
88  #if the res is the first or last take all the atoms from the parent res
89  if res.FindAtom("OXT").IsValid() :
90  parent_atom_names = set([atom.name for atom in parent_res.atom_specs
91  if atom.element != "H" and atom.element != "D" ])
92  else:
93  parent_atom_names = set([atom.name for atom in parent_res.atom_specs
94  if atom.element != "H" and atom.element != "D" and not atom.is_leaving ])
95  additional_parent_atoms = parent_atom_names - modif_atom_names
96  additional_modif_atoms = modif_atom_names - parent_atom_names
97  #WHEN mmCIF WILL BE USED, CHANGE IsPeptideLinking() TO IsProtein(), TO EXCLUDE LIGANDS FROM CANONICALISATION
98  if res.atoms[0].is_hetatom :
99  old_name = res.name
100  ed.RenameResidue(res, parent_tlc)
101  if additional_parent_atoms:
102  if additional_modif_atoms:
103  #replacement
104  _Replacement(res, ed, old_name)
105  else:
106  #deletion
107  _Deletion(res, ed)
108  elif additional_modif_atoms:
109  #addition
110  _Addition(res, ed, additional_modif_atoms)
111  else:
112  #unchanged, later check stereochemistry or H atoms
113  _Unchanged(res, ed)
114  #the res is a peptide but not a ligand (is a protein res)
115  else:
116  if additional_parent_atoms:# if the sidechain is incomplete
117  _DeleteSidechain(res, ed)
118  ed.UpdateICS()
119  return
120 
121 def _Replacement(res, ed, old_name) :
122  #TEMP ONLY MSE
123  if old_name == "MSE" :
124  for atom in res.atoms:
125  atom.is_hetatom = False
126  sel = res.FindAtom("SE")
127  if sel.IsValid() :
128  ed.InsertAtom( res, "SD", sel.pos, "S", sel.occupancy, sel.b_factor ) #S radius=~1;SE=~1.2
129  ed.DeleteAtom( sel )
130  else:
131  _DeleteSidechain(res, ed)
132  else:
133  _DeleteSidechain(res, ed)
134  return
135 
136 def _Deletion(res, ed) :
137  _DeleteSidechain(res, ed)
138  for atom in res.atoms :
139  atom.is_hetatom = False
140  return
141 
142 def _Addition(res, ed, additional_modif_atoms) :
143  for add_atom_name in additional_modif_atoms:
144  add_atom = res.FindAtom( add_atom_name )
145  if add_atom.IsValid() :
146  ed.DeleteAtom( add_atom )
147  for atom in res.atoms:
148  atom.is_hetatom = False
149  return
150 
151 def _Unchanged(res, ed) :
152  if res.chem_class == mol.D_PEPTIDE_LINKING:
153  ed.DeleteResidue(res)
154  else:
155  _DeleteSidechain(res, ed)
156  for atom in res.atoms :
157  atom.is_hetatom = False
158  return
159 
160 def _DeleteSidechain(res, ed) :
161  for atom in res.atoms:
162  if not atom.name in ['CA','CB','C','N','O']:
163  ed.DeleteAtom(atom)
164  return
165 
166 #visible functions
167 __all__ = [Cleanup]