OpenStructure
cleanup.py
Go to the documentation of this file.
1 from ost import conop, mol
2 
3 def Cleanup(entity, strip_water=True, canonicalize=True, remove_ligands=True):
4  """
5  This function returns a cleaned-up (simplified) version of the protein
6  structure. Different parameters affect the behaviour of the function.
7 
8  :param strip_water: Whether to remove water from the structure
9  :param canonicalize: Whether to strip off modifications of amino acids and map
10  them back to their parent standard amino acid, e.g. selenium methionine to
11  methionine.For more complex amino acids, where the relation between the
12  modified and the standard parent amino acid is not known, sidechain atoms
13  are removed. D-peptide-linking residues are completely removed as well.
14  :param remove_ligands: Whether to remove ligands from the structure
15 
16  :return: a cleaned version of the entity
17  """
18  #setup
19  lib = conop.GetDefaultLib()
20  if not lib:
21  raise RuntimeError("Cleanup requires a compound library.")
22  clean_entity = entity.Copy()
23  ed = clean_entity.EditXCS()
24  #remove water residues
25  if strip_water:
26  _StripWater(clean_entity, ed)
27  #replace modified residues before removing ligands to avoid removing MSE and others
28  if canonicalize:
29  _CanonicalizeResidues(clean_entity, ed, lib)
30  #remove all hetatoms that are not water
31  if remove_ligands:
32  _RemoveLigands(clean_entity, ed)
33  return clean_entity
34 
35 
36 def _StripWater(clean_entity, ed) :
37  """
38  This function removes water residues from the structure
39  """
40  for res in clean_entity.residues:
41  if res.IsValid():
42  if res.chem_class == mol.WATER:
43  ed.DeleteResidue(res.handle)
44  ed.UpdateICS()
45  return
46 
47 def _RemoveLigands(clean_entity, ed) :
48  """
49  This function removes ligands from the structure
50  """
51  for res in clean_entity.residues:
52  if res.IsValid():
53  #WHEN mmCIF WILL BE USED, CHANGE IsPeptideLinking() TO IsProtein()
54  if not res.IsPeptideLinking() and res.atoms[0].is_hetatom and res.chem_class != mol.WATER:
55  ed.DeleteResidue(res.handle)
56  ed.UpdateICS()
57  return
58 
59 def _CanonicalizeResidues(clean_entity, ed, compound_lib) :
60  """
61  This function strips off modifications of amino acids and maps
62  them back to their parent standard amino acid, e.g. selenium methionine to
63  methionine.For more complex amino acids, where the relation between the
64  modified and the standard parent amino acid is not known, sidechain atoms
65  are removed. D-peptide-linking residues are completely removed as well.
66  """
67 
68  for res in clean_entity.residues:
69  if res.IsValid() and res.IsPeptideLinking() :
70  parent_olc = res.one_letter_code
71  if parent_olc == "X" :
72  _DeleteSidechain(res, ed)
73  for atom in res.atoms:
74  atom.is_hetatom = False
75  else:
76  parent_tlc = conop.OneLetterCodeToResidueName(parent_olc)
77  parent_res = compound_lib.FindCompound(parent_tlc)
78  if not parent_res:
79  _DeleteSidechain(res, ed)
80  for atom in res.atoms:
81  atom.is_hetatom = False
82  print("Removing sidechain of %s, beacuse it has not been found in the compound library"% parent_tlc)
83  else:
84  #collect atom's names
85  modif_atom_names = set([atom.name for atom in res.atoms
86  if atom.element != "H" and atom.element != "D" ])
87  #if the res is the first or last take all the atoms from the parent res
88  if res.FindAtom("OXT").IsValid() :
89  parent_atom_names = set([atom.name for atom in parent_res.atom_specs
90  if atom.element != "H" and atom.element != "D" ])
91  else:
92  parent_atom_names = set([atom.name for atom in parent_res.atom_specs
93  if atom.element != "H" and atom.element != "D" and not atom.is_leaving ])
94  additional_parent_atoms = parent_atom_names - modif_atom_names
95  additional_modif_atoms = modif_atom_names - parent_atom_names
96  #WHEN mmCIF WILL BE USED, CHANGE IsPeptideLinking() TO IsProtein(), TO EXCLUDE LIGANDS FROM CANONICALISATION
97  if res.atoms[0].is_hetatom :
98  old_name = res.name
99  ed.RenameResidue(res, parent_tlc)
100  if additional_parent_atoms:
101  if additional_modif_atoms:
102  #replacement
103  _Replacement(res, ed, old_name)
104  else:
105  #deletion
106  _Deletion(res, ed)
107  elif additional_modif_atoms:
108  #addition
109  _Addition(res, ed, additional_modif_atoms)
110  else:
111  #unchanged, later check stereochemistry or H atoms
112  _Unchanged(res, ed)
113  #the res is a peptide but not a ligand (is a protein res)
114  else:
115  if additional_parent_atoms:# if the sidechain is incomplete
116  _DeleteSidechain(res, ed)
117  ed.UpdateICS()
118  return
119 
120 def _Replacement(res, ed, old_name) :
121  #TEMP ONLY MSE
122  if old_name == "MSE" :
123  for atom in res.atoms:
124  atom.is_hetatom = False
125  sel = res.FindAtom("SE")
126  if sel.IsValid() :
127  ed.InsertAtom( res, "SD", sel.pos, "S", sel.occupancy, sel.b_factor ) #S radius=~1;SE=~1.2
128  ed.DeleteAtom( sel )
129  else:
130  _DeleteSidechain(res, ed)
131  else:
132  _DeleteSidechain(res, ed)
133  return
134 
135 def _Deletion(res, ed) :
136  _DeleteSidechain(res, ed)
137  for atom in res.atoms :
138  atom.is_hetatom = False
139  return
140 
141 def _Addition(res, ed, additional_modif_atoms) :
142  for add_atom_name in additional_modif_atoms:
143  add_atom = res.FindAtom( add_atom_name )
144  if add_atom.IsValid() :
145  ed.DeleteAtom( add_atom )
146  for atom in res.atoms:
147  atom.is_hetatom = False
148  return
149 
150 def _Unchanged(res, ed) :
151  if res.chem_class == mol.D_PEPTIDE_LINKING:
152  ed.DeleteResidue(res)
153  else:
154  _DeleteSidechain(res, ed)
155  for atom in res.atoms :
156  atom.is_hetatom = False
157  return
158 
159 def _DeleteSidechain(res, ed) :
160  for atom in res.atoms:
161  if not atom.name in ['CA','CB','C','N','O']:
162  ed.DeleteAtom(atom)
163  return
164 
165 #visible functions
166 __all__ = [Cleanup]
def Cleanup(entity, strip_water=True, canonicalize=True, remove_ligands=True)
Definition: cleanup.py:3