OpenStructure
__init__.py
Go to the documentation of this file.
1 from ._ost_seq_alg import *
2 from ost.seq.alg.mat import *
3 
4 def ValidateSEQRESAlignment(aln, chain=None):
5  """
6  Checks if sequence in alignment has same connectivity as residues in chain.
7  This looks for connected stretches in both the sequence and the chain and
8  returns False if they don't match. This uses the connectivity of the protein
9  backbone.
10 
11  :param aln: Alignment of two sequences with second one expected to map to
12  residues in *chain*.
13  :type aln: :class:`~ost.seq.AlignmentHandle`
14  :param chain: Source of the sequence.
15  :type chain: :class:`~ost.mol.ChainHandle`
16 
17  :returns: True if all residues (beside gapped ones) are connected, False
18  otherwise.
19  """
20  from ost import LogWarning
21  from ost import seq
22  from ost import mol
23  if aln.GetCount() != 2:
24  raise ValueError('Alignment contains more than 2 sequences!')
25  sequence = aln.GetSequence(1)
26  if len(sequence) == 0:
27  return True
28  if chain == None:
29  if sequence.HasAttachedView() == False:
30  raise ValueError("Alignment is missing an attached chain view.")
31  chain = sequence.GetAttachedView()
32  residues = chain.residues
33  # eat up all beginning gaps
34  j = 1
35  for s in sequence:
36  if s != '-':
37  break
38  j += 1;
39  l = sequence[j-1]
40  i = 0
41  # run over sequence & alignment
42  for s in sequence[j:]:
43  if s != '-':
44  i += 1
45  r1 = residues[i-1]
46  r2 = residues[i]
47  if r1.one_letter_code=='?' or r2.one_letter_code=='?':
48  continue
49  if l != '-':
50  if not mol.InSequence(r1.handle, r2.handle):
51  LogWarning('%s and %s are not connected by peptide bond' % (str(r1), str(r2)))
52  return False
53  else:
54  if mol.InSequence(r1.handle, r2.handle):
55  LogWarning('%s and %s are connected by peptide bond' % (str(r1), str(r2)))
56  return False
57  l = s
58  return True
59 
60 def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True):
61  """
62  Aligns the residues of chain to the SEQRES sequence, inserting gaps where
63  needed. The function uses the connectivity of the protein backbone to find
64  consecutive peptide fragments. These fragments are then aligned to the SEQRES
65  sequence.
66 
67  All the non-ligand, peptide-linking residues of the chain must be listed in
68  SEQRES. If there are any additional residues in the chain, the function
69  raises a ValueError.
70 
71  :param chain: Source of the sequence
72  :type chain: :class:`~ost.mol.ChainHandle`
73  :param seqres: SEQRES sequence
74  :type seqres: :class:`str`
75  :param try_resnum_first: If set to True, this first builds an alignment using
76  residue numbers and checks if the one-letter-codes
77  match. If they all match, this alignment is used
78  (and possibly validated). Otherwise, it displays a
79  warning and falls back to the connectivity-based
80  alignment.
81  :type try_resnum_first: :class:`bool`
82  :param validate: If set to True, the alignment is additionally checked by
83  :func:`~ost.seq.alg.ValidateSEQRESAlignment` and raises
84  a ValueError if the validation failed.
85  :type validate: :class:`bool`
86 
87  :returns: The alignment of the residues in the chain and the SEQRES entries.
88  :rtype: :class:`~ost.seq.AlignmentHandle`
89  """
90 
91  def IsEqual(olc1, olc2):
92  return olc1 in ('X', '?') or olc2 in ('X', '?') or olc1 == olc2
93 
94  from ost import seq
95  from ost import mol
96  from ost import LogWarning
97  view=chain
98  residues=view.residues
99  if len(residues)==0:
100  return seq.CreateAlignment()
101  if try_resnum_first:
102  aln_seq = seq.CreateSequence('atoms', '-'*len(seqres))
103  aligned_resnums = set()
104  for r1 in residues:
105  if r1.number.num in aligned_resnums:
106  LogWarning('Residue numbers must be unique. Already observed %i, ' \
107  'cannot align %s anymore.'%(r1.number.num, r1.qualified_name))
108  try_resnum_first = False
109  break
110  if r1.number.num <= len(seqres) and r1.number.num > 0:
111  if IsEqual(seqres[r1.number.num - 1], r1.one_letter_code):
112  aln_seq[r1.number.num - 1] = r1.one_letter_code
113  aligned_resnums.add(r1.number.num)
114  else:
115  msg = 'Sequence mismatch: chain'
116  if len(view.GetName().strip()):
117  msg += f' ({view.GetName().strip()})'
118  msg += f' has {r1.one_letter_code}, while SEQRES is '
119  msg += f'{seqres[r1.number.num-1]} at the corresponding position.'
120  LogWarning(msg)
121  try_resnum_first = False
122  break
123  else:
124  warning = 'Residue with number %i is outside of the range covered by '\
125  'SEQRES [1, %i]'%(r1.number.num, len(seqres))
126  LogWarning(warning)
127  try_resnum_first = False
128  break
129  if not try_resnum_first:
130  fragments=[residues[0].one_letter_code]
131  for r1, r2 in zip(residues[:-1], residues[1:]):
132  if not mol.InSequence(r1.handle, r2.handle):
133  fragments.append('')
134  fragments[-1]+=r2.one_letter_code
135  ss=str(seqres)
136  pos=0
137  aln_seq=''
138  for frag in fragments:
139  new_pos=ss.find(frag, pos)
140  if new_pos==-1:
141  raise ValueError('"%s" is not a substring of "%s"' % (frag, ss))
142  aln_seq+='-'*(new_pos-pos)+frag
143  pos=new_pos+len(frag)
144  aln_seq = seq.CreateSequence('atoms',
145  aln_seq+('-'*(len(seqres)-len(aln_seq))))
146  alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)),
147  aln_seq)
148  if validate and not ValidateSEQRESAlignment(alignment, view):
149  raise ValueError("SEQRES cannot be aligned with its corresponding chain.")
150  return alignment
151 
152 
153 def AlignmentFromChainView(chain, handle_seq_name='handle',
154  view_seq_name='view'):
155  """
156  Creates and returns the sequence alignment of the given chain view to the
157  chain handle. The alignment contains two sequences, the first containing all
158  non-ligand peptide-linking residues, the second containing all non-ligand
159  peptide-linking residues that are part of the view.
160 
161  :param chain: A valid chain
162  :type chain: :class:`~ost.mol.ChainView`
163 
164  :param handle_seq_name: Name of the handle sequence in the output alignment
165  :param view_seq_name: Name of the view sequence in the output alignment
166  :returns: The alignment
167  :rtype: :class:`~ost.seq.AlignmentHandle`
168 
169  """
170  from ost import seq
171  v0=chain.handle.Select('ligand=false and peptide=true')
172  v1=chain.Select('ligand=false and peptide=true')
173  s0=seq.CreateSequence(handle_seq_name, '')
174  s1=seq.CreateSequence(view_seq_name, '')
175  s0.AttachView(v0)
176  s1.AttachView(v1)
177  res0=v0.residues
178  res1=v1.residues
179  idx0, idx1=(0, 0)
180  while idx0<len(res0):
181  s0.Append(res0[idx0].one_letter_code)
182  if idx1<len(res1) and res1[idx1].handle==res0[idx0].handle:
183  s1.Append(res1[idx1].one_letter_code)
184  idx1+=1
185  else:
186  s1.Append('-')
187  idx0+=1
188  return seq.CreateAlignment(s0, s1)
189 
191  """
192  Predicts contacts from a multiple sequence alignment using a combination
193  of Mutual Information (*MI*) and the Contact Substitution Score (*CoEvoSc*).
194  MI is calculated with the APC and small number corrections as well as with a
195  transformation into Z-scores. The *CoEvoSc* is calculated using the default
196  PairSubstWeightMatrix (see seq.alg.LoadDefaultPairSubstWeightMatrix).
197  The final score for a pair of columns *(i,j)* of **ali** is obtained from:
198 
199  Sc(i,j)=MI(i,j)exp(CoEvoSc(i,j)) if *(i,j)* >=0
200 
201  Sc(i,j)=MI(i,j)exp(1-CoEvoSc(i,j)) if *(i,j)* <0
202 
203  :param ali: The multiple sequence alignment
204  :type ali: :class:`~ost.seq.AlignmentHandle`
205  """
206  import math
207  from ost import seq
208  if not type(ali)==type(seq.AlignmentHandle()):
209  print("Parameter should be an AlignmentHandle")
210  return
213  ncol=ali.GetLength()
214  for i in range(ncol):
215  for j in range(ncol):
216  if mi.matrix[i][j]>=0:
217  mi.matrix[i][j]=mi.matrix[i][j]*(math.exp(CoEvoSc.matrix[i][j]))
218  else:
219  mi.matrix[i][j]=mi.matrix[i][j]*(math.exp(1.-CoEvoSc.matrix[i][j]))
220  mi.RefreshSortedIndices()
221  return mi
222 
223 def CalculateContactProbability(cpred_res,method):
224  """
225  Calculate the probability of a predicted contact to be correct.
226  This simply transforms the score associated with a prediction into a probability.
227 
228  :param cpred_res: A contact prediction result
229  :param method: The method which was used for contact prediction. Should be one
230  of "MI", "MIp", "MIpz", "cevoMI", "cevo"
231 
232  :type cpred_res: :class:`ost.seq.alg.ContactPredictionScoreResult`
233  :type method: :class:`str`
234  """
235  import math
236  def _growth_function(x,K,x0,B,nu,Q):
237  p=K/(1+Q*math.exp(-B*(x-x0)))**(1/nu)
238  p=min(1,max(0,p))
239  return p
240 
241  def _decay_function(x,A,k):
242  p=A*math.exp(-k*x)
243  p=min(1,max(0,p))
244  return p
245 
246  prob_params={}
247  prob_params["MI"]=[0.05858455345122933, 0.8930350957023122]
248  prob_params["MIp"]=[0.10019621004607637, 0.9065429261332942]
249  prob_params["MIpz"]=[0.7368147563063437, 4.638820470770171, 0.6383539191372934, 1.1627300209863376, 19.63060874042289]
250  prob_params["cevoMI"]=[0.779979757231944, 1.642357937131157, 0.3267847173036033, 0.3742848849873358, 5.1816922372446]
251  prob_params["cevo"]=[0.7564532665623617, 0.5947472274271304, 3.8548775389879166, 0.46203017320927053, 1.6198602780123705]
252 
253  if not method in prob_params:
254  raise ValueError("method should be one of MI, MIp, MIpz, cevoMI, cevo")
255  params=prob_params[method]
256  cpred_res.RefreshSortedIndices()
257  nres=len(cpred_res.matrix)
258  probabilities=[[0 for i in range(nres)] for j in range(nres)]
259 
260  if len(params)==2:func=_decay_function
261  else:func=_growth_function
262 
263  nres=float(nres)
264  if len(params)==2:
265  for k,(i,j) in enumerate(cpred_res.sorted_indices):
266  p=_decay_function(math.log10(0.01+k/nres),*params)
267  probabilities[i][j]=p
268  probabilities[j][i]=p
269  else:
270  for k,(i,j) in enumerate(cpred_res.sorted_indices):
271  p=_growth_function(cpred_res.GetScore(i,j),*params)
272  probabilities[i][j]=p
273  probabilities[j][i]=p
274  cpred_res.probabilities=probabilities
275  return cpred_res
276 
277 
278 
279 
representation of a multiple sequence alignemnt consisting of two or more sequences
ContactPredictionScoreResult DLLEXPORT_OST_SEQ_ALG CalculateContactSubstitutionScore(const AlignmentHandle &aln, int ref_seq_index=0, PairSubstWeightMatrix w=LoadDefaultPairSubstWeightMatrix())
def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True)
Definition: __init__.py:60
def ValidateSEQRESAlignment(aln, chain=None)
Definition: __init__.py:4
ContactPredictionScoreResult DLLEXPORT_OST_SEQ_ALG CalculateMutualInformation(const AlignmentHandle &aln, ContactWeightMatrix w=LoadConstantContactWeightMatrix(), bool apc_correction=true, bool zpx_transformation=true, float small_number_correction=0.05)
def CalculateContactProbability(cpred_res, method)
Definition: __init__.py:223
def AlignmentFromChainView(chain, handle_seq_name='handle', view_seq_name='view')
Definition: __init__.py:154
def PredictContacts(ali)
Definition: __init__.py:190