OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
__init__.py
Go to the documentation of this file.
1 from _ost_seq_alg import *
2 from ost.seq.alg.mat import *
3 
4 def ValidateSEQRESAlignment(aln, chain=None):
5  """
6  Checks a sequence aligned to a SEQRES sequence to be free of strand breaks.
7  Residues divided by gaps are not considered as breakage but may also not be
8  connected.
9 
10  :param aln: Alignment
11  :type aln: :class:`~ost.seq.AlignmentHandle`
12  :param chain: Source of the sequence
13  :type chain: :class:`~ost.mol.ChainHandle`
14 
15  :returns: True if all residues (beside gapped ones) are connected, False
16  otherwise.
17  """
18  from ost import LogWarning
19  from ost import seq
20  from ost import mol
21  if aln.GetCount() != 2:
22  raise ValueError('Alignment contains more than 2 sequences!')
23  sequence = aln.GetSequence(1)
24  if len(sequence) == 0:
25  return True
26  if chain == None:
27  if sequence.HasAttachedView() == False:
28  raise ValueError("Alignment is missing an attached chain view.")
29  chain = sequence.GetAttachedView()
30  residues = chain.residues
31  # eat up all beginning gaps
32  j = 1
33  for s in sequence:
34  if s != '-':
35  break
36  j += 1;
37  l = sequence[j-1]
38  i = 0
39  # run over sequence & alignment
40  for s in sequence[j:]:
41  if s != '-':
42  i += 1
43  r1 = residues[i-1]
44  r2 = residues[i]
45  if r1.one_letter_code=='?' or r2.one_letter_code=='?':
46  continue
47  if l != '-':
48  if not mol.InSequence(r1.handle, r2.handle):
49  LogWarning('%s and %s are not connected by peptide bond' % (str(r1), str(r2)))
50  return False
51  else:
52  if mol.InSequence(r1.handle, r2.handle):
53  LogWarning('%s and %s are connected by peptide bond' % (str(r1), str(r2)))
54  return False
55  l = s
56  return True
57 
58 def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True):
59  """
60  Aligns the residues of chain to the SEQRES sequence, inserting gaps where
61  needed. The function uses the connectivity of the protein backbone to find
62  consecutive peptide fragments. These fragments are then aligned to the SEQRES
63  sequence.
64 
65  All the non-ligand, peptide-linking residues of the chain must be listed in
66  SEQRES. If there are any additional residues in the chain, the function
67  raises a ValueError.
68 
69  If 'try_resnum_first' is set, building the alignment following residue numbers
70  is tried first.
71 
72  If 'validate' is set (default), the alignment is checked using
73  :func:`~ost.seq.alg.ValidateSEQRESAlignment`.
74 
75  :param chain: Source of the sequence
76  :type chain: :class:`~ost.mol.ChainHandle`
77  :param seqres: SEQRES sequence
78  :type seqres: :class:`str`
79  :param try_resnum_first: Try to align by residue number
80  :type try_resnum_first: :class:`bool`
81  :param validate: Validate alignment by
82  :func:`~ost.seq.alg.ValidateSEQRESAlignment`
83  :type validate: :class:`bool`
84 
85  :returns: The alignment of the residues in the chain and the SEQRES entries.
86  :rtype: :class:`~ost.seq.AlignmentHandle`
87  """
88 
89  def IsEqual(olc1, olc2):
90  return olc1 in ('X', '?') or olc2 in ('X', '?') or olc1 == olc2
91 
92  from ost import seq
93  from ost import mol
94  from ost import LogWarning
95  view=chain
96  residues=view.residues
97  if len(residues)==0:
98  return seq.CreateAlignment()
99  if try_resnum_first:
100  aln_seq = seq.CreateSequence('atoms', '-'*len(seqres))
101  for r1 in residues:
102  if r1.number.num <= len(seqres) and r1.number.num > 0:
103  if IsEqual(seqres[r1.number.num - 1], r1.one_letter_code):
104  aln_seq[r1.number.num - 1] = r1.one_letter_code
105  else:
106  LogWarning('Sequence mismatch: chain has "' + r1.one_letter_code +
107  '", while SEQRES is "' + seqres[r1.number.num - 1] +
108  '" at the corresponding position.')
109  try_resnum_first = False
110  break
111  if not try_resnum_first:
112  fragments=[residues[0].one_letter_code]
113  for r1, r2 in zip(residues[:-1], residues[1:]):
114  if not mol.InSequence(r1.handle, r2.handle):
115  fragments.append('')
116  fragments[-1]+=r2.one_letter_code
117  ss=str(seqres)
118  pos=0
119  aln_seq=''
120  for frag in fragments:
121  new_pos=ss.find(frag, pos)
122  if new_pos==-1:
123  raise ValueError('"%s" is not a substring of "%s"' % (frag, ss))
124  aln_seq+='-'*(new_pos-pos)+frag
125  pos=new_pos+len(frag)
126  aln_seq = seq.CreateSequence('atoms',
127  aln_seq+('-'*(len(seqres)-len(aln_seq))))
128  alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)),
129  aln_seq)
130  if validate and not ValidateSEQRESAlignment(alignment, view):
131  raise ValueError("SEQRES cannot be aligned with its corresponding chain.")
132  return alignment
133 
134 
135 def AlignmentFromChainView(chain, handle_seq_name='handle',
136  view_seq_name='view'):
137  """
138  Creates and returns the sequence alignment of the given chain view to the
139  chain handle. The alignment contains two sequences, the first containing all
140  non-ligand peptide-linking residues, the second containing all non-ligand
141  peptide-linking residues that are part of the view.
142 
143  :param chain: A valid chain
144  :type chain: :class:`~ost.mol.ChainView`
145 
146  :param handle_seq_name: Name of the handle sequence in the output alignment
147  :param view_seq_name: Name of the view sequence in the output alignment
148  :returns: The alignment
149  :rtype: :class:`~ost.seq.AlignmentHandle`
150 
151  """
152  from ost import seq
153  v0=chain.handle.Select('ligand=false and peptide=true')
154  v1=chain.Select('ligand=false and peptide=true')
155  s0=seq.CreateSequence(handle_seq_name, '')
156  s1=seq.CreateSequence(view_seq_name, '')
157  s0.AttachView(v0)
158  s1.AttachView(v1)
159  res0=v0.residues
160  res1=v1.residues
161  idx0, idx1=(0, 0)
162  while idx0<len(res0):
163  s0.Append(res0[idx0].one_letter_code)
164  if idx1<len(res1) and res1[idx1].handle==res0[idx0].handle:
165  s1.Append(res1[idx1].one_letter_code)
166  idx1+=1
167  else:
168  s1.Append('-')
169  idx0+=1
170  return seq.CreateAlignment(s0, s1)
171 
173  """
174  Predicts contacts from a multiple sequence alignment using a combination
175  of Mutual Information (*MI*) and the Contact Substitution Score (*CoEvoSc*).
176  MI is calculated with the APC and small number corrections as well as with a
177  transformation into Z-scores. The *CoEvoSc* is calculated using the default
178  PairSubstWeightMatrix (see seq.alg.LoadDefaultPairSubstWeightMatrix).
179  The final score for a pair of columns *(i,j)* of **ali** is obtained from:
180 
181  Sc(i,j)=MI(i,j)exp(CoEvoSc(i,j)) if *(i,j)* >=0
182 
183  Sc(i,j)=MI(i,j)exp(1-CoEvoSc(i,j)) if *(i,j)* <0
184 
185  :param ali: The multiple sequence alignment
186  :type ali: :class:`~ost.seq.AlignmentHandle`
187  """
188  import math
189  from ost import seq
190  if not type(ali)==type(seq.AlignmentHandle()):
191  print "Parameter should be an AlignmentHandle"
192  return
195  ncol=ali.GetLength()
196  for i in range(ncol):
197  for j in range(ncol):
198  if mi.matrix[i][j]>=0:
199  mi.matrix[i][j]=mi.matrix[i][j]*(math.exp(CoEvoSc.matrix[i][j]))
200  else:
201  mi.matrix[i][j]=mi.matrix[i][j]*(math.exp(1.-CoEvoSc.matrix[i][j]))
202  mi.RefreshSortedIndices()
203  return mi
204 
205 def CalculateContactProbability(cpred_res,method):
206  """
207  Calculate the probability of a predicted contact to be correct.
208  This simply transforms the score associated with a prediction into a probability.
209 
210  :param cpred_res: A contact prediction result
211  :param method: The method which was used for contact prediction. Should be one
212  of "MI", "MIp", "MIpz", "cevoMI", "cevo"
213 
214  :type cpred_res: :class:`ost.seq.alg.ContactPredictionScoreResult`
215  :type method: :class:`str`
216  """
217  import math
218  def _growth_function(x,K,x0,B,nu,Q):
219  p=K/(1+Q*math.exp(-B*(x-x0)))**(1/nu)
220  p=min(1,max(0,p))
221  return p
222 
223  def _decay_function(x,A,k):
224  p=A*math.exp(-k*x)
225  p=min(1,max(0,p))
226  return p
227 
228  prob_params={}
229  prob_params["MI"]=[0.05858455345122933, 0.8930350957023122]
230  prob_params["MIp"]=[0.10019621004607637, 0.9065429261332942]
231  prob_params["MIpz"]=[0.7368147563063437, 4.638820470770171, 0.6383539191372934, 1.1627300209863376, 19.63060874042289]
232  prob_params["cevoMI"]=[0.779979757231944, 1.642357937131157, 0.3267847173036033, 0.3742848849873358, 5.1816922372446]
233  prob_params["cevo"]=[0.7564532665623617, 0.5947472274271304, 3.8548775389879166, 0.46203017320927053, 1.6198602780123705]
234 
235  if not method in prob_params:
236  raise ValueError("method should be one of MI, MIp, MIpz, cevoMI, cevo")
237  params=prob_params[method]
238  cpred_res.RefreshSortedIndices()
239  nres=len(cpred_res.matrix)
240  probabilities=[[0 for i in range(nres)] for j in range(nres)]
241 
242  if len(params)==2:func=_decay_function
243  else:func=_growth_function
244 
245  nres=float(nres)
246  if len(params)==2:
247  for k,(i,j) in enumerate(cpred_res.sorted_indices):
248  p=_decay_function(math.log10(0.01+k/nres),*params)
249  probabilities[i][j]=p
250  probabilities[j][i]=p
251  else:
252  for k,(i,j) in enumerate(cpred_res.sorted_indices):
253  p=_growth_function(cpred_res.GetScore(i,j),*params)
254  probabilities[i][j]=p
255  probabilities[j][i]=p
256  cpred_res.probabilities=probabilities
257  return cpred_res
258 
259 
260 
261 
def ValidateSEQRESAlignment
Definition: __init__.py:4
ContactPredictionScoreResult DLLEXPORT_OST_SEQ_ALG CalculateMutualInformation(const AlignmentHandle &aln, ContactWeightMatrix w=LoadConstantContactWeightMatrix(), bool apc_correction=true, bool zpx_transformation=true, float small_number_correction=0.05)
def CalculateContactProbability
Definition: __init__.py:205
ContactPredictionScoreResult DLLEXPORT_OST_SEQ_ALG CalculateContactSubstitutionScore(const AlignmentHandle &aln, int ref_seq_index=0, PairSubstWeightMatrix w=LoadDefaultPairSubstWeightMatrix())
def PredictContacts
Definition: __init__.py:172
def AlignToSEQRES
Definition: __init__.py:58
representation of a multiple sequence alignemnt consisting of two or more sequences ...
def AlignmentFromChainView
Definition: __init__.py:136