ost::seq::alg Namespace Reference

Namespaces

namespace  mat
namespace  renumber

Data Structures

struct  AlignmentOpts
 options for local and global sequence alignment algorithms More...
struct  ContactPredictionScoreResult
struct  ContactWeightMatrix
 matrix to weight pairs of amino-acids. More...
struct  Distances
 Container for a pair wise distance for each structure. Each structure is identified by its index in the originally used alignment. More...
class  DistanceMap
class  InsDel
 get insertions and deletions of an alignment More...
struct  PairSubstWeightMatrix
 position-independet pair substitution weight matrix More...
struct  RefMode
class  SubstWeightMatrix
 position-independet substitution weight matrix More...
class  VarianceMap
 Container for variances for each entry in a distance map. Main functionality: Get/Set, Min, Max, ExportXXX Get/Set and GetSize taken from TriMatrix. More...
class  Dist2Mean
 Container for distances to mean for N structures. Main functionality: Get/Set, ExportXXX. More...

Typedefs

typedef boost::shared_ptr
< DistanceMap
DistanceMapPtr
typedef boost::shared_ptr
< SubstWeightMatrix
SubstWeightMatrixPtr
typedef boost::shared_ptr
< VarianceMap
VarianceMapPtr
typedef boost::shared_ptr
< Dist2Mean
Dist2MeanPtr

Functions

int DLLEXPORT_OST_SEQ_ALG ClipAlignment (AlignmentHandle &aln, uint n_seq_thresh=2, bool set_offset=true, bool remove_empty=true)
std::vector< Real >
DLLEXPORT_OST_SEQ_ALG 
Conservation (const AlignmentHandle &aln, bool assign=true, const String &prop_name="cons", bool ignore_gap=false)
ContactPredictionScoreResult
DLLEXPORT_OST_SEQ_ALG 
CalculateContactScore (const AlignmentHandle &aln, ContactWeightMatrix w=LoadDefaultContactWeightMatrix())
ContactPredictionScoreResult
DLLEXPORT_OST_SEQ_ALG 
CalculateContactSubstitutionScore (const AlignmentHandle &aln, int ref_seq_index=0, PairSubstWeightMatrix w=LoadDefaultPairSubstWeightMatrix())
ContactPredictionScoreResult
DLLEXPORT_OST_SEQ_ALG 
CalculateMutualInformation (const AlignmentHandle &aln, ContactWeightMatrix w=LoadConstantContactWeightMatrix(), bool apc_correction=true, bool zpx_transformation=true, float small_number_correction=0.05)
ContactWeightMatrix
DLLEXPORT_OST_SEQ_ALG 
LoadDefaultContactWeightMatrix ()
ContactWeightMatrix
DLLEXPORT_OST_SEQ_ALG 
LoadConstantContactWeightMatrix ()
DistanceMapPtr
DLLEXPORT_OST_SEQ_ALG 
CreateDistanceMap (const seq::AlignmentHandle &alignment)
std::vector< Real >
DLLEXPORT_OST_SEQ_ALG 
ShannonEntropy (const AlignmentHandle &aln, bool ignore_gaps=true)
AlignmentList DLLEXPORT_OST_SEQ_ALG GlobalAlign (const ConstSequenceHandle &s1, const ConstSequenceHandle &s2, SubstWeightMatrixPtr &subst, int gap_open=-5, int gap_ext=-2)
AlignmentList DLLEXPORT_OST_SEQ_ALG LocalAlign (const ConstSequenceHandle &s1, const ConstSequenceHandle &s2, SubstWeightMatrixPtr &subst, int gap_open=-5, int gap_ext=-2)
AlignmentHandle
DLLEXPORT_OST_SEQ_ALG 
MergePairwiseAlignments (const AlignmentList &pairwise_alns, const ConstSequenceHandle &ref_seq)
PairSubstWeightMatrix DLLIMPORT LoadDefaultPairSubstWeightMatrix ()
AlignmentList DLLIMPORT SemiGlobalAlign (const ConstSequenceHandle &s1, const ConstSequenceHandle &s2, SubstWeightMatrixPtr &subst, int gap_open=-5, int gap_ext=-2)
Real DLLIMPORT SequenceIdentity (const AlignmentHandle &aln, RefMode::Type ref_mode=RefMode::LONGER_SEQUENCE, int seq_a=0, int seq_b=1)
Real DLLIMPORT SequenceSimilarity (const AlignmentHandle &aln, SubstWeightMatrixPtr subst, bool normalize=false, int seq_a=0, int seq_b=1)
VarianceMapPtr DLLIMPORT CreateVarianceMap (const DistanceMapPtr dmap, Real sigma=25)
Dist2MeanPtr DLLIMPORT CreateDist2Mean (const DistanceMapPtr dmap)
def ValidateSEQRESAlignment
def AlignToSEQRES
def AlignmentFromChainView
def PredictContacts
def CalculateContactProbability

Variables

char RAW_CONTACT_WEIGHT_MATRIX_RES_LIST [21] = {'D', 'E', 'R', 'K', 'H', 'S', 'T', 'N', 'Q', 'G', 'P', 'Y', 'W', 'V', 'I', 'L', 'M', 'F', 'A', 'C','-'}
Real RAW_CONTACT_WEIGHT_MATRIX [21][21]
char RAW_PAIR_SUBST_WEIGHT_MATRIX_RES_LIST [20] = {'D', 'E', 'R', 'K', 'H', 'S', 'T', 'N', 'Q', 'G', 'P', 'Y', 'W', 'V', 'I', 'L', 'M', 'F', 'A', 'C'}
Real RAW_PAIR_SUBST_WEIGHT_MATRIX [20][20][20][20]

Typedef Documentation

typedef boost::shared_ptr<Dist2Mean> Dist2MeanPtr

Definition at line 40 of file variance_map.hh.

typedef boost::shared_ptr<DistanceMap> DistanceMapPtr

Definition at line 147 of file distance_map.hh.

typedef boost::shared_ptr<SubstWeightMatrix> SubstWeightMatrixPtr

Definition at line 33 of file subst_weight_matrix.hh.

typedef boost::shared_ptr<VarianceMap> VarianceMapPtr

Definition at line 38 of file variance_map.hh.


Function Documentation

def ost::seq::alg::AlignmentFromChainView (   chain,
  handle_seq_name = 'handle',
  view_seq_name = 'view' 
)
Creates and returns the sequence alignment of the given chain view to the 
chain handle. The alignment contains two sequences, the first containing all 
non-ligand peptide-linking residues, the second containing all non-ligand 
peptide-linking residues that are part of the view. 

:param chain: A valid chain
:type chain: :class:`~ost.mol.ChainView`

:param handle_seq_name: Name of the handle sequence in the output alignment
:param view_seq_name: Name of the view sequence in the output alignment
:returns: The alignment
:rtype: :class:`~ost.seq.AlignmentHandle`

Definition at line 135 of file __init__.py.

def ost::seq::alg::AlignToSEQRES (   chain,
  seqres,
  try_resnum_first = False,
  validate = True 
)
Aligns the residues of chain to the SEQRES sequence, inserting gaps where 
needed. The function uses the connectivity of the protein backbone to find 
consecutive peptide fragments. These fragments are then aligned to the SEQRES 
sequence.

All the non-ligand, peptide-linking residues of the chain must be listed in 
SEQRES. If there are any additional residues in the chain, the function
raises a ValueError.

If 'try_resnum_first' is set, building the alignment following residue numbers
is tried first.

If 'validate' is set (default), the alignment is checked using
:func:`~ost.seq.alg.ValidateSEQRESAlignment`.

:param chain: Source of the sequence
:type chain: :class:`~ost.mol.ChainHandle`
:param seqres: SEQRES sequence
:type seqres: :class:`str`
:param try_resnum_first: Try to align by residue number
:type try_resnum_first: :class:`bool`
:param validate: Validate alignment by
                 :func:`~ost.seq.alg.ValidateSEQRESAlignment`
:type validate: :class:`bool`

:returns: The alignment of the residues in the chain and the SEQRES entries.
:rtype: :class:`~ost.seq.AlignmentHandle`

Definition at line 58 of file __init__.py.

def ost::seq::alg::CalculateContactProbability (   cpred_res,
  method 
)
Calculate the probability of a predicted contact to be correct.
This simply transforms the score associated with a prediction into a probability.

:param cpred_res: A contact prediction result
:param method: The method which was used for contact prediction. Should be one
 of "MI", "MIp", "MIpz", "cevoMI", "cevo"

:type cpred_res: :class:`ost.seq.alg.ContactPredictionScoreResult`
:type method: :class:`str`

Definition at line 205 of file __init__.py.

ContactPredictionScoreResult DLLEXPORT_OST_SEQ_ALG ost::seq::alg::CalculateContactScore ( const AlignmentHandle &  aln,
ContactWeightMatrix  w = LoadDefaultContactWeightMatrix() 
)
ContactPredictionScoreResult DLLEXPORT_OST_SEQ_ALG ost::seq::alg::CalculateContactSubstitutionScore ( const AlignmentHandle &  aln,
int  ref_seq_index = 0,
PairSubstWeightMatrix  w = LoadDefaultPairSubstWeightMatrix() 
)
ContactPredictionScoreResult DLLEXPORT_OST_SEQ_ALG ost::seq::alg::CalculateMutualInformation ( const AlignmentHandle &  aln,
ContactWeightMatrix  w = LoadConstantContactWeightMatrix(),
bool  apc_correction = true,
bool  zpx_transformation = true,
float  small_number_correction = 0.05 
)
int DLLEXPORT_OST_SEQ_ALG ost::seq::alg::ClipAlignment ( AlignmentHandle &  aln,
uint  n_seq_thresh = 2,
bool  set_offset = true,
bool  remove_empty = true 
)

Clips alignment so that first and last column have at least the desired number of structures.

Parameters:
aln Multiple sequence alignment. Will be cut.
n_seq_thresh Minimal number of sequences desired.
set_offset Shall we update offsets for attached views?
remove_empty Shall we remove sequences with only gaps in cut aln?
Returns:
Starting column (0-indexed), where cut region starts (w.r.t. original aln). -1, if there is no region in the alignment with at least the desired number of structures.
std::vector<Real> DLLEXPORT_OST_SEQ_ALG ost::seq::alg::Conservation ( const AlignmentHandle &  aln,
bool  assign = true,
const String prop_name = "cons",
bool  ignore_gap = false 
)

Calculates conservation scores for each column in the alignment.

The conservation score is a value between 0 and 1. The bigger the number the more conserved the aligned residues are.

assign If true, the conservation scores are assigned to attached residues. The name of the property can be changed with the prop_name parameter. Useful when coloring entities based on sequence conservation. prop_name The property name for assigning the conservation to attached residues. Defaults to 'cons'. ignore_gap If true, the dissimilarity between two gaps is increased to 6.0 instead of 0.5 as defined in the original version. Without this, a stretch where in the alignment there is only one sequence which is aligned to only gaps, is considered highly conserved (depending on the number of gap sequences).

Dist2MeanPtr DLLIMPORT ost::seq::alg::CreateDist2Mean ( const DistanceMapPtr  dmap  ) 
Returns:
Distances to mean for each structure in dmap. Structures are in the same order as passed when creating dmap.
Parameters:
dmap Distance map as created with CreateDistanceMap.
DistanceMapPtr DLLEXPORT_OST_SEQ_ALG ost::seq::alg::CreateDistanceMap ( const seq::AlignmentHandle &  alignment  ) 

create distance map from a multiple sequence alignment.

The algorithm requires that the sequence alignment consists of at least two sequences. The sequence at index 0 serves as a frame of reference. All the other sequences must have an attached view and a properly set sequence offset.

For each of the attached views, the C-alpha distance pairs are extracted and mapped onto the corresponding C-alpha distances in the reference sequence.

VarianceMapPtr DLLIMPORT ost::seq::alg::CreateVarianceMap ( const DistanceMapPtr  dmap,
Real  sigma = 25 
)
Returns:
Variance measure for each entry in dmap.
Parameters:
dmap Distance map as created with CreateDistanceMap.
sigma Used for weighting of variance measure (see Distances::GetWeightedStdDev)
AlignmentList DLLEXPORT_OST_SEQ_ALG ost::seq::alg::GlobalAlign ( const ConstSequenceHandle &  s1,
const ConstSequenceHandle &  s2,
SubstWeightMatrixPtr &  subst,
int  gap_open = -5,
int  gap_ext = -2 
)
ContactWeightMatrix DLLEXPORT_OST_SEQ_ALG ost::seq::alg::LoadConstantContactWeightMatrix (  ) 

weight of 1 for all amino-acid pairs and 0 for gaps.

ContactWeightMatrix DLLEXPORT_OST_SEQ_ALG ost::seq::alg::LoadDefaultContactWeightMatrix (  ) 

statistical potential matrix containing interaction pseudo energies

PairSubstWeightMatrix DLLIMPORT ost::seq::alg::LoadDefaultPairSubstWeightMatrix (  ) 
AlignmentList DLLEXPORT_OST_SEQ_ALG ost::seq::alg::LocalAlign ( const ConstSequenceHandle &  s1,
const ConstSequenceHandle &  s2,
SubstWeightMatrixPtr &  subst,
int  gap_open = -5,
int  gap_ext = -2 
)
AlignmentHandle DLLEXPORT_OST_SEQ_ALG ost::seq::alg::MergePairwiseAlignments ( const AlignmentList &  pairwise_alns,
const ConstSequenceHandle &  ref_seq 
)

merge a list of pairwise alignments into one multiple sequence alignment

All sequences in the pairwise sequence alignments are a realigned to the reference sequence. This is useful to merge the results of a BLAST or HMM database search into one multiple sequence alignment.

The method does not produce the optimal multiple sequence alignemnt for all the sequences.

Parameters:
pairwise_alns is a list of AlignmentHandles, each containing two sequences
ref_seq is the reference sequence. The reference sequence must not contain any gaps.
def ost::seq::alg::PredictContacts (   ali  ) 
Predicts contacts from a multiple sequence alignment using a combination
of Mutual Information (*MI*) and the Contact Substitution Score (*CoEvoSc*).
MI is calculated with the APC and small number corrections as well as with a 
transformation into Z-scores. The *CoEvoSc* is calculated using the default 
PairSubstWeightMatrix (see seq.alg.LoadDefaultPairSubstWeightMatrix).
The final score for a pair of columns *(i,j)* of **ali** is obtained from:

Sc(i,j)=MI(i,j)exp(CoEvoSc(i,j))      if *(i,j)* >=0

Sc(i,j)=MI(i,j)exp(1-CoEvoSc(i,j))    if *(i,j)* <0

:param ali: The multiple sequence alignment
:type ali: :class:`~ost.seq.AlignmentHandle`

Definition at line 172 of file __init__.py.

AlignmentList DLLIMPORT ost::seq::alg::SemiGlobalAlign ( const ConstSequenceHandle &  s1,
const ConstSequenceHandle &  s2,
SubstWeightMatrixPtr &  subst,
int  gap_open = -5,
int  gap_ext = -2 
)
Real DLLIMPORT ost::seq::alg::SequenceIdentity ( const AlignmentHandle &  aln,
RefMode::Type  ref_mode = RefMode::LONGER_SEQUENCE,
int  seq_a = 0,
int  seq_b = 1 
)

calculate sequence identity for two sequences in an alignment

Parameters:
ref_mode influences the way the sequence identity is calculated. When set to LONGER_SEQUENCE, the sequence identity is calculated as the number of matches divided by the length of the longer sequence. If set to ALIGNMENT, the sequence identity is calculated as the number of matches divided by the number of aligned residues.
seq_a is the index of the first sequence
seq_b is the index of the second sequence
aln is the sequence alignment
Returns:
sequence identity in the range 0 to 100
Real DLLIMPORT ost::seq::alg::SequenceSimilarity ( const AlignmentHandle &  aln,
SubstWeightMatrixPtr  subst,
bool  normalize = false,
int  seq_a = 0,
int  seq_b = 1 
)

calculate sequence similarity for two sequences in an alignment

Parameters:
seq_a is the index of the first sequence
seq_b is the index of the second sequence
aln is the sequence alignment
Returns:
sequence similarity
std::vector<Real> DLLEXPORT_OST_SEQ_ALG ost::seq::alg::ShannonEntropy ( const AlignmentHandle &  aln,
bool  ignore_gaps = true 
)

calculates the Shannon entropy for each column in the alignment

def ost::seq::alg::ValidateSEQRESAlignment (   aln,
  chain = None 
)
Checks a sequence aligned to a SEQRES sequence to be free of strand breaks.
Residues divided by gaps are not considered as breakage but may also not be
connected.

:param aln: Alignment
:type aln: :class:`~ost.seq.AlignmentHandle`
:param chain: Source of the sequence
:type chain: :class:`~ost.mol.ChainHandle`

:returns: True if all residues (beside gapped ones) are connected, False
          otherwise.

Definition at line 4 of file __init__.py.


Variable Documentation

Initial value:
    {
      { 0.0168, 0.0000, 0.4730, 0.3602, 0.3627, 0.1650, 0.1716, 0.2177, 0.2272, 0.0770, 0.0733, 0.3501, 0.3262, 0.1228, 0.1393, 0.1263, 0.1792, 0.2105, 0.0757, 0.1801, 0.0000},
      { 0.0000, 0.0676, 0.4996, 0.3966, 0.3357, 0.1594, 0.1866, 0.1958, 0.2390, 0.0333, 0.0945, 0.3699, 0.3719, 0.1753, 0.1948, 0.1866, 0.2057, 0.2506, 0.1179, 0.1688, 0.0000},
      { 0.4730, 0.4996, 0.3499, 0.1961, 0.3666, 0.2970, 0.3204, 0.3266, 0.3599, 0.2218, 0.2777, 0.5060, 0.5262, 0.3196, 0.3532, 0.3577, 0.3560, 0.4251, 0.2489, 0.3517, 0.0000},
      { 0.3602, 0.3966, 0.1961, 0.1128, 0.2100, 0.1731, 0.2061, 0.2093, 0.2493, 0.0799, 0.0802, 0.3928, 0.3725, 0.2150, 0.2331, 0.2226, 0.2164, 0.2845, 0.1383, 0.2104, 0.0000},
      { 0.3627, 0.3357, 0.3666, 0.2100, 0.5496, 0.3269, 0.3494, 0.3122, 0.3180, 0.2106, 0.2629, 0.5362, 0.5588, 0.3475, 0.3754, 0.3618, 0.4193, 0.4679, 0.2361, 0.4789, 0.0000},
      { 0.1650, 0.1594, 0.2970, 0.1731, 0.3269, 0.1866, 0.2143, 0.2210, 0.2315, 0.1203, 0.1250, 0.3501, 0.3960, 0.2381, 0.2586, 0.2358, 0.2758, 0.3335, 0.1628, 0.3010, 0.0000},
      { 0.1716, 0.1866, 0.3204, 0.2061, 0.3494, 0.2143, 0.2894, 0.2553, 0.2783, 0.1418, 0.1529, 0.3962, 0.4310, 0.3452, 0.3660, 0.3237, 0.3417, 0.3951, 0.2073, 0.3458, 0.0000},
      { 0.2177, 0.1958, 0.3266, 0.2093, 0.3122, 0.2210, 0.2553, 0.3119, 0.2868, 0.1666, 0.1810, 0.3706, 0.3994, 0.2195, 0.2391, 0.2168, 0.2834, 0.3160, 0.1618, 0.2983, 0.0000},
      { 0.2272, 0.2390, 0.3599, 0.2493, 0.3180, 0.2315, 0.2783, 0.2868, 0.3159, 0.1473, 0.2187, 0.4208, 0.4769, 0.2727, 0.3014, 0.2989, 0.3234, 0.3733, 0.2010, 0.2982, 0.0000},
      { 0.0770, 0.0333, 0.2218, 0.0799, 0.2106, 0.1203, 0.1418, 0.1666, 0.1473, 0.0990, 0.0727, 0.3054, 0.3354, 0.1831, 0.1984, 0.1959, 0.2488, 0.2887, 0.1321, 0.2889, 0.0000},
      { 0.0733, 0.0945, 0.2777, 0.0802, 0.2629, 0.1250, 0.1529, 0.1810, 0.2187, 0.0727, 0.1358, 0.4454, 0.4871, 0.1879, 0.2176, 0.2163, 0.2738, 0.3492, 0.1143, 0.2611, 0.0000},
      { 0.3501, 0.3699, 0.5060, 0.3928, 0.5362, 0.3501, 0.3962, 0.3706, 0.4208, 0.3054, 0.4454, 0.6013, 0.6686, 0.5248, 0.5702, 0.5625, 0.5797, 0.6506, 0.3876, 0.5335, 0.0000},
      { 0.3262, 0.3719, 0.5262, 0.3725, 0.5588, 0.3960, 0.4310, 0.3994, 0.4769, 0.3354, 0.4871, 0.6686, 0.7438, 0.5554, 0.6133, 0.6157, 0.6450, 0.7208, 0.4170, 0.6399, 0.0000},
      { 0.1228, 0.1753, 0.3196, 0.2150, 0.3475, 0.2381, 0.3452, 0.2195, 0.2727, 0.1831, 0.1879, 0.5248, 0.5554, 0.5436, 0.5726, 0.5340, 0.4934, 0.5834, 0.3537, 0.4786, 0.0000},
      { 0.1393, 0.1948, 0.3532, 0.2331, 0.3754, 0.2586, 0.3660, 0.2391, 0.3014, 0.1984, 0.2176, 0.5702, 0.6133, 0.5726, 0.6365, 0.5952, 0.5478, 0.6388, 0.3921, 0.5096, 0.0000},
      { 0.1263, 0.1866, 0.3577, 0.2226, 0.3618, 0.2358, 0.3237, 0.2168, 0.2989, 0.1959, 0.2163, 0.5625, 0.6157, 0.5340, 0.5952, 0.5835, 0.5272, 0.6279, 0.3691, 0.4760, 0.0000},
      { 0.1792, 0.2057, 0.3560, 0.2164, 0.4193, 0.2758, 0.3417, 0.2834, 0.3234, 0.2488, 0.2738, 0.5797, 0.6450, 0.4934, 0.5478, 0.5272, 0.5920, 0.6433, 0.3527, 0.5070, 0.0000},
      { 0.2105, 0.2506, 0.4251, 0.2845, 0.4679, 0.3335, 0.3951, 0.3160, 0.3733, 0.2887, 0.3492, 0.6506, 0.7208, 0.5834, 0.6388, 0.6279, 0.6433, 0.7323, 0.4201, 0.5920, 0.0000},
      { 0.0757, 0.1179, 0.2489, 0.1383, 0.2361, 0.1628, 0.2073, 0.1618, 0.2010, 0.1321, 0.1143, 0.3876, 0.4170, 0.3537, 0.3921, 0.3691, 0.3527, 0.4201, 0.2846, 0.3407, 0.0000},
      { 0.1801, 0.1688, 0.3517, 0.2104, 0.4789, 0.3010, 0.3458, 0.2983, 0.2982, 0.2889, 0.2611, 0.5335, 0.6399, 0.4786, 0.5096, 0.4760, 0.5070, 0.5920, 0.3407, 1.0000, 0.0000},
      { 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000}}

Definition at line 5 of file default_contact_weight_matrix.hh.

char RAW_CONTACT_WEIGHT_MATRIX_RES_LIST[21] = {'D', 'E', 'R', 'K', 'H', 'S', 'T', 'N', 'Q', 'G', 'P', 'Y', 'W', 'V', 'I', 'L', 'M', 'F', 'A', 'C','-'}

Definition at line 3 of file default_contact_weight_matrix.hh.

Definition at line 5 of file default_pair_subst_weight_matrix.hh.

char RAW_PAIR_SUBST_WEIGHT_MATRIX_RES_LIST[20] = {'D', 'E', 'R', 'K', 'H', 'S', 'T', 'N', 'Q', 'G', 'P', 'Y', 'W', 'V', 'I', 'L', 'M', 'F', 'A', 'C'}

Definition at line 3 of file default_pair_subst_weight_matrix.hh.

 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Defines

Generated on 1 Mar 2018 for OpenStructure by  doxygen 1.6.1