OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
blast.py
Go to the documentation of this file.
1 from ost import settings
2 import subprocess
3 from xml.dom import minidom
4 from ost import io, seq
5 import ost
6 import re
7 import os
8 
10  """
11  An aligned patch, aka. HSP
12 
13  .. attribute:: aln
14 
15  The local alignment. Sequence offset of both sequences in the alignment are
16  set to the starting position in the query and target sequence, respectively.
17 
18  :type: :class:`~ost.seq.AlignmentHandle`
19 
20  .. attribute:: bit_score
21 
22  The bit score of the HSP
23 
24  .. attribute:: score
25 
26  The score of the HSP
27 
28  .. attribute:: evalue
29 
30  The E-value of the HSP
31  """
32  def __init__(self, aln, bit_score, score, evalue):
33  self.aln=aln
34  self.bit_score=bit_score
35  self.score=score
36  self.evalue=evalue
37 
38 class BlastHit:
39  """
40  A positive match found by BLAST.
41 
42  Each blast hit consists of one or more HSPs, encoded by the
43  :class:`AlignedPatch` class.
44 
45  .. attribute:: identifier
46 
47  The identifier of the matching sequence
48 
49  .. attribute:: aligned_patches
50 
51  list of :class:`AlignedPatch` instances holding the actual HSPs.
52  """
53  def __init__(self, identifier, aligned_patches):
54  self.identifier=identifier
55  self.aligned_patches=aligned_patches
56 
57 class BlastError(RuntimeError):
58  """
59  Raised when something goes wrong during parsing/execution of the blast
60  executable.
61 
62  .. attribute:: brief
63 
64  Short explanation of the problem
65 
66  .. attribute:: details
67 
68  If set, explains in more detail what caused the error. Might be empty.
69  """
70  def __init__(self, brief, details):
71  self.brief=brief
72  self.details=details
73 
74  def __str__(self):
75  if self.details:
76  return '%s\n%s' % (self.brief, self.details)
77  else:
78  return self.brief
79 
80 def ParseBlastOutput(string):
81  """
82  Parses the blast output and returns a list of :class:`BlastHit` instances.
83 
84  :raises: :class:`BlastError` if the output could not be parsed.
85 
86  This functions is only capable of dealing with the BLAST XML output.
87  """
88  def _GetText(node):
89  rc=''
90  for child in node.childNodes:
91  if child.nodeType==child.TEXT_NODE:
92  rc+=child.data
93  return rc
94 
95  def _GetValue(node, tag_name):
96  tag=node.getElementsByTagName(tag_name)
97  assert len(tag)==1
98  return _GetText(tag[0])
99 
100  def _GetInt(node, tag_name):
101  return int(_GetValue(node, tag_name))
102 
103  def _ParseHsp(query_id, hit_id, hsp):
104  bit_score=float(_GetValue(hsp, 'Hsp_bit-score'))
105  score=float(_GetValue(hsp, 'Hsp_score'))
106  evalue=float(_GetValue(hsp, 'Hsp_evalue'))
107  query_offset=_GetInt(hsp, 'Hsp_query-from')-1
108  hit_offset=_GetInt(hsp, 'Hsp_hit-from')-1
109  query_seq=seq.CreateSequence(str(query_id), str(_GetValue(hsp, 'Hsp_qseq')))
110  query_seq.offset=query_offset
111  hit_seq=seq.CreateSequence(str(hit_id), str(_GetValue(hsp, 'Hsp_hseq')))
112  hit_seq.offset=hit_offset
113  aln=seq.CreateAlignment(query_seq, hit_seq)
114  return AlignedPatch(aln, bit_score, score, evalue)
115  try:
116  doc=minidom.parseString(string)
117  except Exception, e:
118  raise BlastError('Error while parsing BLAST output: %s' % str(e), '')
119  hits=[]
120  query_id=_GetValue(doc, 'BlastOutput_query-def')
121  for hit in doc.getElementsByTagName('Hit'):
122  hit_id=_GetValue(hit, 'Hit_def')
123  hsps=hit.getElementsByTagName('Hsp')
124  aligned_patches=[_ParseHsp(query_id, hit_id, hsp) for hsp in hsps]
125  hits.append(BlastHit(hit_id, aligned_patches))
126  return hits
127 
128 def BlastVersion(blast_location=None):
129  """
130  Returns the version of the BLAST executable, e.g. 2.2.24 as a string
131  """
132  blastall_exe=settings.Locate('blastall', explicit_file_name=blast_location)
133  blast_pipe=subprocess.Popen(blastall_exe, stdout=subprocess.PIPE,
134  stderr=subprocess.PIPE)
135  lines=blast_pipe.stdout.readlines()
136  pattern=re.compile(r'blastall (\d+\.\d+\.\d+)\s+arguments:\s*')
137  for line in lines:
138  m=pattern.match(line)
139  if m:
140  return m.group(1)
141  raise IOError("could not determine blast version for '%s'" % blastall_exe)
142 
143 def Blast(query, database, gap_open=11, gap_ext=1, matrix='BLOSUM62',
144  blast_location=None):
145  """
146  Runs a protein vs. protein blast search. The results are returned as a
147  list of :class:`BlastHit` instances.
148 
149  :param query: the query sequence
150  :type query: :class:`seq.ConstSequenceHandle`
151 
152  :param database: The filename of the sequence database. Make sure that
153  formatdb has been run on the database and the <database>.pin or
154  <database>.pal file exists.
155  :param matrix: The substitution matrix to be used. Must be one of 'BLOSUM45',
156  'BLOSUM62', 'BLOSUM80', 'PAM30', 'PAM70'.
157  :param gap_open: Gap opening penalty. Note that only a subset of gap opening
158  penalties is supported for each substitutition matrix. Consult the blast
159  docs for more information.
160  :param gap_ext: Gap extension penalty. Only a subset of gap extension
161  penalties are supported for each of the substitution matrices. Consult the
162  blast docs for more information.
163 
164  :raises: :class:`~ost.settings.FileNotFound` if the BLAST executable could not
165  be found
166  :raises: :class:`BlastError` if there was a problem during execution of BLAST.
167  :raises: :class:`ValueError` if the substitution matrix is invalid
168  :raises: :class:`IOError` if the database does not exist
169  """
170  subst_mats=('BLOSUM45', 'BLOSUM62', 'BLOSUM80', 'PAM30', 'PAM70',)
171  if matrix not in subst_mats:
172  raise ValueError('matrix must be one of %s' % ', '.join(subst_mats))
173  if not os.path.exists('%s.pin' % database) and not os.path.exists('%s.pal' % database):
174  raise IOError("Database %s does not exist" % database)
175  blastall_exe=settings.Locate('blastall', explicit_file_name=blast_location)
176  args=[blastall_exe, '-d', database, '-p', 'blastp',
177  '-m', '7', '-M', matrix, '-G', str(gap_open), '-E', str(gap_ext)]
178  ost.LogInfo('running BLAST (%s)' % ' '.join(args))
179  blast_pipe=subprocess.Popen(args, stderr=subprocess.PIPE,
180  stdout=subprocess.PIPE, stdin=subprocess.PIPE)
181  stdout, stderr=blast_pipe.communicate(io.SequenceToString(query, 'fasta'))
182  if len(stderr)>0:
183  pattern=re.compile(r'^\[.*\]\s+ERROR:\s+(.*)')
184  lines=stderr.split('\n')
185  error_message=pattern.match(lines[0])
186  if error_message:
187  raise BlastError(error_message.group(1), '\n'.join(lines[1:]))
188  return ParseBlastOutput(stdout)