OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
hhsearch.py
Go to the documentation of this file.
1 """
2 Author: Marco Biasini
3 """
4 
5 import re
6 from ost import seq
7 
8 class HHSearchHit:
9  def __init__(self, summary, alignment):
10  self.summary=summary
11  self.alignment=alignment
12 
13 class HitSummary:
14  def __init__(self, pdb_id, chain, prob, e_value, query_start, query_end,
15  template_start, template_end):
16  self.prob=prob
17  self.pdb_id=pdb_id
18  self.chain=chain
19  self.e_value=e_value
20  self.query_start=query_start
21  self.query_end=query_end
22  self.template_start=template_start
23  self.template_end=template_end
24 
26  """
27  Read HHSearch result file. The result is stored in a list of hh search hits.
28 
29  Usage:
30 
31  result=HHSearchResult('output.hhr')
32  for hit in result.hits:
33  print hit.pdb_id, hit.chain
34  print hit.alignment.ToString(80)
35  """
36  def __init__(self, filename, pipe_separated=False):
37  self.pipe_separated=pipe_separated
38  self._Read(filename)
39  def _Read(self, filename):
40  ifile=open(filename)
41  self._ReadHeader(ifile)
42  summaries=self._ReadHitSummaries(ifile)
43  self.hits=self._ReadHitDetails(ifile, summaries)
44 
45  def _ReadHeader(self, ifile):
46  header_lines=[]
47  for line in ifile:
48  stripped_line=line.strip()
49  if stripped_line=='':
50  break
51  header_lines.append(stripped_line)
52  for header_line in header_lines:
53  var, value=re.split('\s+', header_line, 1)
54  setattr(self, var.lower(), value)
55 
56  def _ReadHitSummaries(self, ifile):
57  summary_lines=[]
58  skip_header=True
59  for line in ifile:
60  if skip_header==True:
61  skip_header=False
62  continue
63 
64  stripped_line=line.strip()
65  if stripped_line=='':
66  break
67  summary_lines.append(line)
68  summaries=[]
69  for summary_line in summary_lines:
70  pdb_id, chain=(None, None)
71  if self.pipe_separated:
72  parts=summary_line[4:37].split('|')
73  pdb_id=parts[1][:4]
74  chain=parts[1][4]
75  else:
76  pdb_id=summary_line[4:8]
77  chain=summary_line[9]
78  prob=float(summary_line[36:40])
79  e_value=0.0
80  query_range=summary_line[76:84].split('-')
81  query_start=int(query_range[0].strip())
82  query_end=int(query_range[1].strip())
83  template_range=summary_line[86:94].split('-')
84  template_start=int(template_range[0].strip())
85  template_end=int(template_range[1].strip())
86  summaries.append(HitSummary(pdb_id, chain, prob, e_value, query_start,
87  query_end, template_start, template_end))
88  return summaries
89 
90  def _ReadHitDetails(self, ifile, summaries):
91  hits=[]
92  for summary in summaries:
93  alignment=self._ReadHitDetail(ifile)
94  hits.append(HHSearchHit(summary, alignment))
95  return hits
96  def _ReadHitDetail(self, ifile):
97  skip_header=True
98  q_seq, t_seq=('', '')
99  for line in ifile:
100  if skip_header==True:
101  if line.startswith('>'):
102  skip_header=False
103  continue
104  if line.startswith('No'):
105  break
106  if line.strip()=='':
107  continue
108  if line.startswith('Q ss_pred') or line.startswith('Q Consensus'):
109  continue
110  if line.startswith('T ss_pred') or line.startswith('T Consensus'):
111  continue
112  if line.startswith(' '):
113  continue
114  if line.startswith('Q'):
115  q_seq+=re.split('\s+', line)[3]
116  if line.startswith('T'):
117  t_seq+=re.split('\s+', line)[3]
118  ali=seq.AlignmentHandle()
119  ali.AddSequence(seq.Sequence.FromString('query', q_seq))
120  ali.AddSequence(seq.Sequence.FromString('target', t_seq))
121  return ali