OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
hhblits.py
Go to the documentation of this file.
1 '''HHblits wrapper classes and functions.
2 '''
3 
4 import subprocess
5 import datetime
6 import os
7 import shutil
8 import tempfile
9 
10 import ost
11 from ost import settings, seq
12 from ost.bindings import utils
13 
14 class HHblitsHit:
15  """
16  A hit found by HHblits
17 
18  .. attribute:: hit_id
19 
20  String identifying the hit
21 
22  :type: :class:`str`
23 
24  .. attribute:: aln
25 
26  Pairwise alignment containing the aligned part between the query and the
27  target. First sequence is the query, the second sequence the target.
28 
29  :type: :class:`~ost.seq.AlignmentHandle`
30 
31  .. attribute:: score
32 
33  The alignment score
34 
35  :type: :class:`float`
36 
37  .. attribute:: ss_score
38 
39  The secondary structure score
40 
41  :type: :class:`float`
42 
43  .. attribute:: evalue
44 
45  The E-value of the alignment
46 
47  :type: :class:`float`
48 
49  .. attribute:: pvalue
50 
51  The P-value of the alignment
52 
53  :type: :class:`float`
54 
55  .. attribute:: prob
56 
57  The probability of the alignment (between 0 and 100)
58 
59  :type: :class:`float`
60  """
61  def __init__(self, hit_id, aln, score, ss_score, evalue, pvalue, prob):
62  self.hit_id = hit_id
63  self.aln = aln
64  self.score = score
65  self.ss_score = ss_score
66  self.evalue = evalue
67  self.prob = prob
68  self.pvalue = pvalue
69 
71  """Stats from the beginning of search output.
72 
73  .. attribute:: query
74 
75  The name of the query sequence
76 
77  :type: :class:`str`
78 
79  .. attribute:: match_columns
80 
81  Total of aligned Match columns
82 
83  :type: :class:`int`
84 
85  .. attribute:: n_eff
86 
87  Value of the ``-neff`` option
88 
89  :type: :class:`float`
90 
91  .. attribute:: searched_hmms
92 
93  Number of profiles searched
94 
95  :type: :class:`int`
96 
97  .. attribute:: date
98 
99  Execution date
100 
101  :type: :class:`datetime.datetime`
102 
103  .. attribute:: command
104 
105  Command used to run
106 
107  :type: :class:`str`
108  """
109  def __init__(self):
110  self.query = ''
111  self.match_columns = 0
112  self.n_eff = 0
113  self.searched_hmms = 0
114  self.date = None
115  self.command = ''
116 
117 def ParseHeaderLine(line):
118  '''Fetch header content.
119 
120  First, we seek the start of the identifier, that is, the first whitespace
121  after the hit number + 1. Since the identifier may contain whitespaces
122  itself, we cannot split the whole line
123 
124  :param line: Line from the output header.
125  :type line: :class:`str`
126 
127  :return: Hit information and query/template offsets
128  :rtype: (:class:`HHblitsHit`, (:class:`int`, :class:`int`))
129  '''
130  for i in range(0, len(line)):
131  if line[i].isdigit():
132  break
133  for i in range(i, len(line)):
134  if line[i] == ' ':
135  break
136  assert len(line)-i >= 31 and line[i+1] != ' '
137  hit_id = line[i+1:i+31].strip()
138  fields = line[i+32:].split()
139  prob = float(fields[0])
140  evalue = float(fields[1])
141  pvalue = float(fields[2])
142  score = float(fields[3])
143  ss_score = float(fields[4])
144  offsets = (int(fields[6].split('-')[0]), int(fields[7].split('-')[0]))
145  return (HHblitsHit(hit_id, None, score, ss_score, evalue, pvalue, prob),
146  offsets)
147 
148 def ParseHHblitsOutput(output):
149  """
150  Parses the HHblits output as produced by :meth:`HHblits.Search` and returns
151  the header of the search results and a list of hits.
152 
153  :param output: Iterable containing the lines of the HHblits output file
154  :type output: iterable (e.g. an open file handle)
155 
156  :return: a tuple of the header of the search results and the hits
157  :rtype: (:class:`HHblitsHeader`, :class:`list` of :class:`HHblitsHit`)
158  """
159  lines = iter(output)
160  def _ParseHeaderSection(lines):
161  value_start_column = 14
162  date_pattern = '%a %b %d %H:%M:%S %Y'
163  header = HHblitsHeader()
164  line = lines.next()
165  assert line.startswith('Query')
166  header.query = line[value_start_column:].strip()
167  line = lines.next()
168  assert line.startswith('Match_columns')
169  header.match_columns = int(line[value_start_column:].strip())
170 
171  line = lines.next()
172  assert line.startswith('No_of_seqs')
173 
174  line = lines.next()
175  assert line.startswith('Neff')
176  header.n_eff = float(line[value_start_column:].strip())
177 
178  line = lines.next()
179  assert line.startswith('Searched_HMMs')
180  header.searched_hmms = int(line[value_start_column:].strip())
181 
182  line = lines.next()
183  assert line.startswith('Date')
184  value = line[value_start_column:].strip()
185  header.date = datetime.datetime.strptime(value, date_pattern)
186 
187  line = lines.next()
188  assert line.startswith('Command')
189  header.command = line[value_start_column:].strip()
190 
191  line = lines.next()
192  assert len(line.strip()) == 0
193  return header
194 
195  def _ParseTableOfContents(lines):
196  assert lines.next().startswith(' No Hit')
197  hits = []
198  while True:
199  line = lines.next()
200  if len(line.strip()) == 0:
201  return hits
202  hits.append(ParseHeaderLine(line))
203  return hits
204 
205  def _ParseResultBody(query_id, hits, lines):
206  entry_index = None
207  query_str = ''
208  templ_str = ''
209  def _MakeAln(query_id, hit_id, query_string, templ_string,
210  q_offset, t_offset):
211  s1 = seq.CreateSequence(query_id, query_string)
212  s1.offset = q_offset-1
213  s2 = seq.CreateSequence(hit_id, templ_string)
214  s2.offset = t_offset-1
215  return seq.CreateAlignment(s1, s2)
216  try:
217  while True:
218  # Lines which we are interested in:
219  # - "Done!" -> end of list
220  # - "No ..." -> next item in list
221  # - "T <hit_id> <start> <data> <end>"
222  # - "Q <query_id> <start> <data> <end>"
223  # -> rest is to be skipped
224  line = lines.next()
225  if len(line.strip()) == 0:
226  continue
227  if line.startswith('Done!'):
228  if len(query_str) > 0:
229  hits[entry_index][0].aln = _MakeAln(\
230  query_id, hits[entry_index][0].hit_id,
231  query_str, templ_str, *hits[entry_index][1])
232  return [h for h, o in hits]
233  if line.startswith('No '):
234  if len(query_str) > 0:
235  hits[entry_index][0].aln = _MakeAln(\
236  query_id, hits[entry_index][0].hit_id,
237  query_str, templ_str, *hits[entry_index][1])
238  entry_index = int(line[3:].strip())-1
239  hits[entry_index][0].hit_id = lines.next()[1:].strip()
240  query_str = ''
241  templ_str = ''
242  # skip the next line. It doesn't contain information we
243  # don't already know
244  lines.next()
245  continue
246  assert entry_index != None
247  # Skip all "T ..." and "Q ..." lines besides the one we want
248  if line[1:].startswith(' Consensus'):
249  continue
250  if line[1:].startswith(' ss_pred'):
251  continue
252  if line[1:].startswith(' ss_conf'):
253  continue
254  if line[1:].startswith(' ss_dssp'):
255  continue
256  if line.startswith('T '):
257  for start_pos in range(22, len(line)):
258  if line[start_pos].isalpha() or line[start_pos] == '-':
259  break
260  end_pos = line.find(' ', start_pos)
261  # this can fail if we didn't skip all other "T ..." lines
262  if end_pos == -1:
263  error_str = "Unparsable line '%s' for entry No %d" \
264  % (line.strip(), entry_index + 1)
265  raise AssertionError(error_str)
266  templ_str += line[start_pos:end_pos]
267  if line.startswith('Q '):
268  for start_pos in range(22, len(line)):
269  if line[start_pos].isalpha() or line[start_pos] == '-':
270  break
271  end_pos = line.find(' ', start_pos)
272  # this can fail if we didn't skip all other "Q ..." lines
273  if end_pos == -1:
274  error_str = "Unparsable line '%s' for entry No %d" \
275  % (line.strip(), entry_index + 1)
276  raise AssertionError(error_str)
277  query_str += line[start_pos:end_pos]
278  except StopIteration:
279  if len(query_str) > 0:
280  hits[entry_index][0].aln = _MakeAln(query_id,
281  hits[entry_index][0].hit_id,
282  query_str, templ_str,
283  *hits[entry_index][1])
284  return [h for h, o in hits]
285  header = _ParseHeaderSection(lines)
286  # parse the table of contents. This is neccessary as some of the properties
287  # (i.e. start of alignment) we need are only given there. From the TOC we
288  # create a list of hits that is then further filled with data when we parse
289  # the actual result body
290  hits = _ParseTableOfContents(lines)
291  return header, _ParseResultBody(header.query, hits, lines)
292 
293 def ParseA3M(a3m_file):
294  '''
295  Parse secondary structure information and the multiple sequence alignment
296  out of an A3M file as produced by :meth:`HHblits.BuildQueryMSA`.
297 
298  :param a3m_file: Iterable containing the lines of the A3M file
299  :type a3m_file: iterable (e.g. an open file handle)
300 
301  :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
302  (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`).
303  '''
304  profile_dict = dict()
305  state = 'NONE'
306  pred_seq_txt = ''
307  conf_seq_txt = ''
308  msa_seq = list()
309  msa_head = list()
310  for line in a3m_file:
311  if len(line.rstrip()) == 0:
312  continue
313  elif line.startswith('>ss_pred'):
314  state = 'sspred'
315  continue
316  elif line.startswith('>ss_conf'):
317  state = 'ssconf'
318  continue
319  elif line[0] == '>':
320  if state == 'ssconf' or state == 'msa':
321  msa_seq.append('')
322  msa_head.append(line[1:].rstrip())
323  else:
324  raise IOError('The A3M file is missing the "ss_conf" section')
325  state = 'msa'
326  continue
327 
328  if state == 'sspred':
329  pred_seq_txt += line.rstrip()
330  elif state == 'ssconf':
331  conf_seq_txt += line.rstrip()
332  elif state == 'msa':
333  msa_seq[len(msa_seq)-1] += line.rstrip()
334 
335  profile_dict['ss_pred'] = list()
336  profile_dict['ss_conf'] = list()
337  for i in range(0, len(pred_seq_txt)):
338  profile_dict['ss_pred'].append(pred_seq_txt[i])
339  profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
340 
341  # post processing
342  # MSA
343  profile_dict['msa'] = None
344  if len(msa_seq) > 1:
345  t = msa_seq[0]
346  al = seq.AlignmentList()
347  for i in range(1, len(msa_seq)):
348  qs = ''
349  ts = ''
350  k = 0
351  for c in msa_seq[i]:
352  if c.islower():
353  qs += '-'
354  ts += c.upper()
355  else:
356  qs += t[k]
357  ts += c
358  k += 1
359  nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
360  seq.CreateSequence(msa_head[i], ts))
361  al.append(nl)
362  profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
363  al, seq.CreateSequence(msa_head[0], t))
364  return profile_dict
365 
366 
367 def ParseHHM(profile):
368  '''
369  Parse secondary structure information and the MSA out of an HHM profile as
370  produced by :meth:`HHblits.A3MToProfile`.
371 
372  :param profile: Opened file handle holding the profile.
373  :type profile: :class:`file`
374 
375  :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
376  (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and
377  "consensus" (:class:`~ost.seq.SequenceHandle`).
378  '''
379  profile_dict = dict()
380  state = 'NONE'
381  pred_seq_txt = ''
382  conf_seq_txt = ''
383  consensus_txt = ''
384  msa_seq = list()
385  msa_head = list()
386  for line in profile:
387  if len(line.rstrip()) == 0:
388  continue
389  if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure':
390  state = 'sspred'
391  continue
392  elif line.rstrip() == '>ss_conf PSIPRED confidence values':
393  state = 'ssconf'
394  continue
395  elif line.rstrip() == '>Consensus':
396  state = 'consensus'
397  continue
398  elif line[0] == '>':
399  if state == 'consensus' or state == 'msa':
400  msa_seq.append('')
401  msa_head.append(line[1:].rstrip())
402  else:
403  raise IOError('Profile file "%s" is missing ' % profile.name+
404  'the "Consensus" section')
405  state = 'msa'
406  continue
407  elif line[0] == '#':
408  state = 'NONE'
409  continue
410 
411  if state == 'sspred':
412  pred_seq_txt += line.rstrip()
413  elif state == 'ssconf':
414  conf_seq_txt += line.rstrip()
415  elif state == 'msa':
416  msa_seq[len(msa_seq)-1] += line.rstrip()
417  elif state == 'consensus':
418  consensus_txt += line.rstrip()
419 
420  profile_dict['ss_pred'] = list()
421  profile_dict['ss_conf'] = list()
422  for i in range(0, len(pred_seq_txt)):
423  profile_dict['ss_pred'].append(pred_seq_txt[i])
424  profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
425 
426  # post processing
427  # MSA
428  profile_dict['msa'] = None
429  if len(msa_seq):
430  t = msa_seq[0]
431  al = seq.AlignmentList()
432  for i in range(1, len(msa_seq)):
433  qs = ''
434  ts = ''
435  k = 0
436  for c in msa_seq[i]:
437  if c.islower():
438  qs += '-'
439  ts += c.upper()
440  else:
441  qs += t[k]
442  ts += c
443  k += 1
444  nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
445  seq.CreateSequence(msa_head[i], ts))
446  al.append(nl)
447  profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
448  al, seq.CreateSequence(msa_head[0], t))
449  #print profile_dict['msa'].ToString(80)
450  # Consensus
451  profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt)
452 
453  return profile_dict
454 
455 
456 class HHblits:
457  """
458  Initialise a new HHblits "search" for the given query. Query may either
459  be a :class:`~ost.seq.SequenceHandle` or a string. In the former case, the
460  query is the actual query sequence, in the latter case, the query is the
461  filename to the file containing the query.
462 
463  :param query: Query sequence as file or sequence.
464  :type query: :class:`~ost.seq.SequenceHandle` or :class:`str`
465  :param hhsuite_root: Path to the top-level directory of your hhsuite
466  installation.
467  :type hhsuite_root: :class:`str`
468  :param hhblits_bin: Name of the hhblits binary. Will only be used if
469  :attr:`hhsuite_root`:file:`/bin/hhblits` does not exist.
470  :type hhblits_bin: :class:`str`
471  :param working_dir: Directory for temporary files. Will be created if not
472  present but **not** automatically deleted.
473  :type working_dir: :class:`str`
474  """
475  OUTPUT_PREFIX = 'query_hhblits'
476  def __init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None):
477  self.query = query
478  self.hhsuite_root = hhsuite_root
479  if os.path.exists(os.path.join(self.hhsuite_root, 'bin/hhblits')):
480  self.bin_dir = os.path.join(self.hhsuite_root, 'bin')
481  self.hhblits_bin = os.path.join(self.hhsuite_root, 'bin/hhblits')
482  else:
483  self.hhblits_bin = settings.Locate('hhblits',
484  explicit_file_name=hhblits_bin)
485  self.bin_dir = os.path.dirname(self.hhblits_bin)
486  # guess root folder (note: this may fail in future)
487  self.hhsuite_root = os.path.dirname(self.bin_dir)
488  self.hhlib_dir = os.path.join(self.hhsuite_root, 'lib', 'hh')
489  if working_dir:
490  self.needs_cleanup = False
491  self.working_dir = working_dir
492  if not os.path.exists(working_dir):
493  os.mkdir(working_dir)
494  if isinstance(query, str):
495  self.filename = os.path.abspath(os.path.join(
496  self.working_dir, os.path.basename(query)))
497  if self.filename != os.path.abspath(query):
498  shutil.copy(query, self.filename)
499  else:
500  self.filename = os.path.join(self.working_dir,
501  '%s.fasta' % HHblits.OUTPUT_PREFIX)
502  ost.io.SaveSequence(query, self.filename)
503  else:
504  self.needs_cleanup = True
505  if isinstance(query, str):
506  self.working_dir = tempfile.mkdtemp()
507  self.filename = os.path.abspath(os.path.join(
508  self.working_dir, os.path.basename(query)))
509  shutil.copy(query, self.filename)
510  else:
511  tmp_dir = utils.TempDirWithFiles((query,))
512  self.working_dir = tmp_dir.dirname
513  self.filename = tmp_dir.files[0]
514 
515  def BuildQueryMSA(self, nrdb, options={}, a3m_file=None):
516  """Builds the MSA for the query sequence.
517 
518  This function directly uses hhblits of hhtools. While in theory it would
519  be possible to do this by PSI-blasting on our own, hhblits is supposed
520  to be faster. Also it is supposed to prevent alignment corruption. The
521  alignment corruption is caused by low-scoring terminal alignments that
522  draw the sequences found by PSI-blast away from the optimum. By removing
523  these low scoring ends, part of the alignment corruption can be
524  suppressed.
525 
526  hhblits does **not** call PSIPRED on the MSA to predict the secondary
527  structure of the query sequence. This is done by addss.pl of hhtools.
528  The predicted secondary structure is stored together with the sequences
529  identified by hhblits.
530 
531  The produced A3M file can be parsed by :func:`ParseA3M`. If the file was
532  already produced, hhblits is not called again and the existing file path
533  is returned.
534 
535  :param nrdb: Database to be align against; has to be an hhblits database
536  :type nrdb: :class:`str`
537 
538  :param options: Dictionary of options to *hhblits*, one "-" is added in
539  front of every key. Boolean True values add flag without
540  value. Merged with default options {'cpu': 1, 'n': 1},
541  where 'n' defines the number of iterations.
542  :type options: :class:`dict`
543 
544  :param a3m_file: a path of a3m_file to be used, optional
545  :type a3m_file: :class:`str`
546 
547  :return: The path to the A3M file containing the MSA
548  :rtype: :class:`str`
549  """
550  if a3m_file is None:
551  a3m_file = '%s.a3m' % os.path.splitext(self.filename)[0]
552  else:
553  a3m_file = os.path.abspath(a3m_file)
554  if os.path.exists(a3m_file):
555  ost.LogInfo('Reusing already existing query alignment (%s)' % a3m_file)
556  return a3m_file
557  ost.LogInfo('Using hhblits from "%s"' % self.hhsuite_root)
558  full_nrdb = os.path.join(os.path.abspath(os.path.split(nrdb)[0]),
559  os.path.split(nrdb)[1])
560  # create MSA
561  opts = {'cpu' : 1, # no. of cpus used
562  'n' : 1} # no. of iterations
563  opts.update(options)
564  opt_cmd, _ = _ParseOptions(opts)
565  hhblits_cmd = '%s -e 0.001 -i %s -oa3m %s -d %s %s' % \
566  (self.hhblits_bin, self.filename, a3m_file, full_nrdb,
567  opt_cmd)
568  job = subprocess.Popen(hhblits_cmd, shell=True, cwd=self.working_dir,
569  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
570  sout, _ = job.communicate()
571  lines = sout.splitlines()
572  for line in lines:
573  ost.LogVerbose(line.strip())
574  if not os.path.exists(a3m_file):
575  ost.LogWarning('Building query profile failed, no output')
576  return a3m_file
577  # add secondary structure annotation
578  addss_cmd = "%s %s" % (os.path.join(self.hhsuite_root,
579  'lib/hh/scripts/addss.pl'),
580  a3m_file)
581  env = dict(os.environ)
582  env.update({'PERL5LIB' : os.path.join(self.hhsuite_root,
583  'lib/hh/scripts'),
584  'HHLIB' : self.hhlib_dir,
585  'PATH' : '%s:%s' % (os.path.join(self.hhsuite_root, 'bin'),
586  os.environ['PATH'])})
587  job = subprocess.Popen(addss_cmd, shell=True, cwd=self.working_dir,
588  env=env, stdout=subprocess.PIPE,
589  stderr=subprocess.PIPE)
590  sout, serr = job.communicate()
591  lines = sout.splitlines()
592  for line in lines:
593  if 'error' in line.lower():
594  ost.LogWarning('Predicting secondary structure for MSA '+
595  '(%s) failed, on command: %s' % (a3m_file, line))
596  return a3m_file
597  return a3m_file
598 
599  def A3MToProfile(self, a3m_file, hhm_file=None):
600  """
601  Converts the A3M alignment file to a hhm profile. If hhm_file is not
602  given, the output file will be set to <:attr:`a3m_file`-basename>.hhm.
603 
604  The produced HHM file can be parsed by :func:`ParseHHM`.
605 
606  If the file was already produced, the existing file path is returned
607  without recomputing it.
608 
609  :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
610  :type a3m_file: :class:`str`
611 
612  :param hhm_file: Desired output file name
613  :type hhm_file: :class:`str`
614 
615  :return: Path to the profile file
616  :rtype: :class:`str`
617  """
618  hhmake = os.path.join(self.bin_dir, 'hhmake')
619  if not hhm_file:
620  hhm_file = '%s.hhm' % os.path.splitext(a3m_file)[0]
621  if os.path.exists(hhm_file):
622  return hhm_file
623  ost.LogVerbose('converting %s to %s' % (a3m_file, hhm_file))
624  os.putenv('HHLIB', self.hhlib_dir)
625  if subprocess.call('%s -i %s -o %s' % (hhmake, a3m_file, hhm_file),
626  shell=True):
627  raise IOError('could not convert a3m to hhm file')
628  return hhm_file
629 
630  def A3MToCS(self, a3m_file, cs_file=None, options={}):
631  """
632  Converts the A3M alignment file to a column state sequence file. If
633  cs_file is not given, the output file will be set to
634  <:attr:`a3m_file`-basename>.seq219.
635 
636  If the file was already produced, the existing file path is returned
637  without recomputing it.
638 
639  :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
640  :type a3m_file: :class:`str`
641 
642  :param cs_file: Output file name (may be omitted)
643  :type cs_file: :class:`str`
644 
645  :param options: Dictionary of options to *cstranslate*, one "-" is added
646  in front of every key. Boolean True values add flag
647  without value.
648  :type options: :class:`dict`
649 
650  :return: Path to the column state sequence file
651  :rtype: :class:`str`
652  """
653  cstranslate = os.path.join(self.hhlib_dir, 'bin', 'cstranslate')
654  if not cs_file:
655  cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0]
656  if os.path.exists(cs_file):
657  return cs_file
658  opt_cmd, _ = _ParseOptions(options)
659  cs_cmd = '%s -i %s -o %s %s' % (
660  cstranslate,
661  os.path.abspath(a3m_file),
662  os.path.abspath(cs_file),
663  opt_cmd)
664  ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file))
665  job = subprocess.Popen(cs_cmd, shell=True, cwd=self.working_dir,
666  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
667  sout, _ = job.communicate()
668  lines = sout.splitlines()
669  for line in lines:
670  if 'Wrote abstract state sequence to' in line:
671  return cs_file
672  ost.LogWarning('Creating column state sequence file (%s) failed' % \
673  cs_file)
674 
675  def Cleanup(self):
676  """Delete temporary data.
677 
678  Delete temporary data if no working dir was given. Controlled by
679  :attr:`needs_cleanup`.
680  """
681  if self.needs_cleanup and os.path.exists(self.working_dir):
682  shutil.rmtree(self.working_dir)
683 
684  def CleanupFailed(self):
685  '''In case something went wrong, call to make sure everything is clean.
686 
687  This will delete the working dir independently of :attr:`needs_cleanup`.
688  '''
689  store_needs_cleanup = self.needs_cleanup
690  self.needs_cleanup = True
691  self.Cleanup()
692  self.needs_cleanup = store_needs_cleanup
693 
694  def Search(self, a3m_file, database, options={}, prefix=''):
695  """
696  Searches for templates in the given database. Before running the search,
697  the hhm file is copied. This makes it possible to launch several hhblits
698  instances at once. Upon success, the filename of the result file is
699  returned. This file may be parsed with :func:`ParseHHblitsOutput`.
700 
701  :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
702  :type a3m_file: :class:`str`
703 
704  :param database: Search database, needs to be the common prefix of the
705  database files
706  :type database: :class:`str`
707 
708  :param options: Dictionary of options to *hhblits*, one "-" is added in
709  front of every key. Boolean True values add flag without
710  value. Merged with default options {'cpu': 1, 'n': 1},
711  where 'n' defines the number of iterations.
712  :type options: :class:`dict`
713 
714  :param prefix: Prefix to the result file
715  :type prefix: :class:`str`
716 
717  :return: The path to the result file
718  :rtype: :class:`str`
719  """
720  opts = {'cpu' : 1, # no. of cpus used
721  'n' : 1} # no. of iterations
722  opts.update(options)
723  opt_cmd, opt_str = _ParseOptions(opts)
724  base = os.path.basename(os.path.splitext(a3m_file)[0])
725  hhr_file = '%s%s_%s.hhr' % (prefix, base, opt_str)
726  hhr_file = os.path.join(self.working_dir, hhr_file)
727  search_cmd = '%s %s -e 0.001 -Z 10000 -B 10000 -i %s -o %s -d %s' % (
728  self.hhblits_bin,
729  opt_cmd,
730  os.path.abspath(a3m_file),
731  os.path.abspath(hhr_file),
732  os.path.join(os.path.abspath(os.path.split(database)[0]),
733  os.path.split(database)[1]))
734  ost.LogInfo('searching %s' % database)
735  ost.LogVerbose(search_cmd)
736  job = subprocess.Popen(search_cmd, shell=True, cwd=self.working_dir,
737  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
738  sout, serr = job.communicate()
739  if job.returncode != 0:
740  lines = sout.splitlines()
741  for line in lines:
742  ost.LogError(line.strip())
743  lines = serr.splitlines()
744  for line in lines:
745  ost.LogError(line.strip())
746  return None
747  return hhr_file
748 
749 
750 def _ParseOptions(opts):
751  """
752  :return: Tuple of strings (opt_cmd, opt_str), where opt_cmd can be
753  passed to command ("-" added in front of keys, options
754  separated by space) and opt_str (options separated by "_")
755  can be used for filenames.
756  :param opts: Dictionary of options, one "-" is added in front of every
757  key. Boolean True values add flag without value.
758  """
759  opt_cmd = list()
760  opt_str = list()
761  for k, val in opts.iteritems():
762  if type(val) == type(True):
763  if val == True:
764  opt_cmd.append('-%s' % str(k))
765  opt_str.append(str(k))
766  else:
767  opt_cmd.append('-%s %s' % (str(k), str(val)))
768  opt_str.append('%s%s' % (str(k), str(val)))
769  opt_cmd = ' '.join(opt_cmd)
770  opt_str = '_'.join(opt_str)
771  return opt_cmd, opt_str
772 
773 
774 __all__ = ['HHblits', 'HHblitsHit', 'HHblitsHeader',
775  'ParseHHblitsOutput', 'ParseA3M', 'ParseHHM',
776  'ParseHeaderLine']
777 
778 # LocalWords: HHblits MSA hhblits hhtools PSIPRED addss param nrdb str
779 # LocalWords: cpu hhm func ParseHHblitsOutput ss pred conf msa hhsuite dir
780 # LocalWords: attr basename rtype cstranslate tuple HHblitsHeader meth aln
781 # LocalWords: HHblitsHit iterable evalue pvalue neff hmms datetime
782 # LocalWords: whitespace whitespaces
void DLLEXPORT_OST_IO SaveSequence(const seq::ConstSequenceHandle &sequence, const String &filename, const String &format="auto")