OpenStructure
hhblits2.py
Go to the documentation of this file.
1 '''HHblits wrapper classes and functions.
2 '''
3 
4 import subprocess
5 import datetime
6 import os
7 import shutil
8 import tempfile
9 
10 import ost
11 from ost import settings, seq
12 from ost.bindings import utils
13 
14 class HHblitsHit:
15  """
16  A hit found by HHblits
17 
18  .. attribute:: hit_id
19 
20  String identifying the hit
21 
22  :type: :class:`str`
23 
24  .. attribute:: aln
25 
26  Pairwise alignment containing the aligned part between the query and the
27  target. First sequence is the query, the second sequence the target.
28 
29  :type: :class:`~ost.seq.AlignmentHandle`
30 
31  .. attribute:: score
32 
33  The alignment score
34 
35  :type: :class:`float`
36 
37  .. attribute:: ss_score
38 
39  The secondary structure score
40 
41  :type: :class:`float`
42 
43  .. attribute:: evalue
44 
45  The E-value of the alignment
46 
47  :type: :class:`float`
48 
49  .. attribute:: pvalue
50 
51  The P-value of the alignment
52 
53  :type: :class:`float`
54 
55  .. attribute:: prob
56 
57  The probability of the alignment (between 0 and 100)
58 
59  :type: :class:`float`
60  """
61  def __init__(self, hit_id, aln, score, ss_score, evalue, pvalue, prob):
62  self.hit_idhit_id = hit_id
63  self.alnaln = aln
64  self.scorescore = score
65  self.ss_scoress_score = ss_score
66  self.evalueevalue = evalue
67  self.probprob = prob
68  self.pvaluepvalue = pvalue
69 
71  """Stats from the beginning of search output.
72 
73  .. attribute:: query
74 
75  The name of the query sequence
76 
77  :type: :class:`str`
78 
79  .. attribute:: match_columns
80 
81  Total of aligned Match columns
82 
83  :type: :class:`int`
84 
85  .. attribute:: n_eff
86 
87  Value of the ``-neff`` option
88 
89  :type: :class:`float`
90 
91  .. attribute:: searched_hmms
92 
93  Number of profiles searched
94 
95  :type: :class:`int`
96 
97  .. attribute:: date
98 
99  Execution date
100 
101  :type: :class:`datetime.datetime`
102 
103  .. attribute:: command
104 
105  Command used to run
106 
107  :type: :class:`str`
108  """
109  def __init__(self):
110  self.queryquery = ''
111  self.match_columnsmatch_columns = 0
112  self.n_effn_eff = 0
113  self.searched_hmmssearched_hmms = 0
114  self.datedate = None
115  self.commandcommand = ''
116 
117 def ParseHeaderLine(line):
118  '''Fetch header content.
119 
120  First, we seek the start of the identifier, that is, the first whitespace
121  after the hit number + 1. Since the identifier may contain whitespaces
122  itself, we cannot split the whole line
123 
124  :param line: Line from the output header.
125  :type line: :class:`str`
126 
127  :return: Hit information and query/template offsets
128  :rtype: (:class:`HHblitsHit`, (:class:`int`, :class:`int`))
129  '''
130  for i in range(0, len(line)):
131  if line[i].isdigit():
132  break
133  for i in range(i, len(line)):
134  if line[i] == ' ':
135  break
136  assert len(line)-i >= 31 and line[i+1] != ' '
137  hit_id = line[i+1:i+31].strip()
138  fields = line[i+32:].split()
139  prob = float(fields[0])
140  evalue = float(fields[1])
141  pvalue = float(fields[2])
142  score = float(fields[3])
143  ss_score = float(fields[4])
144  offsets = (int(fields[6].split('-')[0]), int(fields[7].split('-')[0]))
145  return (HHblitsHit(hit_id, None, score, ss_score, evalue, pvalue, prob),
146  offsets)
147 
148 def ParseHHblitsOutput(output):
149  """
150  Parses the HHblits output as produced by :meth:`HHblits.Search` and returns
151  the header of the search results and a list of hits.
152 
153  :param output: Iterable containing the lines of the HHblits output file
154  :type output: iterable (e.g. an open file handle)
155 
156  :return: a tuple of the header of the search results and the hits
157  :rtype: (:class:`HHblitsHeader`, :class:`list` of :class:`HHblitsHit`)
158  """
159  lines = iter(output)
160  def _ParseHeaderSection(lines):
161  value_start_column = 14
162  date_pattern = '%a %b %d %H:%M:%S %Y'
163  header = HHblitsHeader()
164  line = next(lines)
165  assert line.startswith('Query')
166  header.query = line[value_start_column:].strip()
167  line = next(lines)
168  assert line.startswith('Match_columns')
169  header.match_columns = int(line[value_start_column:].strip())
170 
171  line = next(lines)
172  assert line.startswith('No_of_seqs')
173 
174  line = next(lines)
175  assert line.startswith('Neff')
176  header.n_eff = float(line[value_start_column:].strip())
177 
178  line = next(lines)
179  assert line.startswith('Searched_HMMs')
180  header.searched_hmms = int(line[value_start_column:].strip())
181 
182  line = next(lines)
183  assert line.startswith('Date')
184  value = line[value_start_column:].strip()
185  header.date = datetime.datetime.strptime(value, date_pattern)
186 
187  line = next(lines)
188  assert line.startswith('Command')
189  header.command = line[value_start_column:].strip()
190 
191  line = next(lines)
192  assert len(line.strip()) == 0
193  return header
194 
195  def _ParseTableOfContents(lines):
196  line = next(lines)
197  assert line.startswith(' No Hit')
198  hits = []
199  while True:
200  line = next(lines)
201  if len(line.strip()) == 0:
202  return hits
203  hits.append(ParseHeaderLine(line))
204  return hits
205 
206  def _ParseResultBody(query_id, hits, lines):
207  entry_index = None
208  query_str = ''
209  templ_str = ''
210  def _MakeAln(query_id, hit_id, query_string, templ_string,
211  q_offset, t_offset):
212  s1 = seq.CreateSequence(query_id, query_string)
213  s1.offset = q_offset-1
214  s2 = seq.CreateSequence(hit_id, templ_string)
215  s2.offset = t_offset-1
216  return seq.CreateAlignment(s1, s2)
217  try:
218  while True:
219  # Lines which we are interested in:
220  # - "Done!" -> end of list
221  # - "No ..." -> next item in list
222  # - "T <hit_id> <start> <data> <end>"
223  # - "Q <query_id> <start> <data> <end>"
224  # -> rest is to be skipped
225  line = next(lines)
226  if len(line.strip()) == 0:
227  continue
228  if line.startswith('Done!'):
229  if len(query_str) > 0:
230  hits[entry_index][0].aln = _MakeAln(\
231  query_id, hits[entry_index][0].hit_id,
232  query_str, templ_str, *hits[entry_index][1])
233  return [h for h, o in hits]
234  if line.startswith('No '):
235  if len(query_str) > 0:
236  hits[entry_index][0].aln = _MakeAln(\
237  query_id, hits[entry_index][0].hit_id,
238  query_str, templ_str, *hits[entry_index][1])
239  entry_index = int(line[3:].strip())-1
240  line = next(lines)
241  hits[entry_index][0].hit_id = line[1:].strip()
242  query_str = ''
243  templ_str = ''
244  # skip the next line. It doesn't contain information we
245  # don't already know
246  next(lines)
247  continue
248  assert entry_index != None
249  # Skip all "T ..." and "Q ..." lines besides the one we want
250  if line[1:].startswith(' Consensus'):
251  continue
252  if line[1:].startswith(' ss_pred'):
253  continue
254  if line[1:].startswith(' ss_conf'):
255  continue
256  if line[1:].startswith(' ss_dssp'):
257  continue
258  if line.startswith('T '):
259  for start_pos in range(22, len(line)):
260  if line[start_pos].isalpha() or line[start_pos] == '-':
261  break
262  end_pos = line.find(' ', start_pos)
263  # this can fail if we didn't skip all other "T ..." lines
264  if end_pos == -1:
265  error_str = "Unparsable line '%s' for entry No %d" \
266  % (line.strip(), entry_index + 1)
267  raise AssertionError(error_str)
268  templ_str += line[start_pos:end_pos]
269  if line.startswith('Q '):
270  for start_pos in range(22, len(line)):
271  if line[start_pos].isalpha() or line[start_pos] == '-':
272  break
273  end_pos = line.find(' ', start_pos)
274  # this can fail if we didn't skip all other "Q ..." lines
275  if end_pos == -1:
276  error_str = "Unparsable line '%s' for entry No %d" \
277  % (line.strip(), entry_index + 1)
278  raise AssertionError(error_str)
279  query_str += line[start_pos:end_pos]
280  except StopIteration:
281  if len(query_str) > 0:
282  hits[entry_index][0].aln = _MakeAln(query_id,
283  hits[entry_index][0].hit_id,
284  query_str, templ_str,
285  *hits[entry_index][1])
286  return [h for h, o in hits]
287  header = _ParseHeaderSection(lines)
288  # parse the table of contents. This is neccessary as some of the properties
289  # (i.e. start of alignment) we need are only given there. From the TOC we
290  # create a list of hits that is then further filled with data when we parse
291  # the actual result body
292  hits = _ParseTableOfContents(lines)
293  return header, _ParseResultBody(header.query, hits, lines)
294 
295 def ParseA3M(a3m_file):
296  '''
297  Parse secondary structure information and the multiple sequence alignment
298  out of an A3M file as produced by :meth:`HHblits.BuildQueryMSA`.
299 
300  :param a3m_file: Iterable containing the lines of the A3M file
301  :type a3m_file: iterable (e.g. an open file handle)
302 
303  :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
304  (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`).
305  '''
306  profile_dict = dict()
307  state = 'NONE'
308  pred_seq_txt = ''
309  conf_seq_txt = ''
310  msa_seq = list()
311  msa_head = list()
312  for line in a3m_file:
313  if len(line.rstrip()) == 0:
314  continue
315  elif line.startswith('>ss_pred'):
316  state = 'sspred'
317  continue
318  elif line.startswith('>ss_conf'):
319  state = 'ssconf'
320  continue
321  elif line[0] == '>':
322  if state == 'ssconf' or state == 'msa':
323  msa_seq.append('')
324  msa_head.append(line[1:].rstrip())
325  else:
326  raise IOError('The A3M file is missing the "ss_conf" section')
327  state = 'msa'
328  continue
329 
330  if state == 'sspred':
331  pred_seq_txt += line.rstrip()
332  elif state == 'ssconf':
333  conf_seq_txt += line.rstrip()
334  elif state == 'msa':
335  msa_seq[len(msa_seq)-1] += line.rstrip()
336 
337  profile_dict['ss_pred'] = list()
338  profile_dict['ss_conf'] = list()
339  for i in range(0, len(pred_seq_txt)):
340  profile_dict['ss_pred'].append(pred_seq_txt[i])
341  profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
342 
343  # post processing
344  # MSA
345  profile_dict['msa'] = None
346  if len(msa_seq) > 1:
347  t = msa_seq[0]
348  al = seq.AlignmentList()
349  for i in range(1, len(msa_seq)):
350  qs = ''
351  ts = ''
352  k = 0
353  for c in msa_seq[i]:
354  if c.islower():
355  qs += '-'
356  ts += c.upper()
357  else:
358  qs += t[k]
359  ts += c
360  k += 1
361  nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
362  seq.CreateSequence(msa_head[i], ts))
363  al.append(nl)
364  profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
365  al, seq.CreateSequence(msa_head[0], t))
366  return profile_dict
367 
368 
369 def ParseHHM(profile):
370  '''
371  Parse secondary structure information and the MSA out of an HHM profile as
372  produced by :meth:`HHblits.A3MToProfile`.
373 
374  :param profile: Opened file handle holding the profile.
375  :type profile: :class:`file`
376 
377  :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
378  (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and
379  "consensus" (:class:`~ost.seq.SequenceHandle`).
380  '''
381  profile_dict = dict()
382  state = 'NONE'
383  pred_seq_txt = ''
384  conf_seq_txt = ''
385  consensus_txt = ''
386  msa_seq = list()
387  msa_head = list()
388  for line in profile:
389  if len(line.rstrip()) == 0:
390  continue
391  if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure':
392  state = 'sspred'
393  continue
394  elif line.rstrip() == '>ss_conf PSIPRED confidence values':
395  state = 'ssconf'
396  continue
397  elif line.rstrip() == '>Consensus':
398  state = 'consensus'
399  continue
400  elif line[0] == '>':
401  if state == 'consensus' or state == 'msa':
402  msa_seq.append('')
403  msa_head.append(line[1:].rstrip())
404  else:
405  raise IOError('Profile file "%s" is missing ' % profile.name+
406  'the "Consensus" section')
407  state = 'msa'
408  continue
409  elif line[0] == '#':
410  state = 'NONE'
411  continue
412 
413  if state == 'sspred':
414  pred_seq_txt += line.rstrip()
415  elif state == 'ssconf':
416  conf_seq_txt += line.rstrip()
417  elif state == 'msa':
418  msa_seq[len(msa_seq)-1] += line.rstrip()
419  elif state == 'consensus':
420  consensus_txt += line.rstrip()
421 
422  profile_dict['ss_pred'] = list()
423  profile_dict['ss_conf'] = list()
424  for i in range(0, len(pred_seq_txt)):
425  profile_dict['ss_pred'].append(pred_seq_txt[i])
426  profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
427 
428  # post processing
429  # MSA
430  profile_dict['msa'] = None
431  if len(msa_seq):
432  t = msa_seq[0]
433  al = seq.AlignmentList()
434  for i in range(1, len(msa_seq)):
435  qs = ''
436  ts = ''
437  k = 0
438  for c in msa_seq[i]:
439  if c.islower():
440  qs += '-'
441  ts += c.upper()
442  else:
443  qs += t[k]
444  ts += c
445  k += 1
446  nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
447  seq.CreateSequence(msa_head[i], ts))
448  al.append(nl)
449  profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
450  al, seq.CreateSequence(msa_head[0], t))
451  #print profile_dict['msa'].ToString(80)
452  # Consensus
453  profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt)
454 
455  return profile_dict
456 
457 
458 class HHblits:
459  """
460  Initialise a new HHblits "search" for the given query. Query may either
461  be a :class:`~ost.seq.SequenceHandle` or a string. In the former case, the
462  query is the actual query sequence, in the latter case, the query is the
463  filename to the file containing the query.
464 
465  :param query: Query sequence as file or sequence.
466  :type query: :class:`~ost.seq.SequenceHandle` or :class:`str`
467  :param hhsuite_root: Path to the top-level directory of your hhsuite
468  installation.
469  :type hhsuite_root: :class:`str`
470  :param hhblits_bin: Name of the hhblits binary. Will only be used if
471  :attr:`hhsuite_root`:file:`/bin/hhblits` does not exist.
472  :type hhblits_bin: :class:`str`
473  :param working_dir: Directory for temporary files. Will be created if not
474  present but **not** automatically deleted.
475  :type working_dir: :class:`str`
476  """
477  OUTPUT_PREFIX = 'query_hhblits'
478  def __init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None):
479  self.queryquery = query
480  self.hhsuite_roothhsuite_root = hhsuite_root
481  if os.path.exists(os.path.join(self.hhsuite_roothhsuite_root, 'bin/hhblits')):
482  self.bin_dirbin_dir = os.path.join(self.hhsuite_roothhsuite_root, 'bin')
483  self.hhblits_binhhblits_bin = os.path.join(self.hhsuite_roothhsuite_root, 'bin/hhblits')
484  else:
485  self.hhblits_binhhblits_bin = settings.Locate('hhblits',
486  explicit_file_name=hhblits_bin)
487  self.bin_dirbin_dir = os.path.dirname(self.hhblits_binhhblits_bin)
488  # guess root folder (note: this may fail in future)
489  self.hhsuite_roothhsuite_root = os.path.dirname(self.bin_dirbin_dir)
490  self.hhlib_dirhhlib_dir = os.path.join(self.hhsuite_roothhsuite_root, 'lib', 'hh')
491  if working_dir:
492  self.needs_cleanupneeds_cleanup = False
493  self.working_dirworking_dir = working_dir
494  if not os.path.exists(working_dir):
495  os.mkdir(working_dir)
496  if isinstance(query, str):
497  self.filenamefilename = os.path.abspath(os.path.join(
498  self.working_dirworking_dir, os.path.basename(query)))
499  if self.filenamefilename != os.path.abspath(query):
500  shutil.copy(query, self.filenamefilename)
501  else:
502  self.filenamefilename = os.path.abspath(os.path.join(self.working_dirworking_dir,
503  '%s.fasta' % HHblits.OUTPUT_PREFIX))
504  ost.io.SaveSequence(query, self.filenamefilename)
505  else:
506  self.needs_cleanupneeds_cleanup = True
507  if isinstance(query, str):
508  self.working_dirworking_dir = tempfile.mkdtemp()
509  self.filenamefilename = os.path.abspath(os.path.join(
510  self.working_dirworking_dir, os.path.basename(query)))
511  shutil.copy(query, self.filenamefilename)
512  else:
513  tmp_dir = utils.TempDirWithFiles((query,))
514  self.working_dirworking_dir = tmp_dir.dirname
515  self.filenamefilename = tmp_dir.files[0]
516 
517  def BuildQueryMSA(self, nrdb, options={}, a3m_file=None):
518  """Builds the MSA for the query sequence.
519 
520  This function directly uses hhblits of hhtools. While in theory it would
521  be possible to do this by PSI-blasting on our own, hhblits is supposed
522  to be faster. Also it is supposed to prevent alignment corruption. The
523  alignment corruption is caused by low-scoring terminal alignments that
524  draw the sequences found by PSI-blast away from the optimum. By removing
525  these low scoring ends, part of the alignment corruption can be
526  suppressed.
527 
528  hhblits does **not** call PSIPRED on the MSA to predict the secondary
529  structure of the query sequence. This is done by addss.pl of hhtools.
530  The predicted secondary structure is stored together with the sequences
531  identified by hhblits.
532 
533  The produced A3M file can be parsed by :func:`ParseA3M`. If the file was
534  already produced, hhblits is not called again and the existing file path
535  is returned.
536 
537  :param nrdb: Database to be align against; has to be an hhblits database
538  :type nrdb: :class:`str`
539 
540  :param options: Dictionary of options to *hhblits*, one "-" is added in
541  front of every key. Boolean True values add flag without
542  value. Merged with default options {'cpu': 1, 'n': 1},
543  where 'n' defines the number of iterations.
544  :type options: :class:`dict`
545 
546  :param a3m_file: a path of a3m_file to be used, optional
547  :type a3m_file: :class:`str`
548 
549  :return: The path to the A3M file containing the MSA
550  :rtype: :class:`str`
551  """
552  if a3m_file is None:
553  a3m_file = '%s.a3m' % os.path.splitext(self.filenamefilename)[0]
554  else:
555  a3m_file = os.path.abspath(a3m_file)
556  if os.path.exists(a3m_file):
557  ost.LogInfo('Reusing already existing query alignment (%s)' % a3m_file)
558  return a3m_file
559  ost.LogInfo('Using hhblits from "%s"' % self.hhsuite_roothhsuite_root)
560  full_nrdb = os.path.join(os.path.abspath(os.path.split(nrdb)[0]),
561  os.path.split(nrdb)[1])
562  # create MSA
563  opts = {'cpu' : 1, # no. of cpus used
564  'n' : 1} # no. of iterations
565  opts.update(options)
566  opt_cmd, _ = _ParseOptions(opts)
567  hhblits_cmd = '%s -e 0.001 -i %s -oa3m %s -d %s %s' % \
568  (self.hhblits_binhhblits_bin, self.filenamefilename, a3m_file, full_nrdb,
569  opt_cmd)
570  job = subprocess.Popen(hhblits_cmd, shell=True, cwd=self.working_dirworking_dir,
571  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
572  sout, _ = job.communicate()
573  lines = sout.decode().splitlines()
574  for line in lines:
575  ost.LogVerbose(line.strip())
576  if not os.path.exists(a3m_file):
577  ost.LogWarning('Building query profile failed, no output')
578  return a3m_file
579  # add secondary structure annotation
580  addss_cmd = "%s %s" % (os.path.join(self.hhsuite_roothhsuite_root,
581  'lib/hh/scripts/addss.pl'),
582  a3m_file)
583  env = dict(os.environ)
584  env.update({'PERL5LIB' : os.path.join(self.hhsuite_roothhsuite_root,
585  'lib/hh/scripts'),
586  'HHLIB' : self.hhlib_dirhhlib_dir,
587  'PATH' : '%s:%s' % (os.path.join(self.hhsuite_roothhsuite_root, 'bin'),
588  os.environ['PATH'])})
589  job = subprocess.Popen(addss_cmd, shell=True, cwd=self.working_dirworking_dir,
590  env=env, stdout=subprocess.PIPE,
591  stderr=subprocess.PIPE)
592  sout, serr = job.communicate()
593  lines = sout.decode().splitlines()
594  for line in lines:
595  if 'error' in line.lower():
596  ost.LogWarning('Predicting secondary structure for MSA '+
597  '(%s) failed, on command: %s' % (a3m_file, line))
598  return a3m_file
599  return a3m_file
600 
601  def A3MToProfile(self, a3m_file, hhm_file=None):
602  """
603  Converts the A3M alignment file to a hhm profile. If hhm_file is not
604  given, the output file will be set to <:attr:`a3m_file`-basename>.hhm.
605 
606  The produced HHM file can be parsed by :func:`ParseHHM`.
607 
608  If the file was already produced, the existing file path is returned
609  without recomputing it.
610 
611  :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
612  :type a3m_file: :class:`str`
613 
614  :param hhm_file: Desired output file name
615  :type hhm_file: :class:`str`
616 
617  :return: Path to the profile file
618  :rtype: :class:`str`
619  """
620  hhmake = os.path.join(self.bin_dirbin_dir, 'hhmake')
621  if not hhm_file:
622  hhm_file = '%s.hhm' % os.path.splitext(a3m_file)[0]
623  if os.path.exists(hhm_file):
624  return hhm_file
625  ost.LogVerbose('converting %s to %s' % (a3m_file, hhm_file))
626  os.putenv('HHLIB', self.hhlib_dirhhlib_dir)
627  job = subprocess.Popen('%s -i %s -o %s' % (hhmake, a3m_file, hhm_file),
628  shell=True, stdout=subprocess.PIPE,
629  stderr=subprocess.PIPE)
630  sout, serr = job.communicate()
631  lines = serr.decode().splitlines()
632  for line in lines:
633  ost.LogWarning(line)
634  lines = sout.decode().splitlines()
635  for line in lines:
636  ost.LogVerbose(line)
637  if job.returncode !=0:
638  raise IOError('could not convert a3m to hhm file')
639  return hhm_file
640 
641  def A3MToCS(self, a3m_file, cs_file=None, options={}):
642  """
643  Converts the A3M alignment file to a column state sequence file. If
644  cs_file is not given, the output file will be set to
645  <:attr:`a3m_file`-basename>.seq219.
646 
647  If the file was already produced, the existing file path is returned
648  without recomputing it.
649 
650  :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
651  :type a3m_file: :class:`str`
652 
653  :param cs_file: Output file name (may be omitted)
654  :type cs_file: :class:`str`
655 
656  :param options: Dictionary of options to *cstranslate*, one "-" is added
657  in front of every key. Boolean True values add flag
658  without value.
659  :type options: :class:`dict`
660 
661  :return: Path to the column state sequence file
662  :rtype: :class:`str`
663  """
664  cstranslate = os.path.join(self.hhlib_dirhhlib_dir, 'bin', 'cstranslate')
665  if not cs_file:
666  cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0]
667  if os.path.exists(cs_file):
668  return cs_file
669  opt_cmd, _ = _ParseOptions(options)
670  cs_cmd = '%s -i %s -o %s %s' % (
671  cstranslate,
672  os.path.abspath(a3m_file),
673  os.path.abspath(cs_file),
674  opt_cmd)
675  ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file))
676  job = subprocess.Popen(cs_cmd, shell=True, cwd=self.working_dirworking_dir,
677  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
678  sout, _ = job.communicate()
679  if b'Wrote abstract state sequence to' in sout:
680  return cs_file
681 
682  ost.LogWarning('Creating column state sequence file (%s) failed' % \
683  cs_file)
684 
685  def Cleanup(self):
686  """Delete temporary data.
687 
688  Delete temporary data if no working dir was given. Controlled by
689  :attr:`needs_cleanup`.
690  """
691  if self.needs_cleanupneeds_cleanup and os.path.exists(self.working_dirworking_dir):
692  shutil.rmtree(self.working_dirworking_dir)
693 
694  def CleanupFailed(self):
695  '''In case something went wrong, call to make sure everything is clean.
696 
697  This will delete the working dir independently of :attr:`needs_cleanup`.
698  '''
699  store_needs_cleanup = self.needs_cleanupneeds_cleanup
700  self.needs_cleanupneeds_cleanup = True
701  self.CleanupCleanup()
702  self.needs_cleanupneeds_cleanup = store_needs_cleanup
703 
704  def Search(self, a3m_file, database, options={}, prefix=''):
705  """
706  Searches for templates in the given database. Before running the search,
707  the hhm file is copied. This makes it possible to launch several hhblits
708  instances at once. Upon success, the filename of the result file is
709  returned. This file may be parsed with :func:`ParseHHblitsOutput`.
710 
711  :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
712  :type a3m_file: :class:`str`
713 
714  :param database: Search database, needs to be the common prefix of the
715  database files
716  :type database: :class:`str`
717 
718  :param options: Dictionary of options to *hhblits*, one "-" is added in
719  front of every key. Boolean True values add flag without
720  value. Merged with default options {'cpu': 1, 'n': 1},
721  where 'n' defines the number of iterations.
722  :type options: :class:`dict`
723 
724  :param prefix: Prefix to the result file
725  :type prefix: :class:`str`
726 
727  :return: The path to the result file
728  :rtype: :class:`str`
729  """
730  opts = {'cpu' : 1, # no. of cpus used
731  'n' : 1} # no. of iterations
732  opts.update(options)
733  opt_cmd, opt_str = _ParseOptions(opts)
734  base = os.path.basename(os.path.splitext(a3m_file)[0])
735  hhr_file = '%s%s_%s.hhr' % (prefix, base, opt_str)
736  hhr_file = os.path.join(self.working_dirworking_dir, hhr_file)
737  search_cmd = '%s %s -e 0.001 -Z 10000 -B 10000 -i %s -o %s -d %s' % (
738  self.hhblits_binhhblits_bin,
739  opt_cmd,
740  os.path.abspath(a3m_file),
741  os.path.abspath(hhr_file),
742  os.path.join(os.path.abspath(os.path.split(database)[0]),
743  os.path.split(database)[1]))
744  ost.LogInfo('searching %s' % database)
745  ost.LogVerbose(search_cmd)
746  job = subprocess.Popen(search_cmd, shell=True, cwd=self.working_dirworking_dir,
747  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
748  sout, serr = job.communicate()
749  if job.returncode != 0:
750  lines = sout.decode().splitlines()
751  for line in lines:
752  ost.LogError(line.strip())
753  lines = serr.decode().splitlines()
754  for line in lines:
755  ost.LogError(line.strip())
756  return None
757  return hhr_file
758 
759 
760 def _ParseOptions(opts):
761  """
762  :return: Tuple of strings (opt_cmd, opt_str), where opt_cmd can be
763  passed to command ("-" added in front of keys, options
764  separated by space) and opt_str (options separated by "_")
765  can be used for filenames.
766  :param opts: Dictionary of options, one "-" is added in front of every
767  key. Boolean True values add flag without value.
768  """
769  opt_cmd = list()
770  opt_str = list()
771  for k, val in opts.items():
772  if type(val) == type(True):
773  if val == True:
774  opt_cmd.append('-%s' % str(k))
775  opt_str.append(str(k))
776  else:
777  opt_cmd.append('-%s %s' % (str(k), str(val)))
778  opt_str.append('%s%s' % (str(k), str(val)))
779  opt_cmd = ' '.join(opt_cmd)
780  opt_str = '_'.join(opt_str)
781  return opt_cmd, opt_str
782 
783 
784 __all__ = ['HHblits', 'HHblitsHit', 'HHblitsHeader',
785  'ParseHHblitsOutput', 'ParseA3M', 'ParseHHM',
786  'ParseHeaderLine']
787 
788 # LocalWords: HHblits MSA hhblits hhtools PSIPRED addss param nrdb str
789 # LocalWords: cpu hhm func ParseHHblitsOutput ss pred conf msa hhsuite dir
790 # LocalWords: attr basename rtype cstranslate tuple HHblitsHeader meth aln
791 # LocalWords: HHblitsHit iterable evalue pvalue neff hmms datetime
792 # LocalWords: whitespace whitespaces
def __init__(self, hit_id, aln, score, ss_score, evalue, pvalue, prob)
Definition: hhblits2.py:61
def A3MToCS(self, a3m_file, cs_file=None, options={})
Definition: hhblits2.py:641
def __init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None)
Definition: hhblits2.py:478
def Search(self, a3m_file, database, options={}, prefix='')
Definition: hhblits2.py:704
def BuildQueryMSA(self, nrdb, options={}, a3m_file=None)
Definition: hhblits2.py:517
def A3MToProfile(self, a3m_file, hhm_file=None)
Definition: hhblits2.py:601
def ParseHHblitsOutput(output)
Definition: hhblits2.py:148
def ParseHHM(profile)
Definition: hhblits2.py:369
def ParseHeaderLine(line)
Definition: hhblits2.py:117
def ParseA3M(a3m_file)
Definition: hhblits2.py:295
void DLLEXPORT_OST_IO SaveSequence(const seq::ConstSequenceHandle &sequence, const String &filename, const String &format="auto")