OpenStructure
hhblits3.py
Go to the documentation of this file.
1 '''HHblits wrapper classes and functions.
2 '''
3 
4 import subprocess
5 import datetime
6 import os
7 import shutil
8 import tempfile
9 
10 import ost
11 from ost import settings, seq
12 from ost.bindings import utils
13 
14 class HHblitsHit:
15  """
16  A hit found by HHblits
17 
18  .. attribute:: hit_id
19 
20  String identifying the hit
21 
22  :type: :class:`str`
23 
24  .. attribute:: aln
25 
26  Pairwise alignment containing the aligned part between the query and the
27  target. First sequence is the query, the second sequence the target.
28 
29  :type: :class:`~ost.seq.AlignmentHandle`
30 
31  .. attribute:: score
32 
33  The alignment score
34 
35  :type: :class:`float`
36 
37  .. attribute:: ss_score
38 
39  The secondary structure score
40 
41  :type: :class:`float`
42 
43  .. attribute:: evalue
44 
45  The E-value of the alignment
46 
47  :type: :class:`float`
48 
49  .. attribute:: pvalue
50 
51  The P-value of the alignment
52 
53  :type: :class:`float`
54 
55  .. attribute:: prob
56 
57  The probability of the alignment (between 0 and 100)
58 
59  :type: :class:`float`
60  """
61  def __init__(self, hit_id, aln, score, ss_score, evalue, pvalue, prob):
62  self.hit_idhit_id = hit_id
63  self.alnaln = aln
64  self.scorescore = score
65  self.ss_scoress_score = ss_score
66  self.evalueevalue = evalue
67  self.probprob = prob
68  self.pvaluepvalue = pvalue
69 
71  """Stats from the beginning of search output.
72 
73  .. attribute:: query
74 
75  The name of the query sequence
76 
77  :type: :class:`str`
78 
79  .. attribute:: match_columns
80 
81  Total of aligned Match columns
82 
83  :type: :class:`int`
84 
85  .. attribute:: n_eff
86 
87  Value of the ``-neff`` option
88 
89  :type: :class:`float`
90 
91  .. attribute:: searched_hmms
92 
93  Number of profiles searched
94 
95  :type: :class:`int`
96 
97  .. attribute:: date
98 
99  Execution date
100 
101  :type: :class:`datetime.datetime`
102 
103  .. attribute:: command
104 
105  Command used to run
106 
107  :type: :class:`str`
108  """
109  def __init__(self):
110  self.queryquery = ''
111  self.match_columnsmatch_columns = 0
112  self.n_effn_eff = 0
113  self.searched_hmmssearched_hmms = 0
114  self.datedate = None
115  self.commandcommand = ''
116 
117 def ParseHeaderLine(line):
118  '''Fetch header content.
119 
120  First, we seek the start of the identifier, that is, the first whitespace
121  after the hit number + 1. Since the identifier may contain whitespaces
122  itself, we cannot split the whole line
123 
124  :param line: Line from the output header.
125  :type line: :class:`str`
126 
127  :return: Hit information and query/template offsets
128  :rtype: (:class:`HHblitsHit`, (:class:`int`, :class:`int`))
129  '''
130  for i in range(0, len(line)):
131  if line[i].isdigit():
132  break
133  for i in range(i, len(line)):
134  if line[i] == ' ':
135  break
136  assert len(line)-i >= 31 and line[i+1] != ' '
137  hit_id = line[i+1:i+31].strip()
138  fields = line[i+32:].split()
139  prob = float(fields[0])
140  evalue = float(fields[1])
141  pvalue = float(fields[2])
142  score = float(fields[3])
143  ss_score = float(fields[4])
144  offsets = (int(fields[6].split('-')[0]), int(fields[7].split('-')[0]))
145  return (HHblitsHit(hit_id, None, score, ss_score, evalue, pvalue, prob),
146  offsets)
147 
148 def ParseHHblitsOutput(output):
149  """
150  Parses the HHblits output as produced by :meth:`HHblits.Search` and returns
151  the header of the search results and a list of hits.
152 
153  :param output: Iterable containing the lines of the HHblits output file
154  :type output: iterable (e.g. an open file handle)
155 
156  :return: a tuple of the header of the search results and the hits
157  :rtype: (:class:`HHblitsHeader`, :class:`list` of :class:`HHblitsHit`)
158  """
159  lines = iter(output)
160  def _ParseHeaderSection(lines):
161  value_start_column = 14
162  date_pattern = '%a %b %d %H:%M:%S %Y'
163  header = HHblitsHeader()
164  line = next(lines)
165  assert line.startswith('Query')
166  header.query = line[value_start_column:].strip()
167  line = next(lines)
168  assert line.startswith('Match_columns')
169  header.match_columns = int(line[value_start_column:].strip())
170 
171  line = next(lines)
172  assert line.startswith('No_of_seqs')
173 
174  line = next(lines)
175  assert line.startswith('Neff')
176  header.n_eff = float(line[value_start_column:].strip())
177 
178  line = next(lines)
179  assert line.startswith('Searched_HMMs')
180  header.searched_hmms = int(line[value_start_column:].strip())
181 
182  line = next(lines)
183  assert line.startswith('Date')
184  value = line[value_start_column:].strip()
185  header.date = datetime.datetime.strptime(value, date_pattern)
186 
187  line = next(lines)
188  assert line.startswith('Command')
189  header.command = line[value_start_column:].strip()
190 
191  line = next(lines)
192  assert len(line.strip()) == 0
193  return header
194 
195  def _ParseTableOfContents(lines):
196  line = next(lines)
197  assert line.startswith(' No Hit')
198  hits = []
199  while True:
200  line = next(lines)
201  if len(line.strip()) == 0:
202  return hits
203  hits.append(ParseHeaderLine(line))
204  return hits
205 
206  def _ParseResultBody(query_id, hits, lines):
207  entry_index = None
208  query_str = ''
209  templ_str = ''
210  def _MakeAln(query_id, hit_id, query_string, templ_string,
211  q_offset, t_offset):
212  s1 = seq.CreateSequence(query_id, query_string)
213  s1.offset = q_offset-1
214  s2 = seq.CreateSequence(hit_id, templ_string)
215  s2.offset = t_offset-1
216  return seq.CreateAlignment(s1, s2)
217  try:
218  while True:
219  # Lines which we are interested in:
220  # - "Done!" -> end of list
221  # - "No ..." -> next item in list
222  # - "T <hit_id> <start> <data> <end>"
223  # - "Q <query_id> <start> <data> <end>"
224  # -> rest is to be skipped
225  line = next(lines)
226  if len(line.strip()) == 0:
227  continue
228  if line.startswith('Done!'):
229  if len(query_str) > 0:
230  hits[entry_index][0].aln = _MakeAln(\
231  query_id, hits[entry_index][0].hit_id,
232  query_str, templ_str, *hits[entry_index][1])
233  return [h for h, o in hits]
234  if line.startswith('No '):
235  if len(query_str) > 0:
236  hits[entry_index][0].aln = _MakeAln(\
237  query_id, hits[entry_index][0].hit_id,
238  query_str, templ_str, *hits[entry_index][1])
239  entry_index = int(line[3:].strip())-1
240  line = next(lines)
241  hits[entry_index][0].hit_id = line[1:].strip()
242  query_str = ''
243  templ_str = ''
244  # skip the next line. It doesn't contain information we
245  # don't already know
246  next(lines)
247  continue
248  assert entry_index != None
249  # Skip all "T ..." and "Q ..." lines besides the one we want
250  if line[1:].startswith(' Consensus'):
251  continue
252  if line[1:].startswith(' ss_pred'):
253  continue
254  if line[1:].startswith(' ss_conf'):
255  continue
256  if line[1:].startswith(' ss_dssp'):
257  continue
258  if line.startswith('T '):
259  for start_pos in range(22, len(line)):
260  if line[start_pos].isalpha() or line[start_pos] == '-':
261  break
262  end_pos = line.find(' ', start_pos)
263  # this can fail if we didn't skip all other "T ..." lines
264  if end_pos == -1:
265  error_str = "Unparsable line '%s' for entry No %d" \
266  % (line.strip(), entry_index + 1)
267  raise AssertionError(error_str)
268  templ_str += line[start_pos:end_pos]
269  if line.startswith('Q '):
270  for start_pos in range(22, len(line)):
271  if line[start_pos].isalpha() or line[start_pos] == '-':
272  break
273  end_pos = line.find(' ', start_pos)
274  # this can fail if we didn't skip all other "Q ..." lines
275  if end_pos == -1:
276  error_str = "Unparsable line '%s' for entry No %d" \
277  % (line.strip(), entry_index + 1)
278  raise AssertionError(error_str)
279  query_str += line[start_pos:end_pos]
280  except StopIteration:
281  if len(query_str) > 0:
282  hits[entry_index][0].aln = _MakeAln(query_id,
283  hits[entry_index][0].hit_id,
284  query_str, templ_str,
285  *hits[entry_index][1])
286  return [h for h, o in hits]
287  header = _ParseHeaderSection(lines)
288  # parse the table of contents. This is neccessary as some of the properties
289  # (i.e. start of alignment) we need are only given there. From the TOC we
290  # create a list of hits that is then further filled with data when we parse
291  # the actual result body
292  hits = _ParseTableOfContents(lines)
293  return header, _ParseResultBody(header.query, hits, lines)
294 
295 def ParseA3M(a3m_file):
296  '''
297  Parse secondary structure information and the multiple sequence alignment
298  out of an A3M file as produced by :meth:`HHblits.BuildQueryMSA`.
299 
300  :param a3m_file: Iterable containing the lines of the A3M file
301  :type a3m_file: iterable (e.g. an open file handle)
302 
303  :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
304  (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`).
305  If not available, "ss_pred" and "ss_conf" entries are set to None.
306  '''
307  profile_dict = dict()
308  state = 'NONE'
309  pred_seq_txt = ''
310  conf_seq_txt = ''
311  msa_seq = list()
312  msa_head = list()
313  for line in a3m_file:
314  if len(line.rstrip()) == 0:
315  continue
316  elif line.startswith('>ss_pred'):
317  state = 'sspred'
318  continue
319  elif line.startswith('>ss_conf'):
320  state = 'ssconf'
321  continue
322  elif line[0] == '>':
323  msa_seq.append('')
324  msa_head.append(line[1:].rstrip())
325  state = 'msa'
326  continue
327 
328  if state == 'sspred':
329  pred_seq_txt += line.rstrip()
330  elif state == 'ssconf':
331  conf_seq_txt += line.rstrip()
332  elif state == 'msa':
333  msa_seq[len(msa_seq)-1] += line.rstrip()
334 
335  if len(pred_seq_txt) > 0:
336  profile_dict['ss_pred'] = list()
337  profile_dict['ss_conf'] = list()
338  for i in range(0, len(pred_seq_txt)):
339  profile_dict['ss_pred'].append(pred_seq_txt[i])
340  profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
341  else:
342  profile_dict['ss_pred'] = None
343  profile_dict['ss_conf'] = None
344 
345  # post processing
346  # MSA
347  profile_dict['msa'] = None
348  if len(msa_seq) > 1:
349  t = msa_seq[0]
350  al = seq.AlignmentList()
351  for i in range(1, len(msa_seq)):
352  qs = ''
353  ts = ''
354  k = 0
355  for c in msa_seq[i]:
356  if c.islower():
357  qs += '-'
358  ts += c.upper()
359  else:
360  qs += t[k]
361  ts += c
362  k += 1
363  nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
364  seq.CreateSequence(msa_head[i], ts))
365  al.append(nl)
366  profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
367  al, seq.CreateSequence(msa_head[0], t))
368  return profile_dict
369 
370 
371 def ParseHHM(profile):
372  '''
373  Parse secondary structure information and the MSA out of an HHM profile as
374  produced by :meth:`HHblits.A3MToProfile`.
375 
376  :param profile: Opened file handle holding the profile.
377  :type profile: :class:`file`
378 
379  :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
380  (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and
381  "consensus" (:class:`~ost.seq.SequenceHandle`).
382  If not available, "ss_pred" and "ss_conf" entries are set to None.
383  '''
384  profile_dict = dict()
385  state = 'NONE'
386  pred_seq_txt = ''
387  conf_seq_txt = ''
388  consensus_txt = ''
389  msa_seq = list()
390  msa_head = list()
391  for line in profile:
392  if len(line.rstrip()) == 0:
393  continue
394  if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure':
395  state = 'sspred'
396  continue
397  elif line.rstrip() == '>ss_conf PSIPRED confidence values':
398  state = 'ssconf'
399  continue
400  elif line.rstrip() == '>Consensus':
401  state = 'consensus'
402  continue
403  elif line[0] == '>':
404  if state == 'consensus' or state == 'msa':
405  msa_seq.append('')
406  msa_head.append(line[1:].rstrip())
407  else:
408  raise IOError('Profile file "%s" is missing ' % profile.name+
409  'the "Consensus" section')
410  state = 'msa'
411  continue
412  elif line[0] == '#':
413  state = 'NONE'
414  continue
415 
416  if state == 'sspred':
417  pred_seq_txt += line.rstrip()
418  elif state == 'ssconf':
419  conf_seq_txt += line.rstrip()
420  elif state == 'msa':
421  msa_seq[len(msa_seq)-1] += line.rstrip()
422  elif state == 'consensus':
423  consensus_txt += line.rstrip()
424 
425  if len(pred_seq_txt) > 0:
426  profile_dict['ss_pred'] = list()
427  profile_dict['ss_conf'] = list()
428  for i in range(0, len(pred_seq_txt)):
429  profile_dict['ss_pred'].append(pred_seq_txt[i])
430  profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
431  else:
432  profile_dict['ss_pred'] = None
433  profile_dict['ss_conf'] = None
434 
435  # post processing
436  # MSA
437  profile_dict['msa'] = None
438  if len(msa_seq):
439  t = msa_seq[0]
440  al = seq.AlignmentList()
441  for i in range(1, len(msa_seq)):
442  qs = ''
443  ts = ''
444  k = 0
445  for c in msa_seq[i]:
446  if c.islower():
447  qs += '-'
448  ts += c.upper()
449  else:
450  qs += t[k]
451  ts += c
452  k += 1
453  nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
454  seq.CreateSequence(msa_head[i], ts))
455  al.append(nl)
456  profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
457  al, seq.CreateSequence(msa_head[0], t))
458  # Consensus
459  profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt)
460 
461  return profile_dict
462 
463 
464 class HHblits:
465  """
466  Initialise a new HHblits "search" for the given query. Query may either
467  be a :class:`~ost.seq.SequenceHandle` or a string. In the former case, the
468  query is the actual query sequence, in the latter case, the query is the
469  filename to the file containing the query.
470 
471  :param query: Query sequence as file or sequence.
472  :type query: :class:`~ost.seq.SequenceHandle` or :class:`str`
473  :param hhsuite_root: Path to the top-level directory of your hhsuite
474  installation.
475  :type hhsuite_root: :class:`str`
476  :param hhblits_bin: Name of the hhblits binary. Will only be used if
477  :attr:`hhsuite_root`:file:`/bin/hhblits` does not exist.
478  :type hhblits_bin: :class:`str`
479  :param working_dir: Directory for temporary files. Will be created if not
480  present but **not** automatically deleted.
481  :type working_dir: :class:`str`
482  """
483  OUTPUT_PREFIX = 'query_hhblits'
484  def __init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None):
485  self.queryquery = query
486  self.hhsuite_roothhsuite_root = hhsuite_root
487  if os.path.exists(os.path.join(self.hhsuite_roothhsuite_root, 'bin/hhblits')):
488  self.bin_dirbin_dir = os.path.join(self.hhsuite_roothhsuite_root, 'bin')
489  self.hhblits_binhhblits_bin = os.path.join(self.hhsuite_roothhsuite_root, 'bin/hhblits')
490  else:
491  self.hhblits_binhhblits_bin = settings.Locate('hhblits',
492  explicit_file_name=hhblits_bin)
493  self.bin_dirbin_dir = os.path.dirname(self.hhblits_binhhblits_bin)
494  # guess root folder (note: this may fail in future)
495  self.hhsuite_roothhsuite_root = os.path.dirname(self.bin_dirbin_dir)
496 
497  if working_dir:
498  self.needs_cleanupneeds_cleanup = False
499  self.working_dirworking_dir = working_dir
500  if not os.path.exists(working_dir):
501  os.mkdir(working_dir)
502  if isinstance(query, str):
503  self.filenamefilename = os.path.abspath(os.path.join(
504  self.working_dirworking_dir, os.path.basename(query)))
505  if self.filenamefilename != os.path.abspath(query):
506  shutil.copy(query, self.filenamefilename)
507  else:
508  self.filenamefilename = os.path.abspath(os.path.join(self.working_dirworking_dir,
509  '%s.fasta' % HHblits.OUTPUT_PREFIX))
510  ost.io.SaveSequence(query, self.filenamefilename)
511  else:
512  self.needs_cleanupneeds_cleanup = True
513  if isinstance(query, str):
514  self.working_dirworking_dir = tempfile.mkdtemp()
515  self.filenamefilename = os.path.abspath(os.path.join(
516  self.working_dirworking_dir, os.path.basename(query)))
517  shutil.copy(query, self.filenamefilename)
518  else:
519  tmp_dir = utils.TempDirWithFiles((query,))
520  self.working_dirworking_dir = tmp_dir.dirname
521  self.filenamefilename = tmp_dir.files[0]
522 
523  def BuildQueryMSA(self, nrdb, options={}, a3m_file=None, assign_ss=True):
524  """Builds the MSA for the query sequence.
525 
526  The produced A3M file can be parsed by :func:`ParseA3M`. If the file was
527  already produced, hhblits is not called again and the existing file path
528  is returned (neglecting the *assign_ss* flag!!!).
529 
530  :param nrdb: Database to be align against; has to be an hhblits database
531  :type nrdb: :class:`str`
532 
533  :param options: Dictionary of options to *hhblits*, one "-" is added in
534  front of every key. Boolean True values add flag without
535  value. Merged with default options
536  {'cpu': 1, 'n': 1, 'e': 0.001}, where 'n' defines the
537  number of iterations and 'e' the E-value cutoff for
538  inclusion of sequences in result alignment.
539  :type options: :class:`dict`
540 
541  :param a3m_file: a path of a3m_file to be used, optional
542  :type a3m_file: :class:`str`
543 
544  :param assign_ss: HHblits does not assign predicted secondary structure
545  by default. You can optionally assign it with the
546  addss.pl script provided by the HH-suite. However,
547  your HH-suite installation requires you to specify
548  paths to PSIRED etc. We refer to the HH-suite user
549  guide for further instructions. Assignment is done
550  by calling :func:`HHblits.AssignSSToA3M`
551 
552  :type assign_ss: :class:`bool`
553 
554  :return: The path to the A3M file containing the MSA
555  :rtype: :class:`str`
556  """
557  if a3m_file is None:
558  a3m_file = '%s.a3m' % os.path.splitext(self.filenamefilename)[0]
559  else:
560  a3m_file = os.path.abspath(a3m_file)
561  if os.path.exists(a3m_file):
562  ost.LogInfo('Reusing already existing query alignment (%s)' % a3m_file)
563  return a3m_file
564  ost.LogInfo('Using hhblits from "%s"' % self.hhsuite_roothhsuite_root)
565  full_nrdb = os.path.join(os.path.abspath(os.path.split(nrdb)[0]),
566  os.path.split(nrdb)[1])
567  # create MSA
568  opts = {'cpu' : 1, # no. of cpus used
569  'n' : 1, # no. of iterations
570  'e' : 0.001} # evalue threshold
571  opts.update(options)
572  opt_cmd, _ = _ParseOptions(opts)
573  hhblits_cmd = '%s -i %s -oa3m %s -d %s %s' % \
574  (self.hhblits_binhhblits_bin, self.filenamefilename, a3m_file, full_nrdb,
575  opt_cmd)
576 
577  p = subprocess.run(hhblits_cmd, shell=True, cwd=self.working_dirworking_dir,
578  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
579 
580  lines = p.stdout.decode().splitlines()
581  for line in lines:
582  ost.LogVerbose(line.strip())
583 
584  lines = p.stderr.decode().splitlines()
585  for line in lines:
586  ost.LogError(line.strip())
587 
588  if not os.path.exists(a3m_file):
589  raise RuntimeError('Building query profile failed, no output')
590 
591  if assign_ss:
592  return self.AssignSSToA3MAssignSSToA3M(a3m_file)
593  else:
594  return a3m_file
595 
596  def AssignSSToA3M(self, a3m_file):
597  """
598  HHblits does not assign predicted secondary structure by default.
599  You can optionally assign it with the addss.pl script provided by the
600  HH-suite. However, your HH-suite installation requires you to specify
601  paths to PSIRED etc. We refer to the HH-suite user guide for further
602  instructions.
603 
604  :param a3m_file: Path to file you want to assign secondary structure to
605  :type a3m_file: :class:`str`
606  """
607 
608  a3m_file = os.path.abspath(a3m_file)
609  addss_cmd = "perl %s %s" % (os.path.join(self.hhsuite_roothhsuite_root,
610  'scripts/addss.pl'),
611  a3m_file)
612  env = dict(os.environ)
613  env.update({'PERL5LIB' : os.path.join(self.hhsuite_roothhsuite_root, 'scripts'),
614  'HHLIB' : os.path.join(self.hhsuite_roothhsuite_root),
615  'PATH' : '%s:%s' % (os.path.join(self.hhsuite_roothhsuite_root, 'bin'),
616  os.environ['PATH'])})
617 
618  p = subprocess.run(addss_cmd, shell=True, cwd=self.working_dirworking_dir,
619  env=env, stdout=subprocess.PIPE,
620  stderr=subprocess.PIPE)
621 
622  lines = p.stdout.decode().splitlines()
623  for line in lines:
624  ost.LogVerbose(line.strip())
625  if 'error' in line.lower() or 'bad interpreter' in line.lower():
626  raise RuntimeError('Predicting secondary structure for MSA '+
627  '(%s) failed, on command: %s' % (a3m_file, line))
628 
629  lines = p.stderr.decode().splitlines()
630  for line in lines:
631  ost.LogError(line.strip())
632  if 'error' in line.lower() or 'bad interpreter' in line.lower():
633  raise RuntimeError('Predicting secondary structure for MSA '+
634  '(%s) failed, on command: %s' % (a3m_file, line))
635 
636  return a3m_file
637 
638  def A3MToProfile(self, a3m_file, hhm_file=None):
639  """
640  Converts the A3M alignment file to a hhm profile. If hhm_file is not
641  given, the output file will be set to <:attr:`a3m_file`-basename>.hhm.
642 
643  The produced HHM file can be parsed by :func:`ParseHHM`.
644 
645  If the file was already produced, the existing file path is returned
646  without recomputing it.
647 
648  :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
649  :type a3m_file: :class:`str`
650 
651  :param hhm_file: Desired output file name
652  :type hhm_file: :class:`str`
653 
654  :return: Path to the profile file
655  :rtype: :class:`str`
656  """
657  hhmake = os.path.join(self.bin_dirbin_dir, 'hhmake')
658  if not hhm_file:
659  hhm_file = '%s.hhm' % os.path.splitext(a3m_file)[0]
660  if os.path.exists(hhm_file):
661  return hhm_file
662  ost.LogVerbose('converting %s to %s' % (a3m_file, hhm_file))
663  p = subprocess.run('%s -i %s -o %s' % (hhmake, a3m_file, hhm_file),
664  shell=True, stdout=subprocess.PIPE,
665  stderr=subprocess.PIPE)
666  lines = p.stdout.decode().splitlines()
667  for line in lines:
668  ost.LogVerbose(line.strip())
669  lines = p.stderr.decode().splitlines()
670  for line in lines:
671  ost.LogError(line.strip())
672 
673  if p.returncode != 0:
674  raise IOError('could not convert a3m to hhm file')
675 
676  if not os.path.exists(hhm_file):
677  raise RuntimeError('could not convert a3m to hhm file, no output')
678 
679  return hhm_file
680 
681  def A3MToCS(self, a3m_file, cs_file=None, options={}):
682  """
683  Converts the A3M alignment file to a column state sequence file. If
684  cs_file is not given, the output file will be set to
685  <:attr:`a3m_file`-basename>.seq219.
686 
687  If the file was already produced, the existing file path is returned
688  without recomputing it.
689 
690  :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
691  :type a3m_file: :class:`str`
692 
693  :param cs_file: Output file name (may be omitted)
694  :type cs_file: :class:`str`
695 
696  :param options: Dictionary of options to *cstranslate*, one "-" is added
697  in front of every key. Boolean True values add flag
698  without value.
699  :type options: :class:`dict`
700 
701  :return: Path to the column state sequence file
702  :rtype: :class:`str`
703  """
704  cstranslate = os.path.join(self.bin_dirbin_dir, 'cstranslate')
705  if not cs_file:
706  cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0]
707  if os.path.exists(cs_file):
708  return cs_file
709  opt_cmd, _ = _ParseOptions(options)
710  cs_cmd = '%s -i %s -o %s %s' % (
711  cstranslate,
712  os.path.abspath(a3m_file),
713  os.path.abspath(cs_file),
714  opt_cmd)
715  ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file))
716  p = subprocess.run(cs_cmd, shell=True, cwd=self.working_dirworking_dir,
717  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
718 
719  if not os.path.exists(cs_file):
720  raise RuntimeError('Creating column state sequence file failed, ' +
721  'no output')
722 
723  if b'Wrote abstract state sequence to' in p.stdout:
724  return cs_file
725  else:
726  raise RuntimeError('Creating column state sequence file failed')
727 
728  def Cleanup(self):
729  """Delete temporary data.
730 
731  Delete temporary data if no working dir was given. Controlled by
732  :attr:`needs_cleanup`.
733  """
734  if self.needs_cleanupneeds_cleanup and os.path.exists(self.working_dirworking_dir):
735  shutil.rmtree(self.working_dirworking_dir)
736 
737  def CleanupFailed(self):
738  '''In case something went wrong, call to make sure everything is clean.
739 
740  This will delete the working dir independently of :attr:`needs_cleanup`.
741  '''
742  store_needs_cleanup = self.needs_cleanupneeds_cleanup
743  self.needs_cleanupneeds_cleanup = True
744  self.CleanupCleanup()
745  self.needs_cleanupneeds_cleanup = store_needs_cleanup
746 
747  def Search(self, a3m_file, database, options={}, prefix=''):
748  """
749  Searches for templates in the given database. Before running the search,
750  the hhm file is copied. This makes it possible to launch several hhblits
751  instances at once. Upon success, the filename of the result file is
752  returned. This file may be parsed with :func:`ParseHHblitsOutput`.
753 
754  :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
755  :type a3m_file: :class:`str`
756 
757  :param database: Search database, needs to be the common prefix of the
758  database files
759  :type database: :class:`str`
760 
761  :param options: Dictionary of options to *hhblits*, one "-" is added in
762  front of every key. Boolean True values add flag without
763  value. Merged with default options {'cpu': 1, 'n': 1},
764  where 'n' defines the number of iterations.
765  :type options: :class:`dict`
766 
767  :param prefix: Prefix to the result file
768  :type prefix: :class:`str`
769 
770  :return: The path to the result file
771  :rtype: :class:`str`
772  """
773  opts = {'cpu' : 1, # no. of cpus used
774  'n' : 1} # no. of iterations
775  opts.update(options)
776  opt_cmd, opt_str = _ParseOptions(opts)
777  base = os.path.basename(os.path.splitext(a3m_file)[0])
778  hhr_file = '%s%s_%s.hhr' % (prefix, base, opt_str)
779  hhr_file = os.path.join(self.working_dirworking_dir, hhr_file)
780  search_cmd = '%s %s -e 0.001 -Z 10000 -B 10000 -i %s -o %s -d %s' % (
781  self.hhblits_binhhblits_bin,
782  opt_cmd,
783  os.path.abspath(a3m_file),
784  os.path.abspath(hhr_file),
785  os.path.join(os.path.abspath(os.path.split(database)[0]),
786  os.path.split(database)[1]))
787  ost.LogInfo('searching %s' % database)
788  ost.LogVerbose(search_cmd)
789  p = subprocess.run(search_cmd, shell=True, cwd=self.working_dirworking_dir,
790  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
791  lines = p.stdout.decode().splitlines()
792  for line in lines:
793  ost.LogVerbose(line.strip())
794  lines = p.stderr.decode().splitlines()
795  for line in lines:
796  ost.LogError(line.strip())
797 
798  if p.returncode != 0:
799  raise RuntimeError('Sequence search failed')
800 
801  if not os.path.exists(hhr_file):
802  raise RuntimeError('Sequence search failed, no output')
803 
804  return hhr_file
805 
806 
807 def _ParseOptions(opts):
808  """
809  :return: Tuple of strings (opt_cmd, opt_str), where opt_cmd can be
810  passed to command ("-" added in front of keys, options
811  separated by space) and opt_str (options separated by "_")
812  can be used for filenames.
813  :param opts: Dictionary of options, one "-" is added in front of every
814  key. Boolean True values add flag without value.
815  """
816  opt_cmd = list()
817  opt_str = list()
818  for k, val in opts.items():
819  if type(val) == type(True):
820  if val == True:
821  opt_cmd.append('-%s' % str(k))
822  opt_str.append(str(k))
823  else:
824  opt_cmd.append('-%s %s' % (str(k), str(val)))
825  opt_str.append('%s%s' % (str(k), str(val)))
826  opt_cmd = ' '.join(opt_cmd)
827  opt_str = '_'.join(opt_str)
828  return opt_cmd, opt_str
829 
830 
831 __all__ = ['HHblits', 'HHblitsHit', 'HHblitsHeader',
832  'ParseHHblitsOutput', 'ParseA3M', 'ParseHHM',
833  'ParseHeaderLine']
834 
835 # LocalWords: HHblits MSA hhblits hhtools PSIPRED addss param nrdb str
836 # LocalWords: cpu hhm func ParseHHblitsOutput ss pred conf msa hhsuite dir
837 # LocalWords: attr basename rtype cstranslate tuple HHblitsHeader meth aln
838 # LocalWords: HHblitsHit iterable evalue pvalue neff hmms datetime
839 # LocalWords: whitespace whitespaces
def __init__(self, hit_id, aln, score, ss_score, evalue, pvalue, prob)
Definition: hhblits3.py:61
def A3MToCS(self, a3m_file, cs_file=None, options={})
Definition: hhblits3.py:681
def AssignSSToA3M(self, a3m_file)
Definition: hhblits3.py:596
def __init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None)
Definition: hhblits3.py:484
def Search(self, a3m_file, database, options={}, prefix='')
Definition: hhblits3.py:747
def BuildQueryMSA(self, nrdb, options={}, a3m_file=None, assign_ss=True)
Definition: hhblits3.py:523
def A3MToProfile(self, a3m_file, hhm_file=None)
Definition: hhblits3.py:638
def ParseA3M(a3m_file)
Definition: hhblits3.py:295
def ParseHHblitsOutput(output)
Definition: hhblits3.py:148
def ParseHHM(profile)
Definition: hhblits3.py:371
def ParseHeaderLine(line)
Definition: hhblits3.py:117
void DLLEXPORT_OST_IO SaveSequence(const seq::ConstSequenceHandle &sequence, const String &filename, const String &format="auto")