OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
mmcif_reader.hh
Go to the documentation of this file.
1 //------------------------------------------------------------------------------
2 // This file is part of the OpenStructure project <www.openstructure.org>
3 //
4 // Copyright (C) 2008-2011 by the OpenStructure authors
5 //
6 // This library is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU Lesser General Public License as published by the Free
8 // Software Foundation; either version 3.0 of the License, or (at your option)
9 // any later version.
10 // This library is distributed in the hope that it will be useful, but WITHOUT
11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with this library; if not, write to the Free Software Foundation, Inc.,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 //------------------------------------------------------------------------------
19 #ifndef OST_MMCIF_READER_HH
20 #define OST_MMCIF_READER_HH
21 
22 #include <map>
23 
24 #include <ost/geom/geom.hh>
25 #include <ost/seq/sequence_list.hh>
27 #include <ost/mol/chain_type.hh>
29 #include <ost/io/mol/io_profile.hh>
30 #include <ost/io/io_exception.hh>
32 #include <ost/io/mol/mmcif_info.hh>
33 
34 namespace ost { namespace io {
35 
60 public:
66 
67  MMCifReader(std::istream& stream, mol::EntityHandle& ent_handle,
68  const IOProfile& profile);
69 
75  MMCifReader(const String& filename, mol::EntityHandle& ent_handle,
76  const IOProfile& profile);
77 
80  void Init();
81 
83  void ClearState();
84 
88  void SetRestrictChains(const String& restrict_chains);
89 
95  void SetReadCanonicalSeqRes(bool flag)
96  {
97  seqres_can_ = flag;
98  }
99 
100  const String& GetRestrictChains() const
101  {
102  return restrict_chains_;
103  }
104 
109  void SetAuthChainID(bool id)
110  {
111  auth_chain_id_ = id;
112  }
113 
120  virtual bool OnBeginData(const StringRef& data_name);
121 
127  virtual bool OnBeginLoop(const StarLoopDesc& header); // tested
128 
133  virtual void OnDataRow(const StarLoopDesc& header,
134  const std::vector<StringRef>& columns);
135 
137  virtual void OnEndData();
138 
143  return seqres_;
144  }
145 
149  void SetReadSeqRes(bool flag)
150  {
151  read_seqres_ = flag;
152  }
153 
157  bool GetReadSeqRes() const
158  {
159  return read_seqres_;
160  }
161 
165  const MMCifInfo& GetInfo() { return info_; }
166 
167 protected:
174  void TryStoreIdx(const int mapping,
175  const String& item,
176  const StarLoopDesc& header)
177  {
178  indices_[mapping] = header.GetIndex(item);
179 
180  if (indices_[mapping] == -1) {
181  throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
182  "No item '" + item +
183  "' found in '" +
184  header.GetCategory()+
185  "' header",
186  this->GetCurrentLinenum()));
187  }
188  } // tested
189 
203  bool ParseAtomIdent(const std::vector<StringRef>& columns,
204  String& auth_chain_name,
205  String& cif_chain_name,
206  StringRef& res_name,
207  mol::ResNum& resnum,
208  bool& valid_res_num,
209  StringRef& atom_name,
210  char& alt_loc);
211 
215  void ParseAndAddAtom(const std::vector<StringRef>& columns);
216 
220  void ParseEntity(const std::vector<StringRef>& columns);
221 
225  void ParseEntityPoly(const std::vector<StringRef>& columns);
226 
230  void ParseCitation(const std::vector<StringRef>& columns);
231 
232  const MMCifInfoStructRefs& GetStructRefs() const { return struct_refs_; }
241  String ConvertSEQRES(const String& seqres, conop::CompoundLibPtr compound_lib);
245  void ParseCitationAuthor(const std::vector<StringRef>& columns);
246 
248  void ParseStructRef(const std::vector<StringRef>& columns);
249 
251  void ParseStructRefSeq(const std::vector<StringRef>& columns);
252 
254  void ParseStructRefSeqDif(const std::vector<StringRef>& columns);
258  void ParseExptl(const std::vector<StringRef>& columns);
259 
263  void ParseRefine(const std::vector<StringRef>& columns);
264 
268  void ParsePdbxStructAssembly(const std::vector<StringRef>& columns);
269 
273  void ParsePdbxStructAssemblyGen(const std::vector<StringRef>& columns);
274 
275  std::vector<std::vector<String> > UnPackOperExperession(StringRef expression);
276 
277  void StoreExpression(const char* l, const char* s,
278  bool& is_range, int lborder,
279  std::vector<String>& single_block);
280 
281  void StoreRange(const char*& l, const char* s, bool& is_range, int& lborder,
282  std::vector<String>& single_block);
283 
287  void ParsePdbxStructOperList(const std::vector<StringRef>& columns);
288 
292  void ParseDatabasePDBRev(const std::vector<StringRef>& columns);
293 
297  void ParsePdbxAuditRevisionHistory(const std::vector<StringRef>& columns);
298 
302  void ParsePdbxAuditRevisionDetails(const std::vector<StringRef>& columns);
303 
307  void ParsePdbxDatabaseStatus(const std::vector<StringRef>& columns);
308 
312  void ParseStruct(const std::vector<StringRef>& columns);
313 
317  void ParseStructConf(const std::vector<StringRef>& columns);
318 
322  void ParseStructSheetRange(const std::vector<StringRef>& columns);
323 
327  void ParsePdbxDatabasePdbObsSpr(const std::vector<StringRef>& columns);
328 
330  typedef enum {
333  MMCIF_TURN
334  } MMCifSecStructElement;
335 
339  MMCifSecStructElement DetermineSecStructType(const StringRef& type) const;
340 
344  void AssignSecStructure(mol::EntityHandle ent);
345 
346 private:
348  typedef enum {
349  MAX_ITEMS_IN_ROW=18
350  } MMCifMagicNos;
351 
353  typedef enum {
354  AUTH_ASYM_ID,
355  AS_ID,
356  LABEL_ALT_ID,
357  LABEL_ASYM_ID,
358  LABEL_ATOM_ID,
359  LABEL_COMP_ID,
360  LABEL_ENTITY_ID,
361  LABEL_SEQ_ID,
362  AUTH_SEQ_ID,
363  TYPE_SYMBOL,
364  CARTN_X,
365  CARTN_Y,
366  CARTN_Z,
367  OCCUPANCY,
368  B_ISO_OR_EQUIV,
369  PDBX_PDB_INS_CODE,
370  GROUP_PDB,
371  PDBX_PDB_MODEL_NUM
372  } AtomSiteItems;
373 
375  typedef enum {
376  E_ID,
377  E_TYPE,
378  PDBX_DESCRIPTION
379  } EntityItems;
380 
382  typedef enum {
383  ENTITY_ID,
384  EP_TYPE,
385  PDBX_SEQ_ONE_LETTER_CODE,
386  PDBX_SEQ_ONE_LETTER_CODE_CAN
387  } EntityPolyItems;
388 
390  typedef enum {
391  CITATION_ID,
392  ABSTRACT_ID_CAS,
393  BOOK_ID_ISBN,
394  BOOK_TITLE,
395  JOURNAL_ABBREV,
396  JOURNAL_VOLUME,
397  PAGE_FIRST,
398  PAGE_LAST,
399  PDBX_DATABASE_ID_DOI,
400  PDBX_DATABASE_ID_PUBMED,
401  YEAR,
402  TITLE
403  } CitationItems;
404 
406  typedef enum {
407  AUTHOR_CITATION_ID,
408  AUTHOR_NAME,
409  ORDINAL
410  } CitationAuthorItems;
411 
413  typedef enum {
414  EXPTL_ENTRY_ID,
415  METHOD
416  } ExptlItems;
417 
419  typedef enum {
420  REFINE_ENTRY_ID,
421  LS_D_RES_HIGH,
422  LS_D_RES_LOW,
423  LS_R_FACTOR_R_WORK,
424  LS_R_FACTOR_R_FREE
425  } RefineItems;
426 
428  typedef enum {
429  PSA_DETAILS,
430  PSA_ID,
431  METHOD_DETAILS
432  } PdbxStructAssemblyItems;
433 
434  // \enum items of the struct_ref category
435  typedef enum {
436  SR_ENTITY_ID,
437  SR_ID,
438  SR_DB_CODE,
439  SR_DB_NAME,
440  SR_DB_ACCESS
441  } StructRefItems;
442 
444  typedef enum {
445  SRS_ALIGN_ID,
446  SRS_STRUCT_REF_ID,
447  SRS_PDBX_STRAND_ID,
448  SRS_DB_ALIGN_BEG,
449  SRS_DB_ALIGN_END,
450  SRS_ENT_ALIGN_BEG,
451  SRS_ENT_ALIGN_END
452  } StructRefSeqItems;
453 
455  typedef enum {
456  SRSD_ALIGN_ID,
457  SRSD_SEQ_RNUM,
458  SRSD_DB_RNUM,
459  SRSD_DETAILS
460  } StructRefSeqDifItems;
461 
463  typedef enum {
464  ASSEMBLY_ID,
465  ASYM_ID_LIST,
466  OPER_EXPRESSION
467  } PdbxStructAssemblyGenItems;
468 
470  typedef enum {
471  PSOL_ID,
472  PSOL_TYPE,
473  VECTOR_1,
474  VECTOR_2,
475  VECTOR_3,
476  MATRIX_1_1,
477  MATRIX_1_2,
478  MATRIX_1_3,
479  MATRIX_2_1,
480  MATRIX_2_2,
481  MATRIX_2_3,
482  MATRIX_3_1,
483  MATRIX_3_2,
484  MATRIX_3_3
485  } PdbxStructOperListItems;
486 
488  typedef enum {
489  STRUCT_ENTRY_ID,
490  PDBX_CASP_FLAG,
491  PDBX_DESCRIPTOR,
492  PDBX_FORMULA_WEIGHT,
493  PDBX_FORMULA_WEIGHT_METHOD,
494  PDBX_MODEL_DETAILS,
495  PDBX_MODEL_TYPE_DETAILS,
496  STRUCT_TITLE
497  } StructItems;
498 
500  typedef enum {
501  SC_BEG_AUTH_ASYM_ID,
502  SC_BEG_LABEL_ASYM_ID,
503  SC_BEG_LABEL_COMP_ID,
504  SC_BEG_LABEL_SEQ_ID,
505  SC_CONF_TYPE_ID,
506  SC_END_AUTH_ASYM_ID,
507  SC_END_LABEL_ASYM_ID,
508  SC_END_LABEL_COMP_ID,
509  SC_END_LABEL_SEQ_ID,
510  SC_ID,
511  } StructConfItems;
512 
514  typedef enum {
515  SSR_BEG_LABEL_ASYM_ID,
516  SSR_BEG_LABEL_COMP_ID,
517  SSR_BEG_LABEL_SEQ_ID,
518  SSR_END_LABEL_ASYM_ID,
519  SSR_END_LABEL_COMP_ID,
520  SSR_END_LABEL_SEQ_ID,
521  SSR_SHEET_ID,
522  SSR_ID,
523  SSR_BEG_AUTH_ASYM_ID,
524  SSR_END_AUTH_ASYM_ID,
525  } StructSheetRangeItems;
526 
528  typedef enum {
529  DATE,
530  PDPOS_ID,
531  PDB_ID,
532  REPLACE_PDB_ID,
533  } PdbxDatabasePDBObsSpr;
534 
536  typedef enum {
537  DPI_NUM,
538  DPI_DATE,
539  DPI_DATE_ORIGINAL,
540  DPI_STATUS,
541  } DatabasePDBRevItems;
542 
544  typedef enum {
545  PARH_ORDINAL,
546  PARH_REVISION_DATE,
547  } PdbxAuditRevisionHistoryItems;
548 
550  typedef enum {
551  PARD_REVISION_ORDINAL,
552  PARD_TYPE,
553  } PdbxAuditRevisionDetailsItems;
554 
556  typedef enum {
557  PDS_RECVD_INITIAL_DEPOSITION_DATE,
558  } PdbxDatabaseStatusItems;
559 
561  typedef enum {
562  ATOM_SITE,
563  ENTITY,
564  ENTITY_POLY,
565  CITATION,
566  CITATION_AUTHOR,
567  EXPTL,
568  REFINE,
569  PDBX_STRUCT_ASSEMBLY,
570  PDBX_STRUCT_ASSEMBLY_GEN,
571  PDBX_STRUCT_OPER_LIST,
572  STRUCT,
573  STRUCT_CONF,
574  STRUCT_SHEET_RANGE,
575  PDBX_DATABASE_PDB_OBS_SPR,
576  STRUCT_REF,
577  STRUCT_REF_SEQ,
578  STRUCT_REF_SEQ_DIF,
579  DATABASE_PDB_REV,
580  PDBX_AUDIT_REVISION_HISTORY,
581  PDBX_AUDIT_REVISION_DETAILS,
582  PDBX_DATABASE_STATUS,
583  DONT_KNOW
584  } MMCifCategory;
585 
587  typedef struct {
588  mol::ChainType type;
589  String details;
590  String seqres;
591  } MMCifEntityDesc;
592  typedef std::map<String, MMCifEntityDesc> MMCifEntityDescMap;
593 
595  typedef struct {
596  String biounit_id;
597  std::vector<String> chains;
598  std::vector<std::vector<String> > operations;
600  } MMCifBioUAssembly;
602  typedef std::vector<MMCifBioUAssembly> MMCifBioUAssemblyVector;
603 
604  typedef std::map<String, std::pair<std::vector<int>, std::vector<String> > >
605  MMCifCitationAuthorMap;
606 
608  typedef struct {
609  mol::ResNum start;
610  mol::ResNum end;
611  String chain_name;
612  } MMCifHSEntry;
613  typedef std::vector<MMCifHSEntry> MMCifHSVector;
614 
616  typedef struct {
617  String details;
618  String method_details;
619  } MMCifPSAEntry;
620  typedef std::map<String, MMCifPSAEntry> MMCifPSAMap;
621 
622  // members
623  MMCifCategory category_;
624  int category_counts_[DONT_KNOW+1];
625  int indices_[MAX_ITEMS_IN_ROW];
626  const IOProfile& profile_;
627  mol::EntityHandle& ent_handle_;
628  String restrict_chains_;
629  bool auth_chain_id_;
630  bool seqres_can_;
631  mol::ChainHandle curr_chain_;
632  mol::ResidueHandle curr_residue_;
633  int chain_count_;
634  int residue_count_;
635  int atom_count_;
636  bool warned_name_mismatch_;
637  bool warned_rule_based_;
638  String subst_res_id_;
639  bool has_model_;
640  int curr_model_;
641  std::vector<std::pair<mol::ChainHandle, String> > chain_id_pairs_;
643  MMCifEntityDescMap entity_desc_map_;
644  seq::SequenceList seqres_;
645  bool read_seqres_;
646  MMCifInfo info_;
647  MMCifCitationAuthorMap authors_map_;
648  MMCifBioUAssemblyVector bu_assemblies_;
649  MMCifPSAMap bu_origin_map_;
650  MMCifHSVector helix_list_;
651  MMCifHSVector strand_list_;
652  MMCifInfoStructRefs struct_refs_;
653  // for storing revisions
654  std::map<int, String> revision_dates_;
655  std::map<int, String> revision_types_;
656  bool database_PDB_rev_added_;
657 };
658 
659 }}
660 
661 #endif
convenient datatype for referencing character data
Definition: string_ref.hh:39
void SetAuthChainID(bool id)
Enable or disable reading of auth_chain_id instead aof label_chain id (default)
container class for additional information from MMCif files
Definition: mmcif_info.hh:869
std::string String
Definition: base.hh:54
const MMCifInfoStructRefs & GetStructRefs() const
seq::SequenceList GetSeqRes() const
Return sequences.
const String & GetRestrictChains() const
Protein or molecule.
pointer_it< T > end(const std::vector< T > &values)
parser for the STAR file format
Definition: star_parser.hh:114
tuple compound_lib
Definition: init.py:190
boost::shared_ptr< CompoundLib > CompoundLibPtr
Definition: compound_lib.hh:34
std::vector< MMCifInfoStructRefPtr > MMCifInfoStructRefs
Definition: mmcif_info.hh:776
std::vector< SequenceImplPtr > SequenceList
void SetReadSeqRes(bool flag)
Toggle reading of SEQRES.
reader for the mmcif file format
Definition: mmcif_reader.hh:59
const MMCifInfo & GetInfo()
Get additional information of the mmCIF file.
#define DLLEXPORT_OST_IO
list of sequences.
void TryStoreIdx(const int mapping, const String &item, const StarLoopDesc &header)
Store an item index from loop header in preparation for reading a row. Throws an exception if the ite...
const String & GetCategory() const
Definition: star_parser.hh:92
int GetIndex(const String &name) const
Definition: star_parser.hh:65
void SetReadCanonicalSeqRes(bool flag)
Toggle reading of canonical sequence residues (entity_poly.pdbx_seq_one_letter_code_can instead of en...
Definition: mmcif_reader.hh:95
bool GetReadSeqRes() const
Check if reading of SEQRES is enabled.