OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
mmcif_reader.hh
Go to the documentation of this file.
1 //------------------------------------------------------------------------------
2 // This file is part of the OpenStructure project <www.openstructure.org>
3 //
4 // Copyright (C) 2008-2011 by the OpenStructure authors
5 //
6 // This library is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU Lesser General Public License as published by the Free
8 // Software Foundation; either version 3.0 of the License, or (at your option)
9 // any later version.
10 // This library is distributed in the hope that it will be useful, but WITHOUT
11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with this library; if not, write to the Free Software Foundation, Inc.,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 //------------------------------------------------------------------------------
19 #ifndef OST_MMCIF_READER_HH
20 #define OST_MMCIF_READER_HH
21 
22 #include <map>
23 
24 #include <ost/geom/geom.hh>
25 #include <ost/seq/sequence_list.hh>
27 #include <ost/mol/chain_type.hh>
29 #include <ost/io/mol/io_profile.hh>
30 #include <ost/io/io_exception.hh>
32 #include <ost/io/mol/mmcif_info.hh>
33 
34 namespace ost { namespace io {
35 
58 class DLLEXPORT_OST_IO MMCifReader : public StarParser {
59 public:
63  MMCifReader(std::istream& stream, mol::EntityHandle& ent_handle,
64  const IOProfile& profile);
65 
69  MMCifReader(const String& filename, mol::EntityHandle& ent_handle,
70  const IOProfile& profile);
71 
75  void Init();
76 
78  void ClearState();
79 
83  void SetRestrictChains(const String& restrict_chains);
84 
90  void SetReadCanonicalSeqRes(bool flag)
91  {
92  seqres_can_ = flag;
93  }
94 
95  const String& GetRestrictChains() const
96  {
97  return restrict_chains_;
98  }
99 
104  void SetAuthChainID(bool id)
105  {
106  auth_chain_id_ = id;
107  }
108 
115  virtual bool OnBeginData(const StringRef& data_name);
116 
122  virtual bool OnBeginLoop(const StarLoopDesc& header); // tested
123 
128  virtual void OnDataRow(const StarLoopDesc& header,
129  const std::vector<StringRef>& columns);
130 
132  virtual void OnEndData();
133 
138  return seqres_;
139  }
140 
144  void SetReadSeqRes(bool flag)
145  {
146  read_seqres_ = flag;
147  }
148 
152  bool GetReadSeqRes() const
153  {
154  return read_seqres_;
155  }
156 
160  const MMCifInfo& GetInfo() { return info_; }
161 
162 protected:
169  void TryStoreIdx(const int mapping,
170  const String& item,
171  const StarLoopDesc& header)
172  {
173  indices_[mapping] = header.GetIndex(item);
174 
175  if (indices_[mapping] == -1) {
176  throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
177  "No item '" + item +
178  "' found in '" +
179  header.GetCategory()+
180  "' header",
181  this->GetCurrentLinenum()));
182  }
183  } // tested
184 
190  bool IsValidPDBIdent(const StringRef& pdbid);
191 
204  bool ParseAtomIdent(const std::vector<StringRef>& columns,
205  String& auth_chain_name,
206  String& cif_chain_name,
207  StringRef& res_name,
208  mol::ResNum& resnum,
209  bool& valid_res_num,
210  StringRef& atom_name,
211  char& alt_loc);
212 
216  void ParseAndAddAtom(const std::vector<StringRef>& columns);
217 
221  void ParseEntity(const std::vector<StringRef>& columns);
222 
226  void ParseEntityPoly(const std::vector<StringRef>& columns);
227 
231  void ParseCitation(const std::vector<StringRef>& columns);
232 
233  const MMCifInfoStructRefs& GetStructRefs() const { return struct_refs_; }
242  String ConvertSEQRES(const String& seqres, conop::CompoundLibPtr compound_lib);
246  void ParseCitationAuthor(const std::vector<StringRef>& columns);
247 
249  void ParseStructRef(const std::vector<StringRef>& columns);
250 
252  void ParseStructRefSeq(const std::vector<StringRef>& columns);
253 
255  void ParseStructRefSeqDif(const std::vector<StringRef>& columns);
259  void ParseExptl(const std::vector<StringRef>& columns);
260 
264  void ParseRefine(const std::vector<StringRef>& columns);
265 
269  void ParsePdbxStructAssembly(const std::vector<StringRef>& columns);
270 
274  void ParsePdbxStructAssemblyGen(const std::vector<StringRef>& columns);
275 
276  std::vector<std::vector<String> > UnPackOperExperession(StringRef expression);
277 
278  void StoreExpression(const char* l, const char* s,
279  bool& is_range, int lborder,
280  std::vector<String>& single_block);
281 
282  void StoreRange(const char*& l, const char* s, bool& is_range, int& lborder,
283  std::vector<String>& single_block);
284 
288  void ParsePdbxStructOperList(const std::vector<StringRef>& columns);
289 
293  void ParseStruct(const std::vector<StringRef>& columns);
294 
298  void ParseStructConf(const std::vector<StringRef>& columns);
299 
303  void ParseStructSheetRange(const std::vector<StringRef>& columns);
304 
308  void ParsePdbxDatabasePdbObsSpr(const std::vector<StringRef>& columns);
309 
311  typedef enum {
312  MMCIF_HELIX,
313  MMCIF_STRAND,
314  MMCIF_TURN
315  } MMCifSecStructElement;
316 
320  MMCifSecStructElement DetermineSecStructType(const StringRef& type) const;
321 
325  void AssignSecStructure(mol::EntityHandle ent);
326 
327 private:
329  typedef enum {
330  PDBID_LEN=4,
331  MAX_ITEMS_IN_ROW=18,
332  } MMCifMagicNos;
333 
335  typedef enum {
336  AUTH_ASYM_ID,
337  AS_ID,
338  LABEL_ALT_ID,
339  LABEL_ASYM_ID,
340  LABEL_ATOM_ID,
341  LABEL_COMP_ID,
342  LABEL_ENTITY_ID,
343  LABEL_SEQ_ID,
344  AUTH_SEQ_ID,
345  TYPE_SYMBOL,
346  CARTN_X,
347  CARTN_Y,
348  CARTN_Z,
349  OCCUPANCY,
350  B_ISO_OR_EQUIV,
351  PDBX_PDB_INS_CODE,
352  GROUP_PDB,
353  PDBX_PDB_MODEL_NUM
354  } AtomSiteItems;
355 
357  typedef enum {
358  E_ID,
359  E_TYPE,
360  PDBX_DESCRIPTION
361  } EntityItems;
362 
364  typedef enum {
365  ENTITY_ID,
366  EP_TYPE,
367  PDBX_SEQ_ONE_LETTER_CODE,
368  PDBX_SEQ_ONE_LETTER_CODE_CAN
369  } EntityPolyItems;
370 
372  typedef enum {
373  CITATION_ID,
374  ABSTRACT_ID_CAS,
375  BOOK_ID_ISBN,
376  BOOK_TITLE,
377  JOURNAL_ABBREV,
378  JOURNAL_VOLUME,
379  PAGE_FIRST,
380  PAGE_LAST,
381  PDBX_DATABASE_ID_DOI,
382  PDBX_DATABASE_ID_PUBMED,
383  YEAR,
384  TITLE
385  } CitationItems;
386 
388  typedef enum {
389  AUTHOR_CITATION_ID,
390  AUTHOR_NAME,
391  ORDINAL
392  } CitationAuthorItems;
393 
395  typedef enum {
396  EXPTL_ENTRY_ID,
397  METHOD
398  } ExptlItems;
399 
401  typedef enum {
402  REFINE_ENTRY_ID,
403  LS_D_RES_HIGH,
404  LS_D_RES_LOW
405  } RefineItems;
406 
408  typedef enum {
409  PSA_DETAILS,
410  PSA_ID,
411  METHOD_DETAILS
412  } PdbxStructAssemblyItems;
413 
414  // \enum items of the struct_ref category
415  typedef enum {
416  SR_ENTITY_ID,
417  SR_ID,
418  SR_DB_CODE,
419  SR_DB_NAME,
420  SR_DB_ACCESS
421  } StructRefItems;
422 
424  typedef enum {
425  SRS_ALIGN_ID,
426  SRS_STRUCT_REF_ID,
427  SRS_PDBX_STRAND_ID,
428  SRS_DB_ALIGN_BEG,
429  SRS_DB_ALIGN_END,
430  SRS_ENT_ALIGN_BEG,
431  SRS_ENT_ALIGN_END
432  } StructRefSeqItems;
433 
435  typedef enum {
436  SRSD_ALIGN_ID,
437  SRSD_SEQ_RNUM,
438  SRSD_DB_RNUM,
439  SRSD_DETAILS
440  } StructRefSeqDifItems;
442  typedef enum {
443  ASSEMBLY_ID,
444  ASYM_ID_LIST,
445  OPER_EXPRESSION
446  } PdbxStructAssemblyGenItems;
447 
449  typedef enum {
450  PSOL_ID,
451  PSOL_TYPE,
452  VECTOR_1,
453  VECTOR_2,
454  VECTOR_3,
455  MATRIX_1_1,
456  MATRIX_1_2,
457  MATRIX_1_3,
458  MATRIX_2_1,
459  MATRIX_2_2,
460  MATRIX_2_3,
461  MATRIX_3_1,
462  MATRIX_3_2,
463  MATRIX_3_3
464  } PdbxStructOperListItems;
465 
467  typedef enum {
468  STRUCT_ENTRY_ID,
469  PDBX_CASP_FLAG,
470  PDBX_DESCRIPTOR,
471  PDBX_FORMULA_WEIGHT,
472  PDBX_FORMULA_WEIGHT_METHOD,
473  PDBX_MODEL_DETAILS,
474  PDBX_MODEL_TYPE_DETAILS,
475  STRUCT_TITLE
476  } StructItems;
477 
479  typedef enum {
480  SC_BEG_AUTH_ASYM_ID,
481  SC_BEG_LABEL_ASYM_ID,
482  SC_BEG_LABEL_COMP_ID,
483  SC_BEG_LABEL_SEQ_ID,
484  SC_CONF_TYPE_ID,
485  SC_END_AUTH_ASYM_ID,
486  SC_END_LABEL_ASYM_ID,
487  SC_END_LABEL_COMP_ID,
488  SC_END_LABEL_SEQ_ID,
489  SC_ID,
490  } StructConfItems;
491 
493  typedef enum {
494  SSR_BEG_LABEL_ASYM_ID,
495  SSR_BEG_LABEL_COMP_ID,
496  SSR_BEG_LABEL_SEQ_ID,
497  SSR_END_LABEL_ASYM_ID,
498  SSR_END_LABEL_COMP_ID,
499  SSR_END_LABEL_SEQ_ID,
500  SSR_SHEET_ID,
501  SSR_ID,
502  SSR_BEG_AUTH_ASYM_ID,
503  SSR_END_AUTH_ASYM_ID,
504  } StructSheetRangeItems;
505 
507  typedef enum {
508  DATE,
509  PDPOS_ID,
510  PDB_ID,
511  REPLACE_PDB_ID,
512  } PdbxDatabasePDBObsSpr;
513 
515  typedef enum {
516  ATOM_SITE,
517  ENTITY,
518  ENTITY_POLY,
519  CITATION,
520  CITATION_AUTHOR,
521  EXPTL,
522  REFINE,
523  PDBX_STRUCT_ASSEMBLY,
524  PDBX_STRUCT_ASSEMBLY_GEN,
525  PDBX_STRUCT_OPER_LIST,
526  STRUCT,
527  STRUCT_CONF,
528  STRUCT_SHEET_RANGE,
529  PDBX_DATABASE_PDB_OBS_SPR,
530  STRUCT_REF,
531  STRUCT_REF_SEQ,
532  STRUCT_REF_SEQ_DIF,
533  DONT_KNOW
534  } MMCifCategory;
535 
537  typedef struct {
538  mol::ChainType type;
539  String details;
540  String seqres;
541  } MMCifEntityDesc;
542  typedef std::map<String, MMCifEntityDesc> MMCifEntityDescMap;
543 
545  typedef struct {
546  MMCifInfoBioUnit biounit;
547  std::vector<std::vector<String> > operations;
548 
549  } MMCifBioUAssembly;
550  typedef std::vector<MMCifBioUAssembly> MMCifBioUAssemblyVector;
551 
552  typedef std::map<String, std::pair<std::vector<int>, std::vector<String> > >
553  MMCifCitationAuthorMap;
554 
556  typedef struct {
557  mol::ResNum start;
558  mol::ResNum end;
559  String chain_name;
560  } MMCifHSEntry;
561  typedef std::vector<MMCifHSEntry> MMCifHSVector;
562 
563  // members
564  MMCifCategory category_;
565  int category_counts_[DONT_KNOW+1];
566  int indices_[MAX_ITEMS_IN_ROW];
567  const IOProfile& profile_;
568  mol::EntityHandle& ent_handle_;
569  String restrict_chains_;
570  bool auth_chain_id_;
571  bool seqres_can_;
572  mol::ChainHandle curr_chain_;
573  mol::ResidueHandle curr_residue_;
574  int chain_count_;
575  int residue_count_;
576  int atom_count_;
577  bool warned_name_mismatch_;
578  bool warned_rule_based_;
579  String subst_res_id_;
580  bool has_model_;
581  int curr_model_;
582  std::vector<std::pair<mol::ChainHandle, String> > chain_id_pairs_;
584  MMCifEntityDescMap entity_desc_map_;
585  seq::SequenceList seqres_;
586  bool read_seqres_;
587  MMCifInfo info_;
588  MMCifCitationAuthorMap authors_map_;
589  MMCifBioUAssemblyVector bu_assemblies_;
590  std::map<String, String> bu_origin_map_;
591  MMCifHSVector helix_list_;
592  MMCifHSVector strand_list_;
593  MMCifInfoStructRefs struct_refs_;
594 };
595 
596 }}
597 
598 #endif