OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
mmcif_reader.hh
Go to the documentation of this file.
1 //------------------------------------------------------------------------------
2 // This file is part of the OpenStructure project <www.openstructure.org>
3 //
4 // Copyright (C) 2008-2011 by the OpenStructure authors
5 //
6 // This library is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU Lesser General Public License as published by the Free
8 // Software Foundation; either version 3.0 of the License, or (at your option)
9 // any later version.
10 // This library is distributed in the hope that it will be useful, but WITHOUT
11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with this library; if not, write to the Free Software Foundation, Inc.,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 //------------------------------------------------------------------------------
19 #ifndef OST_MMCIF_READER_HH
20 #define OST_MMCIF_READER_HH
21 
22 #include <map>
23 
24 #include <ost/geom/geom.hh>
25 #include <ost/seq/sequence_list.hh>
27 #include <ost/mol/chain_type.hh>
29 #include <ost/io/mol/io_profile.hh>
30 #include <ost/io/io_exception.hh>
32 #include <ost/io/mol/mmcif_info.hh>
33 
34 namespace ost { namespace io {
35 
60 public:
64  MMCifReader(std::istream& stream, mol::EntityHandle& ent_handle,
65  const IOProfile& profile);
66 
70  MMCifReader(const String& filename, mol::EntityHandle& ent_handle,
71  const IOProfile& profile);
72 
76  void Init();
77 
79  void ClearState();
80 
84  void SetRestrictChains(const String& restrict_chains);
85 
91  void SetReadCanonicalSeqRes(bool flag)
92  {
93  seqres_can_ = flag;
94  }
95 
96  const String& GetRestrictChains() const
97  {
98  return restrict_chains_;
99  }
100 
105  void SetAuthChainID(bool id)
106  {
107  auth_chain_id_ = id;
108  }
109 
116  virtual bool OnBeginData(const StringRef& data_name);
117 
123  virtual bool OnBeginLoop(const StarLoopDesc& header); // tested
124 
129  virtual void OnDataRow(const StarLoopDesc& header,
130  const std::vector<StringRef>& columns);
131 
133  virtual void OnEndData();
134 
139  return seqres_;
140  }
141 
145  void SetReadSeqRes(bool flag)
146  {
147  read_seqres_ = flag;
148  }
149 
153  bool GetReadSeqRes() const
154  {
155  return read_seqres_;
156  }
157 
161  const MMCifInfo& GetInfo() { return info_; }
162 
163 protected:
170  void TryStoreIdx(const int mapping,
171  const String& item,
172  const StarLoopDesc& header)
173  {
174  indices_[mapping] = header.GetIndex(item);
175 
176  if (indices_[mapping] == -1) {
177  throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
178  "No item '" + item +
179  "' found in '" +
180  header.GetCategory()+
181  "' header",
182  this->GetCurrentLinenum()));
183  }
184  } // tested
185 
191  bool IsValidPDBIdent(const StringRef& pdbid);
192 
205  bool ParseAtomIdent(const std::vector<StringRef>& columns,
206  String& auth_chain_name,
207  String& cif_chain_name,
208  StringRef& res_name,
209  mol::ResNum& resnum,
210  bool& valid_res_num,
211  StringRef& atom_name,
212  char& alt_loc);
213 
217  void ParseAndAddAtom(const std::vector<StringRef>& columns);
218 
222  void ParseEntity(const std::vector<StringRef>& columns);
223 
227  void ParseEntityPoly(const std::vector<StringRef>& columns);
228 
232  void ParseCitation(const std::vector<StringRef>& columns);
233 
234  const MMCifInfoStructRefs& GetStructRefs() const { return struct_refs_; }
243  String ConvertSEQRES(const String& seqres, conop::CompoundLibPtr compound_lib);
247  void ParseCitationAuthor(const std::vector<StringRef>& columns);
248 
250  void ParseStructRef(const std::vector<StringRef>& columns);
251 
253  void ParseStructRefSeq(const std::vector<StringRef>& columns);
254 
256  void ParseStructRefSeqDif(const std::vector<StringRef>& columns);
260  void ParseExptl(const std::vector<StringRef>& columns);
261 
265  void ParseRefine(const std::vector<StringRef>& columns);
266 
270  void ParsePdbxStructAssembly(const std::vector<StringRef>& columns);
271 
275  void ParsePdbxStructAssemblyGen(const std::vector<StringRef>& columns);
276 
277  std::vector<std::vector<String> > UnPackOperExperession(StringRef expression);
278 
279  void StoreExpression(const char* l, const char* s,
280  bool& is_range, int lborder,
281  std::vector<String>& single_block);
282 
283  void StoreRange(const char*& l, const char* s, bool& is_range, int& lborder,
284  std::vector<String>& single_block);
285 
289  void ParsePdbxStructOperList(const std::vector<StringRef>& columns);
290 
294  void ParseDatabasePDBRev(const std::vector<StringRef>& columns);
295 
299  void ParseStruct(const std::vector<StringRef>& columns);
300 
304  void ParseStructConf(const std::vector<StringRef>& columns);
305 
309  void ParseStructSheetRange(const std::vector<StringRef>& columns);
310 
314  void ParsePdbxDatabasePdbObsSpr(const std::vector<StringRef>& columns);
315 
317  typedef enum {
320  MMCIF_TURN
321  } MMCifSecStructElement;
322 
326  MMCifSecStructElement DetermineSecStructType(const StringRef& type) const;
327 
331  void AssignSecStructure(mol::EntityHandle ent);
332 
333 private:
335  typedef enum {
336  PDBID_LEN=4,
337  MAX_ITEMS_IN_ROW=18,
338  } MMCifMagicNos;
339 
341  typedef enum {
342  AUTH_ASYM_ID,
343  AS_ID,
344  LABEL_ALT_ID,
345  LABEL_ASYM_ID,
346  LABEL_ATOM_ID,
347  LABEL_COMP_ID,
348  LABEL_ENTITY_ID,
349  LABEL_SEQ_ID,
350  AUTH_SEQ_ID,
351  TYPE_SYMBOL,
352  CARTN_X,
353  CARTN_Y,
354  CARTN_Z,
355  OCCUPANCY,
356  B_ISO_OR_EQUIV,
357  PDBX_PDB_INS_CODE,
358  GROUP_PDB,
359  PDBX_PDB_MODEL_NUM
360  } AtomSiteItems;
361 
363  typedef enum {
364  E_ID,
365  E_TYPE,
366  PDBX_DESCRIPTION
367  } EntityItems;
368 
370  typedef enum {
371  ENTITY_ID,
372  EP_TYPE,
373  PDBX_SEQ_ONE_LETTER_CODE,
374  PDBX_SEQ_ONE_LETTER_CODE_CAN
375  } EntityPolyItems;
376 
378  typedef enum {
379  CITATION_ID,
380  ABSTRACT_ID_CAS,
381  BOOK_ID_ISBN,
382  BOOK_TITLE,
383  JOURNAL_ABBREV,
384  JOURNAL_VOLUME,
385  PAGE_FIRST,
386  PAGE_LAST,
387  PDBX_DATABASE_ID_DOI,
388  PDBX_DATABASE_ID_PUBMED,
389  YEAR,
390  TITLE
391  } CitationItems;
392 
394  typedef enum {
395  AUTHOR_CITATION_ID,
396  AUTHOR_NAME,
397  ORDINAL
398  } CitationAuthorItems;
399 
401  typedef enum {
402  EXPTL_ENTRY_ID,
403  METHOD
404  } ExptlItems;
405 
407  typedef enum {
408  REFINE_ENTRY_ID,
409  LS_D_RES_HIGH,
410  LS_D_RES_LOW
411  } RefineItems;
412 
414  typedef enum {
415  PSA_DETAILS,
416  PSA_ID,
417  METHOD_DETAILS
418  } PdbxStructAssemblyItems;
419 
420  // \enum items of the struct_ref category
421  typedef enum {
422  SR_ENTITY_ID,
423  SR_ID,
424  SR_DB_CODE,
425  SR_DB_NAME,
426  SR_DB_ACCESS
427  } StructRefItems;
428 
430  typedef enum {
431  SRS_ALIGN_ID,
432  SRS_STRUCT_REF_ID,
433  SRS_PDBX_STRAND_ID,
434  SRS_DB_ALIGN_BEG,
435  SRS_DB_ALIGN_END,
436  SRS_ENT_ALIGN_BEG,
437  SRS_ENT_ALIGN_END
438  } StructRefSeqItems;
439 
441  typedef enum {
442  SRSD_ALIGN_ID,
443  SRSD_SEQ_RNUM,
444  SRSD_DB_RNUM,
445  SRSD_DETAILS
446  } StructRefSeqDifItems;
447 
449  typedef enum {
450  ASSEMBLY_ID,
451  ASYM_ID_LIST,
452  OPER_EXPRESSION
453  } PdbxStructAssemblyGenItems;
454 
456  typedef enum {
457  PSOL_ID,
458  PSOL_TYPE,
459  VECTOR_1,
460  VECTOR_2,
461  VECTOR_3,
462  MATRIX_1_1,
463  MATRIX_1_2,
464  MATRIX_1_3,
465  MATRIX_2_1,
466  MATRIX_2_2,
467  MATRIX_2_3,
468  MATRIX_3_1,
469  MATRIX_3_2,
470  MATRIX_3_3
471  } PdbxStructOperListItems;
472 
474  typedef enum {
475  STRUCT_ENTRY_ID,
476  PDBX_CASP_FLAG,
477  PDBX_DESCRIPTOR,
478  PDBX_FORMULA_WEIGHT,
479  PDBX_FORMULA_WEIGHT_METHOD,
480  PDBX_MODEL_DETAILS,
481  PDBX_MODEL_TYPE_DETAILS,
482  STRUCT_TITLE
483  } StructItems;
484 
486  typedef enum {
487  SC_BEG_AUTH_ASYM_ID,
488  SC_BEG_LABEL_ASYM_ID,
489  SC_BEG_LABEL_COMP_ID,
490  SC_BEG_LABEL_SEQ_ID,
491  SC_CONF_TYPE_ID,
492  SC_END_AUTH_ASYM_ID,
493  SC_END_LABEL_ASYM_ID,
494  SC_END_LABEL_COMP_ID,
495  SC_END_LABEL_SEQ_ID,
496  SC_ID,
497  } StructConfItems;
498 
500  typedef enum {
501  SSR_BEG_LABEL_ASYM_ID,
502  SSR_BEG_LABEL_COMP_ID,
503  SSR_BEG_LABEL_SEQ_ID,
504  SSR_END_LABEL_ASYM_ID,
505  SSR_END_LABEL_COMP_ID,
506  SSR_END_LABEL_SEQ_ID,
507  SSR_SHEET_ID,
508  SSR_ID,
509  SSR_BEG_AUTH_ASYM_ID,
510  SSR_END_AUTH_ASYM_ID,
511  } StructSheetRangeItems;
512 
514  typedef enum {
515  DATE,
516  PDPOS_ID,
517  PDB_ID,
518  REPLACE_PDB_ID,
519  } PdbxDatabasePDBObsSpr;
520 
522  typedef enum {
523  DPI_NUM,
524  DPI_DATE,
525  DPI_DATE_ORIGINAL,
526  DPI_STATUS,
527  } DatabasePDBRevItems;
528 
530  typedef enum {
531  ATOM_SITE,
532  ENTITY,
533  ENTITY_POLY,
534  CITATION,
535  CITATION_AUTHOR,
536  EXPTL,
537  REFINE,
538  PDBX_STRUCT_ASSEMBLY,
539  PDBX_STRUCT_ASSEMBLY_GEN,
540  PDBX_STRUCT_OPER_LIST,
541  STRUCT,
542  STRUCT_CONF,
543  STRUCT_SHEET_RANGE,
544  PDBX_DATABASE_PDB_OBS_SPR,
545  STRUCT_REF,
546  STRUCT_REF_SEQ,
547  STRUCT_REF_SEQ_DIF,
548  DATABASE_PDB_REV,
549  DONT_KNOW
550  } MMCifCategory;
551 
553  typedef struct {
554  mol::ChainType type;
555  String details;
556  String seqres;
557  } MMCifEntityDesc;
558  typedef std::map<String, MMCifEntityDesc> MMCifEntityDescMap;
559 
561  typedef struct {
562  String biounit_id;
563  std::vector<String> chains;
564 
565  std::vector<std::vector<String> > operations;
566 
567  } MMCifBioUAssembly;
568  typedef std::vector<MMCifBioUAssembly> MMCifBioUAssemblyVector;
569 
570  typedef std::map<String, std::pair<std::vector<int>, std::vector<String> > >
571  MMCifCitationAuthorMap;
572 
574  typedef struct {
575  mol::ResNum start;
576  mol::ResNum end;
577  String chain_name;
578  } MMCifHSEntry;
579  typedef std::vector<MMCifHSEntry> MMCifHSVector;
580 
582  typedef struct {
583  String details;
584  String method_details;
585  } MMCifPSAEntry;
586  typedef std::map<String, MMCifPSAEntry> MMCifPSAMap;
587 
588  // members
589  MMCifCategory category_;
590  int category_counts_[DONT_KNOW+1];
591  int indices_[MAX_ITEMS_IN_ROW];
592  const IOProfile& profile_;
593  mol::EntityHandle& ent_handle_;
594  String restrict_chains_;
595  bool auth_chain_id_;
596  bool seqres_can_;
597  mol::ChainHandle curr_chain_;
598  mol::ResidueHandle curr_residue_;
599  int chain_count_;
600  int residue_count_;
601  int atom_count_;
602  bool warned_name_mismatch_;
603  bool warned_rule_based_;
604  String subst_res_id_;
605  bool has_model_;
606  int curr_model_;
607  std::vector<std::pair<mol::ChainHandle, String> > chain_id_pairs_;
609  MMCifEntityDescMap entity_desc_map_;
610  seq::SequenceList seqres_;
611  bool read_seqres_;
612  MMCifInfo info_;
613  MMCifCitationAuthorMap authors_map_;
614  MMCifBioUAssemblyVector bu_assemblies_;
615  MMCifPSAMap bu_origin_map_;
616  MMCifHSVector helix_list_;
617  MMCifHSVector strand_list_;
618  MMCifInfoStructRefs struct_refs_;
619 };
620 
621 }}
622 
623 #endif