OpenStructure
omf.hh
Go to the documentation of this file.
1 //------------------------------------------------------------------------------
2 // This file is part of the OpenStructure project <www.openstructure.org>
3 //
4 // Copyright (C) 2008-2020 by the OpenStructure authors
5 //
6 // This library is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU Lesser General Public License as published by the Free
8 // Software Foundation; either version 3.0 of the License, or (at your option)
9 // any later version.
10 // This library is distributed in the hope that it will be useful, but WITHOUT
11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with this library; if not, write to the Free Software Foundation, Inc.,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 //------------------------------------------------------------------------------
19 #ifndef OST_IO_OMF_HH
20 #define OST_IO_OMF_HH
21 
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <fstream>
25 #include <boost/iostreams/filtering_stream.hpp>
26 #include <ost/mol/entity_handle.hh>
27 #include <ost/geom/mat4.hh>
28 #include <ost/io/io_exception.hh>
29 #include <ost/io/mmcif_info.hh>
30 
31 namespace ost { namespace io {
32 
33 const int OMF_VERSION = 3;
34 
35 class ChainData;
36 class OMF;
37 typedef boost::shared_ptr<OMF> OMFPtr;
38 typedef boost::shared_ptr<ChainData> ChainDataPtr;
39 
40 struct SidechainAtomRule {
42  int anchor_idx[3];
44  Real angle;
45  // 0: chi1, 1: chi2, 2: chi3, 3: chi4, 4: 0.0
46  int dihedral_idx;
47  // the value of the dihedral above will be added to base_dihedral to get
48  // the final diheral angle. If you want to have the effect of chi3 + M_PI
49  // you define dihedral_idx as 2 and base_dihedral = M_PI.
51 };
52 
53 struct ChiDefinition{
54  int idx_one;
55  int idx_two;
56  int idx_three;
57  int idx_four;
58 };
59 
60 struct ResidueDefinition {
61 
63 
65 
66  bool operator==(const ResidueDefinition& other) const {
67  return (name == other.name &&
68  olc == other.olc &&
69  chem_type == other.chem_type &&
70  chem_class == other.chem_class &&
71  anames == other.anames &&
72  elements == other.elements &&
73  is_hetatm == other.is_hetatm &&
74  bonds == other.bonds &&
75  bond_orders == other.bond_orders);
76  }
77 
78  bool operator!=(const ResidueDefinition& other) const {
79  return !(*this == other);
80  }
81 
82  void ToStream(std::ostream& stream) const;
83 
84  void FromStream(std::istream& stream);
85 
86  int GetIdx(const String& aname) const;
87 
88  const std::set<int>& GetRotamericAtoms() const;
89 
90  const std::vector<ChiDefinition>& GetChiDefinitions() const;
91 
92  const std::vector<SidechainAtomRule>& GetSidechainAtomRules() const;
93 
94  int GetNChiAngles() const;
95 
96  void _InitIdxMapper() const;
97 
98  void _AddChiDefinition(int idx_one, int idx_two, int idx_three,
99  int idx_four);
100 
101  void _AddAtomRule(int a_idx, int anch_one_idx,
102  int anch_two_idx, int anch_three_idx,
103  Real bond_length, Real angle, int dihedral_idx,
104  Real base_dihedral);
105 
106  String name;
107  char olc;
108  char chem_type;
109  char chem_class;
110  std::vector<String> anames;
111  std::vector<String> elements;
112  std::vector<bool> is_hetatm;
113  std::vector<int> bonds;
114  std::vector<int> bond_orders;
115  mutable std::map<String, int> idx_mapper;
116  std::set<int> rotameric_atoms;
117  std::vector<ChiDefinition> chi_definitions;
118  std::vector<SidechainAtomRule> sidechain_atom_rules;
119  std::set<int> critical_sidechain_angles;
120 };
121 
122 struct ChainData {
123 
125 
127  const std::vector<ResidueDefinition>& residue_definitions,
128  const std::unordered_map<unsigned long, int>& res_idx_map,
129  const std::vector<std::pair<unsigned long, unsigned long> >&
130  inter_residue_bonds,
131  const std::vector<int>& inter_residue_bond_orders,
132  std::unordered_map<long, int>& atom_idx_mapper);
133 
134  void ToStream(std::ostream& stream,
135  const std::vector<ResidueDefinition>& res_def,
136  Real max_error, bool avg_bfactors, bool round_bfactors,
137  bool skip_ss) const;
138 
139  void FromStream(std::istream& stream,
140  const std::vector<ResidueDefinition>& res_def,
141  int version, Real max_error, bool avg_bfactors,
142  bool round_bfactors, bool skip_ss);
143 
144  // chain features
145  String ch_name;
147 
148  // residue features
149  std::vector<int> res_def_indices;
150  std::vector<int> rnums;
151  std::vector<char> insertion_codes;
152  std::vector<char> sec_structures;
153 
154  // atom features
155  std::vector<Real> occupancies;
156  std::vector<Real> bfactors;
158 
159  // bond features - only for bonds that are inter-residue
160  // e.g. peptide bonds
161  std::vector<int> bonds;
162  std::vector<int> bond_orders;
163 };
164 
165 
166 class DefaultPepLib{
167 public:
169  static DefaultPepLib instance;
170  return instance;
171  }
172  std::vector<ResidueDefinition> residue_definitions;
173 
174 private:
175  DefaultPepLib();
176  DefaultPepLib(DefaultPepLib const& copy);
177  DefaultPepLib& operator=(DefaultPepLib const& copy);
178 };
179 
180 class OMF {
181 
182 public:
183 
185  SKIP_SS = 8, INFER_PEP_BONDS = 16};
186 
187  bool OptionSet(OMFOption opt) const {
188  return (opt & options_) == opt;
189  }
190 
192  Real max_error = 0.0,
193  uint8_t options = 0);
194 
195  static OMFPtr FromFile(const String& fn);
196 
197  static OMFPtr FromString(const String& s);
198 
199  void ToFile(const String& fn) const;
200 
201  String ToString() const;
202 
204 
206  return this->GetAU();
207  }
208 
210 
212  return this->GetAUChain(name);
213  }
214 
215  int GetVersion() const { return version_; }
216 
217  static int GetCurrentOMFVersion() { return OMF_VERSION; }
218 
219  Real GetMaxError() const { return 0.001 * max_error_; }
220 
221  // data access without requirement of generating a full
222  // OpenStructure entity
223 
224  String GetName() const { return name_; }
225 
226  std::vector<String> GetChainNames() const;
227 
228  const geom::Vec3List& GetPositions(const String& cname) const;
229 
230  const std::vector<Real>& GetBFactors(const String& cname) const;
231 
232  std::vector<Real> GetAvgBFactors(const String& cname) const;
233 
234  String GetSequence(const String& cname) const;
235 
236 private:
237  // only construct with static functions
238  OMF(): options_(0) { }
239 
240  void ToStream(std::ostream& stream) const;
241 
242  void FromStream(std::istream& stream);
243 
244  void FillChain(const ChainDataPtr data, ost::mol::XCSEditor& ed,
245  ost::mol::ChainHandle& chain) const;
246 
247  String name_;
248  uint16_t max_error_;
249  std::vector<ResidueDefinition> residue_definitions_;
250  std::map<String, ChainDataPtr> chain_data_;
251 
252  // bond features - only for bonds that are inter-chain
253  // given n bonds, bond_chain_names_ and bond_atoms_ have length 2*n and are
254  // organized as follows:
255  // [bond1_at1_x, bond1_at2_x, ..., bondn_at1_x, bondn_at2_x]
256  // bond_orders_ on the other hand has length n
257  std::vector<String> bond_chain_names_;
258  std::vector<int> bond_atoms_;
259  std::vector<int> bond_orders_;
260 
261  // bitfield with options
262  uint8_t options_;
263 
264  int version_;
265 };
266 
267 }} //ns
268 
269 #endif
std::vector< ResidueDefinition > residue_definitions
Definition: omf.hh:172
static DefaultPepLib & Instance()
Definition: omf.hh:168
ost::mol::EntityHandle GetAUChain(const String &name) const
static OMFPtr FromFile(const String &fn)
std::vector< String > GetChainNames() const
static OMFPtr FromEntity(const ost::mol::EntityHandle &ent, Real max_error=0.0, uint8_t options=0)
bool OptionSet(OMFOption opt) const
Definition: omf.hh:187
const std::vector< Real > & GetBFactors(const String &cname) const
String GetName() const
Definition: omf.hh:224
ost::mol::EntityHandle GetEntity() const
Definition: omf.hh:205
ost::mol::EntityHandle GetAU() const
Real GetMaxError() const
Definition: omf.hh:219
std::vector< Real > GetAvgBFactors(const String &cname) const
String ToString() const
static int GetCurrentOMFVersion()
Definition: omf.hh:217
int GetVersion() const
Definition: omf.hh:215
@ ROUND_BFACTORS
Definition: omf.hh:184
@ DEFAULT_PEPLIB
Definition: omf.hh:184
@ INFER_PEP_BONDS
Definition: omf.hh:185
@ AVG_BFACTORS
Definition: omf.hh:184
@ SKIP_SS
Definition: omf.hh:185
String GetSequence(const String &cname) const
void ToFile(const String &fn) const
const geom::Vec3List & GetPositions(const String &cname) const
ost::mol::EntityHandle GetEntityChain(const String &name) const
Definition: omf.hh:211
static OMFPtr FromString(const String &s)
linear chain of residues
Definition: chain_handle.hh:52
Protein or molecule.
external coordinate system editor
Definition: xcs_editor.hh:36
float Real
Definition: base.hh:44
std::string String
Definition: base.hh:54
boost::shared_ptr< ChainData > ChainDataPtr
Definition: omf.hh:38
const int OMF_VERSION
Definition: omf.hh:33
boost::shared_ptr< OMF > OMFPtr
Definition: omf.hh:36
@ CHAINTYPE_UNKNOWN
guess what
Definition: chain_type.hh:47
Definition: base.dox:1
unsigned short uint16_t
Definition: stdint_msc.hh:79
unsigned char uint8_t
Definition: stdint_msc.hh:78
std::vector< int > bond_orders
Definition: omf.hh:162
ChainData(const ost::mol::ChainHandle &chain, const std::vector< ResidueDefinition > &residue_definitions, const std::unordered_map< unsigned long, int > &res_idx_map, const std::vector< std::pair< unsigned long, unsigned long > > &inter_residue_bonds, const std::vector< int > &inter_residue_bond_orders, std::unordered_map< long, int > &atom_idx_mapper)
std::vector< int > res_def_indices
Definition: omf.hh:149
std::vector< int > rnums
Definition: omf.hh:150
ost::mol::ChainType chain_type
Definition: omf.hh:146
String ch_name
Definition: omf.hh:145
void FromStream(std::istream &stream, const std::vector< ResidueDefinition > &res_def, int version, Real max_error, bool avg_bfactors, bool round_bfactors, bool skip_ss)
std::vector< char > insertion_codes
Definition: omf.hh:151
std::vector< int > bonds
Definition: omf.hh:161
geom::Vec3List positions
Definition: omf.hh:157
std::vector< char > sec_structures
Definition: omf.hh:152
void ToStream(std::ostream &stream, const std::vector< ResidueDefinition > &res_def, Real max_error, bool avg_bfactors, bool round_bfactors, bool skip_ss) const
std::vector< Real > occupancies
Definition: omf.hh:155
std::vector< Real > bfactors
Definition: omf.hh:156
std::vector< int > bond_orders
Definition: omf.hh:114
const std::vector< SidechainAtomRule > & GetSidechainAtomRules() const
std::vector< String > anames
Definition: omf.hh:110
bool operator==(const ResidueDefinition &other) const
Definition: omf.hh:66
ResidueDefinition(const ost::mol::ResidueHandle &res)
int GetIdx(const String &aname) const
void _AddChiDefinition(int idx_one, int idx_two, int idx_three, int idx_four)
std::vector< SidechainAtomRule > sidechain_atom_rules
Definition: omf.hh:118
std::set< int > rotameric_atoms
Definition: omf.hh:116
const std::vector< ChiDefinition > & GetChiDefinitions() const
void ToStream(std::ostream &stream) const
std::set< int > critical_sidechain_angles
Definition: omf.hh:119
void FromStream(std::istream &stream)
std::vector< int > bonds
Definition: omf.hh:113
void _AddAtomRule(int a_idx, int anch_one_idx, int anch_two_idx, int anch_three_idx, Real bond_length, Real angle, int dihedral_idx, Real base_dihedral)
const std::set< int > & GetRotamericAtoms() const
std::vector< ChiDefinition > chi_definitions
Definition: omf.hh:117
bool operator!=(const ResidueDefinition &other) const
Definition: omf.hh:78
std::vector< bool > is_hetatm
Definition: omf.hh:112
std::vector< String > elements
Definition: omf.hh:111
std::map< String, int > idx_mapper
Definition: omf.hh:115