OpenStructure
profile_handle.hh
Go to the documentation of this file.
1 //------------------------------------------------------------------------------
2 // This file is part of the OpenStructure project <www.openstructure.org>
3 //
4 // Copyright (C) 2008-2020 by the OpenStructure authors
5 //
6 // This library is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU Lesser General Public License as published by the Free
8 // Software Foundation; either version 3.0 of the License, or (at your option)
9 // any later version.
10 // This library is distributed in the hope that it will be useful, but WITHOUT
11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with this library; if not, write to the Free Software Foundation, Inc.,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 //------------------------------------------------------------------------------
19 
20 /*
21  Author: Gerardo Tauriello, Gabriel Studer
22  */
23 
24 #ifndef OST_SEQ_PROFILE_HANDLE_HH
25 #define OST_SEQ_PROFILE_HANDLE_HH
26 
27 #include <ost/base.hh>
28 #include <ost/stdint.hh>
29 #include <ost/message.hh>
30 #include <ost/seq/module_config.hh>
31 
32 #include <string.h> // for memcpy, etc
33 #include <vector>
34 #include <map>
35 #include <fstream>
36 #include <boost/shared_ptr.hpp>
37 
38 namespace ost { namespace seq {
39 
40 class ProfileHandle;
41 class ProfileColumn;
42 class HMMData;
43 class ProfileDB;
44 typedef boost::shared_ptr<ProfileHandle> ProfileHandlePtr;
45 typedef boost::shared_ptr<HMMData> HMMDataPtr;
46 typedef std::vector<ProfileHandlePtr> ProfileHandleList;
47 typedef boost::shared_ptr<ProfileDB> ProfileDBPtr;
48 typedef std::vector<ProfileColumn> ProfileColumnList;
49 
50 typedef enum {
51  HMM_M2M = 0, HMM_M2I = 1, HMM_M2D = 2,
52  HMM_I2M = 3, HMM_I2I = 4,
53  HMM_D2M = 5, HMM_D2D = 6
55 
57 public:
58  HMMData() {
59  memset(trans_, 0, 7*sizeof(Real));
60  trans_[HMM_M2M] = 1.0;
61  trans_[HMM_I2M] = 1.0;
62  trans_[HMM_D2M] = 1.0;
63  neff_ = 1.0;
64  neff_i_ = 1.0;
65  neff_d_ = 1.0;
66  }
67 
68  HMMData(const HMMData& rhs) {
69  memcpy(trans_, rhs.trans_, 7*sizeof(Real));
70  neff_ = rhs.neff_;
71  neff_i_ = rhs.neff_i_;
72  neff_d_ = rhs.neff_d_;
73  }
74 
75  Real GetProb(HMMTransition transition) const {
76  return trans_[transition];
77  }
78 
79  void SetProb(HMMTransition transition, Real prob) {
80  trans_[transition] = prob;
81  }
82 
83  Real GetNeff() const {
84  return neff_;
85  }
86 
87  void SetNeff(Real neff) {
88  neff_ = neff;
89  }
90 
91  Real GetNeff_I() const {
92  return neff_i_;
93  }
94 
95  void SetNeff_I(Real neff) {
96  neff_i_ = neff;
97  }
98 
99  Real GetNeff_D() const {
100  return neff_d_;
101  }
102 
103  void SetNeff_D(Real neff) {
104  neff_d_ = neff;
105  }
106 
107  bool operator==(const HMMData& rhs) const {
108  return (!memcmp(trans_, rhs.trans_, sizeof(trans_)) &&
109  neff_ == rhs.neff_ &&
110  neff_i_ == rhs.neff_i_ &&
111  neff_d_ == rhs.neff_d_);
112  }
113  bool operator!=(const HMMData& rhs) const { return !(rhs == (*this)); }
114 
115  HMMData& operator=(const HMMData& rhs) {
116  memcpy(trans_, rhs.trans_, 7*sizeof(Real));
117  neff_ = rhs.neff_;
118  neff_i_ = rhs.neff_i_;
119  neff_d_ = rhs.neff_d_;
120  return *this;
121  }
122 
123  friend std::ofstream& operator<<(std::ofstream& os, HMMData& dat) {
124 
125  int16_t p_data[7];
126  for (uint i = 0; i < 7; ++i) {
127  p_data[i] = static_cast<int16_t>(dat.trans_[i]*10000);
128  }
129  os.write(reinterpret_cast<char*>(p_data), 7*sizeof(int16_t));
130 
131  float neff_data[3];
132  neff_data[0] = dat.neff_;
133  neff_data[1] = dat.neff_i_;
134  neff_data[2] = dat.neff_d_;
135  os.write(reinterpret_cast<char*>(neff_data), 3*sizeof(float));
136 
137  return os;
138  }
139 
140  friend std::ifstream& operator>>(std::ifstream& is, HMMData& dat) {
141 
142  int16_t p_data[7];
143  is.read(reinterpret_cast<char*>(p_data), 7*sizeof(int16_t));
144  for (uint i = 0; i < 7; ++i) {
145  dat.trans_[i] = p_data[i] * 0.0001;
146  }
147 
148  float neff_data[3];
149  is.read(reinterpret_cast<char*>(neff_data), 3*sizeof(float));
150  dat.neff_ = neff_data[0];
151  dat.neff_i_ = neff_data[1];
152  dat.neff_d_ = neff_data[2];
153 
154  return is;
155  }
156 
157 private:
158  Real trans_[7];
159  Real neff_;
160  Real neff_i_;
161  Real neff_d_;
162 };
163 
169 public:
170 
173  memset(freq_, 0, sizeof(freq_));
174  }
175 
177  memcpy(freq_, rhs.freq_, sizeof(freq_));
178  if(rhs.hmm_data_) {
179  // do deep copy
180  hmm_data_ = HMMDataPtr(new HMMData(*rhs.hmm_data_));
181  }
182  }
183  ProfileColumn& operator= (const ProfileColumn& rhs) {
184  memcpy(freq_, rhs.freq_, sizeof(freq_));
185  if(rhs.hmm_data_) {
186  // do deep copy
187  hmm_data_ = HMMDataPtr(new HMMData(*rhs.hmm_data_));
188  } else if(hmm_data_) {
189  hmm_data_ = HMMDataPtr();
190  }
191  return *this;
192  }
193 
196 
198  hmm_data_ = p;
199  }
200 
202  if(!hmm_data_) {
203  throw Error("ProfileColumn has no HMM data set!");
204  }
205  return hmm_data_;
206  }
207 
208  Real GetTransProb(HMMTransition transition) const {
209  if(!hmm_data_) {
210  throw Error("ProfileColumn has no HMM data set!");
211  }
212  return hmm_data_->GetProb(transition);
213  }
214 
216  static int GetIndex(char ch);
217 
218  Real GetFreq(char ch) const;
219 
220  void SetFreq(char ch, Real freq);
221 
222  bool operator==(const ProfileColumn& rhs) const {
223  return !memcmp(freq_, rhs.freq_, sizeof(freq_));
224  }
225  bool operator!=(const ProfileColumn& rhs) const { return !(rhs == (*this)); }
226 
227  Real* freqs_begin() { return freq_; }
228  Real* freqs_end() { return freq_ + 20; }
229  const Real* freqs_begin() const { return freq_; }
230  const Real* freqs_end() const { return freq_ + 20; }
231 
233  Real GetEntropy() const;
234 
237  const ProfileColumn& null_model) const;
238 
239  // functions to feed streams with limited accuracy of internal data
240  // not intended for python export
241 
242  friend std::ofstream& operator<<(std::ofstream& os, ProfileColumn& col) {
243  int16_t data[20];
244  //transform aa_freq
245  for (uint i = 0; i < 20; ++i) {
246  data[i] = static_cast<int16_t>(col.freq_[i]*10000);
247  }
248  os.write(reinterpret_cast<char*>(data), sizeof(data));
249 
250  if(col.hmm_data_) {
251  bool has_hmm_data = true;
252  os.write(reinterpret_cast<char*>(&has_hmm_data), 1);
253  os << *col.hmm_data_;
254  } else {
255  bool has_hmm_data = false;
256  os.write(reinterpret_cast<char*>(&has_hmm_data), 1);
257  }
258 
259  return os;
260  }
261 
262  friend std::ifstream& operator>>(std::ifstream& is, ProfileColumn& col) {
263  int16_t data[20];
264  is.read(reinterpret_cast<char*>(data), sizeof(data));
265  //transform aa_freq
266  for (uint i = 0; i < 20; ++i) {
267  col.freq_[i] = data[i] * 0.0001;
268  }
269 
270  bool has_hmm_data;
271  is.read(reinterpret_cast<char*>(&has_hmm_data), 1);
272  if(has_hmm_data) {
273  is >> *col.hmm_data_;
274  }
275 
276  return is;
277  }
278 
279 private:
280  Real freq_[20];
281  HMMDataPtr hmm_data_;
282 };
283 
292 public:
294  ProfileHandle(): null_model_(ProfileColumn::HHblitsNullModel()), neff_(1.0) {}
295 
296  // uses compiler-generated copy- and assignment operators (work here!)
297 
298  const std::vector<ProfileColumn>& GetColumns() const { return columns_; }
299 
300  const ProfileColumn& GetNullModel() const { return null_model_; }
301 
302  void SetNullModel(const ProfileColumn& null_model) {
303  null_model_ = null_model;
304  }
305 
306  String GetSequence() const { return seq_; }
307 
308  void SetSequence(const String& seq) {
309  if (seq.length() != columns_.size()) {
310  throw Error("ProfileHandle - Inconsistency between number of columns and "
311  " seq. length.");
312  }
313  seq_ = seq;
314  }
315 
316  Real GetNeff() const { return neff_; }
317 
318  void SetNeff(Real neff) { neff_ = neff; }
319 
324 
327 
331  Real GetAverageScore(const ProfileHandle& other, uint offset = 0) const;
332 
333  // \brief Can only add column with an associated olc
334  void AddColumn(const ProfileColumn& c, char olc='X') {
335  columns_.push_back(c);
336  seq_ += olc;
337  }
338 
339  // some functions to make it behave like a vector
340 
341  void clear() { seq_ = ""; columns_.clear(); }
342 
343  size_t size() const { return columns_.size(); }
344 
345  bool empty() const { return columns_.empty(); }
346 
347  ProfileColumn& operator[](size_t index) { return columns_[index]; }
348 
349  const ProfileColumn& operator[](size_t index) const { return columns_[index]; }
350 
351  ProfileColumn& at(size_t index) { return columns_.at(index); }
352 
353  const ProfileColumn& at(size_t index) const { return columns_.at(index); }
354 
355  ProfileColumn& back() { return columns_.back(); }
356 
357  const ProfileColumn& back() const { return columns_.back(); }
358 
359  bool operator==(const ProfileHandle& other) const {
360  return seq_ == other.seq_ &&
361  columns_ == other.columns_ &&
362  null_model_ == other.null_model_;
363  }
364 
365  bool operator!=(const ProfileHandle& other) const {
366  return !(other == (*this));
367  }
368 
369  ProfileColumnList::iterator columns_begin() { return columns_.begin(); }
370  ProfileColumnList::iterator columns_end() { return columns_.end(); }
371  ProfileColumnList::const_iterator columns_begin() const {
372  return columns_.begin();
373  }
374  ProfileColumnList::const_iterator columns_end() const {
375  return columns_.end();
376  }
377 
378  // functions to feed streams with limited accuracy of internal data
379  // not intended for python export
380 
381  friend std::ofstream& operator<<(std::ofstream& os, ProfileHandle& prof) {
382  // null model
383  os << prof.null_model_;
384  // num. columns/residues
385  uint32_t size = prof.size();
386  os.write(reinterpret_cast<char*>(&size), sizeof(uint32_t));
387  // sequence
388  if (prof.seq_.length() != size) {
389  throw Error("ProfileHandle - Inconsistency between number of columns and "
390  " seq. length.");
391  }
392  os.write(prof.seq_.c_str(), size);
393  // columns
394  for(uint i = 0; i < size; ++i){
395  os << prof.columns_[i];
396  }
397 
398  return os;
399  }
400 
401  friend std::ifstream& operator>>(std::ifstream& is, ProfileHandle& prof) {
402  // null model
403  is >> prof.null_model_;
404  // num. columns/residues
405  uint32_t size;
406  is.read(reinterpret_cast<char*>(&size), sizeof(uint32_t));
407  // sequence
408  std::vector<char> tmp_buf(size);
409  is.read(&tmp_buf[0], size);
410  prof.seq_.assign(&tmp_buf[0], size);
411  // columns
412  prof.columns_.resize(size);
413  for(uint i = 0; i < size; ++i){
414  is >> prof.columns_[i];
415  }
416 
417  return is;
418  }
419 
420 private:
421  String seq_;
422  ProfileColumn null_model_;
423  ProfileColumnList columns_;
424  Real neff_;
425 };
426 
429 public:
432  void Save(const String& filename) const;
433 
434  static ProfileDBPtr Load(const String& filename);
435 
436  void AddProfile(const String& name, ProfileHandlePtr prof);
437 
438  ProfileHandlePtr GetProfile(const String& name) const;
439 
440  size_t size() const { return data_.size(); }
441 
442  std::vector<String> GetNames() const;
443 
444 private:
445  std::map<String, ProfileHandlePtr> data_;
446 };
447 
448 }}
449 
450 #endif
void SetNeff(Real neff)
HMMData & operator=(const HMMData &rhs)
HMMData(const HMMData &rhs)
void SetNeff_D(Real neff)
Real GetNeff_D() const
bool operator==(const HMMData &rhs) const
Real GetProb(HMMTransition transition) const
bool operator!=(const HMMData &rhs) const
friend std::ifstream & operator>>(std::ifstream &is, HMMData &dat)
void SetNeff_I(Real neff)
Real GetNeff() const
friend std::ofstream & operator<<(std::ofstream &os, HMMData &dat)
void SetProb(HMMTransition transition, Real prob)
Real GetNeff_I() const
Defines profile of 20 frequencies for one residue.
const Real * freqs_end() const
Real GetScore(const ProfileColumn &other, const ProfileColumn &null_model) const
Get column score as in Soeding-2005.
friend std::ofstream & operator<<(std::ofstream &os, ProfileColumn &col)
static ProfileColumn BLOSUMNullModel()
static ProfileColumn HHblitsNullModel()
ProfileColumn(const ProfileColumn &rhs)
void SetHMMData(HMMDataPtr p)
Real GetFreq(char ch) const
static int GetIndex(char ch)
Translate one-letter-code to index (0-indexing).
const Real * freqs_begin() const
bool operator==(const ProfileColumn &rhs) const
friend std::ifstream & operator>>(std::ifstream &is, ProfileColumn &col)
void SetFreq(char ch, Real freq)
HMMDataPtr GetHMMData() const
ProfileColumn()
Construct a profile with all frequencies set to 0.
Real GetTransProb(HMMTransition transition) const
bool operator!=(const ProfileColumn &rhs) const
Real GetEntropy() const
Get entropy for this column.
Contains a DB of profiles (identified by a unique name (String)).
static ProfileDBPtr Load(const String &filename)
size_t size() const
void AddProfile(const String &name, ProfileHandlePtr prof)
std::vector< String > GetNames() const
ProfileHandlePtr GetProfile(const String &name) const
void Save(const String &filename) const
Saves all profiles in DB with limited accuracy of internal data. Binary format with fixed-width integ...
Provides a profile for a sequence.
void AddColumn(const ProfileColumn &c, char olc='X')
ProfileColumnList::iterator columns_end()
bool operator==(const ProfileHandle &other) const
void SetNullModel(const ProfileColumn &null_model)
bool operator!=(const ProfileHandle &other) const
const ProfileColumn & GetNullModel() const
ProfileColumnList::iterator columns_begin()
Real GetAverageEntropy() const
Compute average entropy over all columns.
const ProfileColumn & at(size_t index) const
ProfileColumnList::const_iterator columns_begin() const
ProfileColumnList::const_iterator columns_end() const
const ProfileColumn & operator[](size_t index) const
ProfileColumn & at(size_t index)
Real GetAverageScore(const ProfileHandle &other, uint offset=0) const
Compute score comparing columns other[i] and this->at(i+offset) Column score as in Soeding-2005,...
friend std::ifstream & operator>>(std::ifstream &is, ProfileHandle &prof)
void SetSequence(const String &seq)
ProfileHandle()
Constructs an empty profile handle (sequence = '', 0 columns).
const std::vector< ProfileColumn > & GetColumns() const
ProfileColumn & operator[](size_t index)
const ProfileColumn & back() const
ProfileHandlePtr Extract(uint from, uint to) const
Extract subset of profile for columns from until to-1 (0-indexing). Null model is copied from this pr...
ProfileColumn & back()
String GetSequence() const
friend std::ofstream & operator<<(std::ofstream &os, ProfileHandle &prof)
unsigned int uint
Definition: base.hh:29
float Real
Definition: base.hh:44
std::string String
Definition: base.hh:54
boost::shared_ptr< HMMData > HMMDataPtr
std::vector< ProfileColumn > ProfileColumnList
boost::shared_ptr< ProfileHandle > ProfileHandlePtr
boost::shared_ptr< ProfileDB > ProfileDBPtr
std::vector< ProfileHandlePtr > ProfileHandleList
Definition: base.dox:1
#define DLLEXPORT_OST_SEQ
signed short int16_t
Definition: stdint_msc.hh:76
unsigned int uint32_t
Definition: stdint_msc.hh:80