OpenStructure
hmm_pseudo_counts.hh
Go to the documentation of this file.
1 //------------------------------------------------------------------------------
2 // This file is part of the OpenStructure project <www.openstructure.org>
3 //
4 // Copyright (C) 2008-2020 by the OpenStructure authors
5 //
6 // This library is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU Lesser General Public License as published by the Free
8 // Software Foundation; either version 3.0 of the License, or (at your option)
9 // any later version.
10 // This library is distributed in the hope that it will be useful, but WITHOUT
11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with this library; if not, write to the Free Software Foundation, Inc.,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 //------------------------------------------------------------------------------
19 
20 #ifndef OST_SEQ_ALG_HMM_PSEUDO_COUNTS_HH
21 #define OST_SEQ_ALG_HMM_PSEUDO_COUNTS_HH
22 
24 
25 namespace ost{ namespace seq{ namespace alg{
26 
27 
28 class ContextProfileDB;
29 typedef boost::shared_ptr<ContextProfileDB> ContextProfileDBPtr;
30 
31 
33 
34 public:
35 
36 ContextProfile(int length): length_(length),
37  data_(ContextProfile::DataSize(length), 0.0) { }
38 
39 ContextProfile(int length, Real* data): length_(length),
40  data_(ContextProfile::DataSize(length), 0.0) {
41  memcpy(&data_[0], data, data_.size() * sizeof(Real));
42 }
43 
44 void SetWeight(int pos, char olc, Real weight) {
45  if(pos >= length_) {
46  throw Error("Tried to access invalid pos in ContextProfile");
47  }
48  int olc_idx = ProfileColumn::GetIndex(olc);
49  if(olc_idx != -1) {
50  data_[pos*20 + olc_idx] = weight;
51  } else {
52  throw Error("Invalid one letter code in ContextProfile");
53  }
54 }
55 
56 void SetPseudoCount(char olc, Real count) {
57  int olc_idx = ProfileColumn::GetIndex(olc);
58  if(olc_idx != -1) {
59  data_[length_*20 + olc_idx] = count;
60  } else {
61  throw Error("Invalid one letter code in ContextProfile");
62  }
63 }
64 
65 void SetBias(Real bias) { data_.back() = bias; }
66 
67 const Real* GetWeights(int pos) const{
68  if(pos >= length_) {
69  throw Error("Tried to access invalid pos in ContextProfile");
70  }
71  return &data_[pos*20];
72 }
73 
74 Real GetWeight(int pos, char olc) {
75  if(pos >= length_) {
76  throw Error("Tried to access invalid pos in ContextProfile");
77  }
78  int olc_idx = ProfileColumn::GetIndex(olc);
79  if(olc_idx != -1) {
80  return data_[pos*20 + olc_idx];
81  } else {
82  throw Error("Invalid one letter code in ContextProfile");
83  }
84 }
85 
86 const Real* GetPseudoCounts() const { return &data_[length_*20]; }
87 
88 Real GetPseudoCount(char olc) {
89  int olc_idx = ProfileColumn::GetIndex(olc);
90  if(olc_idx != -1) {
91  return data_[length_*20 + olc_idx];
92  } else {
93  throw Error("Invalid one letter code in ContextProfile");
94  }
95 }
96 
97 Real GetBias() const { return data_.back(); }
98 
99 const std::vector<Real>& GetData() const { return data_; }
100 
101 int GetLength() const { return length_; }
102 
103 static int DataSize(int length) { return (length+1)*20+1; }
104 
105 private:
106 int length_;
107 // data organisation:
108 // context weights in chunks of 20 (length_ chunks)
109 // followed by 20 elements representing the context pseudo counts
110 // last element is the bias
111 std::vector<Real> data_;
112 };
113 
114 
116 
117 public:
118 
120 
121 void Save(const String& filename) const;
122 
123 static ContextProfileDBPtr Load(const String& filename);
124 
125 static ContextProfileDBPtr FromCRF(const String& filename);
126 
127 void AddProfile(const ContextProfile& profile){
128 
129  // enforce same length for all profiles
130  if(!profiles_.empty()) {
131  if(profile.GetLength() != profiles_[0].GetLength()) {
132  throw Error("Require all profiles to be of same length");
133  }
134  }
135  profiles_.push_back(profile);
136 }
137 
138 const ContextProfile& operator [](int idx) const {
139  return profiles_[idx];
140 }
141 
142 const ContextProfile& at(int idx) const {
143  return profiles_.at(idx);
144 }
145 
146 size_t size() const {
147  return profiles_.size();
148 }
149 
150 size_t profile_length() const {
151  if(profiles_.empty()) {
152  throw Error("DB must contain profiles to get profile length");
153  }
154  return profiles_[0].GetLength();
155 }
156 
157 private:
158 std::vector<ContextProfile> profiles_;
159 };
160 
162  Real gapb = 1.0, Real gabd = 0.15,
163  Real gape = 1.0);
164 
166  Real a = 1.0, Real b = 1.5, Real c = 1.0);
167 
169  const ContextProfileDB& db,
170  Real a = 0.9, Real b = 4.0, Real c = 1.0);
171 
173 
174 }}} // ns
175 
176 #endif
static int GetIndex(char ch)
Translate one-letter-code to index (0-indexing).
Provides a profile for a sequence.
void AddProfile(const ContextProfile &profile)
static ContextProfileDBPtr FromCRF(const String &filename)
const ContextProfile & at(int idx) const
const ContextProfile & operator[](int idx) const
void Save(const String &filename) const
static ContextProfileDBPtr Load(const String &filename)
ContextProfile(int length, Real *data)
const Real * GetWeights(int pos) const
void SetWeight(int pos, char olc, Real weight)
Real GetWeight(int pos, char olc)
const Real * GetPseudoCounts() const
void SetPseudoCount(char olc, Real count)
static int DataSize(int length)
const std::vector< Real > & GetData() const
float Real
Definition: base.hh:44
std::string String
Definition: base.hh:54
void AddAAPseudoCounts(ost::seq::ProfileHandle &profile, Real a=1.0, Real b=1.5, Real c=1.0)
void AddNullPseudoCounts(ost::seq::ProfileHandle &profile)
void AddTransitionPseudoCounts(ost::seq::ProfileHandle &profile, Real gapb=1.0, Real gabd=0.15, Real gape=1.0)
boost::shared_ptr< ContextProfileDB > ContextProfileDBPtr
Definition: base.dox:1