Victor
 All Data Structures Functions Variables Friends Pages
Profile.h
1 /* This file is part of Victor.
2 
3  Victor is free software: you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation, either version 3 of the License, or
6  (at your option) any later version.
7 
8  Victor is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  GNU General Public License for more details.
12 
13  You should have received a copy of the GNU General Public License
14  along with Victor. If not, see <http://www.gnu.org/licenses/>.
15  */
16 
17 
18 #ifndef __Profile_H__
19 #define __Profile_H__
20 
21 #include <Alignment.h>
22 #include <AminoAcidCode.h>
23 #include <Debug.h>
24 #include <IoTools.h>
25 #include <string>
26 #include <vector>
27 
28 namespace Victor { namespace Align2{
29 
35  class Profile {
36  public:
37 
38  // CONSTRUCTORS:
39 
41  Profile();
42 
44  Profile(const Profile &orig);
45 
47  virtual ~Profile();
48 
49 
50  // OPERATORS:
51 
53  Profile& operator =(const Profile &orig);
54 
55 
56  // PREDICATES:
57 
59  virtual double getAminoFrequencyFromCode(AminoAcidCode amino,
60  unsigned int i);
61 
63  virtual double getAminoFrequency(char amino, unsigned int i);
64 
66  virtual double getFreqMaxAminoFrequency(unsigned int i);
67 
69  virtual AminoAcidCode getAminoMaxFrequencyCode(unsigned int i);
70 
72  virtual char getAminoMaxFrequency(unsigned int i);
73 
75  virtual unsigned int getNumGap(unsigned int i);
76 
78  virtual unsigned int getNumSequences();
79 
81  virtual unsigned int getSequenceLength();
82 
84  virtual const string getSeq();
85 
87  virtual string getConsensus();
88 
89 
90  // MODIFIERS:
91 
93  virtual void copy(const Profile &orig);
94 
96  virtual Profile* newCopy();
97 
99  virtual void setFrequency(double freq, AminoAcidCode amino, int i);
100 
102  virtual void setFrequency(double freq, char amino, int i);
103 
105  virtual void setNumGap(int numGap, int j);
106 
108  virtual void setNumSequences(int i);
109 
111  virtual void setSeq(string master);
112 
114  virtual void setProfile(Alignment &ali);
115 
117  virtual void setProfile(Alignment &ali, istream &is);
118 
120  virtual void setAllowGaps(bool g);
121 
123  virtual void reverse();
124 
125 
126  // HELPERS:
127 
129  virtual void pCalculateRawFrequency(vector<double> &freq, double &gapFreq,
130  Alignment &ali, unsigned int i);
131 
133  virtual void pConstructData(Alignment &ali);
134 
136  virtual void pResetData();
137 
138 
139  // ATTRIBUTES:
140 
141  vector< vector<double> > profAliFrequency;
142  vector<double> gapFreq;
143  string seq;
144  unsigned int seqLen;
145  unsigned int numSeq;
146  bool gap;
147 
148 
149  protected:
150 
151 
152  private:
153 
154  };
155 
156  // -----------------------------------------------------------------------------
157  // Profile
158  // -----------------------------------------------------------------------------
159 
160  // PREDICATES:
161 
162  inline double
163  Profile::getAminoFrequencyFromCode(AminoAcidCode amino, unsigned int i) {
164  return profAliFrequency[i][amino];
165  }
166 
167  inline double
168  Profile::getAminoFrequency(char amino, unsigned int i) {
169  return getAminoFrequencyFromCode(aminoAcidOneLetterTranslator(amino), i);
170  }
171 
172  inline double
175  }
176 
177  inline AminoAcidCode
179  AminoAcidCode amino = XXX;
180  double max = 0;
181 
182  for (AminoAcidCode j = ALA; j <= TYR; j++) {
183  double tmp = profAliFrequency[i][j];
184  if (tmp > max) {
185  amino = j;
186  max = tmp;
187  }
188  }
189 
190  return amino;
191  }
192 
193  inline char
195  return aminoAcidOneLetterTranslator(getAminoMaxFrequencyCode(i));
196  }
197 
198  inline unsigned int
199  Profile::getNumGap(unsigned int i) {
200  return static_cast<int> (gapFreq[i]);
201  }
202 
203  inline unsigned int
205  return numSeq;
206  }
207 
208  inline unsigned int
210  return profAliFrequency.size();
211  }
212 
213  inline const string
215  return seq;
216  }
217 
218  inline string
220  string consensus;
221 
222  for (unsigned int i = 0; i < getSequenceLength(); i++) {
223  char amino = getAminoMaxFrequency(i);
224  consensus += amino;
225  }
226 
227  return consensus;
228  }
229 
230 
231  // MODIFIERS:
232 
233  inline void
234  Profile::setFrequency(double freq, AminoAcidCode amino, int i) {
235  profAliFrequency[i][amino] = freq;
236  }
237 
238  inline void
239  Profile::setFrequency(double freq, char amino, int i) {
240  setFrequency(freq, aminoAcidOneLetterTranslator(amino), i);
241  }
242 
243  inline void
244  Profile::setNumGap(int numGap, int j) {
245  gapFreq[j] = numGap;
246  }
247 
248  inline void
250  numSeq = i;
251  }
252 
253  inline void
254  Profile::setSeq(string master) {
255  seq = master;
256  }
257 
258  inline void
260  gap = g;
261  }
262 
263 }} // namespace
264 
265 #endif
virtual void setAllowGaps(bool g)
Set wether to include/exclude gaps in the master sequence.
Definition: Profile.h:259
Profile()
Default constructor.
Definition: Profile.cc:29
vector< vector< double > > profAliFrequency
Aminoacids frequencies.
Definition: Profile.h:141
virtual void copy(const Profile &orig)
Copy orig object to this object ("deep copy").
Definition: Profile.cc:58
virtual void setSeq(string master)
Set the master sequence.
Definition: Profile.h:254
unsigned int numSeq
Number of sequences.
Definition: Profile.h:145
Calculate a frequency profile or PSSM.
Definition: Profile.h:35
vector< double > gapFreq
Gaps frequencies.
Definition: Profile.h:142
bool gap
If true, consider gaps in the master sequence.
Definition: Profile.h:146
virtual unsigned int getNumSequences()
Return the number of sequences in the profile.
Definition: Profile.h:204
string seq
Master sequence.
Definition: Profile.h:143
virtual Profile * newCopy()
Construct a new "deep copy" of this object.
Definition: Profile.cc:78
Implement a simple alignment type.
Definition: Alignment.h:32
virtual double getAminoFrequencyFromCode(AminoAcidCode amino, unsigned int i)
Return the frequency of the aminoacid amino for position i.
Definition: Profile.h:163
virtual void pCalculateRawFrequency(vector< double > &freq, double &gapFreq, Alignment &ali, unsigned int i)
Calculate the raw (ie. unnormalized) aminoacids frequencies for position i.
Definition: Profile.cc:168
virtual double getFreqMaxAminoFrequency(unsigned int i)
Return the frequency of the most frequent aminoacid for position i.
Definition: Profile.h:173
virtual ~Profile()
Destructor.
Definition: Profile.cc:37
virtual char getAminoMaxFrequency(unsigned int i)
Return the most frequent aminoacid for position i.
Definition: Profile.h:194
virtual const string getSeq()
Return the master sequence.
Definition: Profile.h:214
virtual void reverse()
Reverse profile.
Definition: Profile.cc:135
virtual AminoAcidCode getAminoMaxFrequencyCode(unsigned int i)
Return the most frequent aminoacid for position i.
Definition: Profile.h:178
virtual void setNumSequences(int i)
Set the number of sequences in the profile.
Definition: Profile.h:249
virtual void setNumGap(int numGap, int j)
Set the number of gaps for position i.
Definition: Profile.h:244
Profile & operator=(const Profile &orig)
Assignment operator.
Definition: Profile.cc:44
virtual unsigned int getNumGap(unsigned int i)
Return the number of gaps for position i.
Definition: Profile.h:199
virtual double getAminoFrequency(char amino, unsigned int i)
Return the frequency of the aminoacid amino for position i.
Definition: Profile.h:168
virtual void pConstructData(Alignment &ali)
Construct data from alignment.
Definition: Profile.cc:186
unsigned int seqLen
Lenght of sequences.
Definition: Profile.h:144
virtual string getConsensus()
Return the consensus of the profile.
Definition: Profile.h:219
virtual unsigned int getSequenceLength()
Return the lenght of sequences in the profile.
Definition: Profile.h:209
virtual void setProfile(Alignment &ali)
Set the profile with or without gaps in the master sequence.
Definition: Profile.cc:87
virtual void pResetData()
Reset all data.
Definition: Profile.cc:210
virtual void setFrequency(double freq, AminoAcidCode amino, int i)
Set the frequency of the aminoacid amino for position i.
Definition: Profile.h:234