Victor
 All Data Structures Functions Variables Friends Pages
AlignmentBase.h
1 
2 /* This file is part of Victor.
3 
4  Victor is free software: you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation, either version 3 of the License, or
7  (at your option) any later version.
8 
9  Victor is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License
15  along with Victor. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef __AlignmentBase_H__
19 #define __AlignmentBase_H__
20 
21 #include <Debug.h>
22 #include <string>
23 #include <vector>
24 
25 namespace Victor { namespace Align2{
26 
32  class AlignmentBase {
33  public:
34 
35  // CONSTRUCTORS:
36 
38  AlignmentBase();
39 
41  AlignmentBase(const AlignmentBase &orig);
42 
44  virtual ~AlignmentBase();
45 
46 
47  // OPERATORS:
48 
51 
52 
53  // PREDICATES:
54 
56  unsigned int size() const;
57 
59  unsigned int getLength();
60 
62  unsigned int getSequenceLength(const string &seq);
63 
65  virtual string getTargetName() const;
66 
68  virtual string getTarget() const;
69 
71  virtual char getTargetPos(unsigned int p) const;
72 
74  virtual int getTargetAminoAcidOffset() const;
75 
77  virtual string getTemplateName(unsigned int index = 0) const;
78 
80  virtual string getTemplate(unsigned int index = 0) const;
81 
83  virtual char getTemplatePos(unsigned int p, unsigned int index = 0) const;
84 
86  virtual int getTemplateAminoAcidOffset(unsigned int index = 0) const;
87 
89  virtual double calculatePairwiseIdentity(const string &seq1,
90  const string &seq2);
91 
93  virtual double calculateIdentity();
94 
96  virtual bool isConserved(unsigned int p, unsigned int index = 9999) const;
97 
99  virtual bool isInsertion(unsigned int p, unsigned int index = 0) const;
100 
102  virtual bool isDeletion(unsigned int p) const;
103 
105  virtual bool isGap(unsigned int p, unsigned int index = 0) const;
106 
110  virtual vector< vector<int> > getMatchSubset();
111 
113  virtual vector<int> shiftMatchSubset(vector<int> inputVector,
114  int newStartPos);
115 
117  virtual double matchPositionVector(vector<int> CeTarget,
118  vector<int> CeTemplate, vector<int> seqTarget, vector<int> seqTemplate);
119 
121  static void saveFasta(string t, string tName, ostream &output);
122 
124  virtual void saveFasta(ostream &output) const;
125 
127  static void saveClustal(string t, string tName, ostream &output,
128  unsigned int from);
129 
131  virtual void saveClustal(ostream &output) const;
132 
133 
134  // MODIFIERS:
135 
137  virtual void copy(const AlignmentBase &orig);
138 
140  virtual AlignmentBase* newCopy();
141 
143  virtual void setTarget(string t, string tName = "target");
144 
146  virtual void setTargetPos(unsigned int p, char res);
147 
149  virtual void setTargetAminoAcidOffset(int orig);
150 
152  virtual void setTemplate(string t, string tName = "template");
153 
155  virtual void setTemplatePos(unsigned int p, char res,
156  unsigned int index = 0);
157 
159  virtual void swapTemplate(unsigned int index1, unsigned int index2);
160 
162  virtual void setTemplateAminoAcidOffset(unsigned int index, int val);
163 
165  void insertCharacter(unsigned int p, char c);
166 
168  void insertDash(unsigned int p);
169 
171  void deletePos(unsigned int p);
172 
174  void purgeTargetInsertions();
175 
177  virtual void cutTemplate(unsigned int index);
178 
180  virtual void clearTemplate();
181 
183  virtual void addAlignment(const AlignmentBase &other);
184 
186  virtual void clearAlignment();
187 
188 
189  // HELPERS:
190 
192  static string getPureSequence(const string &s);
193 
195  static unsigned int getOrigPos(const string &s, unsigned int p);
196 
198  static unsigned int getNewPos(const string &s, unsigned int p);
199 
201  vector<string> getTokens(const string &text);
202 
204  string deleteChar(const string &s, unsigned int n);
205 
206 
207  protected:
208 
209  // ATTRIBUTES:
210 
211  string targetName;
212  vector<string> seqTemplateName;
213  string target;
214  vector<string> seqTemplate;
216  vector<int> startAaTemplates;
217 
218 
219  private:
220 
221  };
222 
223  // -----------------------------------------------------------------------------
224  // AlignmentBase
225  // -----------------------------------------------------------------------------
226 
227  // PREDICATES:
228 
229  inline unsigned int
231  return seqTemplate.size();
232  }
233 
234  inline unsigned int
236  return target.length();
237  }
238 
239  inline string
241  return targetName;
242  }
243 
244  inline string
246  return target;
247  }
248 
249  inline char
250  AlignmentBase::getTargetPos(unsigned int p) const {
251  if (p >= target.length())
252  ERROR("AlignmentBase::getTargetPos() Invalid position requested.", exception);
253  return target[p];
254  }
255 
256  inline int
258  return startAaTarget;
259  }
260 
261  inline string
262  AlignmentBase::getTemplateName(unsigned int index) const {
263  if (index >= seqTemplateName.size())
264  ERROR("AlignmentBase::getTemplateName() Invalid template requested.", exception);
265  return seqTemplateName[index];
266  }
267 
268  inline string
269  AlignmentBase::getTemplate(unsigned int index) const {
270  if (index >= seqTemplate.size())
271  ERROR("AlignmentBase::getTemplate() Invalid template requested.", exception);
272  return seqTemplate[index];
273  }
274 
275  inline char
276  AlignmentBase::getTemplatePos(unsigned int p, unsigned int index) const {
277  if (index >= seqTemplate.size())
278  ERROR("AlignmentBase::getTemplatePos() Invalid template requested.", exception);
279  if (p >= seqTemplate[index].length())
280  ERROR("AlignmentBase::getTemplatePos() Invalid position requested.", exception);
281  return seqTemplate[index][p];
282  }
283 
284  inline int
285  AlignmentBase::getTemplateAminoAcidOffset(unsigned int index) const {
286  return startAaTemplates[index];
287  }
288 
289  inline void
290  AlignmentBase::saveFasta(string t, string tName, ostream &output) {
291  output << ">" << tName << "\n";
292 
293  for (unsigned int i = 0; i < t.length(); i++) {
294  if ((i != 0) && ((i % 60) == 0))
295  output << "\n";
296  output << t[i];
297  }
298 
299  output << "\n";
300  }
301 
302  inline void
303  AlignmentBase::saveClustal(string t, string tName, ostream &output,
304  unsigned int from) {
305  output << tName;
306 
307  for (int i = 0; i < (17 - static_cast<int> (tName.length())); ++i)
308  output << " ";
309 
310  unsigned int max = ((from + 60) < t.length()) ? from + 60 : t.length();
311  for (unsigned int i = from; i < max; i++)
312  output << t[i];
313 
314  output << "\n";
315  }
316 
317 
318  // MODIFIERS:
319 
320  inline void
321  AlignmentBase::setTarget(string t, string tName) {
322  if (seqTemplate.size() > 0)
323  if (t.length() != seqTemplate[0].length())
324  ERROR("AlignmentBase::setTarget() Target length does not match template.", exception);
325  targetName = tName;
326  target = t;
327  }
328 
329  inline void
330  AlignmentBase::setTargetPos(unsigned int p, char res) {
331  if (p >= target.length())
332  ERROR("AlignmentBase::getTargetPos() Invalid position requested.", exception);
333  target[p] = res;
334  }
335 
336  inline void
338  startAaTarget = orig;
339  }
340 
341  inline void
342  AlignmentBase::setTemplatePos(unsigned int p, char res, unsigned int index) {
343  if (index >= seqTemplate.size())
344  ERROR("AlignmentBase::getTemplatePos() Invalid template requested.", expection);
345  if (p >= seqTemplate[index].length())
346  ERROR("AlignmentBase::getTemplatePos() Invalid position requested.", exception);
347  seqTemplate[index][p] = res;
348  }
349 
350  inline void
351  AlignmentBase::setTemplateAminoAcidOffset(unsigned int index, int val) {
352  PRECOND(index < startAaTemplates.size(), exception);
353  startAaTemplates[index] = val;
354  }
355 
356  inline void
358  seqTemplate.clear();
359  seqTemplateName.clear();
360  startAaTemplates.clear();
361  }
362 
363  inline void
365  targetName = "";
366  target = "";
367  startAaTarget = 0;
368  clearTemplate();
369  }
370 
371 }} // namespace
372 
373 #endif
virtual double calculatePairwiseIdentity(const string &seq1, const string &seq2)
Calculate pairwise identity between seq1 and seq2.
Definition: AlignmentBase.cc:73
virtual string getTarget() const
Return target sequence.
Definition: AlignmentBase.h:245
unsigned int getLength()
Return length of alignment.
Definition: AlignmentBase.h:235
vector< string > seqTemplate
Template sequences.
Definition: AlignmentBase.h:214
virtual bool isGap(unsigned int p, unsigned int index=0) const
Check for gap at position p.
Definition: AlignmentBase.cc:186
vector< string > getTokens(const string &text)
Return vector of words of a line of text.
Definition: AlignmentBase.cc:794
virtual int getTemplateAminoAcidOffset(unsigned int index=0) const
Return template index aa offset (counting from zero).
Definition: AlignmentBase.h:285
AlignmentBase & operator=(const AlignmentBase &orig)
Assignment operator.
Definition: AlignmentBase.cc:48
virtual void setTemplatePos(unsigned int p, char res, unsigned int index=0)
Set template index residue to res.
Definition: AlignmentBase.h:342
virtual vector< int > shiftMatchSubset(vector< int > inputVector, int newStartPos)
Return a new vector with positions shifted, depending on new position.
Definition: AlignmentBase.cc:235
vector< int > startAaTemplates
Start templates aa offsets.
Definition: AlignmentBase.h:216
void insertCharacter(unsigned int p, char c)
Insert character c in target and all templates at position p.
Definition: AlignmentBase.cc:377
AlignmentBase()
Default constructor.
Definition: AlignmentBase.cc:29
virtual int getTargetAminoAcidOffset() const
Return target aa offset (only needed for alignment, default = 0).
Definition: AlignmentBase.h:257
static unsigned int getNewPos(const string &s, unsigned int p)
Return position of original index if '-' are now present.
Definition: AlignmentBase.cc:771
virtual void setTargetAminoAcidOffset(int orig)
Set target aa offset (only needed for alignment, default = 0).
Definition: AlignmentBase.h:337
virtual AlignmentBase * newCopy()
Construct a new "deep copy" of this object.
Definition: AlignmentBase.cc:341
void insertDash(unsigned int p)
Insert character '-' in target and all templates at position p.
Definition: AlignmentBase.cc:390
string target
Target sequence.
Definition: AlignmentBase.h:213
virtual char getTemplatePos(unsigned int p, unsigned int index=0) const
Return template index position p.
Definition: AlignmentBase.h:276
virtual string getTemplate(unsigned int index=0) const
Return template index sequence.
Definition: AlignmentBase.h:269
virtual void addAlignment(const AlignmentBase &other)
Combine two multiple sequence alignments of same target.
Definition: AlignmentBase.cc:463
virtual vector< vector< int > > getMatchSubset()
Definition: AlignmentBase.cc:200
string deleteChar(const string &s, unsigned int n)
Delete n-th character.
Definition: AlignmentBase.cc:819
virtual void copy(const AlignmentBase &orig)
Copy orig object to this object ("deep copy").
Definition: AlignmentBase.cc:322
virtual void setTarget(string t, string tName="target")
Set target to t.
Definition: AlignmentBase.h:321
Abstract base class for all sorts of alignments.
Definition: AlignmentBase.h:32
static string getPureSequence(const string &s)
Return sequence without '-' characters.
Definition: AlignmentBase.cc:721
virtual void setTemplate(string t, string tName="template")
Set template to t.
Definition: AlignmentBase.cc:351
static unsigned int getOrigPos(const string &s, unsigned int p)
Return position of index if '-' would not be there (counting from zero).
Definition: AlignmentBase.cc:741
int startAaTarget
Start target aa offset.
Definition: AlignmentBase.h:215
void purgeTargetInsertions()
Delete all gaps from target and all templates.
Definition: AlignmentBase.cc:411
virtual void setTargetPos(unsigned int p, char res)
Set target residue to res.
Definition: AlignmentBase.h:330
unsigned int size() const
Return size of alignment.
Definition: AlignmentBase.h:230
static void saveClustal(string t, string tName, ostream &output, unsigned int from)
Save single line in CLUSTAL format.
Definition: AlignmentBase.h:303
virtual double calculateIdentity()
Calculate overall identity.
Definition: AlignmentBase.cc:100
virtual void clearAlignment()
Clear alignment data.
Definition: AlignmentBase.h:364
vector< string > seqTemplateName
Template names.
Definition: AlignmentBase.h:212
string targetName
Target name.
Definition: AlignmentBase.h:211
virtual bool isInsertion(unsigned int p, unsigned int index=0) const
Check for insertion at position p.
Definition: AlignmentBase.cc:155
unsigned int getSequenceLength(const string &seq)
Return length of seq.
Definition: AlignmentBase.cc:63
virtual string getTemplateName(unsigned int index=0) const
Return template index name.
Definition: AlignmentBase.h:262
virtual void clearTemplate()
Clear template data.
Definition: AlignmentBase.h:357
virtual string getTargetName() const
Return target name.
Definition: AlignmentBase.h:240
virtual ~AlignmentBase()
Destructor.
Definition: AlignmentBase.cc:37
virtual bool isDeletion(unsigned int p) const
Check for deletion at position p.
Definition: AlignmentBase.cc:170
virtual void setTemplateAminoAcidOffset(unsigned int index, int val)
Set template index aa offset (counting from zero).
Definition: AlignmentBase.h:351
virtual void swapTemplate(unsigned int index1, unsigned int index2)
Swap templates index1 and index2.
Definition: AlignmentBase.cc:364
virtual bool isConserved(unsigned int p, unsigned int index=9999) const
Check for conservation. If index is 9999 check on all templates.
Definition: AlignmentBase.cc:134
void deletePos(unsigned int p)
Delete character (or '-') from target and all templates at position p.
Definition: AlignmentBase.cc:398
virtual char getTargetPos(unsigned int p) const
Return target position p.
Definition: AlignmentBase.h:250
virtual double matchPositionVector(vector< int > CeTarget, vector< int > CeTemplate, vector< int > seqTarget, vector< int > seqTemplate)
Companion class to the previous.
Definition: AlignmentBase.cc:262
virtual void cutTemplate(unsigned int index)
Remove all templates below index.
Definition: AlignmentBase.cc:426
static void saveFasta(string t, string tName, ostream &output)
Save single sequence in FASTA format.
Definition: AlignmentBase.h:290