Difference between revisions of "AlignTest"

From victor
Jump to: navigation, search
(Created page with "A simple program to test the basic alignment features. #include <ScoringS2S.h> #include <NWAlign.h> #include <FSAlign.h> #include <SWAlign.h> #include <SubMatrix.h> #in...")
 
m
 
(8 intermediate revisions by the same user not shown)
Line 16: Line 16:
 
  using namespace Biopool;
 
  using namespace Biopool;
  
  /// Show command line options and help text.
+
  ///''' Show command line options and help text.'''
  void
+
  void  sShowHelp(){
  sShowHelp(){
+
  cout << "\nALIGNMENT TEST"
cout << "\nALIGNMENT TEST"
+
  << "\nA simple program to test the basic alignment features.\n"
    << "\nA simple program to test the basic alignment features.\n"
+
  << "\nOptions:"
    << "\nOptions:"
+
  << "\n"
    << "\n"
+
  << "\n  [--in <name>]    \t Name of input FASTA file (alternative to s1 & s2)"
    << "\n  [--in <name>]    \t Name of input FASTA file (alternative to s1 & s2)"
+
  << "\n  [--s1 <seq>]      \t Target sequence (default = HEAGAWGHEE)"
    << "\n  [--s1 <seq>]      \t Target sequence (default = HEAGAWGHEE)"
+
  << "\n  [--s2 <seq>]      \t Template sequence (default = PAWHEAE)"
    << "\n  [--s2 <seq>]      \t Template sequence (default = PAWHEAE)"
+
  << "\n  [--out <name>]    \t Name of output file (default = to screen)"
    << "\n  [--out <name>]    \t Name of output file (default = to screen)"
+
  << "\n"
    << "\n"
+
  << "\n  [-m <name>]      \t Name of substitution matrix file (default = blosum62.dat)"
    << "\n  [-m <name>]      \t Name of substitution matrix file (default = blosum62.dat)"
+
  << "\n  [-M <name>]      \t Name of structural substitution matrix file (default = secid.dat)"
    << "\n  [-M <name>]      \t Name of structural substitution matrix file (default = secid.dat)"
+
  << "\n"
    << "\n"
+
  << "\n  [-o <double>]    \t Open gap penalty (default = 12.00)"
    << "\n  [-o <double>]    \t Open gap penalty (default = 12.00)"
+
  << "\n  [-e <double>]    \t Extension gap penalty (default = 3.00)"
    << "\n  [-e <double>]    \t Extension gap penalty (default = 3.00)"
+
  << "\n"
    << "\n"
+
  << "\n  [--sec <name>]    \t Name of secondary structure FASTA file"
    << "\n  [--sec <name>]    \t Name of secondary structure FASTA file"
+
  << "\n  [--cSeq <double>] \t Coefficient for sequence alignment (default = 0.80)"
    << "\n  [--cSeq <double>] \t Coefficient for sequence alignment (default = 0.80)"
+
  << "\n  [--cStr <double>] \t Coefficient for structural alignment (default = 0.20)"
    << "\n  [--cStr <double>] \t Coefficient for structural alignment (default = 0.20)"
+
  << "\n"
    << "\n"
+
  << "\n  [--verbose]      \t Verbose mode"
    << "\n  [--verbose]      \t Verbose mode"
+
  << "\n" << endl;
    << "\n" << endl;
+
 
  }
 
  }
 
  
 
  int main(int argc, char **argv){
 
  int main(int argc, char **argv){
// Default parameters
+
  // Default parameters
string inputFileName, outputFileName, matrixFileName, matrixStrFileName, secFileName;
+
  string inputFileName, outputFileName, matrixFileName, matrixStrFileName, secFileName;
string seq1, seq2, sec1, sec2;
+
  string seq1, seq2, sec1, sec2;
double openGapPenalty, extensionGapPenalty;
+
  double openGapPenalty, extensionGapPenalty;
double cSeq, cStr;
+
  double cSeq, cStr;
bool verbose;
+
  bool verbose;
// --------------------------------------------------
+
// 0. Treat options
+
// --------------------------------------------------
+
  
if (getArg("h", argc, argv)) {
+
  // --------------------------------------------------
sShowHelp();
+
  // '''0. Treat options'''
return 1;
+
  // --------------------------------------------------
}
+
  if (getArg("h", argc, argv)) {
 +
  sShowHelp();
 +
  return 1;
 +
  }
 +
  getArg("-in", inputFileName, argc, argv, "!");
 +
  getArg("-s1", seq1, argc, argv, "HEAGAWGHEE");
 +
  getArg("-s2", seq2, argc, argv, "PAWHEAE");
 +
  getArg("-out", outputFileName, argc, argv, "!");
 +
  getArg("m", matrixFileName, argc, argv, "blosum62.dat");
 +
  getArg("M", matrixStrFileName, argc, argv, "secid.dat");
 +
  getArg("o", openGapPenalty, argc, argv, 12.00);
 +
  getArg("e", extensionGapPenalty, argc, argv, 3.00);
 +
  getArg("-sec", secFileName, argc, argv, "!");
 +
  getArg("-cSeq", cSeq, argc, argv, 0.80);
 +
  getArg("-cStr", cStr, argc, argv, 0.20);
 +
  verbose = getArg("-verbose", argc, argv);
  
getArg("-in", inputFileName, argc, argv, "!");
+
// --------------------------------------------------
getArg("-s1", seq1, argc, argv, "HEAGAWGHEE");
+
// '''1. Load data'''
getArg("-s2", seq2, argc, argv, "PAWHEAE");
+
// --------------------------------------------------
getArg("-out", outputFileName, argc, argv, "!");
+
  string path = getenv("VICTOR_ROOT");
getArg("m", matrixFileName, argc, argv, "blosum62.dat");
+
  if (path.length() < 3)
getArg("M", matrixStrFileName, argc, argv, "secid.dat");
+
    cout << "Warning: environment variable VICTOR_ROOT is not set." << endl;
getArg("o", openGapPenalty, argc, argv, 12.00);
+
  string examplesPath; 
getArg("e", extensionGapPenalty, argc, argv, 3.00);
+
  string dataPath = path + "data/";
getArg("-sec", secFileName, argc, argv, "!");
+
  if (inputFileName != "!"){
getArg("-cSeq", cSeq, argc, argv, 0.80);
+
    inputFileName = examplesPath + inputFileName;
getArg("-cStr", cStr, argc, argv, 0.20);
+
    ifstream inputFile(inputFileName.c_str());
verbose = getArg("-verbose", argc, argv);
+
    if (!inputFile)
 +
      ERROR("Error opening input FASTA file.", exception);
 +
    Alignment ali;
 +
    ali.loadFasta(inputFile);
 +
    if (ali.size() < 1)
 +
      ERROR("Input FASTA file must contain two sequences.", exception);
 +
    seq1 = Alignment::getPureSequence(ali.getTarget());
 +
    seq2 = Alignment::getPureSequence(ali.getTemplate());
 +
  }
 +
  matrixFileName = dataPath + matrixFileName;
 +
  ifstream matrixFile(matrixFileName.c_str());
 +
  if (!matrixFile)
 +
    ERROR("Error opening substitution matrix file.", exception);
 +
  matrixStrFileName = dataPath + matrixStrFileName;
 +
  ifstream matrixStrFile(matrixStrFileName.c_str());
 +
  if (!matrixStrFile)
 +
    ERROR("Error opening structural substitution matrix file.", exception);
 +
  if (secFileName != "!"){
 +
    secFileName = examplesPath + secFileName;
 +
    ifstream secFile(secFileName.c_str());
 +
    if (!secFile)
 +
      ERROR("Error opening secondary structure FASTA file.", exception);
 +
    Alignment aliSec;
 +
    aliSec.loadFasta(secFile);
 +
    if (aliSec.size() < 1)
 +
      ERROR("Secondary structure FASTA file must contain two sequences.", exception);
 +
    sec1 = Alignment::getPureSequence(aliSec.getTarget());
 +
    sec2 = Alignment::getPureSequence(aliSec.getTemplate());
 +
  }
  
// --------------------------------------------------
+
// --------------------------------------------------
// 1. Load data
+
// ''' 2. Output test data'''
// --------------------------------------------------
+
// --------------------------------------------------
 +
  if (verbose){
 +
    fillLine(cout);
 +
    if (secFileName != "!")
 +
        cout << "\nTarget sequence:\n"                << seq1
 +
        << "\n\nTarget secondary structure:\n"  << sec1
 +
        << "\n\nTemplate sequence:\n"            << seq2
 +
        << "\n\nTemplate secondary structure:\n" << sec2
 +
        << "\n" << endl;
 +
    else
 +
        cout << "\nTarget sequence:\n"                << seq1
 +
        << "\n\nTemplate sequence:\n"            << seq2
 +
        << "\n" << endl;
 +
  }
  
string path = getenv("VICTOR_ROOT");
+
// --------------------------------------------------
if (path.length() < 3)
+
// '''3. Calculate alignments'''
cout << "Warning: environment variable VICTOR_ROOT is not set." << endl;
+
  // --------------------------------------------------
string examplesPath;  
+
  SubMatrix sub(matrixFile);
string dataPath = path + "data/";
+
  SubMatrix subStr(matrixStrFile);
if (inputFileName != "!"){
+
  AGPFunction gf(openGapPenalty, extensionGapPenalty);
inputFileName = examplesPath + inputFileName;
+
  AlignmentData *ad;
ifstream inputFile(inputFileName.c_str());
+
  Structure *str;
if (!inputFile)
+
  ScoringScheme *ss;
ERROR("Error opening input FASTA file.", exception);
+
  if (secFileName != "!") {
Alignment ali;
+
    ad = new SecSequenceData(4, seq1, seq2, sec1, sec2);
ali.loadFasta(inputFile);
+
    str = new Sec(&subStr, ad, cStr);
if (ali.size() < 1)
+
    ss = new ScoringS2S(&sub, ad, str, cSeq);
ERROR("Input FASTA file must contain two sequences.", exception);
+
  }
seq1 = Alignment::getPureSequence(ali.getTarget());
+
  else{
seq2 = Alignment::getPureSequence(ali.getTemplate());
+
    ad = new SequenceData(2, seq1, seq2);
}
+
    ss = new ScoringS2S(&sub, ad, 0, 1.00);
matrixFileName = dataPath + matrixFileName;
+
  }
ifstream matrixFile(matrixFileName.c_str());
+
  NWAlign nwAlign(ad, &gf, ss);
if (!matrixFile)
+
  SWAlign swAlign(ad, &gf, ss);
ERROR("Error opening substitution matrix file.", exception);
+
  FSAlign fsAlign(ad, &gf, ss);
matrixStrFileName = dataPath + matrixStrFileName;
+
ifstream matrixStrFile(matrixStrFileName.c_str());
+
if (!matrixStrFile)
+
ERROR("Error opening structural substitution matrix file.", exception);
+
if (secFileName != "!"){
+
secFileName = examplesPath + secFileName;
+
ifstream secFile(secFileName.c_str());
+
if (!secFile)
+
ERROR("Error opening secondary structure FASTA file.", exception);
+
Alignment aliSec;
+
aliSec.loadFasta(secFile);
+
if (aliSec.size() < 1)
+
ERROR("Secondary structure FASTA file must contain two sequences.", exception);
+
sec1 = Alignment::getPureSequence(aliSec.getTarget());
+
sec2 = Alignment::getPureSequence(aliSec.getTemplate());
+
}
+
  
 
+
// --------------------------------------------------
// --------------------------------------------------
+
// '''4. Output alignments'''
// 2. Output test data
+
// --------------------------------------------------
// --------------------------------------------------
+
  if (outputFileName != "!"){
 
+
    outputFileName = examplesPath + outputFileName;
if (verbose){
+
    ofstream outputFile(outputFileName.c_str());
fillLine(cout);
+
    if (!outputFile)
if (secFileName != "!")
+
      ERROR("Error opening output file.", exception);
cout << "\nTarget sequence:\n"                << seq1
+
    cout << "\nSaving output to file: " << outputFileName << "\n" <<endl;
    << "\n\nTarget secondary structure:\n"  << sec1
+
    outputFile << "\nGLOBAL" << endl;
    << "\n\nTemplate sequence:\n"            << seq2
+
    nwAlign.doMatch(outputFile);
    << "\n\nTemplate secondary structure:\n" << sec2
+
    outputFile << "\nLOCAL:" << endl;
    << "\n" << endl;
+
    swAlign.doMatch(outputFile);
else
+
    outputFile << "\nFS:" << endl;
cout << "\nTarget sequence:\n"                << seq1
+
    fsAlign.doMatch(outputFile);
    << "\n\nTemplate sequence:\n"            << seq2
+
  }
    << "\n" << endl;
+
  else{
}
+
    cout << "\nGLOBAL" << endl;
 
+
    nwAlign.doMatch(cout);
// --------------------------------------------------
+
    cout << "\nLOCAL" << endl;
// 3. Calculate alignments
+
    swAlign.doMatch(cout);
// --------------------------------------------------
+
    cout << "\nFS" << endl;
 
+
    fsAlign.doMatch(cout);
SubMatrix sub(matrixFile);
+
  }
SubMatrix subStr(matrixStrFile);
+
  return 0;
AGPFunction gf(openGapPenalty, extensionGapPenalty);
+
}
 
+
AlignmentData *ad;
+
Structure *str;
+
ScoringScheme *ss;
+
 
+
if (secFileName != "!") {
+
ad = new SecSequenceData(4, seq1, seq2, sec1, sec2);
+
str = new Sec(&subStr, ad, cStr);
+
ss = new ScoringS2S(&sub, ad, str, cSeq);
+
}
+
else{
+
ad = new SequenceData(2, seq1, seq2);
+
ss = new ScoringS2S(&sub, ad, 0, 1.00);
+
}
+
 
+
NWAlign nwAlign(ad, &gf, ss);
+
SWAlign swAlign(ad, &gf, ss);
+
FSAlign fsAlign(ad, &gf, ss);
+
 
+
 
+
// --------------------------------------------------
+
// 4. Output alignments
+
// --------------------------------------------------
+
 
+
if (outputFileName != "!"){
+
outputFileName = examplesPath + outputFileName;
+
ofstream outputFile(outputFileName.c_str());
+
if (!outputFile)
+
ERROR("Error opening output file.", exception);
+
cout << "\nSaving output to file: " << outputFileName
+
    << "\n" <<endl;
+
 
+
outputFile << "\nGLOBAL" << endl;
+
nwAlign.doMatch(outputFile);
+
 
+
outputFile << "\nLOCAL:" << endl;
+
swAlign.doMatch(outputFile);
+
 
+
outputFile << "\nFS:" << endl;
+
fsAlign.doMatch(outputFile);
+
 
+
}
+
else{
+
cout << "\nGLOBAL" << endl;
+
nwAlign.doMatch(cout);
+
 
+
cout << "\nLOCAL" << endl;
+
swAlign.doMatch(cout);
+
 
+
cout << "\nFS" << endl;
+
fsAlign.doMatch(cout);
+
 
+
}
+
 
+
return 0;
+
}
+

Latest revision as of 10:31, 19 August 2014

A simple program to test the basic alignment features.

#include <ScoringS2S.h>
#include <NWAlign.h>
#include <FSAlign.h>
#include <SWAlign.h>
#include <SubMatrix.h>
#include <AGPFunction.h>
#include <Sec.h>
#include <Alignment.h>
#include <AlignmentBase.h>
#include <SequenceData.h>
#include <SecSequenceData.h>
#include <GetArg.h>
#include <iostream>
using namespace Biopool;
/// Show command line options and help text.
void  sShowHelp(){
 cout << "\nALIGNMENT TEST"
 << "\nA simple program to test the basic alignment features.\n"
 << "\nOptions:"
 << "\n"
 << "\n   [--in <name>]     \t Name of input FASTA file (alternative to s1 & s2)"
 << "\n   [--s1 <seq>]      \t Target sequence (default = HEAGAWGHEE)"
 << "\n   [--s2 <seq>]      \t Template sequence (default = PAWHEAE)"
 << "\n   [--out <name>]    \t Name of output file (default = to screen)"
 << "\n"
 << "\n   [-m <name>]       \t Name of substitution matrix file (default = blosum62.dat)"
 << "\n   [-M <name>]       \t Name of structural substitution matrix file (default = secid.dat)"
 << "\n"
 << "\n   [-o <double>]     \t Open gap penalty (default = 12.00)"
 << "\n   [-e <double>]     \t Extension gap penalty (default = 3.00)"
 << "\n"
 << "\n   [--sec <name>]    \t Name of secondary structure FASTA file"
 << "\n   [--cSeq <double>] \t Coefficient for sequence alignment (default = 0.80)"
 << "\n   [--cStr <double>] \t Coefficient for structural alignment (default = 0.20)"
 << "\n"
 << "\n   [--verbose]       \t Verbose mode"
 << "\n" << endl;
}
int main(int argc, char **argv){
 // Default parameters
 string inputFileName, outputFileName, matrixFileName, matrixStrFileName, secFileName;
 string seq1, seq2, sec1, sec2;
 double openGapPenalty, extensionGapPenalty;
 double cSeq, cStr;
 bool verbose;
 // --------------------------------------------------
 // 0. Treat options
 // --------------------------------------------------
 if (getArg("h", argc, argv))	{
  sShowHelp();
  return 1;
 }
 getArg("-in", inputFileName, argc, argv, "!");
 getArg("-s1", seq1, argc, argv, "HEAGAWGHEE");
 getArg("-s2", seq2, argc, argv, "PAWHEAE");
 getArg("-out", outputFileName, argc, argv, "!");
 getArg("m", matrixFileName, argc, argv, "blosum62.dat");
 getArg("M", matrixStrFileName, argc, argv, "secid.dat");
 getArg("o", openGapPenalty, argc, argv, 12.00);
 getArg("e", extensionGapPenalty, argc, argv, 3.00);
 getArg("-sec", secFileName, argc, argv, "!");
 getArg("-cSeq", cSeq, argc, argv, 0.80);
 getArg("-cStr", cStr, argc, argv, 0.20);
 verbose = getArg("-verbose", argc, argv); 
// --------------------------------------------------
// 1. Load data
// --------------------------------------------------
 string path = getenv("VICTOR_ROOT");
 if (path.length() < 3)
   cout << "Warning: environment variable VICTOR_ROOT is not set." << endl;
 string examplesPath;  
 string dataPath = path + "data/";
 if (inputFileName != "!"){
   inputFileName = examplesPath + inputFileName;
   ifstream inputFile(inputFileName.c_str());
   if (!inputFile)
     ERROR("Error opening input FASTA file.", exception);
   Alignment ali;
   ali.loadFasta(inputFile);
   if (ali.size() < 1)
     ERROR("Input FASTA file must contain two sequences.", exception);
   seq1 = Alignment::getPureSequence(ali.getTarget());
   seq2 = Alignment::getPureSequence(ali.getTemplate());
 }
 matrixFileName = dataPath + matrixFileName;
 ifstream matrixFile(matrixFileName.c_str());
 if (!matrixFile)
   ERROR("Error opening substitution matrix file.", exception);
 matrixStrFileName = dataPath + matrixStrFileName;
 ifstream matrixStrFile(matrixStrFileName.c_str());
 if (!matrixStrFile)
   ERROR("Error opening structural substitution matrix file.", exception);
 if (secFileName != "!"){
   secFileName = examplesPath + secFileName;
   ifstream secFile(secFileName.c_str());
   if (!secFile)
      ERROR("Error opening secondary structure FASTA file.", exception);
   Alignment aliSec;
   aliSec.loadFasta(secFile);
   if (aliSec.size() < 1)
     ERROR("Secondary structure FASTA file must contain two sequences.", exception);
   sec1 = Alignment::getPureSequence(aliSec.getTarget());
   sec2 = Alignment::getPureSequence(aliSec.getTemplate());
  }
// --------------------------------------------------
//  2. Output test data
// --------------------------------------------------
  if (verbose){
    fillLine(cout);
    if (secFileName != "!")
       cout << "\nTarget sequence:\n"                << seq1
       << "\n\nTarget secondary structure:\n"   << sec1
       << "\n\nTemplate sequence:\n"            << seq2
       << "\n\nTemplate secondary structure:\n" << sec2
       << "\n" << endl;
    else
       cout << "\nTarget sequence:\n"                << seq1
       << "\n\nTemplate sequence:\n"            << seq2
       << "\n" << endl;
  }
// --------------------------------------------------
// 3. Calculate alignments
// --------------------------------------------------
  SubMatrix sub(matrixFile);
  SubMatrix subStr(matrixStrFile);
  AGPFunction gf(openGapPenalty, extensionGapPenalty);
  AlignmentData *ad;
  Structure *str;
  ScoringScheme *ss;
  if (secFileName != "!") {
    ad = new SecSequenceData(4, seq1, seq2, sec1, sec2);
    str = new Sec(&subStr, ad, cStr);
    ss = new ScoringS2S(&sub, ad, str, cSeq);
  }
  else{
    ad = new SequenceData(2, seq1, seq2);
    ss = new ScoringS2S(&sub, ad, 0, 1.00);
  }
  NWAlign nwAlign(ad, &gf, ss);
  SWAlign swAlign(ad, &gf, ss);
  FSAlign fsAlign(ad, &gf, ss);
// --------------------------------------------------
// 4. Output alignments
// --------------------------------------------------
  if (outputFileName != "!"){
    outputFileName = examplesPath + outputFileName;
    ofstream outputFile(outputFileName.c_str());
    if (!outputFile)
      ERROR("Error opening output file.", exception);
    cout << "\nSaving output to file: " << outputFileName << "\n" <<endl;
    outputFile << "\nGLOBAL" << endl;
    nwAlign.doMatch(outputFile);
    outputFile << "\nLOCAL:" << endl;
    swAlign.doMatch(outputFile);
    outputFile << "\nFS:" << endl;
    fsAlign.doMatch(outputFile);
  }
  else{
    cout << "\nGLOBAL" << endl;
    nwAlign.doMatch(cout);
    cout << "\nLOCAL" << endl;
    swAlign.doMatch(cout);
    cout << "\nFS" << endl;
    fsAlign.doMatch(cout);
  }
  return 0;
}