mirror of
http://43.156.76.180:8026/YuuMJ/EukPhylo.git
synced 2025-12-28 02:50:25 +08:00
88 lines
2.6 KiB
C++
88 lines
2.6 KiB
C++
// $Id: recognizeFormat.cpp 6780 2009-09-15 00:55:05Z itaymay $
|
|
|
|
#include "recognizeFormat.h"
|
|
#include "maseFormat.h"
|
|
#include "sequenceContainer.h"
|
|
#include "molphyFormat.h"
|
|
#include "phylipFormat.h"
|
|
#include "nexusFormat.h"
|
|
#include "fastaFormat.h"
|
|
#include "clustalFormat.h"
|
|
#include "nexusFormat.h"
|
|
#include "phylipSequentialFormat.h"
|
|
|
|
|
|
sequenceContainer recognizeFormat::read(istream &infile, const alphabet* alph) {
|
|
sequenceContainer mySeqData = readUnAligned(infile, alph);
|
|
mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
|
|
return mySeqData;
|
|
}
|
|
|
|
sequenceContainer recognizeFormat::readUnAligned(istream &infile, const alphabet* alph) {
|
|
// recognize a format and returns the sequence container of it.
|
|
sequenceContainer sc;
|
|
if (!infile){
|
|
string tmp = "error unable to open sequence input file ";
|
|
errorMsg::reportError(tmp);
|
|
}
|
|
|
|
// this part eats spaces, tabs and such.
|
|
char check = infile.peek();
|
|
while ((check==' ') || (check == '\n') || (check == '\t')) {
|
|
infile.get();
|
|
check = infile.peek();
|
|
}
|
|
|
|
switch (check){
|
|
case '#':
|
|
sc=nexusFormat::readUnAligned(infile,alph);
|
|
break;
|
|
case '>':
|
|
sc=fastaFormat::readUnAligned(infile,alph);
|
|
break;
|
|
case 'C':
|
|
sc=clustalFormat::readUnAligned(infile,alph);
|
|
break;
|
|
case ';':
|
|
sc=maseFormat::readUnAligned(infile,alph);
|
|
break;
|
|
|
|
default:
|
|
if (isdigit(check)){
|
|
// here it can be either MOLPHY format or one of the PHYLIP type formats (interleaved, sequential)
|
|
// in PHYLIP format there are lines that are not empty, but the first 10 characters
|
|
// are space.
|
|
string s;
|
|
getline(infile,s, '\n' ); // read the first line which are numbers in both formats
|
|
getline(infile,s, '\n' ); // read the second line
|
|
bool phylipFormat = false;
|
|
int r = s.find_first_of(' '); // if there is a space somewhere - this is phylip format
|
|
if ((r==(s.size()-1)) || (r==-1)) phylipFormat = false;
|
|
else phylipFormat = true;
|
|
|
|
|
|
if (phylipFormat == false) {
|
|
infile.seekg(0, ios::beg); // file return to the beginning
|
|
sc=molphyFormat::readUnAligned(infile,alph);
|
|
} else {
|
|
getline(infile,s, '\n' ); // read the third line: interleaved will begin with a space, sequential not
|
|
infile.seekg(0, ios::beg); // file return to the beginning
|
|
if (s[0] == ' ')
|
|
sc = phylipSequentialFormat::readUnAligned(infile, alph);
|
|
else
|
|
sc = phylipFormat::readUnAligned(infile,alph);
|
|
}
|
|
}
|
|
else{
|
|
string line;
|
|
getline(infile, line, '\n');
|
|
string tmp2 = "The program can't recognise your format!";
|
|
tmp2+="\nThis is the first line in your format:\n";
|
|
tmp2+=line;
|
|
errorMsg::reportError(tmp2);
|
|
}
|
|
break;
|
|
}
|
|
return sc;
|
|
}
|