Katzlab dd76ab1d12 Added PTL2 Scripts
These are PTL2 files from Auden 2/9
2023-02-14 11:20:52 -05:00

75 lines
2.0 KiB
C++

// $Id: fastaFormat.cpp 10280 2012-02-06 09:45:26Z itaymay $
#include "fastaFormat.h"
#include "someUtil.h"
#include "errorMsg.h"
#include "ConversionUtils.h"
#include <algorithm>
using namespace std;
sequenceContainer fastaFormat::read(istream &infile, const alphabet* alph) {
sequenceContainer mySeqData = readUnAligned(infile, alph);
mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
return mySeqData;
}
sequenceContainer fastaFormat::readUnAligned(istream &infile, const alphabet* alph) {
sequenceContainer mySeqData;
vector<string> seqFileData;
putFileIntoVectorStringArray(infile,seqFileData);
if (seqFileData.empty()){
errorMsg::reportError("unable to open file, or file is empty in fasta format");
}
vector<string>::const_iterator it1;
int localid=0;
for (it1 = seqFileData.begin(); it1!= seqFileData.end(); ) {
if (it1->empty()) {++it1;continue; }// empty line continue
string remark;
string name;
if ((*it1)[0] == '>') {
string::const_iterator itstrtmp = (*it1).begin();
itstrtmp++;
while (itstrtmp != (*it1).end()) {
name+= *itstrtmp;
itstrtmp++;
}
//for (string::iterator i = name.begin(); i!=(name.end()-2);++i) {
// *i=*(i+1); // removing the ">". should be done more elegant...
//}
++it1;
} else {
LOG(0,<<"problem in line: "<<*it1<<endl);
errorMsg::reportError("Error reading fasta file, error finding sequence name starting with >",1);
}
while (it1->empty()) it1++; // empty line continue
string str;
while (it1!= seqFileData.end()) {
if ((*it1)[0] == '>') break;
str+=*it1;
++it1;
}
// remove spaces form str;
str = takeCharOutOfString(" \t", str);
name = trim(name);
mySeqData.add(sequence(str,name,remark,localid,alph));
localid++;
}
return mySeqData;
}
void fastaFormat::write(ostream &out, const sequenceContainer& sd) {
for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
out<<">"<<(it5)->name()<<endl;
out<<it5->toString()<<endl;
}
}