mirror of
http://43.156.76.180:8026/YuuMJ/EukPhylo.git
synced 2025-12-28 07:10:24 +08:00
1364 lines
40 KiB
C++
1364 lines
40 KiB
C++
// $Id: tree.cpp 13572 2015-11-24 15:21:53Z haim $
|
|
#include "definitions.h"
|
|
#include "tree.h"
|
|
#include "treeUtil.h"
|
|
#include "logFile.h"
|
|
#include "someUtil.h"
|
|
#include <cassert>
|
|
#include <algorithm>
|
|
#include <iostream>
|
|
#include <iomanip>
|
|
#include <fstream>
|
|
#include <ctime>
|
|
|
|
using namespace std;
|
|
|
|
const MDOUBLE tree::FLAT_LENGTH_VALUE = 0.3f;
|
|
const int tree::TREE_NULL = -1;
|
|
const MDOUBLE tree::SHORT_LENGTH_VALUE = 0.000001f;
|
|
|
|
|
|
//removeSon: remove pSon from sons list.
|
|
//does not delete pSon
|
|
void tree::TreeNode::removeSon(TreeNode* pSon) {
|
|
vector<nodeP>::iterator vec_iter = remove(_sons.begin(), _sons.end(), pSon);
|
|
_sons.erase(vec_iter,_sons.end()); // pg 1170, primer.
|
|
}
|
|
|
|
void tree::TreeNode::claimSons(){
|
|
for(int i=0;i<getNumberOfSons();i++) {
|
|
getSon(i)->setFather(this);
|
|
}
|
|
}
|
|
|
|
/********************************************************************************************
|
|
getDistance2ROOT()
|
|
*********************************************************************************************/
|
|
MDOUBLE tree::TreeNode::getDistance2ROOT(){
|
|
if(this->isRoot())
|
|
return 0.0;
|
|
else
|
|
return ( this->dis2father() + this->father()->getDistance2ROOT() );
|
|
}
|
|
/********************************************************************************************
|
|
getMinimalDistance2OTU()
|
|
*********************************************************************************************/
|
|
MDOUBLE tree::TreeNode::getMinimalDistance2OTU(){
|
|
if(this->isLeaf())
|
|
return 0.0;
|
|
else{
|
|
int numberOfSons = this->getNumberOfSons();
|
|
switch (numberOfSons)
|
|
{
|
|
case 0:
|
|
LOGnOUT(3, <<"ERROR: number of sons for node is zero, but not return leaf\n");
|
|
return -1;
|
|
break;
|
|
case 1:
|
|
return ( this->getSon(0)->dis2father() + this->getSon(0)->getMinimalDistance2OTU() );
|
|
break;
|
|
case 2:
|
|
return ( min(
|
|
this->getSon(0)->dis2father() + this->getSon(0)->getMinimalDistance2OTU(),
|
|
this->getSon(1)->dis2father() + this->getSon(1)->getMinimalDistance2OTU()
|
|
) );
|
|
break;
|
|
case 3:
|
|
return ( min(min(
|
|
this->getSon(0)->dis2father() + this->getSon(0)->getMinimalDistance2OTU(),
|
|
this->getSon(1)->dis2father() + this->getSon(1)->getMinimalDistance2OTU()),
|
|
this->getSon(2)->dis2father() + this->getSon(2)->getMinimalDistance2OTU() )
|
|
);
|
|
break;
|
|
case 4:
|
|
return ( min(min(min(
|
|
this->getSon(0)->dis2father() + this->getSon(0)->getMinimalDistance2OTU(),
|
|
this->getSon(1)->dis2father() + this->getSon(1)->getMinimalDistance2OTU()),
|
|
this->getSon(2)->dis2father() + this->getSon(2)->getMinimalDistance2OTU()),
|
|
this->getSon(3)->dis2father() + this->getSon(3)->getMinimalDistance2OTU() )
|
|
);
|
|
break;
|
|
default:
|
|
LOGnOUT(3, <<"ERROR: number of sons for node "<< numberOfSons<<" is not implemented in getMinimalDistance2OTU\n");
|
|
return -1;
|
|
}
|
|
|
|
|
|
}
|
|
}
|
|
/********************************************************************************************
|
|
getMinimalDistance2OTU()
|
|
This implementation is only for binary trees and tr-furcating.
|
|
Can easily be generalized to arbitrary number of sons.
|
|
*********************************************************************************************/
|
|
int tree::TreeNode::getMinimalNumOfNodes2OTU(){
|
|
//int minimalNumOfNodes2OTU = 1;
|
|
if(this->isLeaf())
|
|
return 0;
|
|
else{
|
|
int numberOfSons = this->getNumberOfSons();
|
|
switch (numberOfSons)
|
|
{
|
|
case 0:
|
|
LOGnOUT(3, <<"ERROR: number of sons for node is zero, but not return leaf\n");
|
|
return -1;
|
|
break;
|
|
case 1:
|
|
return ( 1 + this->getSon(0)->getMinimalNumOfNodes2OTU());
|
|
break;
|
|
case 2:
|
|
return ( min(
|
|
1 + this->getSon(0)->getMinimalNumOfNodes2OTU(),
|
|
1 + this->getSon(1)->getMinimalNumOfNodes2OTU()
|
|
) );
|
|
break;
|
|
case 3:
|
|
return ( min(min(
|
|
1 + this->getSon(0)->getMinimalNumOfNodes2OTU(),
|
|
1 + this->getSon(1)->getMinimalNumOfNodes2OTU() ),
|
|
1 + this->getSon(2)->getMinimalNumOfNodes2OTU() )
|
|
);
|
|
break;
|
|
case 4:
|
|
return ( min(min(min(
|
|
1 + this->getSon(0)->getMinimalNumOfNodes2OTU(),
|
|
1 + this->getSon(1)->getMinimalNumOfNodes2OTU()),
|
|
1 + this->getSon(2)->getMinimalNumOfNodes2OTU()),
|
|
1 + this->getSon(3)->getMinimalNumOfNodes2OTU() )
|
|
);
|
|
break;
|
|
default:
|
|
LOGnOUT(3, <<"ERROR: number of sons for node "<< numberOfSons<<" is not implemented in getMinimalNumOfNodes2OTU\n");
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
//*******************************************************************************
|
|
// Constructors Destructors
|
|
//*******************************************************************************
|
|
tree::tree() {
|
|
_root=NULL;
|
|
}
|
|
|
|
// this function will accept "-" for cases where the input in from the standard input (cin)
|
|
tree::tree(const string& treeFileName, vector<char>& isFixed) {
|
|
ifstream in;
|
|
istream* inPtr = &cin; // default
|
|
if (treeFileName != "-"){
|
|
in.open(treeFileName.c_str());
|
|
if (! in.is_open())
|
|
errorMsg::reportError(string("Error - unable to open tree file ")+treeFileName,1);
|
|
inPtr = ∈
|
|
}
|
|
if (readPhylipTreeTopology(*inPtr,isFixed)) {
|
|
if (in.is_open())
|
|
in.close();
|
|
create_names_to_internal_nodes();
|
|
makeSureAllBranchesArePositive();
|
|
return;
|
|
}
|
|
if (in.is_open())
|
|
in.close();
|
|
errorMsg::reportError(string("Unable to read tree from the file ")+treeFileName,1);
|
|
}
|
|
|
|
// this function will accept "-" for cases where the input in from the standard input (cin)
|
|
tree::tree(const string& treeFileName) {
|
|
ifstream in;
|
|
istream* inPtr = &cin; // default
|
|
if (treeFileName != "-"){
|
|
in.open(treeFileName.c_str());
|
|
if (! in.is_open())
|
|
errorMsg::reportError(string("Error - unable to open tree file ")+treeFileName,1);
|
|
inPtr = ∈
|
|
}
|
|
if (readPhylipTreeTopology(*inPtr)) {
|
|
if (in.is_open())
|
|
in.close();
|
|
create_names_to_internal_nodes();
|
|
makeSureAllBranchesArePositive();
|
|
return;
|
|
}
|
|
if (in.is_open())
|
|
in.close();
|
|
errorMsg::reportError(string("Unable to read tree from the file ")+treeFileName,1);
|
|
}
|
|
|
|
tree::tree(istream &in) {
|
|
if (readPhylipTreeTopology(in)) {
|
|
create_names_to_internal_nodes();
|
|
makeSureAllBranchesArePositive();
|
|
return;
|
|
}
|
|
errorMsg::reportError("Unable to read phylip tree file",1);// also quit the program
|
|
}
|
|
|
|
tree::tree(istream &in,vector<char>& isFixed) {
|
|
if (readPhylipTreeTopology(in,isFixed)) {
|
|
create_names_to_internal_nodes();
|
|
makeSureAllBranchesArePositive();
|
|
return;
|
|
}
|
|
errorMsg::reportError("Unable to read phylip tree file",1);// also quit the program
|
|
}
|
|
|
|
tree::tree(const vector<char>& tree_contents) {
|
|
readPhylipTreeTopology(tree_contents);
|
|
create_names_to_internal_nodes();
|
|
makeSureAllBranchesArePositive();
|
|
return;
|
|
}
|
|
|
|
tree::tree(const vector<char>& tree_contents, vector<char>& isFixed) {
|
|
readPhylipTreeTopology(tree_contents,isFixed);
|
|
create_names_to_internal_nodes();
|
|
makeSureAllBranchesArePositive();
|
|
return;
|
|
}
|
|
|
|
tree::tree(const tree &otherTree) {
|
|
_root = NULL;
|
|
if (otherTree._root == NULL)
|
|
return; // if tree to copy is empty.
|
|
createRootNode();
|
|
_root->setName(otherTree._root->name());
|
|
_root->setID(otherTree._root->id());
|
|
_root->setComment(otherTree._root->getComment());
|
|
for (int i=0; i <otherTree._root->getNumberOfSons(); ++i) {
|
|
recursiveBuildTree( _root, otherTree.getRoot()->getSon(i));
|
|
}
|
|
}
|
|
|
|
|
|
tree& tree::operator=(const tree &otherTree) {
|
|
if (this == &otherTree)
|
|
return *this;
|
|
if (otherTree._root == NULL) {
|
|
clear();
|
|
return *this; // if tree to copy is empty.
|
|
}
|
|
createRootNode();
|
|
_root->setName(otherTree._root->name());
|
|
_root->setComment(otherTree._root->getComment());
|
|
for (int i=0; i <otherTree._root->getNumberOfSons(); ++i) {
|
|
recursiveBuildTree( _root, otherTree.getRoot()->getSon(i));
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
void tree::clear() {
|
|
vector<nodeP> vec;
|
|
getAllNodes(vec, _root);
|
|
|
|
for (int k=0; k < vec.size(); k++) {
|
|
delete(vec[k]);
|
|
}
|
|
|
|
_nodes = 0;
|
|
_leaves =0;
|
|
_root = NULL;
|
|
|
|
}
|
|
|
|
//*******************************************************************************
|
|
// questions on the tree topology
|
|
//*******************************************************************************
|
|
|
|
//stores the father and sons of node inNodeP in vNeighbourVector
|
|
void tree::getNeigboursOfNode(vector<nodeP> &vNeighbourVector, const nodeP inNodeP) const {
|
|
vNeighbourVector.clear();
|
|
for (int i=0; i < inNodeP->getNumberOfSons();++i) {
|
|
vNeighbourVector.push_back(inNodeP->getSon(i));
|
|
}
|
|
if (getRoot() != inNodeP)
|
|
vNeighbourVector.push_back(inNodeP->father());
|
|
}
|
|
|
|
|
|
// get nodePTR from name
|
|
// "myNode" is a pointer to the root of the subtree in which we want to find the node "inName"
|
|
tree::nodeP tree::findNodeByName(const string inName, nodeP myNode) const{
|
|
if (myNode==NULL) myNode=_root;
|
|
if (myNode->name() == inName) return myNode;
|
|
for (int i=0 ; i < myNode->getNumberOfSons(); i++ ) {
|
|
nodeP answer = findNodeByName(inName, myNode->getSon(i));
|
|
if (answer!=NULL) return answer;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
// get nodePTR from id
|
|
// similar to tree::findNodeByName
|
|
// "myNode" is a pointer to the root of the subtree in which we want to find the node "inId"
|
|
tree::nodeP tree::findNodeById(const int inId, nodeP myNode) const{
|
|
if (myNode==NULL) myNode=_root;
|
|
if (myNode->id() == inId) return myNode;
|
|
for (int i=0 ; i < myNode->getNumberOfSons(); i++ ) {
|
|
nodeP answer = findNodeById(inId, myNode->getSon(i));
|
|
if (answer!=NULL) return answer;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
//getPathBetweenAnyTwoNodes: store all nodes on the path from node1 to node2 in path
|
|
//the first node in path is node1. the last node is node2
|
|
//1. store all nodes from node1 to the root and node2 to the root
|
|
//2. starting from the root - finds the first node (common_father) which is father to both node1 and node2
|
|
//3. store in <path> all nodes in the path from node1 to common_father, from node2 to common_father and common_father itself
|
|
void tree::getPathBetweenAnyTwoNodes(vector<nodeP> &path, const nodeP node1, const nodeP node2) const {
|
|
|
|
path.clear();
|
|
vector<nodeP> pathMatrix1;
|
|
vector<nodeP> pathMatrix2;
|
|
|
|
nodeP nodeup = node1;
|
|
while (nodeup != _root) {
|
|
pathMatrix1.push_back(nodeup);
|
|
nodeup = nodeup->father();
|
|
}
|
|
pathMatrix1.push_back(_root);
|
|
|
|
nodeup = node2;
|
|
while (nodeup != _root) {
|
|
pathMatrix2.push_back(nodeup);
|
|
nodeup = nodeup->father();
|
|
}
|
|
pathMatrix2.push_back(_root);
|
|
|
|
int tmp1 = pathMatrix1.size()-1;
|
|
int tmp2 = pathMatrix2.size()-1;
|
|
|
|
while ((tmp1 >= 0) && (tmp2 >= 0)) {
|
|
if (pathMatrix1[tmp1] != pathMatrix2[tmp2])
|
|
break;
|
|
tmp1--;
|
|
tmp2--;
|
|
}
|
|
|
|
for (int y=0; y <= tmp1; ++y)
|
|
path.push_back(pathMatrix1[y]);
|
|
path.push_back(pathMatrix1[tmp1+1]); // pushing once, the TreeNode that was common father to both.
|
|
for (int j=tmp2; j >= 0; --j) {
|
|
path.push_back(pathMatrix2[j]);
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
void tree::getFromLeavesToRoot(vector<nodeP> &vNeighbourVector) const {
|
|
getFromRootToLeaves(vNeighbourVector);
|
|
reverse(vNeighbourVector.begin(),vNeighbourVector.end());
|
|
}
|
|
|
|
|
|
void tree::getFromRootToLeaves(vector<nodeP> &vec) const {
|
|
getFromNodeToLeaves(vec,_root);
|
|
}
|
|
|
|
|
|
void tree::getFromNodeToLeaves(vector<nodeP> &vec, const nodeP fromHereDown) const {
|
|
vec.push_back(fromHereDown);
|
|
for (int k=0; k < fromHereDown->getNumberOfSons(); k++) {
|
|
getFromNodeToLeaves(vec, fromHereDown->getSon(k));
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
void tree::getAllHTUs(vector<nodeP> &vec, const nodeP fromHereDown ) const {
|
|
vec.clear();
|
|
getAllHTUsPrivate(vec,fromHereDown);
|
|
}
|
|
|
|
|
|
void tree::getAllHTUsPrivate(vector<nodeP> &vec, const nodeP fromHereDown ) const {
|
|
if (fromHereDown == NULL) return;
|
|
if (fromHereDown->isInternal()) vec.push_back(fromHereDown);
|
|
for (int k=0; k < fromHereDown->getNumberOfSons(); k++) {
|
|
getAllHTUsPrivate(vec,fromHereDown->getSon(k));
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
void tree::getAllNodes(vector<nodeP> &vec, const nodeP fromHereDown ) const {
|
|
vec.clear();
|
|
getAllNodesPrivate(vec,fromHereDown);
|
|
}
|
|
|
|
|
|
void tree::getAllNodesPrivate(vector<nodeP> &vec, const nodeP fromHereDown ) const {
|
|
//DFS: depth first search
|
|
if (fromHereDown == NULL)
|
|
return;
|
|
vec.push_back(fromHereDown);
|
|
for (int k=0; k < fromHereDown->getNumberOfSons(); k++) {
|
|
getAllNodesPrivate(vec,fromHereDown->getSon(k));
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
void tree::getAllLeaves(vector<nodeP> &vec, const nodeP fromHereDown ) const {
|
|
vec.clear();
|
|
getAllLeavesPrivate(vec,fromHereDown);
|
|
}
|
|
|
|
|
|
void tree::getAllLeavesPrivate(vector<nodeP> &vec, const nodeP fromHereDown ) const {
|
|
if (fromHereDown == NULL) return;
|
|
if (fromHereDown->isLeaf()) vec.push_back(fromHereDown);
|
|
for (int k=0; k < fromHereDown->getNumberOfSons(); k++) {
|
|
getAllLeavesPrivate(vec,fromHereDown->getSon(k));
|
|
}
|
|
return;
|
|
}
|
|
|
|
MDOUBLE tree::findLengthBetweenAnyTwoNodes(const nodeP node1, const nodeP node2) const {
|
|
vector<nodeP> pathMatrix;
|
|
MDOUBLE sumOfDistances =0;
|
|
getPathBetweenAnyTwoNodes(pathMatrix, node1, node2);
|
|
for (int i=0; i < pathMatrix.size() ; i++) {
|
|
// two cases: first, the previous node is closer to the root
|
|
// than the current one. NOTE: this can not be the case for the
|
|
// first node in the path
|
|
if (i>0 && pathMatrix[i]->father() == pathMatrix[i-1])
|
|
sumOfDistances += pathMatrix[i]->dis2father();
|
|
else
|
|
// else: the next node is closer to the root than this node
|
|
// again, it can not be the last node in the path
|
|
if (i<pathMatrix.size()-1 && pathMatrix[i]->father() == pathMatrix[i+1])
|
|
sumOfDistances += pathMatrix[i]->dis2father();
|
|
// if both cases are false, then the current node is the
|
|
// closest to the root over the path, and therefor the
|
|
// distance to its father is not in the path at all.
|
|
}
|
|
return sumOfDistances;
|
|
}
|
|
|
|
// simular to above, but for all nodes at once. O(n^3) or so, but this should not be an issue
|
|
// in any reasonable scenario
|
|
// only disTab[i][j] is filled. disTab[j][i] remains zero.
|
|
void tree::getTreeDistanceTableAndNames(VVdouble& disTab, vector <string>& vNames) const {
|
|
vector<nodeP> nodepV;
|
|
getAllLeaves(nodepV, _root);
|
|
disTab.resize(nodepV.size());
|
|
vNames.resize(nodepV.size());
|
|
for (int i=0;i<nodepV.size();++i) {
|
|
disTab[i].resize(nodepV.size());
|
|
vNames[i]=nodepV[i]->name();
|
|
for(int j=i+1;j<nodepV.size();++j){
|
|
disTab[i][j]=findLengthBetweenAnyTwoNodes(nodepV[i],nodepV[j]);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// find length between two neighbouring nodes only
|
|
MDOUBLE tree::lengthBetweenNodes(const nodeP i, const nodeP j) const {
|
|
if (i->father() == j)
|
|
return i->dis2father();
|
|
assert (j->father() == i);
|
|
return j->dis2father();
|
|
}
|
|
|
|
//*******************************************************************************
|
|
// change tree topoplogy parameters - should be applied carefully
|
|
//*******************************************************************************
|
|
|
|
//set the new root at p_iNewRoot
|
|
// The method doesn't convert an "unrooted tree" = "a tree in which the root has 3 sons"
|
|
// to a rooted one = "a tree in which the root has <= 2 sons".
|
|
// The new root will still have 3 sons.
|
|
void tree::rootAt(const nodeP p_iNewRoot) {
|
|
if (_root == p_iNewRoot)
|
|
return;
|
|
vector<nodeP> pathMatrix;
|
|
getPathBetweenAnyTwoNodes(pathMatrix, _root, p_iNewRoot);
|
|
//pathMatrix size is always bigger than 2.
|
|
|
|
for (int i = 0; i < pathMatrix.size() - 1 ; i++) {
|
|
pathMatrix[i]->_father = pathMatrix[i+1];
|
|
pathMatrix[i]->setDisToFather( pathMatrix[i+1]->dis2father() );
|
|
pathMatrix[i]->removeSon(pathMatrix[i+1]);
|
|
pathMatrix[i+1]->_sons.push_back(pathMatrix[i+1]->father());
|
|
pathMatrix[i+1]->_father = NULL;
|
|
}
|
|
_root = p_iNewRoot;
|
|
}
|
|
|
|
|
|
void tree::makeSureAllBranchesArePositive() {
|
|
if (!withBranchLength()) {
|
|
LOGnOUT(3,<<"\n WARN: Tree with no branch length! Create Flat tree with all branches= "<<tree::FLAT_LENGTH_VALUE<<endl);
|
|
createFlatLengthMatrix(tree::FLAT_LENGTH_VALUE);
|
|
return;
|
|
}
|
|
vector<nodeP> _nodevec;
|
|
getAllNodes(_nodevec,_root);
|
|
for (int i=0; i < _nodevec.size(); ++i) {
|
|
if (_nodevec[i]!=_root) {
|
|
if (_nodevec[i]->dis2father()<=0) {
|
|
_nodevec[i]->setDisToFather(tree::SHORT_LENGTH_VALUE);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void tree::makeSureAllBranchesAreLargerThanEpsilon(MDOUBLE epsilon) {
|
|
vector<nodeP> _nodevec;
|
|
getAllNodes(_nodevec,_root);
|
|
for (int i=0; i < _nodevec.size(); ++i) {
|
|
if (_nodevec[i]!=_root) {
|
|
if (_nodevec[i]->dis2father()<epsilon) {
|
|
LOGnOUT(4,<<" @@@ Warning: brachLength too short:"<<endl
|
|
<<" - the node: "<<_nodevec[i]->name()<<", length: "<<_nodevec[i]->dis2father()<<" is changed to: "<<epsilon<<endl);
|
|
_nodevec[i]->setDisToFather(epsilon);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
MDOUBLE tree::getAllBranchesLengthSum() {
|
|
MDOUBLE totalBranchLength = 0.0;
|
|
vector<nodeP> _nodevec;
|
|
getAllNodes(_nodevec,_root);
|
|
for (int i=0; i < _nodevec.size(); ++i) {
|
|
if (_nodevec[i]!=_root) {
|
|
totalBranchLength += _nodevec[i]->dis2father();
|
|
}
|
|
}
|
|
return totalBranchLength;
|
|
}
|
|
|
|
//create new names to all internal nodes.
|
|
//the new name will be NXX, where XX is htu number
|
|
void tree::create_names_to_internal_nodes() {
|
|
vector<nodeP> htuVec;
|
|
getAllHTUs(htuVec,_root);
|
|
|
|
for (int i=0; i<htuVec.size(); ++i) {
|
|
string name = int2string(i+1);
|
|
htuVec[i]->setName((string)"N" + name);
|
|
}
|
|
}
|
|
|
|
|
|
void tree::multipleAllBranchesByFactor(MDOUBLE InFactor) {
|
|
vector<nodeP> vec;
|
|
getAllNodes(vec,_root );
|
|
for (int i = 0; i < vec.size(); ++i) {
|
|
if (vec[i]->father() != NULL)
|
|
vec[i]->setDisToFather(vec[i]->dis2father() * InFactor);
|
|
}
|
|
_root->setDisToFather(TREE_NULL);
|
|
}
|
|
|
|
|
|
void tree::createFlatLengthMatrix(const MDOUBLE newFlatDistance) {
|
|
vector<nodeP> vec;
|
|
getAllNodes(vec,_root );
|
|
for (int i=0; i< vec.size(); ++i) {
|
|
if (vec[i]->father() != NULL) vec[i]->setDisToFather(newFlatDistance);
|
|
}
|
|
}
|
|
|
|
/*
|
|
void tree::set_length_to_father(nodeP iSon, MDOUBLE dLength) {
|
|
iSon->setDisToFather(dLength);
|
|
}
|
|
*/
|
|
|
|
// helper function
|
|
class eqNameVLOCAL {
|
|
public:
|
|
explicit eqNameVLOCAL(const string& x) : _x(x) {}
|
|
const string& _x;
|
|
bool operator() (const tree::nodeP y){
|
|
return _x == y->name();
|
|
}
|
|
};
|
|
|
|
// removes sonNode from its father according to the name of sonNode
|
|
// this function should ONLY be used when the node, sonNode, is to be recycled soon!
|
|
// because this function does not change the number of leaves nor the number of nodes!
|
|
// nor does it change the father of sonNode.
|
|
void tree::removeNodeFromSonListOfItsFather(nodeP sonNode) {
|
|
vector<tree::nodeP>::iterator vec_iter;
|
|
vec_iter = remove_if(sonNode->_father->_sons.begin(), sonNode->_father->_sons.end(), eqNameVLOCAL(sonNode->name()));
|
|
sonNode->father()->_sons.erase(vec_iter,sonNode->father()->_sons.end()); // pg 1170, primer.
|
|
}
|
|
|
|
|
|
//*******************************************************************************
|
|
// Input-Output
|
|
//*******************************************************************************
|
|
|
|
|
|
void tree::output(string treeOutFile, TREEformats fmt, bool withHTU ) const {
|
|
ofstream os(treeOutFile.c_str());
|
|
output(os, fmt, withHTU);
|
|
os.close();
|
|
}
|
|
|
|
void tree::output(ostream& os, TREEformats fmt, bool withHTU) const {
|
|
os<<setiosflags(ios::fixed);
|
|
if (_root == NULL) {
|
|
LOG(1,<<" empty tree ");
|
|
return;
|
|
}
|
|
if (fmt == PHYLIP)
|
|
outputInPhylipTreeFormat(os, withHTU);
|
|
else if (fmt == PAML)
|
|
outputInPamlTreeFormat(os, withHTU);
|
|
else if (fmt == ANCESTOR)
|
|
outputInAncestorTreeFormat(os,withHTU);
|
|
else if (fmt == ANCESTORID)
|
|
outputInAncestorIdTreeFormat(os,withHTU);
|
|
os<<endl;
|
|
}
|
|
|
|
void tree::outputInAncestorTreeFormat(ostream& treeOutStream, bool distances) const{
|
|
time_t ltime;
|
|
int i,k,spaces;
|
|
vector<nodeP> vec;
|
|
int maxNameLen = 0;
|
|
|
|
getAllLeaves(vec,_root);
|
|
for (int w=0; w<vec.size();++w) {
|
|
if (maxNameLen<vec[w]->name().size()) maxNameLen = vec[w]->name().size();
|
|
}
|
|
maxNameLen++; // this is just the longest name of taxa plus one
|
|
|
|
|
|
|
|
time( <ime );
|
|
treeOutStream<<"# created on "<< ctime( <ime ) ;
|
|
|
|
treeOutStream<<"name";
|
|
spaces = maxNameLen-4;
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
|
|
treeOutStream<<" parent";
|
|
spaces = 7-6;
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
|
|
if (distances) {
|
|
treeOutStream<<"disance to father";
|
|
treeOutStream<<" ";
|
|
}
|
|
|
|
treeOutStream<<" child";
|
|
spaces = maxNameLen-4;
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
|
|
treeOutStream<<endl;
|
|
|
|
|
|
for (i=0; i<vec.size();++i) {
|
|
treeOutStream<<vec[i]->name();
|
|
spaces = maxNameLen-vec[i]->name().size();
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
|
|
if (vec[i] != _root) {
|
|
treeOutStream<<vec[i]->father()->name();
|
|
spaces = 7-vec[i]->father()->name().size();
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
}
|
|
else {
|
|
treeOutStream<<"root!";
|
|
spaces = 7-5;
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
}
|
|
|
|
if ((vec[i] != _root) && distances) {
|
|
treeOutStream<<vec[i]->dis2father();
|
|
}
|
|
|
|
for (int j=0; j < vec[i]->getNumberOfSons(); j++) {
|
|
treeOutStream<<" "<<vec[i]->_sons[j]->name();
|
|
}
|
|
treeOutStream<<endl;
|
|
}
|
|
|
|
vec.clear();
|
|
getAllHTUs(vec,_root );
|
|
|
|
for (i=0; i<vec.size();++i) {
|
|
treeOutStream<<vec[i]->name();
|
|
spaces = maxNameLen-vec[i]->name().size();
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
|
|
if (vec[i] != _root) {
|
|
treeOutStream<<vec[i]->father()->name();
|
|
spaces = 7-vec[i]->father()->name().size();
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
}
|
|
else {
|
|
treeOutStream<<"root!";
|
|
spaces = maxNameLen-5;
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
}
|
|
|
|
if (vec[i] != _root && distances) treeOutStream<<vec[i]->dis2father();
|
|
|
|
for (int j=0; j < vec[i]->getNumberOfSons(); j++) {
|
|
treeOutStream<<" "<<vec[i]->_sons[j]->name();
|
|
}
|
|
treeOutStream<<endl;
|
|
}
|
|
}
|
|
|
|
void tree::outputInPhylipTreeFormat(ostream& os, bool withHTU ) const {
|
|
// special case of a tree with 1 or 2 taxa.
|
|
if (getLeavesNum() == 1) {
|
|
os<<"("<<_root->name()<<")"<<endl;
|
|
return;
|
|
}
|
|
else if ((getLeavesNum() == 2) && (_root->getNumberOfSons()==1)) { // very special case of a root with one son.
|
|
os<<"("<<_root->name()<<":0.0";
|
|
if (_root->getComment().length()) os << "[&&NHX" << _root->getComment() <<"]";
|
|
os<<",";
|
|
os<<_root->getSon(0)->name()<<":" <<dec<<_root->getSon(0)->dis2father();
|
|
if (_root->getSon(0)->getComment().length()) os << "[&&NHX" << _root->getSon(0)->getComment() <<"]";
|
|
os <<")"<<endl;
|
|
return;
|
|
}
|
|
// ========================================
|
|
os<<"(";
|
|
// going over all the son
|
|
int i;
|
|
for (i=0; i<_root->getNumberOfSons()-1; ++i)
|
|
{
|
|
print_from(_root->getSon(i),os, withHTU);
|
|
os<<",";
|
|
}
|
|
|
|
print_from(_root->getSon(i),os, withHTU);
|
|
os<<")";
|
|
if (withHTU==true) os<<_root->name();
|
|
if (_root->getComment().length()) os << "[&&NHX" << _root->getComment() <<"]";
|
|
char c=';';// 59 is dot-line
|
|
os<<c;
|
|
}
|
|
|
|
string tree::stringTreeInPhylipTreeFormat(bool withHTU ) const {
|
|
string treeString = "";
|
|
// special case of a tree with 1 or 2 taxa.
|
|
if (getLeavesNum() == 1) {
|
|
treeString += "(" + _root->name() + ")" + "\n";
|
|
return (treeString);
|
|
}
|
|
else if ((getLeavesNum() == 2) && (_root->getNumberOfSons()==1)) { // very special case of a root with one son.
|
|
treeString += "(" + _root->name() + ":0.0";
|
|
if (_root->getComment().length()) treeString += "[&&NHX" + _root->getComment() + "]";
|
|
treeString += ",";
|
|
treeString +=_root->getSon(0)->name() + ":" + double2string(_root->getSon(0)->dis2father());
|
|
if (_root->getSon(0)->getComment().length()) treeString += "[&&NHX" + _root->getSon(0)->getComment() + "]";
|
|
treeString += ")\n";
|
|
return (treeString);
|
|
}
|
|
// ========================================
|
|
treeString += "(";
|
|
// going over all the son
|
|
int i;
|
|
for (i=0; i<_root->getNumberOfSons()-1; ++i)
|
|
{
|
|
string_print_from(_root->getSon(i),treeString, withHTU);
|
|
treeString += ",";
|
|
}
|
|
|
|
string_print_from(_root->getSon(i),treeString, withHTU);
|
|
treeString += ")";
|
|
if (withHTU==true) treeString += _root->name();
|
|
if (_root->getComment().length()) treeString += "[&&NHX" + _root->getComment() + "]";
|
|
treeString += ";";
|
|
return (treeString);
|
|
}
|
|
|
|
//this format is like phylip format except first line is the number of leaves in the tree and the number of trees (1)
|
|
void tree::outputInPamlTreeFormat(ostream& os, bool withHTU ) const {
|
|
// special case of a tree with 1 or 2 taxa.
|
|
if (getLeavesNum() == 1) {
|
|
os<<"("<<_root->name()<<")"<<endl;
|
|
return;
|
|
}
|
|
else if ((getLeavesNum() == 2) && (_root->getNumberOfSons()==1)) { // very special case of a root with one son.
|
|
os<<"("<<_root->name()<<":0.0";
|
|
if (_root->getComment().length()) os << "[&&NHX" << _root->getComment() <<"]";
|
|
os<<",";
|
|
os<<_root->getSon(0)->name()<<":" <<dec<<_root->getSon(0)->dis2father();
|
|
if (_root->getSon(0)->getComment().length()) os << "[&&NHX" << _root->getSon(0)->getComment() <<"]";
|
|
os <<")"<<endl;
|
|
return;
|
|
}
|
|
// ========================================
|
|
vector<nodeP> vec;
|
|
getAllLeaves(vec, _root);
|
|
int num = vec.size();
|
|
os<<num<<" 1"<<endl;
|
|
os<<"(";
|
|
// going over all the son
|
|
int i;
|
|
for (i=0; i<_root->getNumberOfSons()-1; ++i)
|
|
{
|
|
print_from(_root->getSon(i),os, withHTU);
|
|
os<<",";
|
|
}
|
|
|
|
print_from(_root->getSon(i),os, withHTU);
|
|
os<<")";
|
|
if (withHTU==true) os<<_root->name();
|
|
if (_root->getComment().length()) os << "[&&NHX" << _root->getComment() <<"]";
|
|
char c=';';// 59 is dot-line
|
|
os<<c;
|
|
}
|
|
|
|
|
|
int tree::print_from(nodeP from_node, ostream& os, bool withHTU ) const {
|
|
int i;
|
|
if (from_node->isLeaf())
|
|
os<<from_node->name();
|
|
else {
|
|
os<<"(";
|
|
for (i=0; i<from_node->getNumberOfSons()-1; ++i) {
|
|
print_from(from_node->getSon(i),os,withHTU);
|
|
os<<",";
|
|
}
|
|
print_from(from_node->getSon(i),os,withHTU);
|
|
os<<")";
|
|
if (withHTU==true)
|
|
os<<from_node->name();
|
|
}
|
|
os<<":"<<dec<<from_node->dis2father();
|
|
if (from_node->getComment().length()) os << "[&&NHX" << from_node->getComment() <<"]";
|
|
|
|
return 0;
|
|
}
|
|
|
|
int tree::string_print_from(nodeP from_node, string& s, bool withHTU ) const {
|
|
int i;
|
|
if (from_node->isLeaf())
|
|
s += from_node->name();
|
|
else {
|
|
s += "(";
|
|
for (i=0; i<from_node->getNumberOfSons()-1; ++i) {
|
|
string_print_from(from_node->getSon(i),s,withHTU);
|
|
s += ",";
|
|
}
|
|
string_print_from(from_node->getSon(i),s,withHTU);
|
|
s += ")";
|
|
if (withHTU==true)
|
|
s += from_node->name();
|
|
}
|
|
s += ":" + double2string(from_node->dis2father());
|
|
if (from_node->getComment().length()) s += "[&&NHX" + from_node->getComment() + "]";
|
|
|
|
return 0;
|
|
}
|
|
|
|
bool tree::readPhylipTreeTopology(istream &in) {
|
|
const vector<char> tree_contents = PutTreeFileIntoVector(in);
|
|
return readPhylipTreeTopology(tree_contents);
|
|
}
|
|
|
|
bool tree::readPhylipTreeTopology(istream &in,vector<char>& isFixed) {
|
|
const vector<char> tree_contents = PutTreeFileIntoVector(in);
|
|
return readPhylipTreeTopology(tree_contents,isFixed);
|
|
}
|
|
|
|
|
|
|
|
bool tree::readPhylipTreeTopology(const vector<char>& tree_contents) {
|
|
vector<char> isFixed;
|
|
return readPhylipTreeTopology(tree_contents,isFixed);
|
|
}
|
|
|
|
string getName(vector<char>::const_iterator& p_itCurrent) {
|
|
string tmpname;
|
|
tmpname.erase();
|
|
while (((*p_itCurrent)!=')') &&
|
|
((*p_itCurrent)!='(') &&
|
|
((*p_itCurrent)!=':') &&
|
|
((*p_itCurrent)!=',') &&
|
|
((*p_itCurrent)!='}') &&
|
|
((*p_itCurrent)!='{')) {
|
|
tmpname +=(*p_itCurrent);
|
|
++p_itCurrent;
|
|
}
|
|
return tmpname;
|
|
}
|
|
|
|
bool tree::readPhylipTreeTopology(const vector<char>& tree_contents,vector<char>& isFixed) {
|
|
|
|
|
|
int nextFreeID =0; // to give id's for nodes.
|
|
_leaves = GetNumberOfLeaves(tree_contents);
|
|
_root = new TreeNode(nextFreeID);
|
|
if (_leaves == 1) {// very special case of a tree that is only 1 leaf...
|
|
vector<char>::const_iterator itCurrent = tree_contents.begin();
|
|
itCurrent++;
|
|
_root->setName(getName(itCurrent));
|
|
return true;
|
|
}
|
|
|
|
++nextFreeID;
|
|
_nodes = GetNumberOfInternalNodes(tree_contents) + _leaves;
|
|
|
|
isFixed.resize(_nodes,0); // 0 = not fixed, 1 = fixed.
|
|
nodeP conection2part=NULL;
|
|
vector<char>::const_iterator itCurrent = tree_contents.begin();
|
|
|
|
if (verifyChar(itCurrent,OPENING_BRACE)||verifyChar(itCurrent,OPENING_BRACE2)){
|
|
do {
|
|
itCurrent++;
|
|
conection2part = readPart(itCurrent,nextFreeID,isFixed);
|
|
// readPart returns a pointer to himself
|
|
_root->_sons.push_back(conection2part);
|
|
conection2part->_father = _root;
|
|
|
|
} while (verifyChar(itCurrent, COMMA));
|
|
}
|
|
if (!(verifyChar(itCurrent, CLOSING_BRACE)||verifyChar(itCurrent, CLOSING_BRACE2))) {
|
|
errorMsg::reportError("Bad format in tree file.",1); // also quit
|
|
} else itCurrent++; // skip closing brace
|
|
_root->setComment(readPosibleComment(itCurrent));
|
|
if (verifyChar(itCurrent, SEMI_COLLON)) itCurrent++;
|
|
// this part is for the cases where all the edges are fixed. In such case - this part changes
|
|
// all the branches to not fixed.
|
|
int z=0;
|
|
bool allFixed = true;
|
|
for (z=1; z< isFixed.size(); ++z) {
|
|
if (isFixed[z] == 0) {
|
|
allFixed = false;
|
|
break;
|
|
}
|
|
}
|
|
if (allFixed) {
|
|
for (z=1; z< isFixed.size(); ++z) {
|
|
isFixed[z] = 0;
|
|
}
|
|
}
|
|
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
|
|
// isFixed is actually a bool vector. Sometimes we want to fix a subtree of the tree, for example
|
|
// "human and chimp" so we won't try any topologies that interrupt with this constraint.
|
|
// When isFixed[i] == 1, it means that the branch above node i is fixed. This happens for every leaf,
|
|
// and for nodes indicated by CLOSING_BRACE2 which is '}'.
|
|
tree::nodeP tree::readPart( vector<char>::const_iterator& p_itCurrent,
|
|
int& nextFreeID,
|
|
vector<char> & isFixed) {
|
|
if ( IsAtomicPart(p_itCurrent) ) {
|
|
// read the name, i.e. - the content from the file
|
|
nodeP newLeaf = new TreeNode(nextFreeID);
|
|
isFixed[nextFreeID] = 1; // all edges to the leaves are fixed...
|
|
++nextFreeID;
|
|
|
|
string tmpname = getName(p_itCurrent);
|
|
newLeaf->setName(tmpname);
|
|
|
|
// if a number(==distance) exists on the right-hand, update the distance table
|
|
if ( DistanceExists(p_itCurrent) )
|
|
newLeaf->setDisToFather(getDistance(p_itCurrent));
|
|
// clearPosibleComment(p_itCurrent);
|
|
newLeaf->setComment(readPosibleComment(p_itCurrent));
|
|
return newLeaf;
|
|
|
|
}
|
|
else // this is a complex part
|
|
{
|
|
nodeP newHTU = new TreeNode(nextFreeID);
|
|
++nextFreeID;
|
|
nodeP conection2part=NULL;
|
|
|
|
do {
|
|
++p_itCurrent;
|
|
conection2part = readPart(p_itCurrent,nextFreeID,isFixed);
|
|
conection2part->_father = newHTU;
|
|
newHTU->_sons.push_back(conection2part);
|
|
} while (verifyChar(p_itCurrent, COMMA));
|
|
if (verifyChar(p_itCurrent, CLOSING_BRACE)) {
|
|
isFixed[newHTU->id()] = 1;
|
|
} else if (verifyChar(p_itCurrent, CLOSING_BRACE2)) {
|
|
isFixed[newHTU->id()] = 0;
|
|
} else {
|
|
errorMsg::reportError("Bad format in tree file (2)");
|
|
}
|
|
++p_itCurrent;
|
|
|
|
// if a number(==distance) exists on the right-hand, update the distance table
|
|
if ( DistanceExists(p_itCurrent) )
|
|
newHTU->setDisToFather(getDistance(p_itCurrent));
|
|
// clearPosibleComment(p_itCurrent);
|
|
newHTU->setComment(readPosibleComment(p_itCurrent));
|
|
return newHTU;
|
|
|
|
}
|
|
}
|
|
|
|
//copy the information from other_nodePTR to a new node, and set the father to father_nodePTR
|
|
//does not update the number of nodes and leaves
|
|
tree::nodeP tree::recursiveBuildTree(tree::nodeP father_nodePTR, const tree::nodeP other_nodePTR) {
|
|
|
|
tree::nodeP childPTR = createNode(father_nodePTR, other_nodePTR->id());
|
|
childPTR->setName(other_nodePTR->name());
|
|
childPTR->setComment(other_nodePTR->getComment());
|
|
childPTR->setDisToFather(other_nodePTR->dis2father());
|
|
for (int k = 0 ; k < other_nodePTR->getNumberOfSons() ; ++k) {
|
|
recursiveBuildTree(childPTR, other_nodePTR->getSon(k));
|
|
}
|
|
return childPTR;
|
|
}
|
|
|
|
|
|
|
|
void tree::updateNumberofNodesANDleaves() {
|
|
vector<nodeP> vec;
|
|
getAllLeaves(vec,getRoot());
|
|
_leaves = vec.size();
|
|
vec.clear();
|
|
getAllNodes(vec,getRoot());
|
|
_nodes = vec.size();
|
|
}
|
|
|
|
//removeLeaf: removes nodePTR from tree. also deletes nodePTR
|
|
void tree::removeLeaf(nodeP nodePTR) {
|
|
if (!(nodePTR->isLeaf())) {
|
|
errorMsg::reportError("Error in function deleteLeaf - Unable to remove a node, which is not a leaf ");
|
|
}
|
|
|
|
if (getNodesNum() == 1) {
|
|
delete getRoot();
|
|
_root = NULL;
|
|
}
|
|
|
|
if (nodePTR->isRoot()) {
|
|
assert (nodePTR->getNumberOfSons() == 1);
|
|
nodeP sonOfRoot = nodePTR->getSon(0);
|
|
rootAt(sonOfRoot);
|
|
}
|
|
|
|
// leaf is not the root:
|
|
nodeP fatheOfLeafToRemove = nodePTR->father();
|
|
fatheOfLeafToRemove->removeSon(nodePTR);
|
|
delete nodePTR;
|
|
|
|
int tmpSons = fatheOfLeafToRemove->getNumberOfSons();
|
|
|
|
if ((_root == fatheOfLeafToRemove) && (tmpSons == 1)) {
|
|
//in case the tree was rooted and the removed leaf was one of the root' sons:
|
|
//we have to remove the root and reroot the tree at the second root son
|
|
nodeP newRoot = _root->getSon(0);
|
|
delete fatheOfLeafToRemove;
|
|
_root = NULL;
|
|
rootAt(newRoot);
|
|
}
|
|
else if (tmpSons == 1)
|
|
shrinkNode(fatheOfLeafToRemove);
|
|
else if ((_root == fatheOfLeafToRemove) && (tmpSons == 2)) {
|
|
nodeP tmp = _root;
|
|
rootAt(_root->getSon(0));
|
|
shrinkNode(tmp);
|
|
}
|
|
if (_root->isLeaf() && _root->getNumberOfSons() >0 )
|
|
rootAt(_root->getSon(0));
|
|
updateNumberofNodesANDleaves();
|
|
return;
|
|
}
|
|
|
|
|
|
//getAllBranches: returns two vectors such that nodesUp[i] is the father of nodesDown[i]
|
|
void tree::getAllBranches(vector<nodeP> &nodesUp, vector<nodeP> & nodesDown){
|
|
vector<nodeP> localVec;
|
|
getAllNodes(localVec, _root);
|
|
for (int i=0 ; i < localVec.size() ; i++) {
|
|
if (localVec[i]->father() != NULL) {
|
|
nodesUp.push_back(localVec[i]->father());
|
|
nodesDown.push_back(localVec[i]);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// the idea is that if we have a node with only one son (a tree like: node1---node2---node3)
|
|
// we can eliminate node2 (which is nodePTR)
|
|
void tree::shrinkNode(nodeP nodePTR) {
|
|
|
|
if (nodePTR->getNumberOfSons() != 1) {
|
|
vector<string> err;
|
|
err.push_back("you requested to eliminate a node with more than 1 sons.");
|
|
err.push_back(" error in function shrink node");
|
|
errorMsg::reportError(err); // also quit the program.
|
|
}
|
|
|
|
|
|
nodeP fatherNode = nodePTR->father();
|
|
nodeP sonNode = nodePTR->getSon(0);
|
|
|
|
if( (nodePTR->isRoot())&&(nodePTR->getNumberOfSons() == 1) ) // refering the root to be sonNode.
|
|
{
|
|
MDOUBLE dis2root = sonNode->dis2father();
|
|
sonNode->setFather(NULL);
|
|
delete(_root);
|
|
_root = sonNode;
|
|
|
|
for (int i=0; i < sonNode->getNumberOfSons(); ++i)
|
|
{
|
|
MDOUBLE oldDis2Father = sonNode->getSon(i)->dis2father();
|
|
sonNode->getSon(i)->setDisToFather(oldDis2Father + dis2root);
|
|
}
|
|
|
|
_root->setDisToFather(TREE_NULL);
|
|
|
|
updateNumberofNodesANDleaves();
|
|
return;
|
|
}
|
|
|
|
// taking care of the son node:
|
|
sonNode->_father = fatherNode;
|
|
sonNode->setDisToFather(sonNode->dis2father() + nodePTR->dis2father());//if it is the root dont add the distance
|
|
|
|
// takind car of father node
|
|
fatherNode->removeSon(nodePTR);
|
|
fatherNode->_sons.push_back(sonNode);
|
|
|
|
// delete the nodePTR
|
|
delete nodePTR;
|
|
updateNumberofNodesANDleaves();
|
|
}
|
|
|
|
|
|
//createRootNode: erase the current tree and create a tree with one node.
|
|
void tree::createRootNode() {
|
|
clear();
|
|
_root = new TreeNode(0);
|
|
_leaves=1;
|
|
_nodes=1;
|
|
}
|
|
|
|
|
|
tree::nodeP tree::createNode(nodeP fatherNode, const int id) {
|
|
nodeP tmp = new TreeNode(id);
|
|
_nodes++;
|
|
if (!fatherNode->isLeaf()) {
|
|
// if fatherNode is a leaf then we remove one leaf and add one leaf, so no change.
|
|
++_leaves;
|
|
}
|
|
// there is one case when your father IS a leaf and yet you have to increase the number of leaves
|
|
// this is when you father is the root, and you add the first child
|
|
if (fatherNode->isRoot() && fatherNode->getNumberOfSons()==0) {
|
|
++_leaves;
|
|
}
|
|
tmp->_father = fatherNode;
|
|
fatherNode->setSon(tmp);
|
|
return tmp;
|
|
}
|
|
|
|
// check whether the tree contains information about branch length
|
|
bool tree::withBranchLength() const{
|
|
if (_root->_sons.empty()) return false;
|
|
else if (_root->getSon(0)->dis2father() != TREE_NULL) return true;
|
|
return false;
|
|
}
|
|
|
|
ostream &operator<<(ostream &out, const tree &tr){
|
|
tr.output(out,tree::ANCESTOR);
|
|
return out;
|
|
}
|
|
|
|
/*
|
|
void tree::fillNodesID() {
|
|
vector<nodeP> vec;
|
|
getAllNodes(vec,_root );
|
|
for (int i=0; i< vec.size(); ++i) {
|
|
vec[i]->setID( i);
|
|
}
|
|
}
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
void tree::cut_tree_in_two_leaving_interMediate_node(nodeP node2split,tree &small1,tree &small2) const {
|
|
tree tmpCopyOfThisTree = (*this);
|
|
nodeP node2splitOnNewTree = tmpCopyOfThisTree.getNodeByName(node2split->name());
|
|
string interNode = "interNode";
|
|
assert(node2split->father() != NULL);
|
|
nodeP tmp = tmpCopyOfThisTree.makeNodeBetweenTwoNodes(node2splitOnNewTree->father(),node2splitOnNewTree, interNode);
|
|
tmpCopyOfThisTree.rootAt(tmp);
|
|
tmpCopyOfThisTree.cut_tree_in_two_special(tmp, small1,small2);
|
|
nodeP toDel1 = small1.getNodeByName(interNode);
|
|
};
|
|
*/
|
|
|
|
|
|
void tree::outputInAncestorIdTreeFormat(
|
|
ostream& treeOutStream, bool distances) const{
|
|
time_t ltime;
|
|
int i,k,spaces;
|
|
vector<nodeP> vec;
|
|
int maxNameLen = 0;
|
|
|
|
getAllLeaves(vec,_root);
|
|
for (int w=0; w<vec.size();++w) {
|
|
if (maxNameLen<vec[w]->name().size()) maxNameLen = vec[w]->name().size();
|
|
}
|
|
maxNameLen++; // this is just the longest name of taxa plus one
|
|
maxNameLen+=5; // MN
|
|
|
|
|
|
time( <ime );
|
|
treeOutStream<<"# created on "<< ctime( <ime ) ;
|
|
|
|
treeOutStream<<"name";
|
|
spaces = maxNameLen-4;
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
|
|
treeOutStream<<"father";
|
|
spaces = 7-6;
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
|
|
if (distances) {
|
|
treeOutStream<<"disance to father";
|
|
treeOutStream<<" ";
|
|
}
|
|
|
|
treeOutStream<<" sons";
|
|
spaces = maxNameLen-4;
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
|
|
treeOutStream<<endl;
|
|
|
|
|
|
for (i=0; i<vec.size();++i) {
|
|
treeOutStream<<vec[i]->name()<<"("<<vec[i]->id()<<")";
|
|
int len=3; if (vec[i]->id()>=10) len++;if (vec[i]->id()>=100) len++;
|
|
spaces = maxNameLen-vec[i]->name().size()-len;
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
|
|
if (vec[i] != _root) {
|
|
treeOutStream<<vec[i]->father()->name();
|
|
spaces = 7-vec[i]->father()->name().size();
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
}
|
|
else {
|
|
treeOutStream<<"root!";
|
|
spaces = 7-5;
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
}
|
|
|
|
if ((vec[i] != _root) && distances) {
|
|
treeOutStream<<vec[i]->dis2father();
|
|
}
|
|
//else treeOutStream<<" ";
|
|
|
|
for (int j=0; j < vec[i]->getNumberOfSons(); j++) {
|
|
treeOutStream<<" "<<vec[i]->_sons[j]->name();
|
|
}
|
|
treeOutStream<<endl;
|
|
}
|
|
|
|
vec.clear();
|
|
getAllHTUs(vec,_root );
|
|
|
|
for (i=0; i<vec.size();++i) {
|
|
treeOutStream<<vec[i]->name()<<"("<<vec[i]->id()<<")";
|
|
int len=3; if (vec[i]->id()>=10) len++;if (vec[i]->id()>=100) len++;
|
|
spaces = maxNameLen-vec[i]->name().size()-len;
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
|
|
if (vec[i] != _root) {
|
|
treeOutStream<<vec[i]->father()->name();
|
|
spaces = 7-vec[i]->father()->name().size();
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
}
|
|
else {
|
|
treeOutStream<<"root!";
|
|
spaces = maxNameLen-5;
|
|
for (k=0;k<spaces;++k) treeOutStream<<" ";
|
|
}
|
|
|
|
if (vec[i] != _root && distances) treeOutStream<<vec[i]->dis2father();
|
|
|
|
for (int j=0; j < vec[i]->getNumberOfSons(); j++) {
|
|
treeOutStream<<" "<<vec[i]->_sons[j]->name();
|
|
}
|
|
treeOutStream<<endl;
|
|
}
|
|
}
|
|
|
|
//1. remove one of the root's sons. this node is called "toRemove"
|
|
//2. attach the sons of toRemove to the root.
|
|
//toRemove must have 2 sons so that the the root will have 3 sons.
|
|
//3. change the distToFather of the root's other son to be the sum of the distances of the root and its two sons
|
|
//in practice: this func erase the root and makes toRemove the new root
|
|
void tree::rootToUnrootedTree() {
|
|
if (getRoot()->getNumberOfSons() > 2) return; // tree is already unrooted!
|
|
if (getLeavesNum() <= 2) return; // Cannot be unrooted if the tree has less than 3 leaves.
|
|
|
|
if (getRoot()->getSon(0)->getNumberOfSons() == 0) {
|
|
tree::nodeP toRemove = getRoot()->getSon(1);
|
|
getRoot()->getSon(0)->setDisToFather(getRoot()->getSon(1)->dis2father() + getRoot()->getSon(0)->dis2father());
|
|
getRoot()->setSon(toRemove->getSon(0));
|
|
for (int k = 1; k < toRemove->getNumberOfSons(); ++k) {
|
|
getRoot()->setSon(toRemove->getSon(k));
|
|
}
|
|
delete toRemove;
|
|
getRoot()->removeSon(getRoot()->getSon(1));
|
|
getRoot()->claimSons();
|
|
}
|
|
else {
|
|
tree::nodeP toRemove = getRoot()->getSon(0);
|
|
getRoot()->getSon(1)->setDisToFather(getRoot()->getSon(0)->dis2father() + getRoot()->getSon(1)->dis2father());
|
|
getRoot()->setSon(toRemove->getSon(0));
|
|
for (int k = 1; k < toRemove->getNumberOfSons(); ++k) {
|
|
getRoot()->setSon(toRemove->getSon(k));
|
|
}
|
|
delete toRemove;
|
|
getRoot()->removeSon(getRoot()->getSon(0));
|
|
getRoot()->claimSons();
|
|
}
|
|
updateNumberofNodesANDleaves();
|
|
}
|
|
|
|
//check if the distances from the root to all leaves are equal up to the given tollerance
|
|
bool tree::isUltrametric(MDOUBLE tol, bool bErrorIfNot) const
|
|
{
|
|
vector<nodeP> nodes;
|
|
getAllLeaves(nodes, _root);
|
|
MDOUBLE dist0 = getDistanceFromNode2ROOT(nodes[0]);
|
|
for (int t = 1; t < nodes.size(); ++t)
|
|
{
|
|
MDOUBLE dist = getDistanceFromNode2ROOT(nodes[t]);
|
|
if (!DEQUAL(dist, dist0, tol))
|
|
{
|
|
if (bErrorIfNot)
|
|
{
|
|
string error = "Error: tree is not ultrametric\n";
|
|
error += "the distance from " + nodes[0]->name() + " to the root is: " + double2string(dist0) +"\n";
|
|
error += "the distance from " + nodes[t]->name() + " to the root is: " + double2string(dist) +"\n";
|
|
errorMsg::reportError(error);
|
|
}
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|