Compare commits

..

No commits in common. "main" and "v6.0" have entirely different histories.
main ... v6.0

51 changed files with 500 additions and 932604 deletions

View File

@ -1,5 +1,5 @@
# Last updated Sept 19th 2023
# Author: Xyrus Maurer-Alcala and Auden Cote-L'Heureux
# Author: Xyrus Maurer-Alcala
# This script classifies translated CDS into gene families by
# similarity-searching using Diamond against a reference database of

View File

@ -2,12 +2,12 @@
# Author: Auden Cote-L'Heureux
# This script produces both taxon- and sequence-level statistics to describe the ReadyToGo files
# output by EukPhylo Part 1, as well as some OG-level information from the Hook (OG reference)
# output by PhyloToL Part 1, as well as some OG-level information from the Hook (OG reference)
# database. It relies on the utility script CUB.py to calculate composition statistics (GC content,
# Effective Number of Codons, etc.). Both sequence level and taxon-level stats are summarized in tab-separated
# outputs written to the Output folder. This script requires that the OG reference database is available as an
# amino acid fasta file in the Databases/db_OG folder with the same file name as the .dmnd file used in script 4.
# This script is intended to be run as part of the EukPhylo Part 1 pipeline using the script wrapper.py.
# This script is intended to be run as part of the PhyloToL 6 Part 1 pipeline using the script wrapper.py.
import os, sys
import argparse
@ -30,7 +30,7 @@ def get_args():
description = "Updated March 31th, 2023 by Auden Cote-L'Heureux"
)
parser.add_argument('-i', '--input', type = str, required = True, help = 'Input path to the "Output" folder produced by EukPhylo Part 1. This folder should contain both the "ReadyToGO" and "Intermediate" folders.')
parser.add_argument('-i', '--input', type = str, required = True, help = 'Input path to the "Output" folder produced by PhyloToL Part 1. This folder should contain both the "ReadyToGO" and "Intermediate" folders.')
parser.add_argument('-d', '--databases', type = str, default = '../Databases', help = 'Path to databases folder')
parser.add_argument('-r', '--r2g_jf', action = 'store_true', help = 'Create ReadyToGo files filtered to only include sequences between the 25th and 75th percentile of silent-site GC content. Please be aware that these are not necessarily the correct or non-contaminant sequences; examine the GC3xENc plots carefully before using these data.')

View File

@ -1,15 +1,12 @@
# Last updated Sept 2023
# Author: Xyrus Maurer-Alcalá and Auden Cote-L'Heureux
# Author: Xyrus Maurer-Alcalá
# The aim of this script is to generate lots of codon usage statistics to aid in
# identifying useful characteristics for de novo ORF calling. It is intended to be
# stored in the 'Scripts' folder for the EukPhylO Part 1 pipeline scripts, and is
# stored in the 'Scripts' folder for the PhyloToL Part 1 pipeline scripts, and is
# called by Script 5b to calculate composition statistics for Part 1 output files.
# It should not be run separately.
# Users should think about including start/stop constraint as default includes all
# sequences, which can capture pseudogenes
# Dependencies:
# Python3, numpy, BioPython

View File

@ -1,7 +1,7 @@
# Last updated Nov 2023
# Author: Auden Cote-L'Heureux
# This script is run as the first step of the EukPhylo Part 1 GENOMES pipeline,
# This script is run as the first step of the PhyloToL 6 Part 1 GENOMES pipeline,
# before any sequence data are actually processed. It checks to ensure that the input
# CDS files and databases are properly located and formatted.

View File

@ -1,7 +1,7 @@
# Last updated Sept 2023
# Author: Auden Cote-L'Heureux
# This script is a WRAPPER for the EukPhylo Part 1 GENOMES pipeline. Users should
# This script is a WRAPPER for the PhyloToL Part 1 GENOMES pipeline. Users should
# use this script to run the pipeline, rather than running any of the sub-scripts (number 1a through 5b)
# independently. To run an individual step in the pipeline, use --script X where X is the number (1 through 5).
# To run multiple sets (usually all of them), use --first script 1 --last_script 5, or whichever first
@ -19,8 +19,8 @@ import CheckSetup
def get_args():
parser = argparse.ArgumentParser(
prog = 'EukPhylo Part 1 for GenBank Genomes',
description = "Updated January 19th, 2023 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/Katzlab/EukPhylo"
prog = 'PhyloToL v6.0 Part 1 for GenBank Genomes',
description = "Updated January 19th, 2023 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/AudenCote/PhyloToL_v6.0"
)
parser.add_argument('-s', '--script', default = -1, type = int, choices = { 1, 2, 3, 4, 5 }, help = 'Script to run if you are only running one script')

View File

@ -1,45 +1,24 @@
#!/bin/bash
## Last updated Jan 2025 by Auden Cote-L'Heureux
## This script is intended to be used to process genomic CDS with EukPhylo part 1 on an HPC that uses the Slurm workload manager.
## The first part of the script are Slurm-specific parameters that should be adjusted by users to fit their resource allocation
## needs and restrictions, followed by some example commands taken from the GitHub Wiki, more detail for which can be found
## here: https://github.com/Katzlab/EukPhylo/wiki/EukPhylo-Part-1:-GF-assignment
## Slurm specific code
#SBATCH --job-name=EukPhylo
#SBATCH --output=EukPhylo.%j.out # Stdout (%j expands to jobId)
#
#SBATCH --job-name=PTL1_genome
#SBATCH --output=PTL1.%j.out # Stdout (%j expands to jobId)
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --ntasks-per-node=64 # #change to double number of srun when running multiple instances
#SBATCH --ntasks-per-node=64 ##change to number of srun when running multiple instances
#SBATCH --mem=160G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=email@email.edu
#SBATCH --mail-user=YOUREMAIL@smith.edu
module purge #Cleans up any loaded modules
module use /gridapps/modules/all #make sure module locations is loaded
#Unity server
module use /gridapps/modules/all
module load conda/latest
module load uri/main
module load diamond/2.1.7
module load VSEARCH/2.22.1-GCC-11.3.0
conda activate /work/pi_lkatz_smith_edu/Conda_PTL6p1
#Grid server
module use /gridapps/modules/all
module load slurm
module load tqdm/4.66.1-GCCcore-12.3.0
module load Biopython/1.79-gfbf-2023a
module load BLAST+/2.14.1-gompi-2023a
module load DIAMOND/2.1.8-GCC-12.3.0
module load VSEARCH/2.25.0-GCC-12.3.0
module load tqdm
module load Biopython/1.75-foss-2019b-Python-3.7.4
module load BLAST+/2.9.0-gompi-2019b
module load DIAMOND/0.9.30-GCC-8.3.0
parent='/Your/Home/Folder/'
## Example run command
# Start at script 1 and go through script 5 (the final script) using the Universal genetic code
srun -D ${parent}Scripts python3 ${parent}Scripts/wrapper.py -1 1 -2 5 --cds ${parent}Input -o ${parent}Output --genetic_code Universal --databases ${parent}Databases > log.out
path='/beegfs/fast/katzlab/PTL1/Genomes/'
srun -D ${path}Scripts python3 ${path}Scripts/wrapper.py -1 1 -2 5 --cds ${path}PTL1GenomesBatches/PTL1GenomesBatch2 -o ${path}Output/PTL1Genomes_OutputBatch2 --genetic_code Universal --databases ${path}Databases &
wait

View File

@ -3,7 +3,7 @@
# This script is intended to remove transcripts below or above a given
# size range from a transcriptome assembly. It should be run as part
# of Part 1 of the EukPhylo pipeline, using the script wrapper.py.
# of Part 1 of the PhyloToL version 6 pipeline, using the script wrapper.py.
# Prior to running this script, ensure that you have assembled your
# transcriptome and renamed the assembled transcripts in the format of

View File

@ -5,7 +5,7 @@
# by removing sequences with low coverage relative to other
# very similar sequences from samples sequenced on the same
# plate. This script is optional, but to be run as part of the
# EukPhylo Part 1 pipeline using the script wrapper.py
# PhyloToL version 6 Part 1 pipeline using the script wrapper.py
# The specifics of parameters are described below and include removing seqs 1/10
# the coverage of the most highly expressed, and keeping all seqs with coverage >50.

View File

@ -5,12 +5,12 @@
# all length-filtered assembled transcripts against a reference database. It then
# writes these sequences into a separate file, removing them from the remainder
# of the sequences that will go forwards for gene family assignment. This script
# should be in Part 1 of the EukPhylo pipeline using the script wrapper.py.
# should be in Part 1 of the PhyloToL version 6 pipeline using the script wrapper.py.
# You must run Script 1a before this step. Optionally, you may also have run Script 1b.
# Before running this script, ensure that you have a properly formatted rRNA reference
# BLAST database in the Databases/db_BvsE/SSULSUdb folder; it is relatively narrow in scope
# and could be easily replaced or updated to better capture a user's target taxa
# and could be easily replaced
#Dependencies
import argparse, os, sys

View File

@ -9,7 +9,7 @@
# to a prokaryotic sequence, it is labeled with an "E"; if it's best hit to a prokaryotic
# sequence has an e-value >1000 times that of its best hit to a eukaryotic sequence, it is
# labeled with a "P". Anything else gets a "U". This script should be run as part of the
# EukPhylo Part 1 pipeline using the script wrapper.py.
# PhyloToL version 6 Part 1 pipeline using the script wrapper.py.
# Prior to running this script, ensure that you have run scripts 1a (and optionally
# script 1b) and 2a, and that your prokaryote and reference databases (or the default

View File

@ -6,7 +6,7 @@
# gene families. We provide the Hook database on the GitHub, but this
# may be replaced with a custom reference database by REPLACING the
# .dmnd and .fasta files in the Databases/db_OG folder. This script
# is intended to be run as part of the EukPhylo Part 1 pipeline using
# is intended to be run as part of the PhyloToL 6 Part 1 pipeline using
# the script wrapper.py.

View File

@ -7,7 +7,7 @@
# frequencies in all reading frames; it then reports these frequencies in a spreadsheet
# (gcodes_output.tsv) for the user to inspect in deciding which genetic codes to use,
# if unsure. This step can be skipped if genetic codes were input from the beginning. This
# script should be run through the EukPhylo Part 1 pipeline using the script wrapper.py.
# script should be run through the PhyloToL 6 Part 1 pipeline using the script wrapper.py.
#----------------------------------------- NOTES -----------------------------------------#
#

View File

@ -4,7 +4,7 @@
# This script is intended to translate nucleotide sequences. It does this using
# the gcode_output.tsv file output by script 4 and containing in-frame stop codon
# frequencies. The user can use this stop codon information to fill in the last
# column in this file with the genetic code for each taxon, as outlined in the Wiki on Github. If the user input a
# column in this file with the genetic code for each taxon. If the user input a
# genetic code or list of genetic codes to script 1, then the gcode_output.tsv will
# be filled automatically. sequences are translated using the Diamond BLASTp results
# from OG assignment as a starting point for determining coding sequence boundaries.
@ -14,7 +14,7 @@
# of transcriptomic data, poor genetic code assignment or low-quality/partial data can
# interfere with this process).
# This script is intended to be run using the wrapper.py as part of the EukPhylo Part 1
# This script is intended to be run using the wrapper.py as part of the PhyloToL 6 Part 1
# pipeline. It requires that the setup of the 'Output' folder be that as output by script 4
# of this pipeline.

View File

@ -5,9 +5,8 @@
# First, all sequences shorter than 33% or longer than 150% the average length of sequences
# from the same OG in the Hook database are removed. Then, for each transcriptomic sample,
# all sequences within an OG are compared at the nucleotide level to the sequence with the
# highest “score” (defined as k-mer coverage multiplied by length) using BLAST, and sequences that
# are 98% identical to the master sequence are removed. The script should be run
# as part of the EukPhylo Part 1 pipeline using the script wrapper.py. It requires that the
# highest “score” (defined as k-mer coverage multiplied by length). The script should be run
# as part of the PhyloToL 6 Part 1 pipeline using the script wrapper.py. It requires that the
# structure of the 'Output' folder be as output by script 5, and that the Databases/db_OG folder
# contains a .fasta file containing all amino acid sequences in the OG reference database (Hook)
# with the same file name (until the extension) as the .dmnd file for the reference database used

View File

@ -3,8 +3,8 @@
# This script does not process sequence data in any way. It only renames the outputs of
# script 6 to the 10-digit taxon code which prefixes the file names, and then moves output
# 'ReadyToGo' files into a separate folder. It is intended to be run as part of the EukPhylo
# Part 1 pipeline using the script wrapper.py.
# 'ReadyToGo' files into a separate folder. It is intended to be run as part of the PhyloToL
# 6 Part 1 pipeline using the script wrapper.py.
import argparse, os, sys
from argparse import RawTextHelpFormatter,SUPPRESS

View File

@ -2,13 +2,13 @@
# Author: Auden Cote-L'Heureux
# This script produces both taxon- and sequence-level statistics to describe the ReadyToGo files
# output by EukPhylo Part 1, as well as some OG-level information from the Hook (OG reference)
# output by PhyloToL Part 1, as well as some OG-level information from the Hook (OG reference)
# database and the original input assembled transcripts. It relies on the utility script CUB.py
# to calculate composition statistics (GC content, Effective Number of Codons, etc.). Both sequence
# level and taxon-level stats are summarized in tab-separated outputs written to the Output folder.
# This script requires that the OG reference database is available as an amino acid fasta file
# in the Databases/db_OG folder with the same file name as the .dmnd file used in script 3. This script
# is intended to be run as part of the EukPhylo Part 1 pipeline using the script wrapper.py.
# is intended to be run as part of the PhyloToL 6 Part 1 pipeline using the script wrapper.py.
import os, sys
import argparse
@ -31,7 +31,7 @@ def get_args():
description = "Updated March 31th, 2023 by Auden Cote-L'Heureux"
)
parser.add_argument('-i', '--input', type = str, required = True, help = 'Input path to the "Output" folder produced by EukPhylo Part 1. This folder should contain both the "ReadyToGO" and "Intermediate" folders.')
parser.add_argument('-i', '--input', type = str, required = True, help = 'Input path to the "Output" folder produced by PhyloToL Part 1. This folder should contain both the "ReadyToGO" and "Intermediate" folders.')
parser.add_argument('-d', '--databases', type = str, default = '../Databases', help = 'Path to databases folder')
parser.add_argument('-r', '--r2g_jf', action = 'store_true', help = 'Create ReadyToGo files filtered to only include sequences between the 25th and 75th percentile of silent-site GC content. Please be aware that these are not necessarily the correct or non-contaminant sequences; examine the GC3xENc plots carefully before using these data.')

View File

@ -3,12 +3,10 @@
# The aim of this script is to generate lots of codon usage statistics to aid in
# identifying useful characteristics for de novo ORF calling. It is intended to be
# stored in the 'Scripts' folder for the EukPhylo Part 1 pipeline scripts, and is
# stored in the 'Scripts' folder for the PhyloToL Part 1 pipeline scripts, and is
# called by Script 7b to calculate composition statistics for Part 1 output files.
# It should not be run separately.
# Users should think about including start/stop constraint as default includes all
# sequences, which can capture pseudogenes
# Dependencies:
# Python3, numpy, BioPython

View File

@ -1,7 +1,7 @@
# Last updated Nov 2023
# Author: Auden Cote-L'Heureux
# This script is run as the first step of the EukPhylo Part 1 TRANSCRIPTOMES pipeline,
# This script is run as the first step of the PhyloToL 6 Part 1 TRANSCRIPTOMES pipeline,
# before any sequence data are actually processed. It checks to ensure that the input
# assembled transcripts files, databases, genetic codes, and conspecific names files (the latter
# used only with cross-plate contamination, script 1b) are properly located and formatted.

View File

@ -1,7 +1,7 @@
# Last updated Sept 2023
# Author: Auden Cote-L'Heureux
# This script is a WRAPPER for the EukPhylo Part 1 TRANSCRIPTOMES pipeline. Users should
# This script is a WRAPPER for the PhyloToL Part 1 TRANSCRIPTOMES pipeline. Users should
# use this script to run the pipeline, rather than running any of the sub-scripts (number 1a through 7b)
# independently. To run an individual step in the pipeline, use --script X where X is the number (1 through 7).
# To run multiple sets (usually all of them), use --first script 1 --last_script 7, or whichever first
@ -21,8 +21,8 @@ import CheckSetup
def get_args():
parser = argparse.ArgumentParser(
prog = 'EukPhylo Part 1 for Transcriptomes',
description = "Updated September 29th, 2023 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/Katzlab/EukPhylo"
prog = 'PhyloToL v6.0 Part 1 for Transcriptomes',
description = "Updated September 29th, 2023 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/AudenCote/PhyloToL_v6.0"
)
parser.add_argument('-s', '--script', default = -1, type = int, choices = { 1, 2, 3, 4, 5, 6, 7 }, help = 'Script to run if you are only running one script')
@ -92,7 +92,7 @@ def script_four(args):
if os.path.exists(args.databases + '/Taxa_with_few_sequences.txt'):
with open(args.databases + '/Taxa_with_few_sequences.txt', 'r') as f:
content = f.read()
print(f'These samples did not run through EukPhylo part1 because they have no good hits to the hook database or the Diamond sequence aligner ran out of memory. We suggest you remove them and restart.')
print(f'These samples do not run through PTL6p1, perhaps because they has no good hits to the hook. We suggest you remove them and restart.')
print(content)
print('Stopping Run.')
os.remove(args.databases + '/Taxa_with_few_sequences.txt')

View File

@ -1,57 +1,29 @@
#!/bin/bash
## Last updated Jan 2025 by Auden Cote-L'Heureux
## This script is intended to be used to process genomic CDS with EukPhylo part 1 on an HPC that uses the Slurm workload manager.
## The first part of the script are Slurm-specific parameters that should be adjusted by users to fit their resource allocation
## needs and restrictions, followed by some example commands taken from the GitHub Wiki, more detail for which can be found
## here: https://github.com/Katzlab/EukPhylo/wiki/EukPhylo-Part-1:-GF-assignment
## SLURM-SPECIFIC SETUP BELOW
############### FOR SMITH GRID HPC ############### (DELETE section if not applicable):
## Slurm specific code
#SBATCH --job-name=EukPhylo
#SBATCH --output=EukPhylo.%j.out # Stdout (%j expands to jobId)
#
#SBATCH --job-name=PTL1_GBF
#SBATCH --output=PTL1.%j.out # Stdout (%j expands to jobId)
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --ntasks-per-node=1 ##change to number of srun when running multiple instances
#SBATCH --ntasks-per-node=64 ##change to number of srun when running multiple instances
#SBATCH --mem=160G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=email@xxx.edu ##add your email address for job updates
#SBATCH --mail-user=YOUREMAIL@smith.edu
module purge #Cleans up any loaded modules
module use /gridapps/modules/all
module use /gridapps/modules/all #make sure module locations is loaded
module load slurm
module load tqdm/4.66.1-GCCcore-12.3.0
module load Biopython/1.79-gfbf-2023a
module load BLAST+/2.14.1-gompi-2023a
module load DIAMOND/2.1.8-GCC-12.3.0
module load VSEARCH/2.25.0-GCC-12.3.0
module load tqdm
module load Biopython/1.75-foss-2019b-Python-3.7.4
module load BLAST+/2.9.0-gompi-2019b
module load DIAMOND/0.9.30-GCC-8.3.0
module load VSEARCH/2.21.1-GCC-10.3.0
############### FOR UMASS UNITY HPC ############### (DELETE section if not applicable):
## Slurm specific code
#SBATCH --job-name=EukPhylo
#SBATCH --output=EukPhylo.%j.out # Stdout (%j expands to jobId)
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --ntasks-per-node=64
#SBATCH --mem=40G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=email@xxx.edu
module purge #Cleans up any loaded modules
module use /gridapps/modules/all
module load conda/latest
module load uri/main
module load diamond/2.1.7
module load VSEARCH/2.22.1-GCC-11.3.0
conda activate /work/pi_lkatz_smith_edu/Conda_PTL6p1
parent='/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams/'
## PROVIDE YOUR PARENT PATH
parent='/Your/Home/Folder/'
## EXAMPLE RUN COMMANDS BELOW
# A simple run that goes from script 1 to script 7 (the last script) using the Universal genetic code
srun -D ${parent}Scripts python3 ${parent}Scripts/wrapper.py --first_script 1 --last_script 7 --assembled_transcripts ${parent}AssembledTranscripts -o ${parent}Out --genetic_code ${parent}Gcode.txt --databases ${parent}Databases > log.out
# Including the cross-plate contamination step, using conspecific names
srun -D ${parent} python3 ${parent}Scripts/wrapper.py --first_script 1 --last_script 7 --assembled_transcripts ${parent}AssembledTranscripts --output . --genetic_code ${parent}Gcode.txt --databases ${parent}Databases --xplate_contam --conspecific_names ${parent}Conspecific.txt > log.out
srun -D ${parent}Scripts python3 ${parent}Scripts/wrapper.py -1 1 -2 7 -x --assembled_transcripts ${parent}AssembledTranscripts -o ${parent} -n ${parent}Conspecific.txt --genetic_code Universal &
#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 2 -2 7 -x --assembled_transcripts ${parent}Plate4/Assembled_Transcripts -o ${parent}Plate4 -n ${parent}Plate4/Conspecific.txt --genetic_code ${parent}Plate4/Gcodes.txt &
#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 2 -2 7 -x --assembled_transcripts ${parent}Plate7/Assembled_Transcripts -o ${parent}Plate7 -n ${parent}Plate7/Conspecific.txt --genetic_code ${parent}Plate7/Gcodes.txt &
#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 1 -2 7 -x --assembled_transcripts ${parent}Plate11/Assembled_Transcripts -o ${parent}Plate11 -n ${parent}Plate11/Conspecific.txt --genetic_code ${parent}Plate11/Gcodes.txt &
#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 2 -2 7 -x --assembled_transcripts ${parent}Plate18/Assembled_Transcripts -o ${parent}Plate18 -n ${parent}Plate18/Conspecific.txt --genetic_code ${parent}Plate18/Gcodes.txt &
wait

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,89 @@
>NODE_2069_length_1109_cov_20.350386_g2025_i0
GTATCAACGCAGAGTACGGGGGATGAAGATCCAGGTAATGTAGATTATCGTAGTATAGGTGGTTTAAATGATCAAATACGTGAAATACGTGAATCAATTGAATTACCATTAACTAATCCAGAATTATTTAAACGTGTTGGTATAAAAGCACCAAAAGGTGTATTATTATATGGACCACCAGGTACTGGTAAAACATTATTAGCACGTTGTATGGCAAATACTATGGATTGTAGATTTTTAAAAGTTGTTGCTAGTGGTATAGTTGATAAATATATTGGTGAATCAGCACGTATTATAAGAGAAATGTTTGGTTATGCAAGAGAAAATGCACCATGTATTATATTTATGGATGAAATTGATGCTATTGGTGGTAAAAGATTTAGTCAAGGTACATCTGCTGATAGAGAAATACAACGTACATTAATGGAATTATTAAATCAAATTGATGGATTTGAAGAATTAGGACGTGTTAAAATTATTATGGCTACAAATAGACCTGATGTATTAGATCCTGCATTATTACGTCCAGGTAGATTAGATAGAAAAATTGAAATACCATTACCAAATGAATCAGCAAGAATTGATATATTAAAAATTCATGCTTCAAAATTAACTAAATCTGAAAATATTGATTATGAAGCTATTGTTAAACTATCAGATGGTTTTAATGGTGCTGATATACGTAATATTTGTACTGAAGCTGGTATGATGGCTATAAGAGCTGATAGAAATTTTGTAAATCAAGAAGATTTTATGAAAGCTGTAAGAAAATTAAAAGATGCTAAAAAATTAGAATCAAAATTAGAATATAAAAAATTGTAAACATTTCTAATTGTGGTAATATAATTAACGTTTTCAAAATTAATTTTCCAATATTATTTAATTCACTTTTGTCTTAATATAAATTGTGGGGTTTTTTTATAAAAACAAAATTATAAATTTTACGCAAAATTTAAACATTTAAAATTAATTAATTTTCATCGTCCTGATCAACAAAATCAATAAGTTTACTTTGTTGTTTAGCTCGTATTTTTTCCATTTCAATTGCTGATGTATTACGTTCTTGTTTTTGTGATGAATCTTCTTGTCTTCTTTTAGCTCTATCATAG
>NODE_2305_length_1021_cov_17.683544_g2261_i0
GGTATCAACGCAGAGTACGGGCATTTGAAGAAGAACAAAAACATTATTTAGCTAGTTTAGAAAATAATGGTCTAACAACACAGTTTAATCCTTTAATTTATTGCAAATTTCCATCTAAAGATATTACAAAAAATTTACAAGATCCTAGAAAATTATTTGTTGAATCAATGAAAAATTATTTGAATAATAATAATAAATTAATAGGTGCATTAGCTGGTACTATTGGCCAAAAATATATACCTGTATTTAATTCATATTCAAAAAAATTTAATATACAATTTTCACTTCCAAAAATGTAGTTAACATATACAATCTAATTTTGTAAGGTTTATAAAAGTTTAAATAATTTAATTAATTAAATTTGTTTGTTTTGTTATTTGTGTAATTGTTGTATTAATGCTTTGTTTTTATTAAAATATATTTAAATCACATGAAAGGAAAAATATTAAAATTATGAACAAATTATTTAAGACTTTATTATATAAATTAACGTACATTAAAACACATATATATTAATTTAAATTTAATTTAAATATGATGACCATCCATTAAAGAATCATCATATGTATTATATCTTACAAATGGTGGTATACCTGTTGTATATATATCAGTTGCTAATTCTTCAGAAGTTAACATTCCATCTTTTTCCGCATCACTTGATTCTTTATCCACTAATTTTCTTATTTTTGTTTCCATATTTTTTAATTCTTTCTCATCAAGCCAATTTTGTTCTAATATCATATTTCTTATATATCGTAAAGGATCAAATTGTGTTCTGTATTCTTGAACTTCTTCATTATTTCTATAGGATATACCTGGATCAGACATACTATGACCATGATATCTATAAGTCATAGCTTCCATAACAATAGGACCTTTTCCTGATCGACACCAATCTGCTGCAAAACGAGTAGCCTCACGAACACATAAAACATTCATTCCATCTACTTGAATACCAGGAATATAATCTCCTCTTGTATAAAATTCAGTACTAGCGGATGCTCTTTCAATTGAAGTACCC
>NODE_2310_length_1019_cov_82.463002_g2266_i0
ACAGATTATAGATTAAGACGTAGACTAGTTCAACAAGATAAAAGAAAATATAATGCTCCAAAATGGAGATTAGTTGTAAGAATTACAAATAGAGATGTTATTGCGCAAATAGCATGTGCTAAAGTGCAAGGTGATCATATATTATGTGCTGCATATGCACATGAATTAAAAAGATATGGATTAAAAGTAGGTTTAACAAATTATGCAGCTTGTTATGCTACTGGTTTATTATTAGCAAGACGTTTATTAACAAAATTAGGTTTAGATAAATATTATCCAGGTAAAAAATCAATAGATGGTAAAAGATTTATTAGTAGAGTAACAAATGTAAAATGGCAACCAGATCAAAAAGTTTATAGACCTTTTAAATGTATATTAGATAAAGGTTTAGCACGTGGTACATCAGGTGCAAGAATATTTGGTGTAATGAAAGGTGCTATTGATGGTGGTTTAGATGTACCACACAGTGTTAATCGTTTTCCTGGATTTAAAAAAGGAGAAGACGGAGCAAAAGATGAATACAATGCAGAACGATTATCAGAACGTATTTATGGTCAACATGTTGCACAAAAAATGAGACATTTACAAGAAAATGATCCAGATAAATATAGTAGACATTTTAGTCAATATATTAAAGAAGGTATTGAACCTGATAATTTAGAAGAAATGTATGAAAAAGTTCATCGTGCTATACGTAAAGATCCAAGTGCACCACAAAAAGTTAAAAAAGATTATAATAATATGGATGTTAGAAAAAAACAAGCTAGATGGACACAAGAACAAAAAACTGATCAAAAAGTTCAGAAAATTGCTGATTTAAGATTAAAATTTAGTAAACAAGATAAAAAGAAAAATAATAATATGGATGTTGATGAATAATTTAAATATATGTTTGTTTGCTTGTTAATTGTTGGAAATCTTTTGCAGTATATTCTGATCAATTACCAATCTATTACGCACAAAATAGTCATGGTCCCATATTTCATTGACAATTTATCTATCGACCGACAGAACATGCA
>NODE_2554_length_947_cov_14.820366_g2508_i0
GGTATCAACGCAGAGTACGGGAATGACAAATACAAGTGGTAAATGGGATCAAGTATCAGGCATTTTACGAGATGTAAATGTCGATATGGCAATGGAACAGGCTCAAACAAATTTATTGTACAAAAAATTGTTGGAAGATATGAGAGAACGAACAAAAGAAAGAGAATGGGATGATCCAAATCGTGTAACTATAACATCTGGTGAAAAAAACAATTCAAATAAAAATAATAACGATGATGAAGATTCTGATTCTGAATTTGACGATTTTTTCGATGATCCTGAAATGGATAAATTACATCAACAACGACTTGAACAATTAAAACAACAATACAAAGAAAAAAAAAAAAAATTAGAAAAGGGTCATGGTCATTATGATGAAATGAATGAACGAGATATGTTAAAAATGGCATGTGATACAGATTATGTAGTTGTTAATTTTTATAATGATGAATTTGAAAGATGTAAAATAATTGATAAACATTTTCGTATATTAGCACAAAAACATATTGAAGCACGTTTTGTTCGTGTTAATGTAAAAAAATCACCATTTATAGTATCAAGATGGAAAATTAAAACATTACCAACAATAGGAGTTATTATTAATGGTTTATTTGCTGATAAAATAATAGGATTTGCAGATTTTGGAAATAAAGATGATTTTCCAACTGCTGCACTTGCTAAAAGATTAATAAAAACTGGTGTATTAAAAAATTTACAAACTGGAAAACGTAAATTAAGAAATAAAAGAACAACTGCATTAACAGATATTGAATAAACATATCAACGTATTAACATTGTAATCAAATTTATAATAATGTTAATATTTAACCAATTTTTGTGATCATTTTGTTGTTGTTTTTTTTTAATTTTGAAAAAAAATATTGAAATTTTTTCAAATTGTGATTGTGTAAACTGTTTTTAAATATCGAGATATATCATAACAAAGG
>NODE_2573_length_940_cov_56.809689_g2527_i0
GAGTACGGGCAGAAGAAAACGTAAAGCAAATTGTTTAAAGAAAAAATTAAAATTAAGCATACCAGCACCTGAAGGTTTAAAAAAAGCCTGGGAAAGAGAAAACCGTATTAAAAAATTAAGATCAGAAAGAGCACAAAAAATATTAAAAAATTTACCAAACAAAAGAGATAAACAAGCAAAACGTATTGAGAATTATGAAGCAAAATATTCAAATATAGTACGTAGAAGACAAGAAAATGAGTTAAAAGCACGATTAAATGGAAATTTTTATAAACCAGAAGATTCACGAGTAATGGTAGTAGTAAGAATTAGAGGTATAAATCGTGTATCACCTAAAGCAAGAAAAGTATTACAATTATTTCGTTTATTACAAATTCATAATGCAGTATTTGTTAGAGTAAATAAAGCAACAATATCTATGTTAAAATTAATTCAACCTTATGTAGCTTATGGTTATCCATCAGTAAATAGTATAAGAGCATTAATTTATAAAAGAGGATATGCTAAAATAAGACATAGACCAGGTAGTATATCAAGAATACCAATTATGTCAAATAAATTAATTGAAAAACATTTAGGTAAATATGGTATTGAAACTATTGAAGATATGGTATTTCAAATATATACTGGTGGTAAATTCTTTCGTCAAGCATCTAACTTTTTATGGCCTTTTAAATTAAATTGTCCAAAAGGTGGTTATAGAGGTAGAAAACGAAGACATTTTAACGAAGGTGGTACTTATGGAAATTGGGAAAATAATATTGGTAATTTAGTCAAAAGAATGATTTAAATTATACTTGCGTTTGTTGTTATTAATTCAAATTTTAATTCAGTTTGATTATATTTCGAATTCGCATTGTTTACAATTATTGCGAATCATCGATATACTATTAAATGAAAATTTGTCTTCTAGATTGACAATTCTTGATAAAATTAATTC
>NODE_2620_length_930_cov_17.252042_g2574_i0
AAAAAAAAAAAAATCAATTGAAAAGTTTTACTTTTAAAGTTTCACACAACACGCACATAAAAAACAAAGTTAAAGAGAAGTAATATTAGCAACACCAGGTAATACTTTACCTTCTAATAATTCTAATGAAGCTCCACCACCAGTAGAAACATGACTAACTTTATCTTTTAATCCAGCTTTTTTAATAGCAGATGCACTATCACCACCACCAATAATAGTAACAGTACCATTTTGAGTAGCTAATGCAAATGCATTACATAAAGCAGTAGTACCTTTACTTGTTTTTTCCCATTCAAATACACCAGGTGGTCCATTCCATACAATAGTTTTAGCACTATTTATACTATTTTCAATTAATCTAATGGTTTCAGGTCCAATATCTAATCCCATCCAATTATCAGGTATACCATCTTTAGCAGTACATGTTTTTATTTGACAATTTGGATCAAATTTATCACCAATTACAAAATCAACTGGTAATATTATTTTAACATTATTTTTTTTAGCTTTTTCTACTAAATCAGATACTGTTTTTGCACCTTCTTCATCATATAATGATGTACCAATTGACATATTATTTATTTGTTTTAAAAATGTATATGCCATACCACCTGCTATAACCATTTCATTTACACGATCTAATAATGAACTAATTACTTTTATTTTATCTCTTACTTTTGCACCACCTAATATTGCTAAATATGGTTTTTTTGGTACTGCTAATGCTTGATTAAAATAATTTAATTCTTTTTCTACTAATAAACCACTTACACGTAATGGTACATCAACACCAACCATACTACTATGACCACGATGACATGTACCAAATGCATCATTAACATATAAATCTGATATTGATGTCATTACACTTCTTAATGAAGATATTTCTTCTTTTGATGCCCCGTACTCTGCGTTGATACCCTGTCTCTT
>NODE_2637_length_926_cov_4.390387_g2591_i0
GGTATCAACGCAGAGTACGGGTGCATCCATGGTGTAATAAAAATATATCTATATGGAAAAAATATAATAAATATAATAGTAATTCATTTACACAAATAAATTTTAATCCTTCGGTTCCTCAAAGGGTATTATTATATAATAATTATCATTTAGTTTTATTAATTTTAAATAAAAATTTACAAAAACATAATTTTGATTTGTTTGATATTAATATAAATCATTCAAATAAACGAAGAAAATTAAATAATAATAATATTAGTCAATATAATAATAAAAAACACAACAATACAAACAAAAATAATTATAAAATCAATGAATTAAAAGTAATAAAAAAATATCAAAATATTTTATTATCAAGTTTTATAAATTGTGATGAGTTATTACTAATTGAAATGAAATGGGATAATATTACTCAAAAATTAAAAAATCCAATATATTCAAAAAAATATGGTATTTAATTAATCGACATATTGAAATTTAGTTATTTTTGTATGTGTTTTATTGATTTTGTATTTAATTTGATAATTTTTTAGCTAAATTGATATTAAAATTACAAACAATCTATACTTCTATATAAGATTTATTACTAACATTTCCACTACTAATTAAAGATTTTGGTCTTTTATCACAATAAACACGTCGACAATTATACATTAATCCTTGATCGTATGGATTTACATTTTGTGAATAAGTATTTTTAACATTTTCATTTGTAGTTTGATTTATAATAATTAAATGTAAATGATAAAATGCTAATGAAGATACACTAAAAAATATTAATATACCGAATACAAATAAACCAATAGCAAATGGATTATCTTCTATGCTTGAAAATAATTCATCTGACCAACTACGATGTTTATTATTGTTATCATGATTATTATTAAATGAATTTGCTTTTTCAATTGCTTTTCGATAAATAACCC
>NODE_2714_length_902_cov_47.945718_g2668_i0
AGAGACAGGGTATCAACGCAGAGTACGGGGAATTATTAAAAACAGAAAATTGTGAATTATTATTTACAGATGATGCAATTGATAAAATAGCAGAAATAGCAGTTAGATGTAATGAAAATATTGAAAATATAGGTGCTAGAAGATTAGTTACAGTATTAGAAAAAGTTATGGAAGAAATTAATGTTAATGCTTCAGATGAAACTTCTAATAAATATGTTATTGATGTACCATATGTAGAAAAACAAGTTGAGGATATATTTAAAGGAGATGATTTACAAAAATGGATTCTTTAAATGTAAATTATTCAATAAATTCATTTTTTGATTCACAAGTAATGTACTTCAATTAAACCAGTTAGCTATTCACAACTGTATACGTTAATTTCATGTACTGAATTTCTGCAACTATGAATTATTTAATTAAACAATATCCCTCAATTGAACATTAAATGATTTATTTACAAATTGAACTATATCGACGTCATTCATCAGTATTTTGAGTTGTATTATCTTCCTCAACAACTTCATTGTCTTTCTTAGTGTTGTCTTCATTTACTTCATCATTATTATTACTGATTTGTGTCGTATTATCATTGTTAGTCTCAGTTTCAGAATTGTTGTTATTTTGTTTTGATTGATTTTCATTATCATTTTGAGTTTCTGTTTTATCTTCTTTTTCTTCGTTGTCTTGTTCATCACCACAAAAATATTGGTTCATTTTTTTTAATGTGTATTCTCTCAAAACTAAATTATCTTCCTCTTTATCACCAAATCCACTAGCTGTACCATTAAATTCTAAAATATATTCTTCATCTTTGGTTGTTACTAAAACATCTACTGTAACAATATCCATACCACCTGCAAATTTAGCACATTCATCTGCCCAGTACTCTGCGTTGATAC
>NODE_2774_length_888_cov_5.245399_g2728_i0
ATTTTCATCCAAAACACACAACATCCAAAATTTACAAAATTGAATTTCGAATAATAATTTACAATTTAGCTTGTTTAGCAGCCAGATTATACATAATTTCTTCACTACCAGCTGCAATAGCACTTGCTCTAACACCTCTATAACCTTCTTCAACTCTACCAGCTCTACCACCTCTAACATAACTTCTACCACCAAATATTTGTGAACAGTCAATTAATATTTTTTGATATGATTTTGTAATATGTACTTTCATTAATCCTACATTACGTGGTATACTTTTATCTTTACGTCCATATTGATCAAATGATAAATCGTATGCCATTTTTTCTAAAAATAATTGACAAGTAATAGCTAATCTTGAAATATCAGCAATTTTATGTTTAATTACTTGATGATCCATTAAACGTTTACCAAATGTTTTTCTTTCTTTTGCCCATAATATTGTATCTTCTATTGCTGCTCTCATACCTGCTACTGCATCTGCACATACTACAAATCTTTCAAAATTAAAATTATACATTAATGGTTTAAATCCTTTATTTTCTGTACCAATGAGATTTTCAACAGGTACTTTCACATTTACGAATGTAACAGATGCTGTATCATTTAAATCAGCTCCTTGCATATTTAATTTTGATGTATATACACCTTTACAACGTGGTATTAACATTAATGATACTTGACCTGGTCGATCACCTGTTTTACATAATGTTGTAAAATAATCTGCTCGGGCGCCACCTGTTATCCAATATTTTGAACCATTTACTACGTAATATTTTCCATCTTCAGATTTAACAGCAGTAGTTTTAATTCTAGCAACATCACTACCACCAGTCATTTCACTAATAGCTAAAGATATTAATTTATCCCCGTACTCTGCGTTGATAC
>NODE_3004_length_845_cov_42.306995_g2957_i0
GTATCAACGCAGAGTACGGGGTAACAAAAATGGATTTCGCCTCCCCAAAAGGTGGTTTATCCCGTGTAGATGGTTCAAGTGTAACAGATTTCGCCAAAGATGAAGAATGTGTTCAATTTACCAAAGATGTTTTGAAGAATCGTGACAATAAAGACAATCCAGGTGAAATAGATACAATCGCATTATCCAAAATAAATAGTAAAGATTACAATGTAATATTTTTTGCAGGTGGTCATGGTACAATGTGGGATTTTCGTGATAATAAAGATGTAAATAGATTAGCAAAAGAAATATATGAAAATGGTGGTATTGTTAGTGCTGTATGTCATGGTCCATGTGCATTATTAGGTATTAAATTATCTAATGGTGAATATTTAATTAAAGATAAATTAGTATGTGGTTTTACCAATGATGAAGAAGAAGCTGTTAATTTAACTAAAGTTATGCCATTTTTATTAGAAACGGAAATGAAAAAAATTGATGGAAAATTTGTTTCAAGAAAAAAACTGGTCTTGTTGTGCTGTATTAGATAGAAGAGTTGTTACTGGACAAAATCCTGCTAGTGCTGGTAGATGTGCAGAATTAATTTTATCTTGTTTTAAACCGGATGAATTAATTGAAGAAGAAGAAGAACAACCAAAACAAAATATGATGTTTGATGATGATGAATAAAGTTGTGTTTAACTTGAAGTCAGATTTGTAAATAGCTAAATTAGTGACTGTTTCTTGTAGCATTATTTTTAAACTTCGACTCAGAAAATGAGCAACATATTTTTGACCTTGAAATGTTTTGGTTTTGTTTGTTCACTAACTTTACTTAATGCACTTTGTGACAGCTGTCTCTT
>NODE_3014_length_842_cov_25.146944_g2967_i0
GTATCAACGCAGAGTACGGGGGAAGAGGAAGAGGTAGAGGTGGTAAATCATTAGAACAATGGCAACCAATTACTAAATTAGGTAGATTAGTTAAAGATAGTAAAATAAATACTTTTGAAGAAATATTTTTACATAGTATTAAAATTAAAGAACCTGAAATTGTTGATTTCTTGCTTGAAAAACTTGGATACGAATTGAAAGATGAAATAATGAAAATAAAACCAGTCCAAAAACAAACAACAGCAGGTCAACGTACACGTTTTAAAGCGTGGGTAGCAGTAGGTGATAGTAAAGGTCATATAGGATTAGGTCAAAAATGTGCAAGTGAAGTAGGTATAGCAATTCGTGGAGCACATATATTAGCAAAATTATCATTAGTACCAATACGTCGTGGTTATTATTTATCAAAATTACGTGATCCTCATACAGTACCTGGTAAATTAACAGGACAATGTGGTAGTGCTAGAGTAAGATTAATACCTGCACCTCGTGGTACTGGGTTAGTTGCTGCTGGTGTATGTAAAAAAATGTTAGGTATGAGTGGTATTGAAGATATTTATGTATCTGCAAGAGGTCAAACTAGAACTACTGGTAATTTTATTACTGCACTTTTCCTTGCATTAAGAAAAACTTATAAATTTATTACTCCTGATTTATGGGCACCATTTGCATTAAGAGATAATCCATTAGATAAATATCAAATTAAACAAGAAGATGATCAAAGAAGTTAAAATTAATATCATTATGACAATTTTGAAAAAATATTATCAATTGATAAAATTGTTGGTTTTTTTTGTAACAAGTTGAAGTTTTTGCATCATATGTTTTGCATGTTATGTCTC
>NODE_3028_length_840_cov_13.483703_g2981_i0
GAGACAGCTCGAATGCGCACTGAATTAAGAACGCAAAATAAGGTTTAATTCACATTTACACAATTTTGAATGACAATAACAGACTAAGATAGTGTATTTTCCAAAGTGCCATCGTGGTTGTTACAGTACTTGCAGTATAAAAGAATTAAACAGACACGTTAACATTTGCACAAAATACAACTTTCTCCCATTTTTCGTTTAATTCTAAATCATAAAATATATCATTACCGCTAATATTGCAATCAAAACAATGATCAAACAAATTTTTTTTCCTTCAGAATGACCCATTAATTTATCTAATTTTTTGGTAAGAGAGTTTATTTTACCCATTGCTGTGTCCATTTCAGTATCAACTTCGTCCAACAAAATTAATTGATCTTCAAGTTCTGCTTTAATATTGTTTCCATGAACACCTAATCGTCGCAATGTACCCAACATATCATCTAATACTAAGTCTTGTTCGTCCCTTTGTATTTGTTTTTGGGTTTGTGTATCCTCAATATAATTTTGATTTATTTCTTCTACCAATCTGTTCTTAGGTTCTGTGTTCTGTTTTTCTGATTTTATACGGTCTTGTTTTAGTTTTTCTCTAGTTCGATCAGATTGCATATTATGACGACATTCATCAATATATTTTTGAGTATTGTTAAGAAATTCTTCTCGAGATTCTAATTCATATTTACTTATATGTGGATAATTTTCAGGATTATTACGCACAGAATCGACTGTAGATTTACGAATTTCTTTTAATGCATGTTTAATATCTTTATATTTTCCTCTTAAATTATCGGTTAATTCTTTAAATCGTTTATTTTTTACCCCGTACTCTGCGTTGATACC
>NODE_3108_length_826_cov_13.136786_g3061_i0
GAAAGAAATGTGAACTTTCAATTTTTGTTTGAGAATTTCAACAAAAATTTTTGCAATTTTAAACACAACAAGCAGTACTAAAAAGAAAAATAATACGAATTCTAATAGTAATAAATTTAATTTTTTTTTCTTTGTGTTTTTGTTTTTTGTTTTTGTTGATTTCTTTTTTTACTACCTTTATTTTTCTTTTTTGAACCTTTTGATTTTTTTTTACTACCTTTATTACTTTTTTTATTATCATCAACATCCATTACATTTTCACCTCTTTCTCTTCTTTGTTTTCTTTCAATTTGTTTACGTTTTTGTTCTTTAATTCTATCTTTTTTAGTAGTACCAAAAAATTCTTTTTTTTCAACAGCAGTTTGAAATCTACCAACACCCATTTTACTCGATGTATCAATAAATTTAAGATTAATTTCTTCAGATGCACCTGAACCACTTGGTAATACAACAGGTTTTCTTAAAGTAATTGGTCTTTTTCTTGGACCAGGACAAGTACCTTTTATCATAATATAATCTTGTTTAACAATACCATAATTAACAAAACCACCAAGTGGAGTTATATTTTTGGATGTTAAATCATTTTCAGTTGATGCATTAAATGATGTTGGTTTACCATTTTCATCATATTCAATTGCTTTACCAATTCTATAAATTTTTTTATGAATTTCTGTTCTATGATGATAACCTTTTTGACCTTCTCTAGCAATTTGATAACCAACACGTGCTGGATGCCATGAACCAATACATGCTACTTTTCTTAAACCTCTATGTGTTTTTCTAGGTAATCTAGTTACACCCCCGTACTCTGCGTTGATACTGTCTC
>NODE_3121_length_824_cov_4.390146_g3074_i0
GACAGGTATCAACGCAGAGTACGGGGGTATATTTAATTTTGAAGGTGGTTGTTATGCTAAAACATCTAAATTATCATTAGATACTGAACCTGAAATTTATCGAGCTGTAAAATTTAACGCATTGATGGAAAATGTTTGGATATCACCATACAGTCATGATATTGATTATTTTAATTTATCAATAACAGAAAATGGTCGTGTATCATATCCAATTGAACATATAGATAATCGTGAAGATTCATTAGCAGGTGGACATCCAGAATATATAATATTTTTATGTTGTGATGCATTTGGTGTATTACCACCAATAGCAAAATTAAATGCAGGTCAAAGTATGTATCATTTTATAAGTGGTTATACAGCTAAAGTAGCAGGTACTGAAAGAGGTATTAAAGAACCACAAGCAACATTTAGTCCATGCTATGGTGCTGCTTTCTTAACATTACATCCAATGGAATATGCAAGATTAATGAAAAAAAAATTAGAAAATCATAATGTTGATTGCTATTTAGTAAATACTGGTTGGACTGGTGGTCCATATGGTGTTGGTGAAAGAATGAGTATTAAAACTACTAGAAATTGTATTAATGCTATATTTAATGGTGCTATCAAAAAATCGAGATTTAGAGAAGATAATTTATTTAAATTTAGTGTACCTGAAAATATACCAAATGTTGATAGTAATTTATTAAATCCTAGAAATACATGGAGTGATAAAAATGCATATGATGAAGCTGCTTTAGATTTAGCGGATAGATTTGCTGTAAATATTGCACAATATACGGATGATGTTAATGAATATGAAGGATGTGGACCTATTGGAC
>NODE_3128_length_822_cov_12.560748_g3081_i0
ATCATGTTGAATTAAGTCTAACAAATGTTCTGTAGACCATAATTCAACATGATCACTGAATCCAAATGAAGTAAATAAACAAAAAACAAGCAAAACACTAGACATTTTATTACTCTACTTTAGTTTTAAATTAATTTGGAATTGTCCAATTAATTTCATCTTTTCCTAATTTTTTCAATAATGAATTTGTTTTTGAATATGGCTTACTTCCATAAAAACCTCTTGAAGCAGACAAACCTGATGGATGCGCAGATTTAACTATTTTATGCAAATTAGTATTTATAATTAATTCTTTTTTTTGTGCTTGTTTACCCCATAATATAAATACAACACCATCTTTTTTTTTATTTGATATTGTTTTAATAACACTATCTGTAAATTCTAACCAACCAAATTTCTTATGGCTATTTGCTTTGTGAGCTTCTACAGTTAATGCAGTGTTTAATAATAATACACCTTGTTCTGCCCATGTAACTAAATTACCATGATTAGGCCTTTTAAATGATTTACCTAAATCACTTTCTGCTTCTTTATACATATTTCTTAATGATGATGGAACTTTAATACCTTTTGGTACACTAAAACATAAACCTTCTGCTTGTCCATCATCATGATATGGATCTTGACCTACTATAACAACTTTTAACTTTTCCCATGTACATAATTCAAATGCTCGATATACTTGATGTTTTGGTGGAAATACCTCTATTTTCGGATCTGATTCTACATTTTTTAAATTTTTAATTAATTTTAAAAAATATGGTTTTTTAAATTCATTTTGCAACATTTCTTTCCATCCCCCGTACTCTGCGTTGATACCAC
>NODE_3147_length_818_cov_35.135570_g3100_i0
TTCAATGAATTGAAACTAAACCACAAAAAAAATCAAAAAACAATAAAATTGCACCCCAATAGACAAGACTTTAAGAGTCAAGAAAAAATTCAAAAAGAAACATGTGCAACAAACAACAAACTAATTAACAAAAATGAACTTACAAAAAAAAAAAAAATAAAATAATTTAATTTTCACCACATTTTGAAATATTTAATTACCACCACGTAATCTTAATACTAAATGTAAAGTACTTTCTTTTTGAATATTATAATCACTCAAAGTACGATTATCTTCTAATTGTTTACCTGCAAATATTAATCTTTGCTGTTCTGGTGGTATTCCTTCTTTGTCTTGAATTTTTGCTTTGACATTTTGAATTGTATCATTAGCTTCAACATCTAATGTAATTGTTTTACCTGTTAATGTTTTTACAAATATTTGCATTGCACCACCACGTAATCTTAATACTAAATGTAATGTACTTTCTTTTTGGATATTGTAATCACTTAAAGTACGATTGTCTTCTAGTTGCTTTCCTGCAAATATTAAACGTTGTTGTTCAGGTGGTATACCTTCTTTATCTTGAATTTTAGCTTTAACATTTTGAATAGTATCATTAGCTTCAACATCTAAAGTGATAGTTTTACCTGTTAATGTTTTCACAAATATTTGCATAGCACCACCACGTAATCTTAATACCAAATGTAATGTTGACTCTTTTTGAATATTATAATCACTTAATGTACGATTATCTTCTAATTGTTTTCCTGCAAATATTAAACGTTGTTGTTCAGGTGGTATACCTTCTTTATCTTGAATTTTAGCTTTAACATTTT
>NODE_3153_length_817_cov_5.100806_g3106_i0
CCACAAATTCTAAAAATAAAAAATATGTTTCCTTGATTCTTGTTTTATTTTTATACAAGATCAGGTACTGCTTCATCATCTTCTTCATCTTCACCATCTTGAACATTATTATCTAAATTAGCTAAATCTTTATTTAACATTTCATTAATAAAATCTTTTTCTTCATCTTTTTCATTAATAATATCTCCACGTGCTTGTAAATTATTCAAAATATTATCACTTGCTTCATTATTTTCTTCAGATTTAACAGGATATTTTTGTCTTAATTCTTCTCTTCTTTTTTCAATTTTTTCTAATTCTTCTTGTCTTTGTTTAGTAATATTTTTCATACATTTATCTAACATATTTTGTGATCTTCAGCATTTGGTGTTAATCTTAATACTTTTTGAAATGCAGTTGCTGCATCTTTATATTTATTTAATTTCATTAATACCATACCACGTAAATGATGTCCTTTGCCATAATTTTCTAATGGATCTAATTCACGAATTTTTTGACAATCTTCTAATGCTTTTTCGTATTGATTATTTTGATAAAACATTAATACTCTATTGCTATATAATATAATATTTGTAGGATGTTTTTCAATTGCTTTTGATACTTTTTCTATTGCTGATGTGTAATTTTTTTCATTAAAATCTTTATTTGCATCTGTTTTTATTGTTTGTGCTTCTGACCAAAGAACATTTAATTCATCATTAGTTCTAAATTCAAGTGGAGTACTTTTTCTTAATTTATTTAATTCTAGTTTAATTTGTTGATATGTCCAATTTTCACATTTTTTATTTCCTATTCTAATTAATTGACTACCAATAGG
>NODE_3211_length_808_cov_24.370068_g2924_i1
GATAGAACATTATCAGATTATAATATTTAAAAAGAAAGTACATTACATTTAGTTTTAAGATTACGTGGTGGTGCTATGCAAATATTTGTAAAAACATTAACAGGTAAAACCATTACTTTAGATGTAGAAGCTAATGATACTATTCAAAATGTCAAAGCTAAAATCCAAGATAAAGAAGGTATACCACCAGAACAACAACGTTTAATATTTGCTGGTAAACAATTAGAAGATAACAGAACATTAAGTGATTACAACATTCAAAAAGAAAGCACATTACATTTGGTATTACGTTTAAGAGGTGGTGCAATGCAAATATTTGTTAAAACGTTAACTGGTAAAACTATTACATTAGATGTTGAAGCAAACGATACAATTCAAAACGTAAAAGCAAAAATTCAAGATAAAGAAGGAATACCACCAGAACAACAACGTTTAATATTTGCTGGTAAACAATTAGAAGATAATCGTACTTTAAGTGATTACAATATTCAAAAAGAAAGTACATTACATTTAGTATTAAGATTAAGAGGTGGAGCAATGCAAATATTCGTAAAAACATTAACTGGTAAAACAATAACATTAGATGTTGAAGCAAATGATACAATTCAAAATGTAAAAGCAAAAATTCAAGATAAAGAAGGAATACCACCAGAACAACAAAGATTAATATTTGCAGGTAAACAATTAGAAGATAATCGTACTTTGAGTGATTATAATATTCAAAAAGAAAGTACTTTACATTTAGTATTAAGATTACGTGGTGGTAATTAAGCTAGAAACCAAATCAATAAATCACTCAAATATTTTC
>NODE_3261_length_799_cov_53.852617_g3212_i0
GTTCTAAACCTCGTGGACCTGGTATGGAAAGATCAGATTTATTACGTGATAATGGAAATATATTTGAAAAATTAGGAAAAGTAATGAATACAAAAGCAGCAAGATATTGTAGAACTACAGTAGTAGGTAATCCATGTAATACAAATTGTTTAATATTAGCAAGCAATTGTCCTGATATTGATCGTAATAATTTTACTGCTATGACTAGATTAGATCATGATAGAGGTTTATCATTAATATCAAGTAAAGTATGTTTACCTGTAAATGAAATAAATTATTTTAGTATTTGGGGAAATCATAGTGCAAGTTTATTTCCTGATTTATCAAATACATTAATACATGGTGTTGAATGGGACGAATTAATTGGTAAATATAAATCTGATAGATTTTTTAGAAATGAATTTATTCCAAGAGTACAACAGCGTGGTGCAACTATTATAGATGTTAGAGGATCATCTAGTGCTGCTAGTGCTGGAAGTGCATGTTTAGCTCATACAAGAGATTGGATATTTGGTACACCACAACCTGATTGGACATCTATGGCAATATTTAGTAATGGCGAATATAATGTACCAAATAATTTAGTATTTTCTTTTCCTGTATGGTGTAAAAATGGTTATTATCAAGTAGCAAGTACACCATATCAAATAAATGCATTTCAACAATATTGGATTGAGAAAAATATTCAAGAATTGAAAGATGAGCGAGATATGGTTTCTAATTTTGTTAGATAATTAATTCTAATGTAAAAATTTTAGAAAACCTTTTTGACTTTTTGTGTTTTTTTTGAGTAATTTAT
>NODE_3269_length_798_cov_24.885517_g3220_i0
ATATAAAAGCGCAAAGATTCCCAAGCCAGGAGAAAAAAAAAAAGAAAACACAAGCAACAAAACAAAAAAACGAATGAAGTAGAAAACTAAAAATAAACTATCACAAAAATATAACGCAAAATGATAAGATTGTATTCAATGAATAATGATGTTTAAAAGATTTTTTATGACTGTGTTTAATTGATATTACTATTTCATAAATTAGTTCTTATGAACTACTGATACAACCTTACCAAGCATAATTAATGAATTACTATCCATAGCAGCAACTCTACCTAATGGTTTACAATCATCAAATGCACATACAACAAATGGCATTTTAGGTTTAAATACAACTTCTGCTTGATCACCAGCTTCAATATATGGTGGCATTTCAACTTTTTGATTATTTGTTGATTTACCACTTTTCCATTTTATTTCAACCATTTGACATGGTGCTTTAGCAGTTCTAATATGTATACTTGGTGTAAAACCACCTTTATATTCATCATTTTGTTTTTTAGCACATTTTAATTGTCCAGGATGATCTTGTACAAATACTAATGCTGTAAATGTATCAGCTTGTTTTGGTGGTTTAGGATCATCTTCATCATCATTACACATAACATCTCCTACTTTTGGCATATTTTCTTTTTTTAAACCTTTGACATTTACACCTACATTATCACCATGGACTGCTTTTTTTACTGTTTTATGATGCATTTCTATGCTGAATGCTTTACCTTTTACACCAGATGGATAAAATCTTACATTTACATCTGGTACTAATTTACCTTGTTCAATACGTCCCCCGTACTC
>NODE_3271_length_798_cov_13.121379_g3222_i0
TAATTTTTGTGTGCTTTATCGAGTACAAAAAAGTTTAACGTTAACACTAACACTAGAACACTAAATCAACGAACAATTATAATATGACGTGCATGACTGTACGTTAATATCAATAGTCTTAGTAAATATTATATAATATGTGAATTTGAATACACAGGTTAATCTAACTCAACAGAAAAAGCAATAATAAATATAATATAAGCAGCACATAATATATATCCATGAAATAATTTTAACGTTAATGATGATTTATACAATAAAAAACATAATACTATTAATAAAACACCTAATGCCAATAAAAATAATTCAAATCCAGTATCTGATTCAACATCTAATGTTTTAAAACCTATCATAACTGATTTTAAAAAAAATGATAATCCAACACATATACAAATATCAAATACATTTGAACCTAATGCATTTGATACTGCCATAGTACCTTTACCTTGTTTTGCAACTAATATACTACTAAAACAATCTGGTAAAGAAGAACCAATTGCTAATAATGTTAATCCCATAACATCTGCATTTAATTTAACACAATTACCGATTTTATTTGCACAATCAACTGCTAAAAATGTTAAAAAACCCATCCATACTATTGATGCTATAAATACTGCTGATAATTTACAATATGTTATTATTTCTGATATTGGACGATCATTTGCATCATGTGGATCATGTTGTACTGCTTGAGAATTTGATGATTTATTTATACTTGTATGATAACCAGGTGAATTTAAATCTGGTATTGTATAACTAAATATAATTCGAAATGGTATTAACAATATTTTAAGT
>NODE_3272_length_798_cov_8.382069_g3223_i0
AAAAAACAATACAAAATGAAATAATATCTCAAATATCCAAATCTGAATCATCTTCTGATTCTTCTTGTTTTATTCTTTTTTTTTCTTGTTTTAATTTTTTTTCAAGACCAAGAAAAGTTTCTTCAAAAAATCCGTATTCATACAATTTTTCTTCTAATTTAATTGTTTCTAATTTACCATCTGGTGCAACCCAATCTAATCCAACTAATTTTCTATCAACTTTAAATTTTTTGGAACAAACTAATGTTGGTAACATCCATATATTTAATTTTTCCATTAAATAACTTGCATTTTTTGCTTCAATTTCTACAAATTTACATTCAATATGTTTTCGAGCCAATAATGTTAAATGTTCCCTTAATGTATTACACCATTTATTTGTTTTATTAAAAAAATGACATATGACATATTCTGATGTTTTAACTTCATTAAAAAATTCACGTTGATCTGGTAATAACACCATACGTCCATGATTTGACGACATCCATTTTTTTCCCCGTACTCTGCGTTGATACNNNNNNNNNNGTTTTAGTTGCTCTAAACGTTTTTTCCTTAGTTCTTGAACATCTTCTTTTGAAAGATTATTTAATCGTTCAATTTCTTCATCAACTTCTCGTTCTTTGTCATTTAAAACTTTTTGCATTGTTGTAGTGACTTTGTCACGAACTAATTGTTCCATTTGCCATTTTTTGTATTGGTTTTGAAGGTCTTGTTGACTTTGTTTTGACATTGCTCACCACACAAGTTTTAAAAAGTTTGTTTTGTACCCGTACTCTGCGTTGATACCACCTGTCTCTT
>NODE_3363_length_786_cov_44.642356_g3313_i0
TCTATATAAAGCTAAACCGATCATTTGTATAGATAAAAAAATGTCCACAAACACAACAAACAAACACAGATGACTCACACCATTAGTACATATGATCAATGACAACCATGACATGCAAAATAGATCCATATGTACACTGAAAATTAATATTAAATTATAAATTTACTTATAAAAATTGTCTTAAAATGTTTTCTAATTTAACGATATCATCAGCAAATCTACGAATACCTTCAGCTAATTTATCATTTGCCATAGGATCTCTACACATACCATATCTAAAATTAGGTTCTGTAACATTTAATTGTTGTTTATAATCACTTTTTTGACTTGGTTCTAATACTCTTGTTATTTGATCATTACTATTAGTTAATTGTTCTAAAAATTTAGGACCAATTGTTAATCTATCACATCCAGCTAATGCTAATATTTGTTCTTTATTTCTAAAAGATGCACCCATTACAATTGTTTTAAAACCAAATGTTTTATAATATCTATAAATTTCTAATACATTTTTAGGACCTGGATCATCTTTAATATCAAAACCGTCAACACCTTGATTTTTTTTATGCCAATCTGTTATTCTTCCAACAAAAGGTGAAATTAAGTAAGCACTACCAATTTTAGCAGCAGCTGCTGCTTGCCATATATTAAATAATAATGTCATATTACAATTAATATTAAATGCATGTAATCTTTGACATGCTTGAATACCTTCCCAAGTTGATGCTATTTTAATAAGTATTCTATTCTTCGCATCTTTAATACCCCCGTACTCTGCGTTGATAC
>NODE_3371_length_785_cov_53.504213_g3321_i0
GAGTACGGGGTGGTCAAAGATGTACATCATTAAGAAGATTATTTTTACATGAATCAATATATGATGAATTTTTAAATAAATTAATGAATAAATATAAAACAATTAAAATAGGTAATCCATTAGAAAATGATACATTATGTGGTCCAATGATAAATAAAGAGGCTGTACAAGATTATGTGAATGGTATAAATTTAATTAAAAAATCATCAAAATCTAAAATATTATGTGGAGGTAATGTTTTAGATAATATGAAAGGTAATTTTGTTGAACCAACTATTGTTCAAACAGAACATACTGAACCATTTGTAAATGAAGAATTATTTGCACCTGTATTATATGTTATGAAATTTAAAACATATGATGAGGTAGTAAAAATGCATAATAGTGTAATACATGGATTAAGTAGTTCATTATTTACTAAATCACATACTAATATATTTAAATGGTTAGGACCAACTGGTAGTGATTGTGGTATTGTTAATGTTAATATTGGTACAAGTGGTGCTGAAATTGGTGGTGCTTTTGGAGGAAATAAATATACTGGAAATGGAAGAGAAAGTGGATCAGATTCTTGGAAACAGTATTGTAGACAAAGCACATGTACCATTAATTATTCAGATGATTTACCGTTAGCACAAGGTATCAATTTTGGCTCTGATGAATAAATTAAGTCTTATTTGTTTGTGAGTGTGTTAATTGCTCACACGTCGTTGTTTTGTTTGTTGAGTGAAATAAATAATGCTTATTTAAAAATTAAAATTTAAAATCAAGTTTTGTGTATGTTT
>NODE_3405_length_781_cov_5.103107_g3355_i0
CCAAAAAGGGGAGTATAGCGACCCTTTGCGGCTAGTATGAATAATAAAACTTAATTATTTTGGTCAACAGGATTTCTACAAATAGGACAAATATGATTTCTTTGCAACCATGTATCAATTTCTTTAGTATGAAATATATGTAAACATGGTAATCTTCTAATTTCATCACCTTCTTTAAATTTTTCTAAACAAATACAACATTTTGAATTTTCATCATTTTGATTATTATTATTATTATTTTTTTTATTATCATCAATTGATGTTGTTTTTTTTTCATGATATTTATCAGTGGGTAATCTTTCAATATCTTGTTGATTAGCACCTCTAGGTGGATTTGGAAATTGTCTTAATAATTCTTCATATGTCATATTATGTATATCTCTATTTAATGAATTCATACCATAAATATCATTACCCATTAAAGCTCTTAAATTTGGTAATCTTGACAATATATTATATGGATTATCAACGTTTGCATTATCAAAATCATCGTTACTGCCACTAATATCACGCATTAAAAATGAATTCGTACCTAAATCAAATGTATTACTATCCATATTATCACTATTATTATTATTATTTCTGCTGTATGTATTTCTAATTGTTGTAAAATTCAATCCGGTGCCATTACTATTATTTCCTGTTGAAGAATAATTATATGAAAATTGTGTACCATCATCATCAGCCCAACTACGTGATTCATCATTATTGTTGTGCATATTTTGTAATCCAAGCAAATTAAAAGGATCTTCACGATCCCCGTACTCTGCGTTGATACCAC
>NODE_3438_length_776_cov_5.551920_g3388_i0
ATCATATATTCTTGTTTTACCTAATTGAATACTTACAACTGCATTTATATGTTCTTCTGCACTTTTAATCAATGGTACATCTTGAATATGTCGTTGTTTTACTAATAATTCTATTTCTGTACAACCTAAAATACAACATTGTGCGCCTTTTATTTTACACAATTCATTTCTTATTACATTAATAAAAAAATTTCGTGATTTTTGTTCAAATTTATTAAAACTTAATTCTTTTTCAATTATTCGCTCCATTTCTATCTGATCAGATTCATTTGATGGTATAACTACTTTTAATCCATGTTGACGTAAACGTGCTTTTAAGTAATCTTGTTGTAAAGTAAAACGAGTGCCAACAAGTCCAACTGTATTAAATCCTTTACTTATAATTGATTTTGCACAACAATCTGCTATATGCAATATTGGAAAATGTGGCAATATTTTAGTCAAATAAGGTACTATCATATGACCAGTATTGGATGCAATAACAAGAAAATCTGCACCTGCATTTCGAACACGTCTAGCAGCATCAGATAATAAACCAACTACTAAATCCATTCGATCTGCTGTCAAGTAAGCAACGTATTCTTCTAAATTTACAGAATACATTACCATTTTGCTTGTATTCCCAGCAAATTTCCATTTCGAACCACTTTGAATTTTTTCATTAATTTGTATGTAATAATCCGCTCCACTAATATGTGATATACCAGTAACAATACCAACAGTTGGCTGAGTAATTACAGTATTCATTTCAACTTGCCCGTACTCTGCGTTGATAC
>NODE_3456_length_773_cov_10.982857_g3406_i0
AGAGACAGAGTAAACACAAATATTTAATTTTGGGTGAAATTTAAATAATTGTCAAAAACAAACAACAAAATATCCCATCCATATACAAAACACGAAAAACGATAAATATTTCATAATGAGATGAAAATATTTAAAAAAAATCAACAATGATGTAACATTTACTCCAAAGAATATTAAGAATAAATATTCCCTTTACGTAACAATATATCGTATGAATATAATATTACAAAATACTAATTTTCATTATTATTCATAACAATATCATCATCTTCCACACTATTATTATTATTATTATTATTATCTGTAGTTTTATCATTTTTATTATCTTCAGTCTCGTTTTCTTTACTTTCTGTTTGATAATTTTGATTATTATTTTCACTATTAGTATTACTATCAGCATTTTCATCAGTTTCATCCCATTTTATAGAAAATCTAGTAACTCTAGGTTTATCAGCTAATACAGTACGTGTAATTTTATTAGGATCTATATTTAAATTTTTTTTTTTATTTGTATTTTCATTTTTATTATTATTATTAGTATTTTCTTCTTTATTATTTTTATTATCGTTAATATTATCATCATTTTTATATTCATTGGATACATAATATCCAATACGTATAAATTCTTCTTCATTATATGAACAAGATAATAAAACTACAGTAACATCCAATAAATCTTGTGGTGATATTAATTTTGAATTTGGTGCTGGTGCTTCAAATACAAATCGATTTTTACCTAAAGATATTGGTCCCCCGTACTCTGCGTTGATACC
>NODE_3462_length_772_cov_13.801144_g3412_i0
AAGAGACAGGTATCAACGCAGAGTACGGGGTACCACAAATTGAAGTTACATTTGATTTAGATGCTAATGGGATTTTATCTGTTTCTGCTAAAGATAAAAAAAATGAAAGTAATAGTAAAAAAATTACAATTGATCAACAAAAAGGTAGATTAAGTGAAGAAGAAATTAAAAAAATAGTTGAAGAAGCAGAAAAATATAAATCTGAAGATGAAGAATTAAAGAAAAAAATAACAGCTAAAAATGATTTAGAATCATTTGCATATCAAATGAGAAATACATTAGATGATGGTAAATTTAAAGATGTAATTAAAAAAGAAGATAAAGAAAAAGTAGAAAAAGCAGTCAAAGAAGTAATTGAATGGGTTGATCAAAATCCAAATGCAGAATTAGATGAATTAGAAGCTAAAAAAAAAGAATTAGAAGATTTATGGAAACCAATTATTATGGAAGCTTATAAATCTACTGGTGGTCAACCTGGACAAGGAGGTATGCCTAATATGGGTGGTATGGGTGGTATGCCTAATATGGGAAATTTTCAGCAACCAACAAATACTCAATCAAATAAAGGACCTGAAATCGATGATGTTGATTAAATTACTATTAATTCATTGATTTATTAAAATACATAATAAAATATTAAATTAAAATATTTTTTTAAAATTATAAATAAGTTTTTGAATTTGGGATTGTGATCTGATTTCTTTTCCAAATTTAAATTTTATATGTTTTGTTTTAATTTGTTTTCATTTTGGTTTTATTGTCGATCTGGCTT
>NODE_3477_length_770_cov_21.413199_g3427_i0
GTGGTATCAACGCAGAGTACGGGGGTAAATGTAAAAGCAGTATTACGAGAATGTGGTGGTAGATCAAGACTAATTGGACCAAATTGTCCAGGAATAATAAAACCAAATGAATGTAAAATAGGTATTATGCCAGGTCATATACATATGCCAGGTAAAATAGGTATTGTTAGTAGAAGTGGTACATTAACATATGAAGCAGTTAATCAAACAACTGGTGTAGGATTAGGTCAATCAACTGTTGTAGGTATTGGAGGTGATCCATTTAATGGTACTAATTTTATAGATGTATTACAAAAATTTAAAGATGATCCTGAAACTATTGGTATTATTATGATAGGAGAAATAGGTGGTGGTGAAGAAGAACGAGCTGCAGAATGGATTAAACAAAATAATTTAACTGAAACTAAACCTATGGTTGGTTTTATATGTGGTGTAACTGCACCTCCAGGAAGACGTATGGGACATGCTGGTGCAATTGTATCAGGAGGTAAAGGTGATGCTAAATCAAAAATGGAAGCTTTAAGATCTGCTGGTGTTGTAGTAAGTGATTCACCTACTATTATGGGTAAAACTATGTTGCGTGTAATGCAAGAAAGAGGATTACACTAAAAATCATCAATTACACCTTATTTATAATTCATATACAATTCAATTACAGTTTGTGTTTTATCAATTTGACTTGTTTGTTTAACTATTTGTTTAAAAAATTAAAAAATTTAATTTTTTTCTTTTTTTGTTTTCACAACAATAAAACGGATAACATATAATAT
>NODE_3481_length_769_cov_29.005747_g3431_i0
GAGTACGGGGGACGTATTGAACAAGGTAAATTAGTACCAGATGTAAATGTAAGATTTTATCCATCTGGTGTAAAAGGTAAAGCATTCAGCATAGAAATGCATCATAAAACAGTAAAAAAAGCAGTCCATGGTGATAATGTAGGTGTAAATGTCAAAGGTTTAAAAAAAGAAAATATGCCAAAAGTAGGAGATGTTATGTGTAATGATGATGAAGATGATCCTAAACCACCAAAACAAGCTGATACATTTACAGCATTAGTATTTGTACAAGATCATCCTGGACAATTAAAATGTGCTAAAAAACAAAATGATGAATATAAAGGTGGTTTTACACCTAGTATTCATATTAGAACTGCTAAAGCACCATGTCAAATGATTGAAATTAAATGGAAATCAGGTAAATCAACTAATAATCAAAAAGTTGAAATGCCACCTTATATTGAAGCTGGTGATCAAGCTGAAGTTGTATTTAAACCTAAAATGCCATTTGTTGTATGTGCTTTTGATGATTGTAAACCATTAGGTAGAGTTGCTGCTATGGATAGTAATTCATTGATTATGTTGGGGAAAGTTGTATCAGTAGTTCATAAAAATTAGATAAAAATTATTAATTTTACAAAGTGAGAAAAAAGGAGCATTTTTTACGTTTTTTTACGTTTTCTTTAATTCTATGTGGTTTGTAACTTTGTTTGTTTGTGTGGGTATGATAAATTGAAATTTTTAATTCTTTTTTTAGCGAAAATTTTTTGCCGGTTCCTAAGCCTCAATG

View File

@ -0,0 +1,50 @@
>NODE_96_length_4637_cov_31.218614_g87_i0
CTTGGATAAAATGGTTATTCTTTTTCCGAATTGTGAGATGATAGAAATCAAAGGATTGACGTTGACTGAGGTTACATTTGAAAATCTTGTTTTATGTGTGGATAATATAAGTATGGAAGTGATTGCTGCACAAGATTTCAATATTTGTACTAAATCATCACAGGCTGGGAAAAAACCTCGTCTTAAGATGGTAAGCAAATTACAAAGGATTTTAATCACCGAACCAGGCATAGAAGTGGATGACATTATGGATGAACTTAATACTATTCGTGAACGTTTCGTAGATTTACATTGGAATATTAAATATTTAGAGAGTTATCATTCTGCAGCATCAATAATTATCACCAGAGATAATGAATATTTTAAAGAACGTGTTCAACAAAAAAAGAAGGAAATTAAGAAACGAATTAAAGCCATCTCTAGTTCAAAGCCACATACGCCACTCAATGAAGCATCAGACAATATATCATCCGATGTACAATCTTCTGCAGTAGCAGGAGAAGAATCTGCTGCTGGTGTTTTAAAAAATGCTGCAGAAAAAGTGGTTGAGGTGAAGAGTGATGATGATGAAAAAAAAGTTGGGGAAGAAGATTTATCTGAAGATGATAATGATGCACAAGATGAAAATGCTTTTATAGAATTTGAAGAAGCAGATTTTGCAAATGATATGCTTGATAATAATGATGACATAAAAGAAGATATGGATGAGGATGATATTGATTGGGATGATTGTCCAGAATATCTACAATTACAAAGTTTTGGATTTGATCGACTATGGGTGAAATGTTCACTCACTTTATACAATAGAAAATTGCAGGATTGTTTGGAATATTTAATATCAGCAGAACATGAAAATGCTAAAAATGCGTATGATGCCATATTAGCTAATGATCCAAATGCATTCACCAATGATAATATTTATAATCCTTTGTGGAAATGTGTAATGTGTGGTTTTAAGAATCGCGGAGCTCAACTCACATGTTTATTATGTTCACTTGGACAAAGACCAGCACCAACCGTTCCTAATACTATGGATTTATTGTTGGATGAAGATGAAGTACTTCCAGCAGTGGTTGAATTAGATTTAGGTCCATGGAATTGTCCATTGTGTTCTTTTGAGAATGAGAATTCTAAATTGGCATTTTGTGATATGTGTCAAAAAGGTAAGAATTCTGTGCAATGGGAACAATTGATATCTGGTGAATCACGCATTGTAAAACGATGTAATTTTAAATCAACAGATGATAGAAATGGTGTAGTATTTTGGATTGGTACACTTTGTGGTCAATCAAAATGGAAAAACCCAGGATTATTCAATCGTATAAGATGTGAAGCATCAACGCTCACTCCCGATTCACATCCAGTGACATCAACACTCACTGATCGAGTAAGTGTACGATGTGTAACAACTGCTACTCCAGGTTCATGGATGATGATTGATTTTGTACATTTTAAAATACGTCCAACACATTATAGTTTGAGACATTATATCTCATGGAGAAGTGAAGCAATGCGTAATTGGGTGTTAGAAGGATCGAACACTCCTTCCAACAATAATAATAATAATTATAATAAAGAATGGATTCAATTGCGTGAACATAAAAAGGATGCATCATTGTTTGGTAAGGGATCAATTTCTACCTGGAAATTGAATGTCACTGATAATGCATATCGTTTTTTTCGTATCAGGCAAACTGGTCTCAATTCTAACAATCATCAATATTTAGCTTGTTCTGGTTTTGAAATGTATGGTATTGTATTTCCTGTTAATGTTTTGCAACCTTTGGTTGAGATTAGAGTTTTAATGCTAAGTGTTGAAGAACCAATTGAAGATATAGATCATGATGAAAGATTATTGCAGAGTATTAAAAAACAACAGATAAATCATTTTATGTCAAATTTGGATTCTAATCTTGATGGACATAAGTTTAATTATAAATCAGATTTCGATGAGAATGGTGTTTTATTTTTTTTAGGTACAGAATATGGAACAGCACCATGGAATAATCCAGCATTATCTGGTGTAGTGACTGTTACATCATCTGGGATGGATCCGGAGAGTGTGGCAGCAACTGCTATTTGTGGTAGGAGTGCTGAACGTTGTGTGTGTTTGCCAGTTGATGACAATTGGATAAAGGTTGATTTGAATAATTTCTTCATTCAAATCAGCCATTATACTTTGAGACATTATACTTCAGCACATGAGGCGTTGCGTAATTGGTATTTTGAGGCAAGTGATAATGCTCAGAATTGGACTGTGTTGAAAACGCATAGAAATGATGAGGGTTTGAAGATGGAGAGTGGTAGTACTAAAACGTGGAAATTAATTGGAGAGCGAAGGAGATTTCGTGCTTTTCGTATTCGACAATATTCTAGAAATTCTAATGGGCATATGTGGTTGGCTTGTAGTGGATTGGAATTGTATGGAATTATGTTTTTGCAAAAGTAGGTAAAGGTTGGTAAATGAGAGTAAAATGAAATCATTGAAAACTCAGATGTTTATAAAGGATGTCTGTCTCATAGAAGTGTTGACATTGTTCGCAACAATGTATCTCCTGAATATCAAATACTACAAGTTCTTTCACATGTAAACTTATTAGTTCTATTATTTTATTATTTTATTATATTTCTTGATCTGTTTGTCTCTCGCAACCACAAAAAATTACAAACAATTTCAAACTAGCCTTCCCTTCTATGATCATATTTACTAAAATACATACTGCTGTTGTTGCGCTTTCTGCTGTGACTTAATTGGCAATGAACTTGGATGATTCCACGATGTAGTTCCAGATGTATGATCCACATAATACACAGTACCATTCTCATCATATGATCTCTCCCATCCCGGAGGCAATGGTGCATTCGCATACTGATCATTTGCAACCACTGCAGGTGCAGTAGACATTTGATACACACTCTGTTGACCCAGCTGTTGATCAGCTTCATTGTACGCAAATTGTTTCACATTATATTGTGCTTGAGATTTAGGCCGTGGTTGAATATTATGTGCACGTACATATGATAGAAACTGATCAGGAATCTCAATCAATGTTTCTCTACTCAAATCAGATAACTTATGATACTTATTCATAGGCACAAATTGAACTATATCACGTTTGGCCACTTGTCCTCTGCTATTCATCAATCCATATTGATCACCATCCAATTCATCCATTGCACTAAAATCTGCATTACCAACACCAACAATAATTACAGATATTGGTAAATTCTCATTCGACATTGCAACAATTTCATTGCGAGTTAATTTCATATCATTTATAATACCATCAGTAATAATCAATAATATAAAATATTCCAAATTGCCAGCAGCAGCAGCCGCAATCCCATGAGCTTTTTTCAAAATCGGTGCAAATAATGTTGGACCACTTAAAGCAAAAGAACCACCTCTGATACTCTGTAAATATGTTTGTTCAATACCATATATATCGTAAACTTCAGGATCATGAGAATTAAAATTCAAATTGAAATCATGAAAAACACCATTGTATGTAGTATTCCCAATTCTAATATTACTGAAATGTGCGCCAAATCCCCAAACTGGAAATTTTTGGTCAGAATCGTACACTTTAATAATATTACCAATGGTTCGAATGGCATTCTGATATGGTGATGGATTTTGACCAAAAATATAATGTAAACTTTGATAATCTTTTGGGTGTCCATTAGAGCCAGTGAAATCAATTGCAACCATCAAACTCATATCCAAATCACCATTCATATAATCTAAAAACGAATTCACTGTTTGTGATTCACACGAAACAACATTTAATGTACCATACGTCTTCTTCTTCTTCTGCACTCGTTTCAATGGCATATTTTTCGGCATATTTTTCAATTCGTACAAATTAGTATCAATTTGGCCAATAAAATCATCACTGCCATTGCTATCCCAATCATACACTTTAATGCGTATTGGTCGATATTCATCATTGTTGCATAATCGCTGACTCTCAATTTTGAATGGTTTCCAAACTGGATTCAACGTGCGTTTGATAAAATTTTCACGATCTCCATAAACGGTAATTTCCTTGCCATCTTCTCTGGTTCTACAGAGAAGAAAATATGGGTCAGCTTTTCCAAACAAACCATCCATTTTTGGTAAATTTTTTGCACTGAATTGCATGGTAATTAGAGTGTTTCCTTGTTCGTTGATTTGTTCTAGTGTTGCTATCACGTGTGAGAAACGTTTGGTTTTTTTATTTTTCAAATTCTTTCCTTTGTACATTAGTTTTTTTGCCATTCGCGAACCAGATGAATGAACCAATTCGCCCAAAACCATAGAACATGATCCAAGAATATCATGATCTTTCAATCGTTTGGAACCTTTCTTATCTTCGTCATAGCAGTCAAATCTAAGAATCTGTTCTTCTTCAAAATAGTAATCCATACGAAATTGAGTGCTAAAATTTGGATTGTGATTATCGTATATTACTTCTGTTCGACCAACTTCTACAAATGATTTATTTTTGCTGCCTTTTACGTACAGTATTACAAATGGGTCTGATTTGCTGTGTACGTCTTTTTTTATCAAATCACGACAGCTGACGAAGATTTCAACACTTTCTTTTAAAATTGGTGCTAAAACGTTGCTATTGTTGTTGTTGTTGTAGTTAACG
>NODE_109_length_4370_cov_38.616755_g82_i2
AGCAATGATGATTGGTCTAAAATATACCCAGGTGTTATTGGTATTGCAGCAGAAACCATGGGTTGGGAAGGAAATAATGCTAGAAATCCTATGCTCGTAAAATTATTCTTGAAAATGATAACTATTGCAATGACCAAAGCCAAAGCTTCTAAAAAAGATACAGGTATGTTTACCATCTATTTATCATGGGCTATGCAGAAATTGCAAGAAAAGAAATTCAATGCAATGGCTGAAGAATGTATAATGGCGACTTGCTTTCAGTATGGACCTAAATTAGTGTTCACTGCAATGGAGAAATTTATTACAGAGCCAGAAAATGCACAAAATGATCCATTTAAGAATGAGAGAGCATTTGGACCAGTGATTAAATTTATGCATCGTATGGTCAAAGAATTTGGTGTGGATAATTGTTATCCATTGCGTATGCTACGTTTGACACATGCGTTGTCTGCCAAATGCAGACAGAAAGATTCTAAAGAAGCATGTTATAATGTGATCACTGAAATGTATCAGCAATTGGGTAAAAATATGAAAGGTGTATGTCTTGGACCATTGGGTAAAGCGCAAGTGAAAGTGTTGACCAAAAGATTTGATGCTATAAAAAATGCAGGAACATTCATGCAATTACGCTGTACACGAACAGAGAAAACACCTGCAGCCAAAGGAGGACAAACAGAAGAATATGAAGAAGAAGTAATTGAGTGGGTTGAAGATACATCAGCAGAAGTGGCAGCAGCACAGGCAGCACAAGCTGAAGTAAAAGCAGCAGAACCAGCTCAAGCTGCTGTTGTTGCGGTTGTTGCGGTTGCTGCTGCACCAGAAGAACCTGAAGTAAAGGAGGAAATAAAAGAACCAGAGAAGTCTCCTACTCCTCCACCACCGGATGGACCAATAAAAAGTACTGCCAAACAAAAAAGTAAACGTTTGAAGAAAGCGAAGGCGAGATACAAACAATTTGAATGGAAGCCTTGGAGTAAAAATGAGATAAAAGAATATGATGCATATTTGTTTCCTAAAAAAGAAGAGGATGAGCCATACAGAGATGAACAGTTTGGTAAGGATTTCAATATCAAGATTGGTTCTTTGAAGAAATGGAAAAAATGCCTTGAGCCAATTGAGATATGGATGGATACAAATTGGAAACAATTCTATTGTGCATCAGATTTGGTTATGTATTGGTCGACATACATTTTTGAGCTTAATCCACAGCCTAATATTGGTAAAGCATTTATTCCTTTGTGGAAGAAGTATTGTGAGCTGTTGCAGGAACATTTGCAGATTATGGATGAAGCAGAAGGAAAATACGTGTTAGCCTTGGTAATAGAGAAAGGTTTCGTTCATCGAGGATTGATTGAAAGTGTCCATGATTTCTTGGATGTTTTTGAAGTAGTTTTCAATCCTGCTCTTTGTATGAAAACATATTTGACCATGGCTGAAAAAAGTAAATCAAATGCAGTAAAATTGCATTGCATGTCACGGTTAGGAATATGCATAGAACGCTATGGTTTGGTGAAATGCGAATCAAAGAAGAATAAATTGAAATCTAGCAAAGCATTCAGAGATGCTAAAGACTTTTCTCGTCAATTTATAAAAATGTATCCCAAATTGATATCAGATAAAAAAACAGCAGTGCAAACTAAAAAGATATTGAAGTATGTGTATGAAGTGATTGGTGATAAAGCATTTTGGAGTGCAATGAAAATCAAAGACAAAACTAAAGACAAAGAAGAAATGACTGATCTCATATCAGATGCAGATAAATTCAAGAAAGGAAAATTTCGTAAACCAGAAGAACATTGGGTGGGAGATATGATCAAAGCTGCCAATAAAAGAAAAGCAAGAGAAGCTGTATTAGGTGGTGGAGCAGGTGGAATGCCAATGGGTGGTCCAGGAATGGGTGGAGGTGAACATGGATTGCTTGATCCAACGCAATATGCACCAAAAGGACCTCTTCCTGCCTTAGTTCTCAATAATTTCACACCAATGCATTTCACTGCACAACAATTGTTAGCTTTGCGATATGGTCCAGTTCCACATCTGTGGGAGATAAGTAATATTCGTGATATGGTTGGTAGATTACCAGCAGCTTTCAGCATGGATTGGGAAGAATTCGAACAAAAAAAACAAACAGTTGATACTAAAGCTACCATGCCTTACCTTAGAATTGGGAAAGTGCGAGTGGATGTAGAAACAGGCAAAGAATATCCAGAAATAGTACAAAATGGTAAAATACAATTGTTTGCAAGTGTTCCACCAACAGATGATAGAGCACATCAATTGTTGGTAGAAGAGAAAGAGGCTGCATATATGAATAGATTGAGAAACGATGATAATCATATTCAAGCTATTCGCCAGATTGAAGCAGAAAATTCAGAATATTTTGATCCAGCCAATGATGGTTCATCATGGTTGGCATTGTATGATGTCAATGGTTTTGATGAAACTGAACGTCAAATACAGATGATAGAAATGAGACAGATATATGGGGTAAAGTCATTGCAGACATGGTTGCATACTGGTAAGAATGTTAATGTTATTGAACAACGAGTGGAGAAAATTATTGATAAATTATTGCCGTGGTTTAAAAGTTTGATCAATACTAAAACTGCAATGAGTGATCATCGTAGATATGAGCATGTGTATCAAATATTGCAATTGCTGAAATTTGTTTGCAAGTTTAAAGCGCTTCGATCATTGAATATATCGGTGTTGAATGCGTTTTTTGAGACTTTGTTGAATGCTATCACATATGATAATATGCCTGCGCTTTTCAATCAACATCATAAGCGAGTGCTTGGTGAAATGAATTCAGTTTTATTGACATCTATAAATTACACACCGCCAGTGGAAACATTATGTATTTTAATACAATTGCTATCAAAATGTGATCCAACCAATGATAGTGAATCAAAAAAGTTGTATGCAGAAGCTATTTCTAAATTAATTTACAGACTGTTGAATAAATTCGATCGAGCAATTATGAAAACAGATGAAAGTAAAATAAAAATTCTACAAGAAATTCATCAATTTTTTTCTTTGATATCGTATGAAACATGGCAAATGGCTGATAGTTCGAGGCGTTTTCCATTGAAGATTATTCAAATGGTGATTGCTAGAATTGTGTATTATTCTGGACATGAAACTGCTCAATTGTTGGAAGAGTCAATTCAGCAAATGCAAATCACTCAGAATATGAATGGAGGGCCCACTCCACTCAGAGATACATATGCTAAACGGTTGATATTGAATTTTATTCAATCATGTGAGAATCGAAGATATCAGCAGCAAAATCCGGGGAAAATGAAGAAAGTGCCGAAACCGCATTCGCATTATGATGATATGGAACGAAGGCCAGACAATATTTACCAGGCACAATCTGAAATGCCTTTTGCTCAAGGAAACAGAGGAGGTAGTGTTAGTACGAGTCATCAGCAGGAAGCACCTTCGCAGATTATGCGTGCTAAGAAAGAGATTTCAGTGCAAGCGCAATCGTATGCAAAATCTAATGCTATTGATTTGAATTTCTCGATATGGGCTGGAATGACTGGAAGAGATCGAAGGCAAAACAATTTTGGTGTTTATTAAATGTGAGTTGATAGGAAAAATATATTTATATTGTTTGTGTTGCATCTTGCATGCGTAGTTTTATGTTTTCTGTGCGTTTTTTTTGTTTTCAATCATGAGTTCAAGTAATATCACTGTGCATGTAAATACAGCCACTTTTGCCACGATTTATAAACACTTGCTCCATCAGCATCATTACTACCGGTATGCATATCAATCTCAGAAGTTTCTACGACTAAATTACAAGATTGCTGCTTCTGCTTTTGATCATGCTTGTAATGACTAAAAAATAGATTCATAGTATTTTCATATATAATATCAGACAAGTGATTAATATCCTCTCCTTTGATTGCTGCAATCACTTCCAAAACTTGAATAATATGACATGGCTCTGTTCTTCCTTTCTTCACTTGCAAATAATCTGATGTTTTCTTATATTTCTTTATCTCATTCTTGAAGTGTGTTTGCTGAACATAAGAATATCCAGCAAACGATTTTTTAATTCCACAAAAAGGACAGTCAGTTTCAATCAACAATCGATCATTTGGTATGTATCGAATCATATCCAAATTATCATTGGTCTTCAATGAACATCCATTTATGCCAATGTACATTCCTTCAAATTTCAAAAACTCTTTCATATCTTCAATAGTATCAGTAAAAGAATGCACGACAGCAGTTTTCCATCGATGTCGATTTTTAGACAGGAGTTGAATGAGATCTTTAGTGGATCGTCGGTTGTGTAAAAATATTGGTAAATAATTGCTGGCTTCACACAAATCGAAATGTTTTTCAAAATGTTTGAG
>NODE_156_length_3937_cov_77.750514_g133_i0
TGTTCGGTTGCTGGTATCTCTGATCTTGCTCGCTATGAAGAGCTTACTGAATCATTAATTTTGATGAGAATTGATGAAGGATTACAACATTCTTTATGGGAGATATCATCCGGTATTTTCAATTTAGGTAATATTGAGTTCACTCGCGAAGGCGATGGTTTTGCAGCAATAAACAAAAAATGCCCAAAATTCATCCATGCAGTTGCTGAACTCTGGGGAGTCAAAGATAGTATGATAAATGATCGTTTACTTACTAAGAATATGAGAGTAATGAAGAAGACTATAACCCAACGGATAACTTACGAAAACTCTATTACAAATCGTGATAGTATTGCTAAAGGTATTTATGAGAATATTTTCTTATGGTTGGCGGAGAGAATTAATGCTGAGTTGTATCAAACAGAAGAGGATGTGAAATCTATTCTGTTCATTGGTATTTTAGATGTTTTTGGTTTTGAGAATTTTTATATTAATTCTTTAGAGCAGTTTTGTATTAATTTTACAAATGAGAAATTGCAGCAGTTCTTTAATTATCATATTATTAAATCTGAGCAGGAGGAGTATATTAAGGAGAGTGTGTTTTGGACACCGTTGTCTGTGCCAGATAATATTAATTATGTACATATGGTTGAGAATAAGGATCATGGGTTTTTTGCGTTGTTGGATAGTGCATGTAAAGCACCGAAGCCTTCGGTTGAGGCTTTTATGCAGGAGTTGTTTAAAAAGAATGGGAAGAATCCGTGTTTGGCTACGATTAATGCTCCTGGGGTTGGGATGTTTAGAGGTGGACCAAAGAATGCTAAAAAAAAAAAAAAAGGAAAAGGACGTTCAAGAGGAATATTCACTGGTTTTAGTATTTTACATTTTGCAGATCATGTTGGATATGATGCTAAACATTTCTTAACCAAAAACATGGAATCTGTCCATGCAGATACTGCTAAAATGATGGCCAAATCAAGTAAAGCTTTAACCGCTCAAATCGGTGGTCCAGTGAGTGGCAGTAAGAAATCCAGAAAGAAAAAATCTGTAACTTCCGTATTCTTCAGTGGTATAAAAATATTAATGAAAAACTTATCTGCAACCGAACCATACTTCGTAAGATGTGTGAATCCAAACAAACAGAAATCCAGTAAAGTATGGAATGTAAACTTAGTTAAACACCAATTACGCTGTGGAGGTTTGGTGGAAGCACTAAAAGTATTAAAATTAGGATATCCCACACGTGTACCCTACGCAACCTTATTTGATAAATACCATGGAAATGTCACTAACCCATTGATAAAAAACATGGGACCCGAAGCATTTTCAACTGCATTGTTAATTGCTTTTGATGTAAGTGAAAATGATTACGAATTAGGTTTAACCAAAATATTCTTCAAACCTTCGAAAGCAGCAGTTTTAGAGACAATCATGGGTCAAGCTGGTCAACCCTTAAGTAAAGCACAAAATGAAAAGATCACAAAATGGGTTGTACAAAAACGTATCAAACAAATGATGGGTACTTGCAAAGCGTTCTTGGAATACAGAAAACGTGTACGTTTAACACGTGCTGCTCGCAGATGGCAGTATGCTGGTAGAGTTGCTTCTCTTTTGGGTGGTAGTGTACTCAGTCATTTGTATATGGCTCGTGAGATAATACTAAAACGTAAGAGAGAGGAAGCATCCGTGAAAATGCAATCATTCTTTCGTGGTTCATATGAACGTGGAAGATATATCAAACATATTGTAAAAGTAAAGAAAGCTACTAAAATTGTATGGATATCTTATCGTAGATGGCAAGAGAGAGTCAATCTTCAGATATGGTTGGATGTCAAAGTAGTAGAGACACGTAAAAGAAAAGAAGAGGAAGAGAGAATCCGAAAAGAATTAGAGCGTGAAAGAAAGTTGGAAGAGGAGAGATTAGAAAGAGAGAGGATATTGGAAGAGAAGAGACAGGAAGCAGAACGTTTGAGATTGCAAGCGTTGATGGCAGAAGAGGAAAGACAAGCAGAGATGGAGAGATTACGTGATGAAGCAGAACAGTTACGTTTGGAAGCAGACAACAGAAAGGTTGAAGCAGAGAAAGAGAGAATACGTTTGGAAGAGGAAAGAAAAGCAGAGATGGAGAGATTACGTTTGGAAGCTGAGAAATTACGTTTGGAAGCAGAGAAGAGAAGACAAGAGGAAGAAGAAGCAAAACGTGAGGAAGAGCGTATACGTAAGGAAGAAGAAGAGAGAATACGTGCAGAGGAGGAAGCTAAGATATTGGAACAAGAACAGGAGGAGGCTCGTAAGAGACAAGAACAGAAGTCTAAGTCAATCAAACAGGAGAAGAAGATTATTGAGTTAGAGAAACGTGAACATAAGAGAAGAAAACGTGAGGCGAGTATAAAATCGAATAAAAAGAAGAGGAGAAAACGTGAGGATGACAGACAAGATGAAGAAGATGCAAGATTTATAGAGAAGAATTTGATTGAGATGGTTAGTGATACGGATGATGAGTCTGCTAGCAGTGAGACTGACAGTGAAAGTGATGGACCAATTATGAGTGTGAAAGAGTTGTTGAAGAATTTCGATAAAATTGCAAGTACTGGTCAATTGTTTTTGAAGTATACTGGGAAGAGAAGACGTAAACCGCAGGATAGAATTGTGAAAGTGTCGTTTGATAATAATTATAAACCTAAGCAGATCTCATGGGGTAGTGGTTCGAGACATATTGATTTTAGTGATATATTGTATATTGCGCAAGGACATTGGACACCAGTGTTTCAGGCTAGAACAGAGTCTTTGGATGCGAAGTTGTGTTTTAGTGTGGTTGGTAAACAACAGATATTGGATGTGCAAGCACAGACTAAAGATATGTGTGAGTTGTGGGTTAAAGGGTTACGCCGTTTGATTGGACAAACAGATGAACAAGCACTTAAGTTGTGCAAACAGAATTTGGAGAGTGGGAATTTGCCTGGTTTTAAGAGTAAAGATCAGAATGAACAAAAACGTGCAGAAAAGGAGCATAAGAGACGAACGAAATCGTTGATGCTTTTACAGCAGGATTTGTTTGTGATGACAACTACCACTGTTTTTCGTAATTTGGATGAAGAACGCATATGGGATATTGATCAACAAGTGCGTGAACAGTTCAATGCAAAAGTGTTGTATGAGCAAGCGTTGAGAGAAGATATTCCATGGCGACAGTGGAATCACTGGATACGTGAGAAGATTGTTACTTATTTGCGAACGAACAATCGAATTGCAGCTCCACAGCCAATGTATGGTCAACAACAGTTTGGTGGACAACCGCAGTATGGACAGGCACAGCAAATGTATGGTCAACAGCAGTTTCAACAGCCAGTACAACAGTTACAGTATCAGCAGCAGTATAATCAGTATCAACCGCAACAACAGCAGGTGTATGGTGGTGGTCAGCAGTATGGACAGCCACAACAAATGTATGGTCAACAGCAAGCAGCACGGAGTAGTGCTTACAGTCAGGGTGGTGGATATGGTGGAGGAATGATGCAACAGGCACAGCAACAGCAACAACTGCAGCAACAACAGCAACAACAGCAACAGCAGCAGCAACGTCAACAGATGCTGAGACAGCGGCAAAGTAGTCAGGTATCTTTACCGCAGATGAATACTGGGAATTTTAATATGCCTGCTCAACAGCAGACGGATAATTTTGGATATGGTCAACAGCAGCAGCAGGTGCCAATGCAACAACAGCAGATGCCGAATATGGGAGGAGGAGGAGGAGCTGCGTATGGTGGAGGTGGTGGTGGTGGTGGTGGAGGAGGAGAGAATGATGAAAATTGCACATTGATGTGAAAACGATGAAAATCATATCTGAAATTTTTGTGTTCATATAGAAAATGATTGAATTTTGTGCTTCTTATGCTTGAAAAGTACATTTTTTTTATTGGTAATGTTATTTTATTATTTTTTTTTTATTACTTATCCG
>NODE_162_length_3902_cov_67.743317_g126_i1
ATGACTTCCACTTTAGTAACCTTTGAGCCCTGTTCTTGCCCTTCAATCACCACTTTGTTCAAATCACAGCACAAAATGCCGTCGGATATTTCTCAGTCATATGAAAATAAATATTTTATATTGATGTTTCATCACAAATGTCTATTTATTTGTATTCTTATTTCCACTCGCTTTGGATGAACTCTTACCACTGGTGTTCTTACTACTCTTCTTAATGAATGGTGTCATAGGATCATAATTGCACACAGCAGTCGACAAAGAAATATTAAATGCACTATTCTTCACAATCTGTGATGCATGTCCAACACATTGCTCAATAGCAATATCACATCCCATATCCATAGCCTCCTGCAATGGATAACCACTTGACTTACGACTCATATCACCTGAATTGAATTTCTTATTCATAGTCAAAATACGCTTTGGTAATTCATCACGTATTTCAGGCTCCAACAATGCAGAAGATGACAACTGCCGTATCCACAAATGCTTACGCGACATCATTTCCAAATTATGTCTCACCAACGAACCCATAAAAAAAGAATGAGACAATACGTAATATCTGTTGTAAGCTGCTCCCCATCTCTTGGATAATATCGATTGATCTGCTACTTTCATATCTGGACCAGTGTTCTTTGGTGGTTCATAATCTGGTGGATAACACATGAATGTCAATAATTCCACCTTAGATTTAGCCAAACATCGAAGAGGATCAACCACAATACCTGCCCATTTTCTGAATTGCATCTGCCACAATAGCTGACAACTTACATCTATTGCACTGAAAAAGGAATTTGAATCTACACCCACTTCAAATGGATGTGAATGATACCAACCAATAAATCTATCTTTTCTTAGCATTTCAATTCTATCTTGCATACGAGTCATGAAACCTAATACTTTTTCATCATCTGCTACCACTCTTGTTTCTGATCCTTCTACAGGTAATTCAACGCAATCCATAATGATGATAGTTGCATGGTCAATTCTACCCACTAACAATCCCATGATTTCTATGGGTAATCCACCTTTTTTTCTACCTTGATTTACACCTTGTATGGCATGTTTTACCATTTTTGTAGCTGCTAAAACGTTGATTCTACACTTAGTAAAATATCGAGGATTTTTATTCCATGGGGCTTCTTTGTTCAATTGCGCGATTTTCTTATCGTCATAGGTGTATACACTATCTTGTATTGGAATTGCAAATGTTGCTGGTTTTTTTGAACCAGGGTCAGTCTTACGTTCAGAACGCGATGAATTATTTCCTCTGCGCATGATCAAAGTAGTTTTCAAAAAAAAGTAAAATGTAGGCAGAGGACTTGAACAAATACAATCAATCAATCAATCAATCTTTCATAAAAATTCAATAATTTTTCTTCTTTCTTCCCTTGCTATGAATATACCACAATAAAGTTTGAGTATAAATACGAAATTCAAACGTGCCAACTTCCTCATCTCTATTCTTCAAAGTTTTCGACAACAAATCAGGCAACACATTCCAATGTTTCTGATGCTGCTTCAAATCATGTTCACTCCATTTCATCAAATGCTTATAACTCCCCATAAAAAAATCATCATCAGCTTCATCATAGTCATAAACCTTGATATTAATCTCCAATCTCCCACCAGTATTGCCAACAATCCCCCGAGAAATACCACTGTTATCTTTCACAGACAACCCACTATCTCCATATAACTCATCATTCACTCTTTTATTAAAACCTGGTGAACCTGCCCTTGCCCGCTTACTCCCATGCGTCTTCATTTGCATATCAATCAAATCAGGAATATCTTCAACCGGTATTATAAACAACAGTGGTTGTCTCCACTCAGGATTGAGATTCTTTATAATAGTCTCTGTCCGTTTTACTTTCCCAGCAACCGAAACACTAGTGTATGGATCACTTGTACCTGTACTCTCATCATTCTCATCTTCAAAATCATGTGCAGGTAAATTCATTGCACGTTCAACAGCTATAACAATAGGCACTCCTCGACCTTCACACATACGCAAAGCATCCTCATGTCTGAATTGTGTCACCAAAGTAGTAGTATAAAACTCCCAAGTGACATCATCGCACACTTTGGCTGCAATCATGAATGTCAACAGTTTGGATTGTTTGGATTTCAATGCATACATCACTGGACTATCATAATCTACCACTGGTAATCTCTCCACAGATTTACCCACCTTCCATAGTAAATCAGTTAAGTATAACTCATTTGTGGTATCATCTGCGATATCTATGAATATCTCTGCCACATCTCTGGCAGATACTACTCCATCATCACTTTGACGAAGGGCAGCACACAATTGAAGAAATTCACGGAAATCGAATGTACGACTTTTATCATCATCGAATGCAAGGAAGAGTTCTTTTATTTCGTTATGGGAACGACTGAAGATTACAGATACCAAACCTTTTGGTGAATCTGCTAACATTTGACCTAAGAAACGACATTCATGAGTGGTCAAATTCTTCAATGAATATGCACTTAATTTTATTGTATGTAGTGTACATTCTGGAGATATGAGTGCTTTAGATACTAATCCAACACTATGTGAATCAAATCCAAAGTCAGTCCAATTCACTGCTGACCGTTTACCTCTCTCAATGGATGGTAAAAGTGGTACAAAATTGAATGAACCTTGCAATACTACTGTCACATATTCATAAAAATGAAGACCTGATTGCTTACTCTCTATTTTAAGTACAACTGTCCATAGAATTAATACTAAAAAAGATGTAATTAGTGATAAAAGAACTGCACCTAAGTCAACATTACTTGGATTTGAACGAGAGAAACGTGCAAAGATATATAATTGTAATACAGTTTGACATATGGTTTCAGCAACCACTTCAGAAACTCTACGCAATTTTTTATACCCTCTTTCAGCATGTGATTCTTCACGCAATCTTTTCTTAGTTATACACAAATAAATTGGTTTGATAATAAAATCTTCAAACAAGAGATAGATATCAATAAATATGAGTATAAGAACACCGATGGGTGCAATTGCAAATAAAATTACAAAGAGTTGTATTTTCTTATCTGGACTTCCAGTCACTTGTTTACTGACAATCTCCTCTGATGAACTTGCGTCATCTTCTTTGTATTTATCATTGCTATCATAATCATAAAATTCTTCTTTTTGTTCGTCAGTTTCACCATCAACAGTGGTGGATACTTTCAATTCTGATTCTGTGGATGATTCCATTGCTAATTTTTGTAGATATCCAAATGCAGCAGCCCATGCAATATAATATGGTGCCAATATAAACACACAACTCAACATAAAAAGCCAATTTTCTTTAGCATCAGATAATGTCTGAGCAACACGAATGTCAGTGATGAGATCTGCAAATATGACACATTCTGAGAAGAAATGTAAGAGTAATATCCCACTGACCACATATATCGGATGTTTTCTTACTTTGACAGTCATCGCTCGTATAAAAGTAATATTATGCAAGAATCCCCATACGCCGGATAACTCTTTTGGTTCAGGAATGTTCACCAATCTCTTCAGTGTTTCTGGTGTTACTTCGGATAGTGTTCCATCGGTTGTGCATGATTTTCTTATCCATCCTTGAACTGGAAGTACTACCTCTAAATATAAACCATTGTCTGCATTTGTTTTACCTGTGGTCACTAACACTTCATACTGCTGAACTAATTTGCATATTTTATAGTCTGCATTTGGTTCGGGATGGATGGGGAAACGCCAATCTTCGTTTACCATATACATTTGACCTTTTTTTAAATCTTTTGGTTTCAAATATGATTGGTCTAGAGTTGGGCCAGAAACTTCAAACGTTATTTCGTTGGCATTTAGAAACTGCTCTGCTGCCTGTTGTACTATAATAGAAGTCATGGTTTGTTTTTGGAATGTAGGA
>NODE_166_length_3897_cov_25.502339_g139_i0
TCACTTAATCATGTCTGGTAAAAAACTATTCAACTTCAAAAAACGTCGCGGCATTTCAACCGACGAATCCCCCAAACAACAAAAACGTAAACACAGCTCCGGCCAAAAACCACGCTCCCAAACCTCATGGGGCCTAATGTCCCTAGCAACCACGCGATCCAACGGCATAAAAAAAACCAGCGCAGTTCCCTCATCAATAATGACAAACGTCCATCGCATGATAAAAACCTCCTCAGCAACCAACACTTCAATAACCCCAACCCCTTTGCACAACCGCAGTATCCACAATTACAATATCAACACTCCCTTGGGTTACTCATCATCAGTATTGAATGCACGCTTAAGATCTCAACACTCCTCACACGCAAACAACAACAACAACAACCCATTGGATAACATCATCAACCAATTGCACAGTCCACGTCGTTCATTACTACCCACACATATGACTCGACCAATGACAGACCGTTCAAAAACCACTTCCAATAAACCTCAACAAAAAATCATCCATAATACAATGTACTCCGTATTCAGCAACGCAGGCATGGAATCATTGATGAATAAATTGCAGGGACAAACCAAAGATACCTTCAGCAGCAATACAAACATCCATCAAACTCCTCGTTTACAACCATATAACAATTTATCCAAGAGTAGTAACAGTCCAACTGCATTTTCATTGCTCAATGGTGGAGGATATGGTCAACGTTCAAGAAATTCAAGTTTGAATGTTGTCAATGGTGAGACATGTACTGTATCCATTGAAACTATGGTTGATAATGTAAGTAATAAATCTGTGAATAGCAATACTAGTGCACCACCCAATAGTTTTATGTTACCTTCGAATGTCATGAAAGCTATGACAGATGATGATGTAAAAGCATGGGAGCAAGAGTTGATTGAATGGGAAGTGAAAGGTGAATGGATTGAATGTAGAGAGACATTGACTGGGAGAGTGATGTGGTATAATTCTTTATCAAATCGTATTATATTTGATTCACCTCCCGATGGCGTACAACCATTACAACATACAATACATACCAAAAGTTTAGGTCCAAAATCGGATGATTACTTTCTAAGTCGACCATTGACACCATACGAAAACGAAGATGTAGTTCAAGGTTTACTAAAATTCGATGATAGCACATGGGACTTCACCAATGATTTGGTAGATGTATATCGTACACGTGCAGCCATGCGAAATCATAAAAAAATGCTACCTAAACCTCAGCAATATTGTTTTCCAGAGTATTTTTCAATGTCTCCATTGCCTGAGTTGCCTGGTGGTCGATTCTTAGCTAAGATACGTTTACCAGAGGAGTTTTCTGCACATCGTAGATACTCAAGTGTGCAAATAAAAGTGGATTACACTGCAGCATCAGATGCTATCAAACAGGGTGTGGATAAATTGGATGAACCATTCAACACTCGCAGACAGGATTACATTCTGAAAGTTGTGGGACAAGAGGCGTATATGTATGGTAGACGTAAGATTATTGATTATGAAGCAGTAAGAGATGCAGTTCGCAATGAGGATGATGTTGAATTTGTACTTATTCAAAGATCAGATTTCAAAGAAAAAGTTGCGGAAGCAAAACAACATCAGTTGGAATATGCTAAGCTTTTTTCTACTGCATATCCTGCGAATTTGGTGGAGTCAGCGAAGGAAACATTGAAATATGATTTCAATAATCTCAGGGATAATTTACCGAATATGAATTCGAAGAAACAACGGTCAATGATAGATTTTGAACCACAGGATTATATATCTTTGTATGATTGCGACTGGCATTATAGAATAAAAATTGAAGGGTTGACTAATGCTACATCTTTGCCGAGATTTGATGATCAGTCAATGAAATCTATTTATGTGGTAGCTGAGTTGTGGATGGGTGATCTTATGTTCGATCATGCGACACTTATGACACGTAATTCATATCCATCGACGAATATTCGTTGGGGACAATGGCTTTCATCTCGTAATCAAACATTTGCGCAGATACCGAGGGAATCTGTGCTGTGTTTTATGGTTATGGGTATCAAAGAAGGCGATAATAAACCTCAATGTTTGAGTTATTGTAGATTACCTTTGATTGATCATCGAAATTGTTTGCGTTCTGGTAAATATTTACTTAATATGTGGCAGATTCCAGTGTTCAAAATGGTCAAAGATGGTCCTAAAACTGATCCATATTTAGATCGTCCATTCAGATATCGTGGATGTATGCGTGATAGAAATATGAAAGCAATGGCAGGTAATGATGATGAAGATTATGAACAATGTCAATTGCTCATAGAATTTGATGAGTTTGCATTCGATGTCGTTGCACCCAAGTATTTACCAAAAAAAGATTATAGCGAAGTTGATGTTGGTGGTAAATTGAATCATACACAATTGACCAAACAACAAAAGAGTTCCATACATACAATAATAAATAAAACACCTTTGGAAGTGCTGGAACAAAAAGATAAACATCTTATTTGGCAATCACGTGATTTACTATGGCATGATCCAAGCGCTTTGCCAGCTTTCTTACGTTCTGTGAATTGGACCAATTTGTGTCATATCTCAGAAACACATAAGTATTTGGATTTGTGGGCAAGTCCCAAAAGACCTGAGAACGCTATAGAATTCTTAGATTATAGGTTTGCAGATACGAGAGTGAGAGAGAAAGCTTTGGAATGGTTGGAAGATTTGCATGATGCAGATCTACAAAAATATCTGTTGCAATTGGTTCAGTGTTTGAAATATGAACCTCAGAATGATAGTGCACTCTCTCGATTTCTTATACGTCGTGGACTTAAGAGTCCATATCAGATTGGACACTTTTTATTCTGGCATTTGAAGGCTGAGTATCATAAGGAACAGTATACAGAGAAGTTTGGTTTACTTATGGAAGAATATCTGTTGCATGCTGGAGTACATACAGAACAATTGTTTGTTCAGCATGCATTGCTAAAACGTTTGGAATTGATTGCAGAGAAGATACAACAGGCAAAGAGAAGTATGAGTAGTGATCAATGTAAGAGATTATTTCGTAAGGAATTGTATGCACTGAACAAAGATTTACCAGATATGCCCATACAAATACCATTGAATCCGAAGTGGAGTGCTAAAAAGATAATTATTGATGAATGTCGATATATGAGCAGTAAAAAAGTACCTCTTTGGTTAGTGTTTGAAAATGGTGATGAATATGCTCCACCGATAAAAATTATGTTCAAATCTGGAGATGATTTGAGACAGGATATGCTGACGTTGCAGATTATAACTATTATGGATCGTTTGTGGTTGGACAATAAGTTAGATCTGCATTTGAAGCCGTATTCGGTGATGGCTACGGGTGTCAATCGCCATAATGAGGGAGTTGGTATGCTTGAAATGGTATTGCAATCGTGTACGGTCAATACTATAAATGTTGAATATGGTGGGGCATTCAATGAGAAGACGATTGATTCTTTTTTGCGGAAGTATAATACGTATGATCAGTCATTGAATAAAGCACGAGAGACATTTGCGCGTTCTTGTGCTGGATATTGTGTGGCTACTTGGGTTTTGGGCATTGGTGATCGACATTCGGATAATTATATGGTCACTCAGAATGGGCAATTCTTCCATATTGATTTTGGACACTTTTTGGGGAATTTTAAGTCGAAGTTTGGTTTTAGACGGGAACGGTCACCGTTTGTTTTTACTCCGCAGATGAAGTTTGCCATTGATTCTGGTTTGAGAAAAAATAAATTGTATTATGATTTTCTGGGATGGTGTTCGGAGAGTTATAATGTGTTGCGAGTGAGGAGTCGTTTGTTGTTGGTGTTGTTTTCGTTGATGGTGGCTGCTGAAATGCCGGAGTTGATGAGAGAGTCGGATATTGGGTATTTTCGGCAGATGCTGAA
>NODE_170_length_3872_cov_43.923097_g143_i0
TGTGCTCTTCCGATCTGACTGATTCTTCAACTAGACTCGTAAACTCGGAAACCCATCAAACCCTAGAAAAACAGCCATTCTTAAGAAATATGGATACCGACGACGAAGATGAAGCCCTAGCAAATGCCCCCAAATACGAAAAGAAATTCCTAAACATTGGTGGAATGACCTGCGGTGCCTGCTCTTCAGCAGTAACCAAAATAATCATCGAACAAGAAGGCGTTCAATCTGCAACAGTCTCACTAATGATGCAAAGAGCAGAAGTAATTTTCGATCCACTCATCATTGACCTCCCTCAAATCATTGAAGAAATCGAAGACACAGGTTTTGATGCCAGCGAACTGAAATTATCCACAACCAATTCCAATCAATTCATGATACATATATTATACCCATTAGCATCCACCACTGAAATACAAGTTACTGATATATTACTAACCATAGATGGCGTACTAACTGTAAAAACAGTTGATGAAGGAATTGAGAATCTTGGTCTCATTGATAGTGATTCTATGCGTTCATTATCAATCATAGATGGTGGTGGAATTGGCGCTGATGATTTATATATCTCTAAATATGGTAGCAATGGTGATCGTACATTGGAAAACGCATCAATGTATGTGAACATAAAATTCGATCCAAAAATCACTGGTATGAGAAGTATTACAGAGTGGATCAATTCTGCAGTCAATACTGCCTTCAGATGTAGGATATTGTTTGACAGTAGTGATATTTCACAGCGTAAAAAAAATATTCAACGTGGAAGAGAAGAAGAGATACATAAATGGAGATCACTCTTAAAATTCAGTGCATGTTTTGCTATTCCAGCATTTGTTCTCGCAATGGTATTCCCAATATTCCCAGCATTTCGCAATGCATTTGATACAAAAATACTTCCAGGATGTCTTTTACGCGATGCAATATTATTCTCTTTAGCTACACCCATACAATTTGGTCCACCTGGATTGCTATTCTACAGAGGTGCACATAAATCTTTGAGAGCAGGTGTAGCCAATATGGATGTTCTGGTTGCTTTAGCTACTACCATATCATATATATTTTCATGTTTCAGTATACTTTTATGTATTATCAATGATCATTCATCTGCAGATGAAGAGACAACATTTGAGACATCTGCTTTGTTGATTACAGTTATAATATTAGGTAAGTATATGGAGACTTTAGCAAAAGGTAAGACATCACAAGCTTTAGATAAATTGATGAATTTAGCGCCATCTACAGCGAGATTGGTTGATAATTGGAATGATGATGAAAATGAAGAGAAAATTCATCATCAAGAAGAAGAAAAGTCATCGATGACAGATACTGGCATATTTGCAGCAGAAAAAGTGAATGAAGAGCAACGTGCTTTACACATACGTGAGATTGATGCACGTTTAATACAATTGGGTGATATTGTACAAGTACAACGTGCAATGAAAGTCCCCTGTGATGGCATAATTGTTGAAGGTGTCTCCAGTATCGATGAAAGCTTAATTACAGGTGAGTCATGTCCAGTAAATAAAGAGATCGGTGACCAAGTCATTGGTGCTACAGTAAACATAAGTAATACCATATACTTTCGTGTCAATAAAATTGGAAGTGAAACTGTACTCTCAAAGATTATAACCTTAGTTGAGAATGCACAATCTTCTAAAGCACCCATCCAAAAAACAGCAGATTTGGTGGCCAGTAAGTTTGTACCTGCAGTGGTAGTTATAGCCATATTAGTATTTCTTGGATGGTTTTTTGCTTTGGAATATGGTTTGGCAGATATGCGTTCTTTGTTTGATGTTGCAATGAGAACAAGTGCAGCAGTGTTCTATGCAGTCATATTTGCAGTCACTGTATTAGTAATCTCATGTCCATGTGCATTGGGTTTAGCAACACCTACAGCAGTGATGGTGGCCACTGGAAAAGCAGCAGAGTTAGGTATACTTTTTAAAGGAGGTGAACCATTGGAGATTGCAGGTACTACCAATTGTTTGGTGTTTGATAAAACTGGAACTTTGACTGAAGGTAAAATGCAAGTGGTAAATATCATACGTTTGACAGATGGTATGCTGTATAATGTAATACAACAGCCACAGTCATACAATAGTAGTACTTCAGATCATGTGAAAAACAGACATGATTTTTGGAATTTTGTGTACGGTGCTGAATCTCAGAGTGAACATGCAATTGCAACTGCAGTTTGCAAATTCATTGAAGGTAAACCAAATGTTTCTAAATACGTAACTGCTGACAGTAAAACTAACACTATAGATATGAGTATCATAGATAAAAAAAGTAAAACAACAGCATATGAGAGAATAAGTACTGATGATAGCACTATAGATGTTGGTGAACAACAAGAGATAAGTTATAATGCAGATAGTTTTTATAGCATAAGAGATAATGAGTTAGGGAATACATTTGAATTCAAGCAATGGACAGCCAATGAGTTCAATGCAAAAACAGGCAAAGGAGTCAATGCTGTTTATGAATTACCCACTTATTGTCAAATGTTCATTGGCAACATAAAATACATGAAAGAGAACAATATCAGTGCTATATTGATGTCAAAGTTCTTGACCAATGATAGCGTTGACAATGATGTATACTCTGAAGTAAAAAACAGAGCAGATGATGAAGAACAAAAACATGCATACGTAAGTGTTGAAGAAGAGAATGCATATACCATAGTAATGGATAAAAGCAATGAACTAAAACGCAAAGGATATACAGTAATATTTGTAGCATGCAACAAACATTTGACTGCAGTTTTGTCCATTGCAGATAAATTGAAATCCGATTCGTATGCAGTCATACAATATTTGCAGCATGAGATGCATATACCATGTTATATGATCACAGGTGATAATGAATTGACTGCATATGCAATTGGTGATATGTTAGGTATAGATAGAGATCATATCGTTGCTGGTGCACAACCGGAAGATAAACAGAAGATTGTGAAATTACTGCAGCGTACGCGTGCAATACATGTGAGAGAGGGCAGAAAAAATTTCATCACTGCTTTCTTTTCTGGTGGTAGTGGTGGTAGTAAAAAAAGTGTGGAAGCAGTTGAATTTAGTTTGATACAGAATGGAAGTGGTGATGGATATGTTAGCAGTCGTGGTAGCACTAGCAGTAGTGTAAAAATTGATGAAGTGAGAAATATAGTGACATTTGTTGGAGATGGCATCAATGATTCACCATCTTTGGCACAAGCGGATGTTGGAGTTGCAATTGGTGCAGGTACTGATGTTGCTATTGCTTCAGCATCTGTTGTTTTGATGAATGATGGGTTGAGTGATGTTTTGAATGCAATTGATTTATCTAAGGCTACACTTATGCGAATAAAATCTAATTTTGTATGGGCTTTGTTGTATAATACATTGATGATACCGTTTGCTGCGGGACTGTTTTATCCATGGTTGCATTGGGCATTGCCGCCTTTTATGGCTGGTATTTTGATGTGTTTGTCGTCGATCAGTGTTGTGTGTAATTCTATGTTTTTGCGGTTGTACAAACCACCGGATTGGAGGAAGTCGTTTGATGAGAAAGAGAGGAAGAGGAGAAAGATGGGGATTAAGAAATATGGGGATAATCGTAGTCGGGGAAGTCGAAAGAGTAGTTATAAAGTGGAGAGTAAAGCATCGATTAATTTGTTGGAGGAAGATTGAATTTATTGTTTGTTTTTGTGTGTGGAATATTTAGGTGAACTTCTTGATGTGTAGTGAATGAACTGGGTATTAAACATATTTATTTACATTGTACTGTAGTCTGACGTACACTTGTAGTGACAGGGTAGCTTGTTTTTGTATGGAGCCAGTCTCAGTCAGACATCTACTACTTAAATTAAAGTTTTCCACTGAAAAAAAAAAAAA
>NODE_181_length_3796_cov_18.176675_g149_i1
ATAGTACACATGTAAAACCAATATCAACTGCAAATGATATACAATCACAAACTGTTTCTATTCATACATCATCCAAAGATAAAAGTACAACAAATGAAACAAAAATTGAAGAAGAATTGAATACAACTGTGATTGTTGATGATAGCGAAGAAGAAATTGAATATCATATAAGAATATTTTGGGCTGGCGATAGTCGAGCAATACTTATTCGTCATTCATCAAATGTGGAATCAAAAGAAATTCAAAATTTACCATACACAATAAAAAATATTTTACAAAAGAGAGATATCCCCAATGAAAATACAAATCAATCACAGACATATTTCTTTGAAAGTAATACTAATCAATCATCAACGCATACCACCCAACCCAAACAACCCAAACATCAATCACCTCCAACTCTCCCACATTTCATTGATTTAACCATAGACCATTCACCAAAATGCAAATCAGAATACGATCGCGTTCTAGCTGCCCATGGCCAAATAATATCCAACCGCGTTGATGGTAAACTAGCTCTAAGCCGTGCATTCGGTGATAAATCAATGAAAAACAATCCCAACCTATCATTCAACAAACAACGAGTTATCTCTGTATGCGATGTACAAACAATAACTGCCAAATCCAATGATTACTTATTCTTATTCTGTGATGGATTGGTTGAACAATGGAAACATCCCCAACTAATATCTCATCTGTCCACAAATGTTTCTCATTTCAATGACAATGTGTATGCATTGGGTGATACATTCGATGATATCATTGATGCTGGATCAAAAGATAATATGTCTGCAATGCTAATACAATTTACAAATGGTGAACAATATGGAGATATTATGGAAAATATATCAAAACCTAAGACATTTTTACCTGGACCTTTGTATTGTTCGAGACATGATCGTAGATTTGTTACATGTTATATGGAAAATGCAAAGAGATTTGGACATAAGGATAGACCATATCTAAGACGAGCTGCATACAAATCAGATATAAAGTATTTACAGAAGTATGGGATTAAGCATTTGTTGTATGAGCAGAGATGTAATCGTACATGTAAGGATATTATTTCTGATATTCGTAGAGGTATTCATGAGATTGATAAAAGAAGGGAATTGGAGATGAAGAAAATGGAGAAAAAGAAGAATGAGAAACAGGAAAACGTTTTATGTACAAAGAATACAGAAACAATAAATATTTCTAATATAAATACAGAAACGATAGATAAAGAGATTGTTACTGCTGCTGATGATATTGTTATTGGTGATGGTATTGTGGATGAATTGAGTGCAAATGAAATACCTTTAACTCAGGGAAGTGTTGTAAATACAATTGATAGTCAATTGAGTGAAGAAGTTGGAGATGAAGGACCATTGAATATTTTCAGTGATGATGATGAGGATGATGATGAGGTTAGTACTGAGATTATTGATAGTAGTGATTATGATAGTAGTTGTAATTTATTGAGGTATACATCATCGAATATCAGTAGTATTGAATCTCTTTCTCCTGTGAAAGAATCTGTTTCCCCTGTGAAACAATTGCATGAGAATACGGAGTTGAATTTGGAGGATGATGGAATTGAAGATGAAATGAAGATGGATGCTTTGGAAACAGAGGTGGAAGTTGAGGTTGAGGTTGAGGTTGAAGTTGAAGAAGAAGTGGAAGTTGAAGAAGAAGAAGAGATAAATAGCAGTAGTAGTATATCTCCATTCTATAGTGCAAGTGCAATGGTGAAAAGTGCTTTTATATCTGTGAGTGAATTTGCAAGTAATGTGATTAAGAAAGTTATACCGGGTGTTTCTGGGGATAGTATGAGTAGAAAGAGATCATTATCTGAAATATTATCTTCTTCGATTGATTTTACGGATGAAATGTTGCCATCAGCATCGAAAATAATGAATAATACAAATACTATGGAAGATAATGATGGTGGAGTTAATTCAAATGATGCAGAATTATTGAGTCCAGCAACAAAGAAAAGGAAACTTAATAAAATGCAATAGAAATATATTTATACTGTTGAAAGATTACAAATTATTAATGTTGCCCTTGAAGTGTAAGTTTTTTATTTAAATGTTTTGTCATTATGACTCTCATTTGACAACTGCTGTTTTTTTCTCTACAATAAACCAAATCAATTGAAATTGTTTTACAAAATGAATTTTTCAAAATCTTCCCCTCTAAATATTTTCCCCAAATGTTCCTCAATATAAACCTCATCAATAACATAAGCCTTCCCTTCCCCCTCTGCAGCTTCAATATTAATATCCTCCATAATCTTCTCAATAACAGTAACCAACCTCCTAGCCCCAATATTCTCCACATTCATATTACAATCAACCGCTGTATCCGCAATACAATTGATTGCACCCTCAGTAAACTCCAACTTACACCCTTCAGTCTCCAACATCTTCACCTGCTGTGAAATCAAATTATACTCCGTTGCAGTCAAAATCCTACACAAATCATCCTTCGTCAACGGTTGCAACTCAACCCTCACCGGCAAACGTCCCTGAAACTCTGGCATCAAATCTGACGGCTTAGAATCATGAAAAGCACCTGCACATATGAACAATATATGATCAGTGTTGATTTTACCATAATTTTGTACAGTGATTGATGTACCCTCAATCAAAGGCAATAAATCTCTTTGTACACCTTCCTGAGATACACGTCTTTCACTTCCACCTCTCATACTATCTCTATTCCCACATATTTTATCTATCTCATCAATGAATACAATTCCATCATTCTCTGCCAATTCTATTGCACTTGCAGCTAAATCTGTGTCAGATAAAAAACTCTTCAATTCTTGTTCCATCAAAATTTGACGAGCTCTCTTTATTGTATGTGACTTCTTTTCATATTTATGTTCACTGTTACTGCCACCTCCACTTCCTGGAGGACCAGCGAACACTAAAAAAGGACTGCGTAGACCACCTCTCAGAAATGAAGATATATCAGATTCACGTTGAACATCTATTTCAACAGTTACAGTGTCCAACAAACCATTGTCCAATTGTGATTGCCAATATTTCTGTTCTGTATCTTGCAATTTACCCATAAGTGAACGGAGGAGTATTTTATCCACGGCTGTTTTTAGTTCTTCACGCATTTCTTTCTCTTTTCTGGATTTTACGAGAATTGCTGCATGTGCTACTAAGTCTTCGATGATAGTGTTCACATCTTTGCCAACATATCCAACTTCGGTGAATTTGGTGGCTTCAACTTTGACGAAAGGTGAGTCAGAGAGACGTGCTAGTCTTCGAGCGACTTCTGTTTTTCCACAACCGGTGGGGCCAATCATTAGTATGTTTTTTGGGGTTACTTCGCGTCTGTAGTCTTCTGGTAATTGTTGGCGACGCCAGCGAGCACGCCATGAAATTGCTACTGCTTTTTTGGCGTCGTTTTGGCCAACGACGAATCTGTCCAATTCTGATATCATTGCGTTTGGTGTCATATTATATGGATCATCATTTGCTTTTTTATCGATGTTGGAGGATTTGTTGGTGTCATCTGTGGTAGTGTCTGTTGGGTTGTTGGTGGTGGTAGTGGTGGTGGGAGGATTTGAAGGGGTATTGGTATTGTTGGTTGTGTTGTCAGATGATGTGAGGGAGGGTGGATTGGATTGTAAGTGATTTAAGTATTGTTGATGGAGTATGGATCTTTGGAGGTTGGGATCATTGAATGCGTTGTTGGTGAGTGATGGGGATGGTTGGATGAAATAATTGTATTTGGAATAAGTTAATATATTATGATGATTGATTTGTAGGAGTTTTTTAAAACAAACAAGTCG
>NODE_194_length_3707_cov_26.335703_g159_i0
ATGAGTTTTGTATTGATATCGATGATGCATTAGGTGATTGGCCATCACCATTAGCATGTTTGAATGGACAAAATCCATTCTCACTTGGTTTTGTTCCTCCAATGAATTCCATGGCTGCAGCAATGATGATTCAACATCCATATATGTCAAAAATGTCACATGCTGCTGGTGGTAATTATCATCGTTCTGCAAGAAATTTGCATATACCACATACAGTAAATCGACGAACAGCTGAAGACCATCACAGTAGAAAAGAAACATTCACTTTATCTGACATTGTAAGCATTACTCGATCTCCTAGAAACTGTGAGGAATCAATATATACTTCTGTTTCTGGCACTTACCCACATACTTCTTCTTACCTTAACCCACCTTTTACAACTACTCTAAAAACATCTCAGTCTCTTATGGAACGTATTTCATCACCAGCATATTCTTCCAGACGTAGAGCAAACACTGGTACACAAAAAGCTCAACGATTATCGTTATGGAAGGAACAAATGCTATATGTTTCTGAACGCCATAATAATTCATCATCGCATGGAAGTCATCCACCAGTTAGTAATAGTGCTTTACCAGGTGGTGGCAATGCTTTATGTTCAATCCCATCTTCGCAGTCTGTTGCTACTTCTCGTGAATCATCTACATTTGCTGTTGTGGAACCACGTGAAACTCAACGGCAACATTTGGTTTTAACGCCAAGTAATTCTGCTCCACCTGCGAGTGAATTACCACAGGAATTGAAGCAACCGTTGATGTCAACGTACGCTAGTGCACAGATATACCATTCGAAAACTGTTGTTGAAGTGGAATATAATATTGGTGATCGTGTATTGGTAGATGGTGGTAAGTTAGGTCAAATTAAGTCGATTGGACAGCAACCTCAGTGGGGAGCTGGTACTTTTTATGGTATTAGATTGGCTGGAAAGAATGGTTCATGTGATGGTGCTTGGAAAGGTATTCGTTGCTTCTCTTGTCCACCAGGCTATGGTGTATTCATAAAGAAACAGCGTATTACACAAAAGTTACCAGATGCGAAGTTTGAATATATGGATGAAAATTTCATTGAAGATAACAAACAAAATACACGCGACCGGCAGTTGAAGGAGATCACTAAAAACGATTCTTCTTCTGATGAAGATCCTGGAGAATCAGACATTCCTACCCCAGGTAATTTTCATCTTTTGCAACACTCAACATCCGAAAGAATCCTATCTTTAACACCAAAAACTGATAAGCGTGTAAAACTTTCATTTGATAAAGCAAGTAAATCATCAAAACCACCATCATCAAAACCACCATCATCTAAAAATAAGACAGCATTGACTCGTAAATGGACTGGTGGTGCACCGCGTACTGTTGTTCTACGTAATAAAATTCCACCAATCAAACTGCATGCTGGATACAAAACATGTCAAGAGGGTATGAAATTACGTGAAGAAGAAAAATATAAATTAGCAATTGAGAAATTTAAGATAGGTATTGCATTATTGCAAGTAACTCTATTGGAAATTGATCGTGGATTGGAAAAACGTAAATGGAGAGGAATAATCGAAGAATTTCGTCATAAAATGAGTCGATGCACATTTGAAATACAGAATTGCAAACGTAATCGCTTTCTAAAAAAACGCAATCAAAATATACAAAGAGCAGCTGAACTAAAAGCACAATTGGAATTAGAGCATCAAAGGAAAGCAATGCAAGAAGCATTATCAAATGCTGTTGGTCGTGAACGTAATTGGACAGTAACTCGTGAAGATATGGAAGTGAAATTGCAAGCAAATCGTGCAGAAATTGCAGCCGCAAATAATGCTGATAGTGATGAGGAAGAAAAATCTGGTAAAAAGAGTAGCAATAAGAAATATATGCGTGATCAGACTTTAGCGAAAACTAAACGAGAATATAGATTAAAAAAGAAAGGCAAAGGAAAAAAAGATTGGAGAGATTATCCAGGATATGGTGGAGATATGGGGAAAACCGGAGATGACAGTGGTGGCGATGATGATGATGGTGGAAGTGATGAAGAAGATGGTGGAAAGAAAAAAAAGAAATCTAAATTATCCAAACATGATCAAGAATTGCGTTCCAGAATAGAAGGTGATGTAATGACTGAAGCACCAGATGTTTCATTTTCAGATGTGGTTGGTTTAGCAAATGTAAAGTTAGCACTCTACGAAAGTATTATATTACCTTTTTTGCGACCAGATTTGTTTCAATCTATAAAGAAAAGTACACAAGGTATCCTACTCTTTGGTCCACCTGGAAATGGTAAAACTATGATTGCTAAATGCGTAGCAGCCCAATGTGATTGTACATTCTTCAGTATTTCTGCATCTTCCATAACTTCTAAATTTGTTGGTGAAGCAGAACGTATTATGCGTACATTGTTTGATATGGCTCGTCAACGATCACCTTCAATTATATTTATTGATGAAATTGATTCATTATTGAGAGCACGTGGTGGTGCTAATGAAGCTGAATCTTCGCGTAGAGTAAAAACTGAATTTCTCATTCAATTTGATGGTGTAAAAAGTGCTCAGCAGGCAGATACAACAATAACAGTGATTGGTGCTACAAACTTGCCATCACAATTGGATGATGCAGTACTTCGTCGATTTCCAAAACGTATTTTAGTTCCAAATCCGAATTCTGTTGCACGATATGGTTTAGTTCGCTTATTGATGTGTAAGCAAAATCATGCAATAAATGAGAAACAATTTCAAACAATTGCTAGAAAACTGGATGGTTACTCATGTTCAGATATTGCTATGCTTTGTACAGATGCTAAAATGGGTCCAATACGTTCTATAAAAGGTTCTGATATACTTACAACTCGAAAATCTGAAATACCACCAATATCTATGAATCATTTTCAACAGTCAATAAAAAATATACGCTCTTCATTGTCTGAAGAAGCAGTAAATGCATATCGTGCTTGGGATGATGAGTTTGGTTCGAAACTGTTCTTAACTATGGATGTATTGCCAAAAGATATGTTAGCCAAGGAATTGATGCCTGTTGAAGAAGAAATAGCAAACAAGAAACGAAAAATTAAGGAAGAAGTGGAAAGAATTATTGCATTGGCAGATGCACAAAAGGAGAAAAAGATGCTGAAACAAAAGGAAAAACCAAAGTCGACAGTAACAAATAAGAAGACTGTATCATCATCGTTGTCTCAAACACAACCATCTTCTGAATCGAAATTGAAAGCATCAACGAGTGGAAAATCTGCGAAATCTCAGATGCAATCGAATAAGAAAAAGTCTAAGGCACCTGTAGCAACAGCTCCTGCAACAGTGCAGAGTAAAGCAAAGCGTAGTTCAGTAAAAGGGAAGAATAAAACCAATAGTAATATGAGTGATGCAGTGCAGGTAAAGAAGAATAGCAGTAGTGGTCATAATGGTGGTAGCAGTAGAAGAAAGAAAGGAAGTCTTCGTTTGACTGTTGTAACGAACAATAAAGTTGAATCCAGCAAAAGAAGAAAGAGTTCAAGCAGTGCTAAATAGTTGTGTGATTCATATCATGTATAGGTTGGCAATGCTTAATATAAAATCCGAATAGGAATATATCACCGAAATTTTTTATAATATTTTATATTTAGTTGTTGCTTGTGTTGTTAATAGACAAACTCCATGTACCGTAGGACAGGAAAGTTTTAGAACAATGTAGTGATCTGGCAGTGGTGGTGTGTGGAAGTGGTGGCGGTG
>NODE_200_length_3691_cov_73.986546_g164_i0
TTTTTTTTAGTGTACGTTCTACCACAATATGGTAAAGAAAAAGTAAATCAAAAACAAAAAAGAGCAGAAAAAAAATAGAACGTAAAGGTACTAAAATTTAAATATAAATACTGTATATTATGGAGGGAAGTGCAATCAAATGAAAAAACTTCATTCTCGATTGCTTAACTTCCTTCCTATACAATAAAAATGATTTATAAATATATATATATACTAAAAAAAACTGAGCATAATAAACAATAAGTGCTACAAACAAACCATAAATATTGATATTCAAAGAAAAAAACAAAAACCTATTTATTTTTAGATTTCACTGCAAATTGCTTCAATTTAAGCCATCGATTCTCACGTTGTTCTTTTTTTTGAATCCGCAATTGCTCATTTTCATCAAATTTCTCATCCAACGACTGATGTTGTTTTTCAAGTTTCTCAAAAAACTCTGGATCTCTTCTCTTAAAATCAGACATAATTTGCCGATTAATATTTCGAATGGCAGACAACCAATACTTCTCTTTATTCTCCAACAATGCATTATAAATCATTGGCCATATCTTCTCTCGATGTAAATCAACACAAATACGAACTGTCTCATCTCTCCAAACCATAAGACTCCGTTCAGCAACCTGGTGATGAGGACTAGTCATACAATCCACAAATTGTTTCAAAACTGCAATAGTAATTGTAGCAAATGCAGGATCACTTTGTTTTACATGCGGATGACGTATACATACATTCAGAATATGAACAATCTCAGTAATAAACAATTGCTCTTTCAAAGGAGAGAATTTAGGCCAGAACTTAAGTATCCCACCCATAATCACTGGTGCTGAATGTGGATCTTTAGTTACAAACTGTGTACAACATTGCAACAATTGTTCATGAAATTCATCCAAATGGAAAGACTTATGCAATGGTACAATCACATTACGTAATACCACATGGTAATCAGGTTTTACCGGTACATTCAATCCTTGAAATACACTACAGAATATCTCAAGTATCTCTGGTAATCCTTGCCAAGAACTTGTATCGCGAAATGTGGAAGTATAAATATAGCGATAGCAATAATCCGACAAATGTTTTCGAATCGCTTTACGTAATGCCATAAATCTACCATAAATGGCATGTACAATAATTTTCACATACTGTGGTTCTCTATCATCACAAGAACCAAACAAATGAATGAGATTTTCTAAAAATGCTCCTTGCAAATGTTTTTTCATTGTTTTTTTATCAATGTGTGTGTTGGTAACCACATGAAAAGTGAGTTCGTATATCAATTGTAAATGATCCCATGCTGGATCACGAAAGTCCATTTCATCATCATTTGGGTCTGTACGTATCATATTTTCATCTGGTAATGCACGAAACAAATTCTTCCAAATTGTTTCAATGCATTGTTTGAGCAGTTCAATCGAATTCCATTTATTTCCACTCATACACTGTAGTACTTCCAACAATAAGTCTTTCTTGTTATTTATGCGTATCATCTCTTCTTTGGTTGCTTCTTTGAGTTCAACATTAGTGTTGTTATTGTTGTTGTTGTTGAACTCTTCATCGCTGTTTTCATTCTCATTCTCATCCGTTTCATCCTTGCATAATTCAAAATTGCAAAGTACTGAGCAAAGTTCTAATTTCTCTTTCATGAGAAGTTGTCTTTCATTTACATCAGTAACATCTGACAGTGATTTCATTGAATCACCTGCAGATAATAACTCATGTAACAATCGATTGGTTCTCTCTCTTTGTTTCTCTTTGGATGCATTCAATTTCTCAAACATTGGACTATAATCAGGCAATGGAACTGAAGGTTCATTTCCTCGAAGAGAACCTTTGTTTTTACCTTTCTTTTGTTTCTTTCGAAGAACATTTGGCGAAGATGGACACTCACCAGGTAATACACCACCAACAGCAATGATTTGTTTTTGTGAAAGTACTTTAGCTCCACTAACTTTAGCTTCCTTCTTAGCCATCAATAATTCATGTTTCAATTGAGAAATCTCATTATCTTTAGCAATAATCTCACGTTCTTTTCTCTGCAAAATAGAATCTTTTTTGGACAATTGAGCTTCCAAACGATTGATTTTACCATTCAACTGTTGCATATCAGCCAACAATTGCGACTTTTCTCCACGTAACTCACTCACAGGTGGATGATTCATTGCAGTTGAAGGACTATGTCCATTTCTATCATGCAATTGTTTGGCCATTTTAAGATCTTTCTCAAGTGCTTCGATTTTATACTGCAACTCTTGCTTATCTTTGCTGTATCTCTCCAATGATTGTTGAACCATAACTTTATCTCTATTGTGTCGATCTAAAGATAACTGTATGTACTTCTGTCCCAATCGAGCATAGCAACTATAGTGAATTTGATGATGCGGTGGAGGAATCGCAGATTTCAATGTCTTAGCATAATTCATCACTTCATTGCACAATATCTTACCCTCTTTGTATTTGTTGCTGTATTCAAGCAATCGAGAGTAGAAATAACAGATATCACCTGCGAATGGATTGAGTTCCAAAGCTCTTTTATAAAATATTTCAGCATTCTCCAATTTCTCAACCAATACCAAACAATTGGCATAATATGTGAGAGCTTCGGTTGATGTTGGATACTTATCAACTGCTCGTTGATAAATATCCAACGCTTCAACAAACATACCTTGCTCTTCAAAAAACACTCCATGTTTTAGTGGTTCAACTACAATGAACTTTCCATAATCATAGAAATGTGGTTTGGATGCATGTGATTTGAGAACATGAAATTCATGTGCATTCAATAGTATATCATTGTCAGATGTATACCCATTGATTGGTCGAGATTTTCTAAATTTCATCTCAAAAATGGTAGATACATACTGTGACATTTCTCTTATCTTAGCAATCAATTCTTCGTAATTGAAATTGGATGATAGATGAATATGAAAAACAGGTGTTGTGGCAGCAGATGATGTTGTTTGCGTACGTCGTCGTTGATCATTGTCGCAATATATGGCTAACTCAATGGTGGTTGATATCATTTTTATTTTTGTTGTACTCGAAGTCGAAGAAGAACCATTTGTATGTGTGTGCAAGGATGAATGTTTACCTCTTTTACGTCTATGACTTATACCATATGATGCACCATCAGAATCAGAAGAAGATGAACGTGCATGAGTGCTGGGGAGTGCAGCAACCTTTTGAATGCTGGAATTGTTGCATTTTAGAAAATATGCATATTTGGAGCTAGTTTTAAAATAGCGAGTACCTTTGAACATTCCATCACTGTCTCCAATTGCTTTGCGTAACTGTACTCCATAATAGACATCTGAGCAACGATTGGGAGTACCAATGTATCTGATTTGACCAACACTCTTACGACTGCCTACGTACAATTTCACCCAATCTGTGATGCCAATTCTGGAGATGGAGTCAGAGGGCATTACTTCTTGTATGCGAGCGAAACGTAAGAAGAGACCACGTTTAGCTTTGCATTTGAAGTATCGAACATTACCTACCACACCATTTGACTTACCTTTTGATTCATTCAATTCTATGCCGTAGTATATGCCTTTTCTTGCTTGGATCTCTCCTACGAATCGGACTACTCCTAATAAGTCTTTTGTTAAACGGACGCGGTCACCGACTGTTACAACGTCGTTCAAGTTTGTAGTATGGCCGTTTGTTCCAGACATGATTTTACACAATA
>NODE_202_length_3674_cov_52.950345_g165_i0
TTTTCTTTAATGTTATATGAATCTTATCTAAGCACTTTCTCGAAAAAAGAAACCAACCACCCCCAAAAGCCCTTGAGCTAAGTGCCCTAATATAAAAAATATTTTCAGTTTTCGTTGCATGCATTTACCACATGTGTAAACGCTGTGCAATAGAATATATAAATATGATATCATCAAATTATAATCGTATTCTTACGTACAGAATACAACCATAACGTAATCAAACAACAAGCAACAACAATAAAGAAAAGCGTCAGCATAAGCATCAACGATTCCCAAGCAGCCATCCGTTGCTCATGCAAAAATACCTTCCGCACATTCTCCTCCATAACTTTCTGTTCTGCATCTGTACGAATAACACCACCACCTCCTTTATCACCGAATTCCCCACCAACCTCATCACCAAAATAATATTTATCAAAATAATGATTCGCCACTTTAGCATCATAGATATCTTGCATTTCAGATAATCTCTGATTCCTAACACTATCCATCTCCAACTCATCATCAACACCATCACCATCACCAAAACCATATTCATCATCAACAAATCCACCTTCAAAACCATAATTATCATCACCCACATACCCACCATCATCAATTCCATCAACCATCATATATTCCTCAATATCAGCAGCATCTGCATTCATCCCATTACCATTCTCATGTTTATCCACATAATCACTCAACCACTCAGCAGTCTTCTCCTTATCATTATTCAAAAATTTATTGTTCAACATAATCTTCTCTAAAATTGACCGCACAACCCGTACCTTACTATGTATAAGTTTCCTCTTTCCATCATTATCAAAATAATAATCATTTATCTCACGATATACTTTGATATTAGCAAAAGTAGATGCCATACCCGCCTGTCTCGGTTTACCTCTACTTCTCTTCAACATATTCTGCCATACAATACCACGTCCATAACAACGTCTCAAACTTCCAAACCCATGTCCTTTCACATTACGACGTACTTTTGGAGAGAGTACAAAATTGATTGCACGTTGCAACATATATGGATCACGATATACAGCACCAATTGCACCTAAAATCACTGTCTGCACTTCTGTATTTGCACTACCATATATGTATAACAAATTATTGTAAATCAGTTTAGTATTGCGTGACTTCTATGCTTATTTTCATTCATTTTACTTAAGCCAGCTTCAAATATAGCTTGTAAAACATTAGCATCGATATCTGCTTTGAAGTATCCTTCTTTAGTTACTCCAAACTTATGATTGTTTTTGCGCATTATACTCAACAATAAATTATATCCAGTGTTGATTATTGTTTGATCTTCAAATCGTATGAGTGCAGATAAAATTAAGGGACGAAGTAATATATGATCATTTGATTCGCTATTGCTATCTTTTTTATGTCTATGATATGAGCTACTTCCATCTCCATCCCATTTTCCAATTGCTCTCCATATTTTATGTATGATGTTTTTAGCAAAATAACGAAAGTCTTTTCGTAATAATTGTAGAGTCATATCTGGTGAATGGAAGAGTTCACAGTATATATCATCGATATGGATGAATGATTCAATGATCAAACGCCATAATAAATAATTTATTTTTCCGTGTTTACCATTGTTGATTTTACTGACAATTGATGATACAAATGATATATAATTACCAGCTGATATGTAGTTGAACATTAGTAATGCGAATCTGTCTGCAACAATACTGAATTGGTCCACTTTTGATAAATGGTCGAATGATGAGATAATTAGTTTGAGACTTTTTTTATCATAGTGTGTACGAAATAGAGATTTCATTTCTGGATTTATGATATAATATTCGTTACCTTTGAGGCATTTCTTTCTAGCCATAGTTATCTTAAGGGCAGTAGAATCAAACAATCGCTTATTATCATATGTGCTATAACAATTTCTCACACGCAATGGTATACTCCACAACTGATCCATATTAAGTTTAGTTTGCGAATGTAGATTAGGTATCATCCGATCTTGACTCAAAACCAATGCAACTCTACCATCACTCATATCTATCGACACTTTCACTATAGGTAAACCAATATTCAACACCCAATCATTCATCATACCTAAAATCTTCTTACGTGTCATATACTTATGCATATTTCCAATCATCGCATTATCCGCTAATATTTCAAACAAATCATCGGTATCTGAATTAGAATATTGATATTTCTTCAGATATGCTTGCAAACCATTCATAAATCTATAATTTCCAAAATATTCACTCAACATATGCAAAAAACCTGCTCCCTTATGATATGTTATACCATCGAACAAAGAACGAACATCTCTTGGACGAACAGCAACTTCACCATTCTGACTCTCTTTATCAACAACAATAGCATGTGTTTCATGCGAACAATCCAACAGCATAGCACGATGTGTATCATAATAGTATTGATCCCAAAAATTCATCTCTGGATATGCATGTTGTGCACCAATAATCTGGAAATAAGCTGCAAATCCTTCATTCAACCATATATGATTCCACCATTTCATAGTTACCAAATTACCAAACCATTGATGTGCTTCTTCGTGTGCAATAACCATTGCTACTTGCATAACATCCATTAGTGTGGAATGTGCTGGGTCTACCAACAAACGTATATCACGATAAGTTACCATTCCCCAATTCTCCATTGCTCCCATTTTAAAATCGCTTACTGCCAATGAATACATTTTAGGTAGTTTATAAGTCATATGAAACAATTTCTCATAATAAGGCATTATGGATAGAGTTGATTTTAAAGCAAATTGAGCACGAATGTGCTTTTGAATGGGGAAGTATATGCGTTGGGATATATTTCCAGATTTACCAGCAACATATTCATAATCGCCAATGGCAAATGCAACTAAATATGTTGACATTTTAGGGGTGGTGTCGAAACGCATTAATTTACATTTGGAAGTATGACCGACATGAGATCGCCAAAGGCAATCATGTTTATCTGTTTTTTCTTCATATTTGACGAGTGTATTGGATATAACAGTTGCTGGATAAGGTGCATATACATTTAGATTGAATTTCGCTTTGAAATGTGGTTCATCAAAACAAGGAAATAAACGACGAGCATTTGTTGCTTCAAAATGTGTGATTGCATTATGTATCATAACACCATTATGCTCATATGAAGTAATGTACAATCCATTCATTTCTGTCTGCATTTTACCCTTGAATTCCAGATATAAAATGGCTGATATATAATCATTATTTTTCTGTTTTCTAAGAATAGATAGTAATTTAGTGTTAGCTAAATTGAAATCGAATTTGACTGTCTGTGCATGTTTATTGTAATGAGCATTTGTTTTATGGTACACTTTATTATGGTTGTTATATAAGTATAAGCGACATTTTATTACTTTTATATCACGTGCGTTGATGGTAATTTGTAAGTTTTCATTGCTGTTTTGTAAATGATCTTTGGTTATGCGAATACTGATGTATTCGAAGGCTTCGATTGTGAGATCACTAAAAAATGGATGAATAGTTATATTATATTTCACAGGGTATATAGTTTTTGATAAGCGATAGGTTGCATGGGATGATGATAATGAAAATGAGGTTATTGATATTAGAAGAAATACTAGGATTGTGTTGATTAAGTATGACATACAGTGTTGAAAAACTGCAGACGCTTAGAACTTTGTGCTAATTCACTCCATT
>NODE_226_length_3556_cov_34.562589_g181_i0
TTGTCAACACATGGAACATGCACGCTTTACGTGCAGTTCAACTTTTTTGTAGGAAATCTTGTTATTCACATCTTCAATCTGTTGCTATCATGACGTCCATGCCAACCGTTATCAAATCTCCTGCATCATCTTCTTCCTTATCTTCATTTCAATCAATGATGTATTTTTTCCACAACAAAGAACAGCAACCTACCCAATTCATTTCCAATTTGAATTCGATTGAAAACTCATCACAAATGTTGTCAATCGATTGGCAAACATTAAAAATACAAGCACAAAAACAACAAACATCTGAAACAGAATCAGAATCAGAATCAAATTCAGAGACAAGAAATATTTCCGTGATTTCCAATTCCAAACGCGGTCTCAAATGGGACAATGGCTATTTTTATAACAAATTACAAACGAAATGGCTTGGACAAACATTTTCATTTTGCGATACTTTGTCTTCCACCCAAACGTTTCTGAAATCAAATGCTCAACAGCTTCCATGCGGAACTCTCTGCACATGCAATACCCAAACATGCGGTCGAGGAAGAGGAGGAAATGTTTGGGAATCTCCGAAAGGCTGTCTCATGTTTTCATTCACTGCTGATTTGCATTCATCGAAAGCACGTTTTTTGCCAGCCATGCAATATTTGATATCTGTAGCAATAGTGCAATCCATCCTACATTTGACTGATGATCAATTGCAACTGCAGCTCAAATGGCCGAATGACATCTACCATGGAAGCACTGAAAAAATTGGAGGAGTGCTTTGCGAAAGTTTATATTTACAGGAAAGCAAGAAATATCGAGTGATTGCTGGAATTGGATTGAATGTCGATAATGAACAACCAACCACTTGCATTTCTAAAATTATCAATGATTTGAATGGTAGTAGAAATGAAAATGAAACTGGAGAGAAACTGTCGATCATATCGAGCATCAATCGCGAAGATGTTCTCAGTACTTTTTGTAATATTTTTGAAGGCATGTTCGATGATTTTTGTTGTCAAGGATTTTCAAATCATCATCGCCAGCAATATTTGCAATATTGGATGCACTCTGGGCAAACAGTGACTGTCAGAGAATCTGAGGATTCGGTGGTGAGCAAGGAAGTGACCATCACTGGTATCAGCAAATACGGATTGTTATTGGCTCACAATGCTCTCGGGGAAGGATTTGAACTACATCCGAATGGAAACTCATTTGATTTTTTGAAAGGTCTGATTTGTAGAAAAATACAAAATATAAGCGTCAAATATTGATATTTATGTAGTGATTTGTTAGGATGAACTATTACTTTTGTTCGTATGCCGTATTTATATCACAACAATACCGTACACACATTTTCAACAAATAATCTATTTCTATGGTAAATGCTCATCAATATTTTTACTAATACCATTTTTTGGATAGATAGCAATGGTATTCTTCTGCACACGAGTCGTTGGTTCCGACTTAGCATAAGTAATCATCCCCAACAAAGGTTCATACTCTGATGATTCACTTCCCTCCAATAAATTCTTTTTCAATCCTCCATCAAATATAACATCACTGCACTGTTTCTCCGGAAATTCCACAATATATTTACTCTCTCTTTCCATCAAAGTACCATACATACAATACTGATACCACTTAAAAGCCAAACAAAACCCACCAGCAATAACAACAACAGCCACCACAAAAGTCATCCATTTCAACACTTTCTCAAATCTCTTAACACTCACAGGTCCACCAATCCGCTTCACCAACATAGCATTCACCGCTTCATGTGCTATTTTATATTCCATAGCCATTGCATAATACTTAGATTGACTCACATCAATATCTGTATATTTCACATCTGCCGACATCAATGATTTATCAACCAATACTTCCTCAAATTCATATTTAATTCCATTCAATCCATAATTCTTCATCCATGGCATCCAAAACCCTCTAAAACGCAATGGATCACTATTGGTATCAAAAGAATTCTCATATTCTTCAACCAAAGTAACAGTAGAAATATCATCATATGCCAACAACATATTATACATATATTGATACAAATTTGAAAATCTTATATCATTGCTGAGATCATAATATTCGCAAGGATCACTTACGATATCAAACAAACATGGTTTATCACTGCATGGAACATCATCTTCACTCACAGCACTCTGTTGAACATCATCTGGAATACTACCACATCCCAGATAATCCATATATCCATCACTGTGTGCATTAATTGCGAAATCACTCGTCCAAAATGCATATGCACTATTAACATTTGAAGCACCAATGATTTTATATCTTCCATATCGATATGCACCACATACCTTACCATCCTTACATTCATATGAATCAATATTATACAGAAATTCTCTGTTCTCCAATGATAACTCATTATCAATTGTCCCATATTGCAAAGCATTCCACATATTTTTACCATCGAATACACGTCGATCACTGAATATCTCATCATCTGCCAAACCAATAGCTGATAGAATGGTAGGATACCAATCTGTGACATGCATAACACTATCAAACACTTTACCTTTTCTATCATCATTCAAATATCCACCAGCAATAAATGCAGGTGTACGTACTCCACCTTCATACAAATATGCTTTCCCACCGCGCAATGGATAATTATCACCAGATAATATGAATGCACCATTGTCTGATATACCAATCAACAATGTATTCGACCATACATTTGATAAGGATTTAAGATGGTCTATCATTTGTTTGAGAGCATGATCAGCACATGTGGTCACTGATTGTGCAGCTATTCTCAAATCACATGAGTTGATTGCTGTGAGTAATGCTGGGAAGCCATATTGTTTGAGACAGGTGGTTGATTGTAATTTTTCCATGTATGGTTGTATGCATTCTTCTGTTGTTTCTAAAGGGAAATGGGAAGCATGCATAGCTAAGTAAATGAAGAATGGTTTGTTGGTGGATGCAAATGCAGTGGTTATTTGTATGAATTGTTCTGTTTCCCATATACTTAGATATTTATTGTTGTTTTCAATGAAATGATCATTTATTCGTAGATCATAGCCATTGTATATTTTATTATCAACAGTGAGAGAGTAATCATGTGAGTAGTAATGTAACATGGGTGATAATGAACCATGGAATGCATCAAAGCCTCGATGTAAAGGACATAAATCTGGTCGATGGAAACCTAAGTGCCATTTTCCAACTGCTGCTGTTTTGTACTCATTCTCTTGCATGTCATTTGAGATCATTTTCTCAAATACTGATAAACCAAATGGGTAGCCATTCTCGATTACAGATGATTGGAGACCGAAACGTATTGGGTAACGACCTGTGACTAATGCTGAACGAGTGATGGAACATAAAGGGTATACGTAATGGGAATCTATGGTTAGAGCTTCGTTTTTTATGAAATCTGTGAGGAATGGCATTTGGGTGTCTGTGTGTTTGTGCCATGGAACGTCGTTGAATCCCCAATCATCTGTTACTAGTAAAATTACATTGGGTTTGGTATTTTTATCTTTTCCATGTACTTGGAGAGCTAGAAAAAATAAGCAGAGACCTAGAAAAGTAGATGAATGGT
>NODE_238_length_3524_cov_80.640288_g191_i0
CAACAATTTAAAAACAGACATGTACGCGTCTGTTAAAAAATTAATTTTCAAAAGGATCACTCATCGTTCATTTTCTTCTCGTGCGTTTGATGAATGGAACCCACGAATCCACTCTCTATCTGCAAAATATCCAGGGTTGTTCAATCGTCAATCATTTGTCGATGATCAAATAGAACTTCTTAAATCGGCGGAATCTTCCTTGCCTGGAATTGCAGAAGAATACGAGAAAGGAAGGAAAATGGTAAATAATATGATAGGGGATGAATTGTTTCCAAATGTGAATGAGGTTCAACCGCAGAATGAACCAATATTCTCCAATACGTATTTTGAACAAAATTCCACTGAATTTGAACAAAAGCTGAGCACAGGTGAATCAATAATTGCCAAATTGCTTGCTTTGCAGCAATCGAGTGCATCATCGGTTGAAGAATCAAATCTGCACATGATTGAAGTCTCAAACTTTGCCGATGAGATTGAAGAATGGCGTTTATCATTGCTCAGCAATGGAACATTCAATGATACACTTGGGTTTCCAATTTCCGATAAAAAGTTGGATGCATGGGTAACCAATGAATACTACCATGCACCAGATTATGTTCGAGAACAAGGGATGATAACATATGAAGAACATAAGAAGTTAGAGAATCAATGGATGCTCGAGGTTGATGAACGATTGGTAATTAAGTTGAAGAAAGATTGTTCAACCAAAGGAATTGATTTTAATGAAGATGCTTGGGTGAGATTTTGTGGAATAGTGAATGATATCTTCCATCCTCATAACATCGATCATTATACTCTCTCAAATGCAAGTAAATCTTTATTTCGAGCAGCGCTTTGGCAATCTGTCAATAATGATGGTAGTGATCGACCAACAATGGTTGAAGAAAGTATTCTTCAATTGAAAGAGATATCGGAAGATATGCGCGATAATGATTGTCAAATTGTTCGACGGTGCTATGAACAGAATATTCAAATGAAGTATGCATCATTGGTGGCATTGCAATTGCAAATCACTGATCCATTGTTTGCTGCCAAATTATGTGATGAAGTACGGTTGACTCTGGAAAAGGTTGCTTTTGATAGAGCAGTATGTGATGTTATGCGTGATTTGGAGATACCCAGTATTGAAGCAGAGAAGAGAGCAAATGAAGAAGAATTACGAGAGACAATGTCAACAAATCCCGTGAATTTACTGCGCAATATCAAACGATTAGAAAATGAAGCACAAAAGTTCAATATTGAAGTACCAGATTTAGATTTTGTGTTAGATTCTATTTTGCCAATAAAATTATGGGGCAATGATTTCTCTCGAGTGAAGACTGCCATTCTAGCATATGCTACAGGAGATACATCCGTGAAATTGTTGGACATTCATTCTGCGATTGAATCTGTTCTCATTTCTTCGAATTTGGTGCAGTCCATTGAGAATGATGGAGGATTGAATGATGAGGAAATTACAAAATTCAGTGTTGAATTTCATAATGCACGTGCATCAAATCCAGAATGGTGGAATTTGAAGCAGTTTTTGGTGAATCATTGCAATCAAAAACGAGATCAATCATCCATAGTAAGCAATCAAAGAATTGTGAGAAGAAAATTGAGTTTGATTGATCCCCTGCTGAGCAATCTCATTTGTATAATGTTTGCAGATGGTACATGTGATTTGAATTCATTTCATTCACTTCTCGCAGATTATCAAACAATAATGCGTAGATACCGAGGAGAGGTGGATGCAGTGATCACTAGTGCACAAGCACTCGATGAAACTACATTCAATTCTATTTTGATGGCGTTGGAGAATGCAAATCCAGAGAAGAAAATTACTTTGAACCAAGCAATTGATACTGGAGTTCAAGCTGGATTTATTGTCAAAGCTGGATTGCAGAAATTTGATTTTTCATTGGCCTCGCATTTGCAGCAGGCACTATGACAGGCACTATGAATTTTTATAAAATTATAAAATAAAACGGTTTTGAACCTTGATTTAAAAATGAATTGCTTTCATTCCACATGCATTCATACATGCTTCTTTAAATGCTTCGCTAAGCGTTGGATGTGCATGACATGTGCGAGCTAAATCTTCAGCAGATGCTCCATATTCAATACCAATCACACCCTCTGCAATCATTTCTCCAGCAGCAGATCCAATGATATGTACACCCAACAATCTGTCTGTTTCTTTATCTGATAATATTTTCACAAAACCTTCCGTTTCTCCATTTGTCCTCGCTCGAGAATTTGCTGCAAAAGGAAATTTACCCACCACATATGAAATATTACTCTCTTTCAACTGTTCTTCAGTTTTCCCAACACACGCAATCTCTGGTGAAGTATATATCACAGATGGTATGCTATTATAATTTACATGTCCAGACTTTCCAGCAATGACTTCAGCCACTGCTATTCCTTCTTCCTCTGCTTTGTGTGCAAGCATTGGTCCTTCAATGCAATCACCGATAGCATAAATATTGTTATGAGTGATACTCCTCCAATCCCCATGAACTCCATCACGTATTTTTATTCTTCCTTTGTCTGTCTCCATACGAATGGACTCTGTATCTAAGCCAAGATTGCTAGTGTTTGGTCTACGCCCAATTGCAATGAGTAATTTTTCACATTCATACACTTTATCATCAGTGCATGAAACTTCAATCATATCTGATGTTGTTTTCACTGATGATAAACCAGTGGATAGTTTGATATCTATTCCTTGTTTTTTCAACATCTTCTGAAATGCATTTGCAACTTCTAAATCTGTGCCAGGTAATATCCTATCTAGAAATTCGATTACAGTTACTTTTGTTCCGAGTCTGCTCCAAACACTTCCCAATTCAAGTCCTATGACTCCAGCACCAACCACAATCATAGATTTTGGTATTTCAGAAAATGAGAGAGCTCCAGTGGAACTGACAATATTTTGTTCATTCACTTCTACTCCAGGTATATCTGTACTCACACTTCCACTCGCAATTACAATATTCTTGGCAGTCACTATTTTTTCACCAGAATCAGAATTTATTGAGATTTCTGTTGGACTCTTCAATTGTGCCCATCCTTTTTCATAATGTACCTTATTTTTTGAGAATAAGCCTTCAATGCCTTTGGTGAGACCAGAAACTGCATTTTCTTTGTATTTCATGAATTGATTGAAATCGAATTTCACATCTGCAATAATTCCTCGATTGCTCAAATCATGCAATGATTGATGATAGAGATGGCTATTGTGCAATAATGCTTTTGAAGGAATACAACCTACGTTCAGACATGTGCCTCCTAATGATCCACGTCCTTCGACACATGCTGTATTAAGGCCCAATTGAGCTGCTTTGATTGCACAACAATATCCTCCAGGACCACCGCCAATTACCACTACATCGTATGCTGATGTTGAATTTCTGTTCTGTCGAATACATAGAAAACTCCCACTCTGTATATTTGTGCTATGTTGCAGTCTTTGACCAACGCGTTTTACAGCTAGCGCTAACATTGAATTTAAGTCACGAATTAATTTTATTAATTAGATC
>NODE_257_length_3425_cov_46.478969_g206_i0
GTGCTCTTCCGATCTTTTTTTTTTTTTCTGTCGACCATTCGTTCGTTTTCATGTCGTTCTTGTGTCTTCTTGGTTTCTCTTGCTTACTCTTCCACTTCACACAAAGTTCCAATAGCCACGACTCACATGGATTTACCCAAACGAATAAGTTCCAACCAAATATAATCTTCATGTTTTGCGATGATTGTGGCTTCAATGATTTTGGTTTCAATCATGAATCAAATGTCCAAACACCATTCATCGACACTCTAGTAAAAAATGAAGGTTTGATAATCAATACACATTATGTCCATAATTTATGTTCCCCAACCAGAGCAGCATTTCTTAGCGGTAGATATGCACATAAACTTGGATTACAAGTTGGAATGCTCACTGAAAATACAGACTATGCATTAACCAGACAAGTATCTCTACTTTCAAATGAATTTCAATCTCAAGGATATGCTACTCACGCAATTGGAAAATGGCATTTAGGCTACCAAACATGGGAATACACCCCAACCTACCGTGGCTTCGACACTTTCGCAGGTTTTTACAACGCAAAATCTGAATATTTCACCCATAAATTCAAAAATTCCCAAAACATGGATTACTACGACTTAAGATTGAATGAAGAACCAGTAAGAGATGCAACAAACATTTATGGTACTCAATGGGAACAAGAACAAGCAATATCAATTCTAAAAACAAAACATAAATTAGATGAACAACCATTTTTCATGTATCTTGCATGGCAAGCAGCACATGAACCATCAGAAGCACCAATGAAATATCAAGACATATATTGTGTCAGTGACACATGTGATGATAGTGATATACATAAAGCACAACTCACATCATTAGATGATAATATACAAACAATTGTAACTTACCTCAAAGAGAATGATTTATGGTCCAGAACATTACTTGTATTCTCTGGTGATAATGGAGGTGCAGTTGGATTTGGTGATAACTTCCCATTACGTTCATATAAATTCTCACCATTTGAAGGTGGAGTACGTGTACCTGCATTTGTTACAGGTGGTTTTCTTAATCCAATGCGCTATGGAGAACATGTGGATGAATTTGCTGTTCATGTCACTGATTGGTATCCAACATTATTGTCTGCTGCTGGTCTTTCAATAACACATGCAAAATCACTTGCATATCATTCTAGTACGTATGATGAAAATACATTGGAATTGATTGATTTTGATATACCTTTGGATGGCTATGATATTTGGCAATTTATTCAATATGGAACATATTCAAAAGATGGTAAAATTGCAGATTTCTTCATACGTAACAATGATGATGGTAAAGGAAGCAGTAGAAATTTGGGGAATACTTTGAGTCATGGACGTGAGATTATATTGAATATTAATAATATGAATTGTAAGTGGGATTCATGTGGATCTATGATTATTGGTGGGAGATGGAAATATGTGAGAGGTGGGAATATGTGTGGTTCTATTGTGGATTGTAATGCATGGATGGAAACAAGTGATACTAATATTCTGAAGTGTGAGTCGTATTCTGATATTAGTGGTAAAAGTGTTGATGAAGGTGGTATTAGTGCTATTGACTGTATTGAGACTGAGGATGGATGTTTGTTTGATATCATTGGTGATCCATGTGAATATTTTAATTTGGGTGAACGCAATCCAAATATTGTGAATGTGGCGAAGAAGAGATTGCAAGCTGTTTATGATGGAGATGCTATTGCTCCATTGGATTTAATGGGGAAGTTGCGTGGGGATGTTATTGATCCTGCTTTGCATGGGGATGTTAGTGATTTTTGGGGTCCTTTTCAGTTGTTTGAGGATGTGCAGTTTGAAAAGCTAATATTTCGTGATTTTACATTGCTTTATGAGGGGAGGAAAGTAGAGGTTGGGGATTATGAGGATGATGGTTTGATTGTTGATGTTGTGAGTGGGGAAGAGTCTGGATATCATAGTAAGGGAATGTTTGAGTTTGATGATGATGATATATTTGCAATTATTATCATTGGAGTGGTGTCGATATTGTTGTTGATGTTGTGTGCTGGTATTTCGTATTTTTGGAGAAAAGTGGATCGGAAATCGGAAGTAATGGCAGAAATGCAGCCCTTACTTGTTTTGTAGCAGGATGAGTGAACCTAATGAACGACGTTGAATTCTATTGACTGTTTCATCATGCGTTTTGCTCGTATTTTTGCCTTTGTGTCGCGTACTACTCGCGTTTTATTTAGAAAATCAAGTTCTTCCTCTACCACAGAACTGAAGAAAACCAAATTAAATGCTTTCCACATCGCAAATAACGCAAAATTAGTAGATTTCGGAGGATTTTCAATGCCAGTTCAATACAAAAATTTATCCATAACAGATTCCACTCTCCACACACGCAAATCATGCTCATTATTCGATGTATCACACATGGGACAATTGAAATTCTACGGAAAAGATCGCATTCGATTCTTAGAATCAATATTAGTGTCCAATGTATCTCCAATAAAAACAAATCAATTGAAATATTCATTAATGCTCAACAAAAATGGTGGAATCATTGATGATTTAGTAATTGCCAATTGCGATCAATCTAAAAACGATAATCCCCATCATTATATGGTTATAAATGCTGGACGTATCCCAGAAGATTTACAACATATTGATCATCAACTCTCTCAATTCAATGGTGATTGTAATTATACATTCATGGATAATCAATCATTGATTGCATTACAAGGGCCAAAAGCAGTTAATGTTCTGCAAAGATTAATAACTACTGATTTTGATTTCAATGGTTTGAAATATTACAATATGAGTGATATGAATATCAGTGGTATTCCAATTCAAGCATCGCGCAGTGGATATACTGGTGAAGATGGATTTGAGATTTCTGTGAGCAGTGATCAGATTGTTGAATTAGCTGAGGTTTTGTTGAATGAAAGTGAAGTTGAACTGGCGGGTTTGGGAGCGAGAGATGCTTTGAGATTGGAAGCTGGAATGTGTTTGTATGGAAATGATTTGAATGAGAATATAACTCCAAATCAAGCATGTTTGTTGTGGACAATGAGTAAGAAGAGAAGAATGGATGGAGGGTTTCTTGGGTTTGAAGCTGTTCGACATCAAATTGTGAATGGAGTTAAGATACAGAGGGTTGGGTTGATTGGGGAAAAAGGACTTACGCCGAGAGGTCATCAGAGAGTGGTGAATGAGAGTGGTGTGGATGTTGGGGAGGTTACATCTGGTACTTTTTCTCCGTGTTTGCAATTGCCGATTGCATTGGGGTATGTTGGTAGTGATTGGTGTGAGATTGGTACACGTTTGGAAGTTATAATTAGGGAAGGTAAGAAAATAAAGGTAGAAGTTTGTAAATTACCTTTTGTAGAAACGTCTTATTATAAGTAAACATTTGTAAGGGACACAAGGAACATAATTTTTAATTTATTTGACGATTTAAGGGGC
>NODE_284_length_3345_cov_73.038532_g225_i0
ACGTCAGTTCTTGTGCCTTACAAAAAATAAAGACACAATAGATTTAAAAATAAAGGGTGAAGCAAGAAAAAAAAATATAAAATAAAAATCAAAAAAGTATGGAAAAAAACTGTTTATAGCACAGATAATTGCAAAAAGAGAAGTAAAATACATATTACACACACAAACAAGAAATATATTCACCATCCAACAAACTTATACAGATTCTGAATTAGATGGTGAACTATCAGAACTTGTTCCATGAGCCAATTGCAATTGCGACATTGGCAAATCATCTGAATTTAAAACAGAGATAATATCGAAATGTGTACTATACTCATCACTTTCAATATCTCTTACCACCAACACATAATGTAATGATGTCTCTTGTACTGCTAAATACGTACCAGCAAATTCATTGCTCTCAAGTGTACCATATTTACTGTTATATCTAAATAATGAATATTTACTACCACTTCCATCATTCACATCTACACTCCCACCCTTATTGTACACACGTAAGTATCGACCAGTTTTATTATTTCTAAATTTTACTAAATCTCCATATATTTCTAAAATCCATTGTGAATTAATTTTCTCATATCCATTACCACGTGCATTAACCTCTTCATCATCCGATGCCACTCTCAAATGCTTACCAAATGCATGCTGTAACAGCACAACGCTATAATTCTTCAAAACACTCTCTCCATTCATTATCTTCCCATGTCTTACATCAGATATCATATGTTCATCATTATAACTTCTATTACGTCTATTATTATTATTCCTATTACCATTCATCCTCAATACTTCCATATCATCCTCATATTCATCATCATAATCACTTCCACCAGCACTACTAAAATTATTTACATTTATATTTGCAGAAAAGATAACTTTCGACCATGAATTCTTTCTCTTCACTCGTTTCTCAGCAAGTGTCAAAGGTTTAGGTTTCTTTCCTCTCAACTTCTCTTTTGCATGCTTTTGAAGTACAATTTCATATGGATCACGATTGTTAACCTCCGCATCCGATATCGCTTTTGCAATCGCATTCTCCTTTCTATCTTTCTTTGATAAATATTGCTGCATCTTCTTCCAATATGCACTATCCTTACTACGATATATCTTCTCCACTGAACATGAATTCTGTTTTAATGCTTGATTTTTATGATCACTATTGCGATAAAAAGCTTCACACAATTTGGGTAAGAACGCTTTTCTATCATAATCCACCAACCGTTGCATACTCTGTTCTTTCCATGCAGCAATTGCACGTTCAGCTACCGGTTGTCTGGTACTCAACATACAATCTGTAAGTTTTGTGGCAGCCGCCACCAATATCTCTTTATAGTTCTCGTACACAAATCCATGCTGGTGAGTGATCAATACATTGATGATATTCACTACTTCCATAATGAATATCTCTTCCTTTTGCGGTGATTGCTGTGGCCAGAAACGAAGCAGACCACCTAAGATTACTGAACCACCATTGCCATCCTTAGCAACATAATTCACACAACATTGTGTCAACTGTTCCCAGAACTTCTTCAATCGACGACATTTGTGCAATGGCACTAAAACATTTCGCACAAATGATTGCCAACTTTCTTTTACTGGTACAGTCAAACCTGGGATTATTGCACATATGATTTGCAATAATTCAATGATACCATTGACATGTTCAAAGTCTGAGGTATATATCATACGATAGAAGTATCCACACATGATTTGTATGATATATGGACGTAGTTTTAGACAACGACCATATATTTTATGAAGTATCATCATTAGATATGCACGTTCGCGGTGATCTTCAGATGCGAAGAGTTCTACTAAGCATTTGAGGAATTTACCAGTCATGTATTTCTCCATCATTTGGGCAGTGACCATTGGAGTGTTAATCACTCGCCAGGTTAGATCGTAGACTAATTTTAAGTGACGCCAGCTGGGATCTTCAAAGTTCGACTCTTTGTCGAATACTTCATCGTTGAAGAAGGAGAGAGCGGGTGGGCGATCTTGATATGGAAGAGGACGAAATAGATTGGCACCTACGGTTTCTAAGCATTTTTCGTAGAGGTTCTCTTTGAACCATACATGTCTTGCTATGAATTCACTCAATTCTAAGAGTAATTTCTCTTTTTTTATTATTAATTCATCCATTTCTGGTGTTTGAAAAAAGAAATCACATCGTGCTTGGCAATAATACAACTTAAGTACAAACATATCTTCTCTTTCATCTTCTGGATAATCAGCAATGGGTGATAGATTACTGTAATACAATTCTTGAGCAATACCACCTAATTTTCTGCTTTGCGCGAGATATATCTCTAAAACTCGATTGGTGGATGAATATGAACGAGAGTCTTCTGGTGAAGGTTTAGGTGTAGACGATGATGAATTCTGCTGTCTTGGCTCTACTTTTCTATCATCAGAGTCCTCATCACCTGTACCACTTTCATACAAAGATACCCAACCTTTGAACATCAATTTCTCACTCATACGAAGTTTTTTTATTCGTTTTATTTGTGGTTTGGGTGTTTTTGGCTTCACTTCAATGAAGAATGGATCATCTCGATTGGTATTACTTTTATCTGGTTTTTTTATTGATGGATCCACTGGTGCTGGAATAATTCTTTTGAAAGGTAACATTACTCCATGATTATCTTCAGCATCAAAATAGAATTTCTTTTTTACTGTTCCATTGTTTTTACCTTTTGGTGCATCCAATTCTACACCAAACCATACACCCTTATCAAAATGTGTGGGTCCAACATATTTGATAATACCAGGTGTCTTCTTCTTTACATATATAACTCTATCACCAATGGTAAGATTAAATTTCTTACCACGATGGAGTACACGTTCAACTCTCTGTTTTGGTACCCATATGGTTTTATTTCTGGGCGATTGATTATCAAATACATTTTCACTGCTTAGATGAACATCCATGACCACACGTACCCATACTCCACGTTTTTCTGGTTTAGCACCAATTGAAGTTACTTTTCCACTTCTTTTGTTTGCTAATAAGATTTTATCTCCAACATATAATTGTAGATCATCATTTTGATAGCTTATGTACTTCTTTTTATTTTTATGACCAGTGCTTTTGGTTCTATATTTTTTTGGATGTTGAGGTGAACGAGCAGTGCCATTGTGGCTGTTGTTATCTGTGTTAGTATCTATATTTTCAGGTTCTTCTTCATTATCGACATCTGGCATTGTTGTTGAATCACCTCTTTTTAGAGATGCTAATAATTCGTCATCATTGCGTTTCTTTTTCTTAAACCAACCAGACATTTTCTATGTTTATGATTTTTACTTTATTTCCTACGTTTTTTACGGTTCGCTTGAGCAAAA
>NODE_285_length_3344_cov_82.848558_g226_i0
TCAATTAAGACCTGTTGTGCTTCAAAAATCAAATCAAATCACAAAACCAAACCACAAAAACCACAAACATAAAATCATAGATGAGTTTCATAGCTGCCACAGAACAATACTACACCCACTACGAACAAGAAGGTCCTCTAAAACGCACTGAAGACATCCGCGATCACAAATGGCTCTTCTCCAGACACGACCTCTGTTCACTCATGGCCAATTTCCAAGAAAAACGCTTAGCACTCTCCACACTCGCCCAAATGGGCGGTAGCCAAGGCTTATGCTACGCCCTACGCAGTCACTCACGCACTGGCCTCGGCATCGATGAAATAGAAGACAACTCAGAAATCAATGCATTAGACATCAGACGCAGTAAATTCGGCATAAATGAATTGCCCCCACATAAATCTGATCCATATTACAAACTATGCTATGATGAATTGCAAGACCCTATGCTCTGTGTGTTAGTGGTCGCAGGTGTCATCTCTCTAGTCGTTGGTGCAGCATTGCATGCAGCGGATGGTGGATATATTGAAGGATTAGCAATACTCATTGCAGTAGTTATTGTAGTAAATGTTGGAGCCATAAACAATTGGCAAAAAGAGAAACAATTCCGAAAAATGGATGAAGAGAATAAAAAGAAGAATACAATAGTAATGAGATCTACAGAAATGGAGATTCCATGGAGTGAAGTGGTAGTTGGAGATCTAGTTATACTTAGAAATGGTTTTACTGTTCCAGCTGATGGTGTATTTGTATTAGGTACTGAAAATCTGCATACGGAGGAAAGTTCACTCACTGGTGAATCGCGTGAATTATCGAAGAATAGCGAGAATCCATTGTTGATGAAAGGTACGAATGTTGTGGAAGGTGAAGGACTTATGCTGGTGGTTACTGTTGGTCCATACACAGAATGGGGAAAACTTATGCTCGGATTGCAAGAGGAAAGAAAAGATACTCCATTGCAAGAGAAATTGGATCGTTTGGCTGGGTTGATTGGCTATGGCGGTGGAGCAGTAGCAATTCTTTTGTTCATAATATTGACCATCAACTGGGGAATCAATGGTGGACATGATGCAGATATAAATATATTGAATTTCTTGATCATTGCAATAACTATTGTAGTGGTTGCTGTTCCAGAAGGTTTACCATTGGCAGTTACCATCTCACTTGCATACTCAATGAAGAAAATGTTGTTGGACAATAATTTTGTACGTCATTTGAAAGCATGTGAGACAATGGGTAATGCAACAACCATATGTTCAGATAAGACTGGAACATTGACTACCAATCGAATGTCTGTTCAGCAAGTGTTTATGTATGGCAGACGATTCATTCGTAGTGAAATATGTGATACTCCAGAGATTGCTGCAAATGCTTTGACACCAGTTGTGCATCAATTGCTGATGAATTGCATATGCACTAATACCAAATCATTTCAGGAAGAGCCTAAAACTGTAGATGAAAGAGCAGCCATCGATGCTGGGAAACGTAAAAAAAGACTCACTGGCGGTAACCAAACGGATTGTGCAATGTTGCAGTTTGCAATTGATTTGGGTGCACATGATTACAAAGAACGTCGTAGAAACAGCCCAGTTACTAAAATGTTCCCTTTCAATAGCAAAGTAAAACGTAGTTCAGTGTTAGTGAGAGATAAGAATCGATATATAATGTATACCAAAGGCGCAGCTGAAGTAACACTTGAGATATGTACACATTATATGTCCAACAATGGAGAAAGTGTACGTATGTCCAACGAAGATAAAGCAAAAGTCTTAAAAGCTATGAATCTGATGACAAAACGCGGTTTACGCTGTCTCGGCACTTGTTACAAAACATTTGACAAATCTGAAATACCTTTTTCTTCCATAAGTTTGAATATCGCAGAAGAAGATTGCGGAATATTGTTTGAGAATATGATATGGATAGCAGTGATGGCCATACAAGATCCAGTACGCGATGAAGTACCAGATGCAGTTCTAACATGTCAGAGAGCAGGTATTGTAGTACGTATGGTAACTGGTGATCATTTGGAGACTGCCAAACATATTGCAAAAGAATGTCATATCCTCACATGTGCTGACCATGTGTGTATGACTGGAGAACGTTTTCGTTCTTTGACAGATGATGAAAAATTTGATCTTCTGCCAAGATTGAGAGTGTTGGCTCGTTCTAAACCTAAAGATAAGGAACAATTGGTGAAATGGTACAAAGAGAACAACAATGACATCGTAGCTACCACCGGTGATGGTGCAAATGATGCATTGGCATTGAAAGAAGCGAACATTGGACTGTCAATGGGTATACAAGGGACAGATGTTGCAAAAGAAGCATCAGATATTATTATTATGGATGACAATTTTGCATCTATAGTGCAGACAGTGATGTGGGGCAGATCTGTTTATGATAATATAAGAAAGTTTGTACAGTTTCAATTGACTGTGAATGTGGTTGCTTTGACATTGTCTTTGATTGCGGCATTTTGGACAGAGTTTGCTAATCCTTTGACAGCTGTGCAGTTGTTGTGGGTGAATTTGATTATGGATACAATGGCTGCGTTGGCATTGGCTACGGAGGACCCTACTCCAAAACTGTTGGACAGACATCCTTTTACACCTGATTCGAATTTGATCACTCAGATTTTGTGGAGGTTTGTGTTTGGACATTCTTTGTATCAATTGGTACTGCTTTTGATGACAATGTTTGTAGCGGATGAGTGGTTGGGTATTAGAGATATGGAGAAAGGGGAGGAACAGAATCGGAGACATTTGACTGTGATCTTCAATACTTTTGTGTGGATGCAGATTTTTAATGAGTTCAATGCACGTAAGGTGAATAATGAGTGGAATATTTTTGAACATTTGTTCGATAATCTGTACTTTTGGTTTATTATGGGAGTGACTGTGCTTTTGCAGATATTTATGATTGAGTTTTTTGGGGATTTTGCGTCAACAGAGGGTTTGAATGGAAAGGAGTGGGGATATTGTTTGGCATTGGGTGCAGGTTCTTTGCTGTGGCATCAGTTGGTGAGATTGGTCCCAGTGGATTTCAATGATGGCATTAAAATTGTGGATAGTGATGTGCTTTTTAAGACTGAGGTTGAATTTGAACCAGGGTATGTTGCTCCGAATGCGGAGGTCATTGATGATGAGAAGGATGATAGTACTTTGACTGCTGATAATATGGTACATGTTGCAAGTCATAGTACACAGCAGCATCTGTGATTTTTTTTTTGTGTTTTGTGCATAACATCAATGAAAACTATTTTTTGGGTATTTACTTTTTTTTAGGTGTATGTTCGTTGTTGGTTTGTAGTTGTACATATTTGCGGTTTATTTTTATATGAGTGTAGT
>NODE_294_length_3304_cov_64.247619_g234_i0
GAAAGGATTAGAATGGGCGATTTTTTATCTCATGTTCCATGTTCGATGAATAATCAGGAATTGATAGTGCATATGATTGAGAATTATTTGGAAGAGAGGGAGAGTCAAAGTAAAAATCACGCGGCATTTTATGAGAATGGAAATATTCCAAATTATATCCAGCAATTATTTAACGAGTGCACTTCTGACAGATATGCATTTCTTCTCAATTCATATGCAGTCAAGCATTCAATCACTAATGAACAATTGCTGAAATATTTGGATGGAAGTGAGAATGGAAAGAATTCTCCTTTTTTGAGTGTTTTTGGGAGGATTGCTGAAGCACATGTGTATGAATGGTTGGTGACTGGAGATGAATACGTTTCATATTTTTCAGATTCTCATCCATGGGTAGTAAATCAGCAATCATATCGAATAAATCTCGCAAAAGAATCTGAGGATGAGCCATTGTATCTAAAGTTCCATTGGGAATTAAACAATGATAAAAGTGATGGTTTATTATGGTGCTTATTGGTCTGTGATCAATTGCCAATGCAAGTGGAAGAAGTGAGAATGAATTTAGGGATATATATTCCAGAAACGAAATCTGAAACGAATGAGGGTTCTTTTTTGAATCGCCAACAGTTTAGAGGCAATCAATTGCACAGCATTGTAGAAAGCAAGAAGTTATTGATGGAGAATAATCCGCAAGTAAAGAAACCTGAATTTAATCTCAAATATATTTTCAGATGCTGAATGTCAAAAATAAATAGTTTCACTATGAAATATGTACACATACAAAACAAAAACAAAAACAGTCAAAATGGTAAAAAATAACAAAACAGTCAAAATGGTATATAAAAATACAATAAATATGGCCAACCCTCCCCAAATCAATCACTTTAAACTTCTCAAAATTGTCTCTCAAAATATTTCCCAAAATCATGCTTCGGATCAATAGGCTTAGCATCAAGCGCATCATTCGGATCAATCTGAATCTTCGCAAAATTCCCCAAATAAGTCGAACTCTTAACCTTATTAACAATAGGTGCAGGCATAGTCCCATCATTCAACTTAGCCCACGAAAAACTAGAATACCACGGATGCTTACGAATATTATTAGCACCACCTCTCATAACTCCCAACCTCCGAGTAGGCTTATTATGCAACAAACCTTTGATCAAATCACGCACTTCAGCACTGAAATATCTCGGAAAACGTATTCTCCCACGTATAATCTTACGATATGTCTCAATTGGATCATCAGCCACAAAAGGTGGAAAACTAGCCAACATCTCATATATCAATATCCCAAGCGTCCACCAATCAACACCTTTTCCATGTCCTTGACCAGTGACAATCTCAGGACACAAATAATCTGGTGTTCCACACAAAGTAAATGTCTTACCTGTCAAAAATTTAGCAAAACCAAAATCAGTAACTTTTAAGTATCCATCATTGTCCAACACCAGATTCTCTGGTTTCAAATCACGATAAATGATATTCATACTGTGCATGTAATCAAATGCTTCAATGACACAACCAGCATAAAAACGAGATGCTGGTTCATTGAAGTATCTGCGAGAACGTAGGATAGTAAATAATTCACCACCCAAACATACATCCAATAAAAAGTATACACGTAGTGGGTCATTGTAAGTGCAGTGTAAGTTCACTAAGAACTTGTTTTTCATAACATCCATCACTTGTTTTTCACTTACGATGTGTTTTTGCAGTTCAAGTTCTATCACTTGGAACTTTTTTATGGATTTTAAGGCGTATGATTTTTTTGTATGTGGATCGACCACTAAAGTTACTAAACCAAAACCACCTTTGCCAAGCACTCCAACAGTCTGCAACTCATGCAAATCACAAATCTTTTTAGGTTTCGTAGCTGCGTTTTTATTTTGCTCTGCAGCTTTCGATCTTATTTGCTTTTTCTTCTCACGAACAGCATCTTCAGCTATCATTTTTGCAGAACGACTGTACTCTGATATCTGATCATCCACTAATTCAATGACTGGTCCAAGCAAATCAAAGAAATGTTGACTGTCCATTTCTAAGCAGGTTAAGTCAGTTTTTGCACTGATTGTTGCTGCACGTGCTTGTTTTGTACGCAAAGCACGTTCACCAAAGAATTGTCCTTTAGTTAATATACCTTTTTCACCATTAACTTTTTTCCATTCTGCTGAACCTTTATATATAACATAAAATCGTTCACCAACTTCCCCCTGCCTAAAAACCACAGTTTTTGCTGAATATACCTTCTCTTCCAGCGACGAACCCAAATCCATAATCTCTGATCTCAGCAATGGCTTAAACAACGAAACAGTAGGCAAAAAAGCCATTAATTCTTCATCTTTCTGTTTACTGACAACAATAACCTCATATCTAAACATCTGTCGTTCAATCACCCATAAACGACCACCTTTTTTAGATACACATTTCACAGTTGCAGCTCTGGGTGCATCATTAATTAATGCAAGTTCACCAAAACACCCACCTCTTTTGTATGAATGTACTTTACATTTATCTTTTAATACATTATAATCTCCACTCTCAACCACATAAAAAGTCGTTGCTTTTAAGTCTCCTTGATTGATAATACATTCATTGTAATCAACAGTAATCATTCTCATACGTGATAATATTCGTCTCTTCATTGCTTTATCAAAATTCTGAAATAACAAATTATCAGCTACAGATTTCATCAACCATTTCATAGCACTATCACTCACATTCGACAAAGGTGCTGCAGCATATTTACTACTACCACTACCACTGCTGGTATGAGCATTATTATTATTGATATTACCACTGCCACCACTCTTGTACTCATCGCGTAATGCACTCAAATTTATTGGAGTAATAAAAGCATTTCGTTTTGCTTCACGTTTGGCAAATCTCACGGAAGAACTCGATTGTATAACCTTTTGAAACAATTTCTGGTTGCATACCAGACATATAACTTTAGAACATGCTTGTAAGCTGGCATTTCTTTTGGCATTTTGGAGTAATGCTTGTTCACCTGCGTAGTCACCTGTTTCTAGTGTTGCTACCACTTTGTTGTCTGTTGATAGGACGTTGACTATGCCTTCTACTATGATGTAGAATGAATCACCAGTCTCGCCTTCTTTCATTATATATTCACCTTTAGCGAACTGTTGGGTTACTAAACCATTTGAGAGGTTCTTACGTTCGATTGGTGATAAACCAGAGAGTAAAGGAACTGATGCTAGAATGGTGTTTATTACGGATTGTCTAGTTTTAGAGTACTTTGAACTGGTCTTTGTTTTATTTGTAGTGCCAGACATTATTTTTTTAT
>NODE_373_length_3084_cov_119.730807_g297_i0
TTTGCTTATTTTTTTCTTATTCCTTGGTTCTTCTTGGTTTTGTGTTTGTTTAATGACAACAACAACACCTGCAGTAACGATAACAAACGTTGATCGGAACAACCCGTATGCATCTGCTTCTTTGTATGTGGGTGATTTGGCAGCAGATGTGACTGAAGCCACTCTCTTCGAATTATTCAACGCAGTTGGTCCAGTTGCCAGCATACGTGTGTGCAGAGATGCAACCACACGTCGTTCCTTGGGTTATGCTTATGTAAATTTCCATTCAGTGCACGATGCAGAAGTGTCATTGGATATCATGAATTTTACAAACATTCGTGGTCGCCACTGTCGAATTATGTGGAGTCAGAGGGACCCTCGCTTACGCAAATCTGGCAAAGGCAACATTTTTGTCAAGAATTTGCATGAGTCCATTGACAACAAAACACTTTATGATACGTTCTCTGTGTTTGGAAGTATTCTCTCCTGCAAAGTAGTGGTTGATAGAGATTCAGGACTTTCTCGCGGTTATGGATATGTACACTACGTGGATGATAAGTCAGCAGCTAAAGCTATCGAAGGAGTAAATGGAATGAAAATCAATCAATGCCAAGTGCATGCTGAGCTTTTTAAACCGCGCGAAGAGAGAATGAAAGACCCGAAGTATGAGTTCACTAATATTTATGTGAAATACATTCCAAGTGGTGTGAATGAGAAAAAGTTGGTGGAGCTTTTCCAGCGCGAAGCGGAAGAAGTTTGTAATAAGTATGATTTTTGGTACAGAGAGTATGGAATATCTGCTTGTTTTAATTTCAAGTCTACGGCTGGTGCTCGTAGAGCAATCAGAGAAATGAATGGCAAGTTTTTGCATGATTTCAAAGACATTGATCAATTGTTTGATGAGAAGGAGGCAGTAGCAGTGGTTGAAGATAAAGTTGCAGATGAAGCAGTTGTCAATGGTAATGAAGAGAAAGAGAACAGTGGGCAAGCTGAAGAATCTGTTGCCGGCGGTGATGAGAATAAAAATGCGAAGGCAGAAGAAGAAGAAAAAGATTCTGAAGCAGATATAATTACCACTTCTGCTGCCACTACCACTGGTAATTCAACTGCTATCGCTGCTGTTGCTTCAGAAGATAGCAAAGCAGATGATAGCAAAGATGACAGTAAATCGCGTGAAGATCGTCTTCCAGTGCCAACCACAAATGGTGCTTTGATGCGTGTGCAACAGCGTGGATTGTATGTTGCGCGTGCACAAAAAGGAAATGAGCGCAAAGAGTTTTTAAATCGCATGGCTCGAAGTGTAACTCTCAATGGAAGACGAATTGGAATTCCTGGGGCGAATTTGTATGTAAAGAATTTGAGTCCAGAGGTGAATGATGACAAATTACGTGAGATGTTCGCTATTTTTGGTACAATTACATCTGCAGTAGTGATGACAGAGAAAGATAGTAAGAAATCGAGAGGTTTTGGTTTTGTAGCATATCAAAAGAAAGAGAGTGCTGCTCGTGCAATTCATGAAATGATGAATAGTTTGCACAATGGTAAACCTTTGTATGTGTCGCGTGCTCAAAGCAAACAATTTCGCCAACAGTTTATAGCGAAACAACTTCGCCAAAGAGGTAATTTCAGTGGTAGAGGTCGTGGTGGATACCGTGGACGTGGAAGAGGTAATTACCGTGGTCGAGGTAGAGGTCGAGGAAGTGGAGGTTCATACAGTGGAGGATATCGTTCATTTCGCGGTGGATATCGCGGAGGATACAATAATTACCGAGGTAGAGGCAGAGGACGTGGACGTGGACGATCCAATTTCCCACCAACTTCGGTCTACAGTGGATATCCTCCTCAACAGCCTACCCCTTACGCAGGTGTATATGGAGGTGGACAAGCATATCCAGCATCGAGTCCATACGCTCGACAGTATTCTGGTCAATATCCGAATTATGCTCAGGCATCTCGTGTACCATATGGTTATCCTGGTCAAAGTGCAGCTAATGCACAAATGGCGAGAACTGCGCAACCATCAGTGCCTATGAATTATTCTGCACAGGGAGTTGTGCCGAATATGAATCGAGCATATATTCCGCAAACAGCACAAGGGTACATTCAACAACCGTATGTGAATCCGCAGCTTCAATTGAGACAAACGCAGAATATTCCGTCTCAGCCCGCAAGTGCTGCATTAGCGGTAGCTCAACCGCAGCAAATTCAATCTTTGCCAGCTGGACAGCAATCTGGTTCAATGATGTATGGAACATCATCTGTTCCTCGTCCACCGTACAGTGTGAATCAAATTGTGCATCAAGTGCCAATGCCATCACAGCCATTGGCACAACCACAGACATCTGTGAGTCAATTGAGTGCAAACAAGGATTCTGCTCCAGTCTTAGAGAACCATCCATTGACCAGTGAAATGTTGAAGGAAGCAAAGCCGCCAGAGCGCAAACGATTGATTGGTGAGAGATTGTTTCCAAAAATTCAAGTGGTTGAGCCTCGATTGGCAGGGAAGATTACAGGAATGTTGTTAGAGATGGATAATACTGAACTGTTGGTACTGTTGTCAGACCAGGCTGCGTTGATGAGTAAGATTAATGAGGCATTGGCTGTGTTGAAGGATCATCAACAGAAGCAGTCTCAGCGCAATCCTGAATCATCCAAGAATCAGTCATCCCAGGCAAACAAAGCAGGTTCGCAGTCAAATCAAGCAAGCTCTTCTGCTGCACAAGCGAATCAAACTAGTGTTGGACAACATGTTGCGCAACCGAGATCTGCTGCCAATCCATAAACAATTGTATGATGCTCGATGGGTGTTCAATTGCCTCATCCATCTCCATTGCATTCACTATTCTTATTATTTTTAGTTCTAATTTTTATTTAATTTTGCCATACACAAAAAAAAATAAAAAAAATAAAAAAAAAACTGATTTTCTATTGAAGTGTATGGTGTTTGACCAAGTTTTGTGTTTAAATTGTTTCTTTTTTCGATTTTATTTATTTTTTGCCTTTTGTTTAGTTTATATCACGGATATATATATACAATATTATGTTTCTAGAATGTGTTTATTACTATTTCCGAAACCATTGTCCACTTTGAAAAATTGAAAAAAG

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

1
PTL1_gene_families Normal file
View File

@ -0,0 +1 @@

View File

@ -1,12 +1,12 @@
# Last updated Jan 2024
# Authors: Auden Cote-L'Heureux and Mario Ceron-Romero
# This script chooses orthologs to concatenate OGs. This can be done as part of an end-to-end EukPhylo run,
# This script chooses orthologs to concatenate OGs. This can be done as part of an end-to-end PhyloToL run,
# or by inputting already complete alignments and gene trees and running only the concatenation step.
# Use the --concatenate flag to run this step, and optionally use the argument --concat_target_taxa to input
# a file containing a list of taxon codes to be included in the concatenated alignment. If a GF has more
# than one sequence from a taxon, a representative ortholog must be chosen to include in the concatenated alignment.
# To do this, for each taxon EukPhylo keeps only the sequences falling in the monophyletic clade in the tree
# To do this, for each taxon PhyloToL keeps only the sequences falling in the monophyletic clade in the tree
# that contains the greatest number of species of the taxons minor clade (or major clade, if the target taxon list
# uses major-clade codes). If multiple sequences from the taxon fall into this largest clade, then the sequence
# with the highest score (defined as length times k-mer coverage for transcriptomic data with k-mer coverage
@ -118,15 +118,17 @@ def remove_paralogs(params):
#Getting a clean list of all target taxa
if os.path.isfile(params.concat_target_taxa):
try:
target_codes = [l.strip() for l in open(params.concat_target_taxa).readlines() if l.strip() != '']
except AttributeError:
print('\n\nError: invalid "concat_target_taxa" argument. This must be a comma-separated list of any number of digits/characters to describe focal taxa (e.g. Sr_ci_S OR Am_tu), or a file with the extension .txt containing a list of complete or partial taxon codes. All sequences containing the complete/partial code will be identified as belonging to target taxa.\n\n')
if type(params.concat_target_taxa) is list:
target_codes = [code.strip() for code in params.concat_target_taxa if code.strip() != '']
elif params.concat_target_taxa != None:
target_codes = [params.concat_target_taxa]
if os.path.isfile(params.concat_target_taxa):
try:
target_codes = [l.strip() for l in open(params.concat_target_taxa).readlines() if l.strip() != '']
except AttributeError:
print('\n\nError: invalid "concat_target_taxa" argument. This must be a comma-separated list of any number of digits/characters to describe focal taxa (e.g. Sr_ci_S OR Am_tu), or a file with the extension .txt containing a list of complete or partial taxon codes. All sequences containing the complete/partial code will be identified as belonging to target taxa.\n\n')
else:
target_codes = [leaf.name[:10] for leaf in tree]
print('\nERROR: missing --concat_target_taxa argument. When concatenating, you need to give the taxonomic group (sequence prefix), groups, or a file containing a list of groups (multiple prefixes) for which to select sequences to construct a concatenated alignment\n')
exit()
monophyletic_clades = { }

View File

@ -1,4 +1,4 @@
# Last updated Jan 2025
# Last updated Jan 2024
# Authors: Auden Cote-L'Heureux, Mario Ceron-Romero.
# This script contains the entirety of the contamination loop, an iterative tool to assess
@ -323,45 +323,15 @@ def cl_mafft(params):
if file.split('.')[-1] in ('fasta', 'fas', 'faa'):
os.system('mafft ' + params.output + '/Output/Pre-Guidance/' + file + ' > ' + params.output + '/Output/NotGapTrimmed/' + file)
os.system('Scripts/trimal-trimAl/source/trimal -in ' + params.output + '/Output/NotGapTrimmed/' + file + ' -out ' + params.output + '/Output/Guidance/' + file.split('.')[0] + '.95gapTrimmed.fasta' + ' -gapthreshold ' + str(params.trimal_cutoff) + ' -fasta')
os.system('Scripts/trimal-trimAl/source/trimal -in ' + params.output + '/Output/NotGapTrimmed/' + file + ' -out ' + params.output + '/Output/Guidance/' + file.split('.')[0] + '.95gapTrimmed.fasta' + ' -gapthreshold 0.05 -fasta')
#Utility function to run FastTree in between iterations (if this is the chosen tree-building method)
def cl_fasttree(params):
for file in os.listdir(params.output + '/Output/Guidance'):
if file.split('.')[-1] in ('fasta', 'fas', 'faa'):
os.system('FastTree ' + params.output + '/Output/Guidance/' + file + ' > ' + params.output + '/Output/Trees/' + file.split('.')[0] + '.FastTree.tre')
#Utility function to run Iqtree in between iterations (if this is the chosen tree-building method)
def cl_iqtree(params):
for file in os.listdir(params.output + '/Output/Guidance'):
if file.split('.')[-1] in ('fasta', 'fas', 'faa'):
if not os.path.isdir(params.output + '/Output/Intermediate/IQTree'):
os.mkdir(params.output + '/Output/Intermediate/IQTree')
tax_iqtree_outdir = params.output + '/Output/Intermediate/IQTree/' + file.split('.')[0].split('_preguidance')[0]
os.mkdir(tax_iqtree_outdir)
os.system('iqtree2 -s ' + params.output + '/Output/Guidance/' + file + ' -m LG+G -T 10 --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
#Copy over the final output
if os.path.isfile(tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile'):
os.system('cp ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile ' + params.output + '/Output/Trees/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.tree')
os.system('rm -r ' + params.output + '/Output/Intermediate/IQTree/*')
#Utility function to run Iqtree_fast in between iterations (if this is the chosen tree-building method)
def cl_iqtree_fast(params):
for file in os.listdir(params.output + '/Output/Guidance'):
if file.split('.')[-1] in ('fasta', 'fas', 'faa'):
if not os.path.isdir(params.output + '/Output/Intermediate/IQTree'):
os.mkdir(params.output + '/Output/Intermediate/IQTree')
tax_iqtree_outdir = params.output + '/Output/Intermediate/IQTree/' + file.split('.')[0].split('_preguidance')[0]
os.mkdir(tax_iqtree_outdir)
os.system('iqtree2 -s ' + params.output + '/Output/Guidance/' + file + ' -m LG+G -T 10 --fast --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
#Copy over the final output
if os.path.isfile(tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile'):
os.system('cp ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile ' + params.output + '/Output/Trees/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.tree')
os.system('rm -r ' + params.output + '/Output/Intermediate/IQTree/*')
#Wrapper script to manage parameters and iteration
def run(params):
@ -470,15 +440,11 @@ def run(params):
if params.cl_tree_method == 'fasttree':
cl_fasttree(params)
elif params.cl_tree_method == 'iqtree':
cl_iqtree(params)
os.system('rm -r ' + params.output + '/Output/Intermediate/IQTree/*')
elif params.cl_tree_method == 'iqtree_fast':
cl_iqtree_fast(params)
os.system('rm -r ' + params.output + '/Output/Intermediate/IQTree/*')
elif params.cl_tree_method == 'raxml':
os.system('rm -r ' + params.output + '/Output/Intermediate/RAxML/*')
else:
if 'iqtree' in params.cl_tree_method:
os.system('rm -r ' + params.output + '/Output/Intermediate/IQTree/*')
elif params.cl_tree_method == 'raxml':
os.system('rm -r ' + params.output + '/Output/Intermediate/RAxML/*')
trees.run(params)

View File

@ -1,12 +1,12 @@
# Last updated Jun 02 2025
# Last updated Apr 2 2024
# Authors: Auden Cote-L'Heureux and Mario Ceron-Romero
# This script runs Guidance in an iterative fashion for more both MSA construction
# and more rigorous homology assessment than what is offered in EukPhylo part 1.
# and more rigorous homology assessment than what is offered in PhyloToL 6 part 1.
# Guidance runs until the input number of iterations (--guidance_iters, default = 5)
# has been reached, or until there are no sequences below the sequence score cutoff.
# All sequences below the score cutoff (--seq_cutoff, default = 0.3) are removed at
# each iteration. By default, EukPhylo does not remove residues that fall below the
# each iteration. By default, PhyloToL does not remove residues that fall below the
# given residue cutoff (--res_cutoff) and columns that fall below the given column
# cutoff (--col_cutoff, defaults are 0), though this can be turned on by adjusting
# these parameters. Outputs at this point are found in the “Guidance_NotGapTrimmed”
@ -14,14 +14,9 @@
# that are at least 95% gaps (or --gap_trim_cutoff) generating files in the “Guidance”
# output folder.
# Users should note that there are two version of Guidance. This script, by default, uses
# the newest version (v2.1). Users who wish to use the older version of Guidance will have
# to make a small change in guidance.py (look for a comment in the script with the phrase
# "UNCOMMENT THE FOLLOWING LINE IF USING v2.0.2"). See the Wiki for more information here.
# This step is either intended to be run starting with --start = unaligned (but not raw)
# inputs, meaning one amino acid alignment per OG. It can also be run directly after the
# preguidance step. The run() function is called in two places: in eukphylo.py generally,
# preguidance step. The run() function is called in two places: in phylotol.py generally,
# and in contamination.py if the contamination loop is using Guidance as the re-alignment
# method.
@ -29,7 +24,7 @@
import os, sys, re
from Bio import SeqIO
#Called in eukphylo.py and contamination.py
#Called in phylotol.py and contamination.py
def run(params):
if params.start == 'raw' or params.start == 'unaligned':
@ -58,25 +53,6 @@ def run(params):
guidance_removed_file = open(params.output + '/Output/GuidanceRemovedSeqs.txt', 'w')
guidance_removed_file.write('Sequence\tScore\n')
too_many_seqs = False
#For each unaligned AA fasta file
for file in [f for f in os.listdir(guidance_input) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta')]:
nseqs = len([rec for rec in SeqIO.parse(guidance_input + '/' + file, 'fasta')])
if nseqs > 2000:
too_many_seqs = True
#Print if OG has > 2000 seqs
guidance_log = open(params.output + '/Output/GuidanceLog.txt', 'w')
guidance_log.write(file + ' has more than 2000 seqs.\nStopping run')
print(file + 'has more than 2000 seqs')
print('Do you want to run this?')
print('Stopping run.')
break
if too_many_seqs and not params.allow_large_files:
return False
#For each unaligned AA fasta file
for file in [f for f in os.listdir(guidance_input) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta')]:
tax_guidance_outdir = params.output + '/Output/Intermediate/Guidance/Output/' + file.split('.')[0].split('_preguidance')[0]
@ -101,17 +77,8 @@ def run(params):
else:
mafft_alg = 'auto'
#For Guidance v2.1 (2025 version) on the grid ... COMMENT OUT THE FOLLOWING LINE IF USING v2.0.2
os.system('python ' + params.guidance_path + '/script/guidance_main.py --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
#For Guidance v2.0.2 (origin version in PhyloTol6). UNCOMMENT THE FOLLOWING LINE IF USING v2.0.2
#os.system('Scripts/guidance.v2.02/www/Guidance/guidance.pl --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
#For UMass Unity users, use the following line and comment out the others:
#os.system('python3 /work/pi_lkatz_smith_edu/Guidance/guidance_Linux/script/guidance_main.py --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
#For Smith College Grid users, use the following line and comment about the others:
#os.system('python /gridapps/software/Guidance_mid/2.1b-foss-2023a/bin/script/guidance_main.py --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
#Running Guidance (one per OG per iteration)
os.system('Scripts/guidance.v2.02/www/Guidance/guidance.pl --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
#Checking for a sequence score file; if not available, Guidance failed.
if os.path.isfile(tax_guidance_outdir + '/MSA.MAFFT.Guidance2_res_pair_seq.scr_with_Names'):
@ -183,10 +150,10 @@ def run(params):
os.system('mafft ' + tax_guidance_outdir + '/postGuidance_preTrimAl_unaligned.fasta > ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.postGuidance_preTrimAl_aligned.fasta')
#Gap trimming
os.system('Scripts/trimal-trimAl/source/trimal -in ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.postGuidance_preTrimAl_aligned.fasta -out ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.70gapTrimmed.fasta -gapthreshold ' + str(params.trimal_cutoff) + ' -fasta')
os.system('Scripts/trimal-trimAl/source/trimal -in ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.postGuidance_preTrimAl_aligned.fasta -out ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.95gapTrimmed.fasta -gapthreshold 0.05 -fasta')
#Copying over final aligments (pre and post gap trimming) into output folder.
os.system('cp ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.70gapTrimmed.fasta ' + params.output + '/Output/Guidance/' + file.split('.')[0].split('_preguidance')[0] + '.70gapTrimmed.fasta')
os.system('cp ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.95gapTrimmed.fasta ' + params.output + '/Output/Guidance/' + file.split('.')[0].split('_preguidance')[0] + '.95gapTrimmed.fasta')
os.system('cp ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.postGuidance_preTrimAl_aligned.fasta ' + params.output + '/Output/NotGapTrimmed/' + file.split('.')[0].split('_preguidance')[0] + '.postGuidance_preTrimAl_aligned.fasta')
#Removing intermediate files if not --keep_temp
@ -201,8 +168,6 @@ def run(params):
os.system('mv ' + tax_guidance_outdir + '/' + gdir_file + ' ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '_' + gdir_file)
guidance_removed_file.close()
return True

View File

@ -2,7 +2,7 @@
# Author: Auden Cote-L'Heureux
# This script is what users should call when running any or all components of
# EukPhylo part 2. It briefly determines which parts of the pipeline should be
# PhyloToL 6 part 2. It briefly determines which parts of the pipeline should be
# run (pre-Guidance, Guidance, tree building, contamination loop, and/or
# concatenation) based on the --start and --end parameters, and then runs all
# of these components. Each component is actually run by the run() function in

View File

@ -2,7 +2,7 @@
# Authors: Auden Cote-L'Heureux, Mario Ceron-Romero, Godwin Ani
# This script is only run when --start = unaligned. This typically means that a user
# is inputting ReadyToGo files as output by EukPhylo part 1. The script contains two optional
# is inputting ReadyToGo files as output by PhyloToL 6 part 1. The script contains two optional
# filters. One filter aims to remove sequences outside silent-site GC content ranges set by
# the user, and relies on the output of the utility script GC_Identifier_v1.0.py. See the manual
# for details on using this filter. Sequence filtration by composition is set using the --og_prefix
@ -27,7 +27,7 @@
import os, sys, re
from Bio import SeqIO
#This function is called ONLY in eukphylo.py.
#This function is called ONLY in phylotol.py.
def run(params):
#Reading in the list of gene families to use (--gf_list)

View File

@ -2,7 +2,7 @@
# Authors: Auden Cote-L'Heureux and Mario Ceron-Romero
# This is a relatively simple script that only runs trees, using either IQ-Tree
# or RAxML. The run() function is called in two places: both in eukphylo.py, and
# or RAxML. The run() function is called in two places: both in phylotol.py, and
# in contamination.py, where it is used to re-build trees. When starting at this
# step, users must input one aligned amino acid fasta file per OG. Otherwise, if
# starting at the pre-Guidance or Guidance steps, this step will be run if --end = trees.
@ -12,7 +12,7 @@ import os, sys, re
from Bio import SeqIO
from color import color
#Called in eukphylo.py and contamination.py
#Called in phylotol.py and contamination.py
def run(params):
#Checking whether aligned files were input, or it should just start with the Guidance outputs from the previous step.
@ -34,7 +34,7 @@ def run(params):
for file in [f for f in os.listdir(guidance_path) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta') or f.endswith('.fas') or f.endswith('.aln')]:
#Run IQ-Tree
if params.tree_method == 'iqtree' or params.tree_method == 'iqtree_fast':
if params.tree_method == 'iqtree':
#Make intermediate folders
if not os.path.isdir(params.output + '/Output/Intermediate/IQTree'):
os.mkdir(params.output + '/Output/Intermediate/IQTree')
@ -42,21 +42,10 @@ def run(params):
tax_iqtree_outdir = params.output + '/Output/Intermediate/IQTree/' + file.split('.')[0].split('_preguidance')[0]
os.mkdir(tax_iqtree_outdir)
#Examples on how to run IQ-Tree
#Comment on the lines that do not fit your system
#Run IQ-Tree on the Smith College grid
if params.tree_method == 'iqtree':
os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G -T 10 --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
elif params.tree_method == 'iqtree_fast':
os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G -T 10 --fast --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
#Run IQ-Tree in HPC Unity Cluster
#if params.tree_method == 'iqtree':
#os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
#elif params.tree_method == 'iqtree_fast':
#os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G --fast --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
# Copy over the final output
#Run IQ-Tree
os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G -T 10 --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
#Copy over the final output
if os.path.isfile(tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile'):
os.system('cp ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile ' + params.output + '/Output/Trees/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.tree')
#color(params.output + '/Output/Trees/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.tree')

View File

@ -2,33 +2,33 @@
# Author: Auden Cote-L'Heureux
# This script is a general utility script that does two main things. First, it has
# a function to read in all EukPhylo parameters, which is called in eukphylo.py.
# It also has a function that checks for and cleans up existing EukPhylo part 2
# a function to read in all PhyloToL parameters, which is called in phylotol.py.
# It also has a function that checks for and cleans up existing PhyloToL part 2
# output files from previous runs, and creates a new, empty Output folder structure
# for the new run. This function is also called only in eukphylo.py.
# for the new run. This function is also called only in phylotol.py.
#Dependencies
import os, sys, re
import argparse
import shutil
#Reading in all parameters. This function is only called once, in eukphylo.py
#Reading in all parameters. This function is only called once, in phylotol.py
def get_params():
parser = argparse.ArgumentParser(
prog = 'EukPhylo v1.0',
description = "Updated January, 2022 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/Katzlab/EukPhylo"
prog = 'PhyloToL v6.0',
description = "Updated January, 2022 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/AudenCote/PhyloToL_v6.0"
)
common = parser.add_argument_group('Commonly adjusted parameters')
common.add_argument('--start', default = 'raw', choices = {'raw', 'unaligned', 'aligned', 'trees'}, help = 'Stage at which to start running EukPhylo.')
common.add_argument('--end', default = 'trees', choices = {'unaligned', 'aligned', 'trees'}, help = 'Stage until which to run EukPhylo. Options are "unaligned" (which will run up to but not including guidance), "aligned" (which will run up to but not including RAxML), and "trees" which will run through RAxML')
common.add_argument('--start', default = 'raw', choices = {'raw', 'unaligned', 'aligned', 'trees'}, help = 'Stage at which to start running PhyloToL.')
common.add_argument('--end', default = 'trees', choices = {'unaligned', 'aligned', 'trees'}, help = 'Stage until which to run PhyloToL. Options are "unaligned" (which will run up to but not including guidance), "aligned" (which will run up to but not including RAxML), and "trees" which will run through RAxML')
common.add_argument('--gf_list', default = None, help = 'Path to the file with the GFs of interest. Only required if starting from the raw dataset.')
common.add_argument('--taxon_list', default = None, help = 'Path to the file with the taxa (10-digit codes) to include in the output.')
common.add_argument('--data', help = 'Path to the input dataset. The format of this varies depending on your --start parameter. If you are running the contamination loop starting with trees, this folder must include both trees AND a fasta file for each tree (with identical file names other than the extension) that includes an amino-acid sequence for each tip of the tree (with the sequence names matching exactly the tip names).')
common.add_argument('--output', default = './', help = 'Directory where the output folder should be created. If not given, the folder will be created in the parent directory of the folder containing the scripts.')
common.add_argument('--force', action = 'store_true', help = 'Overwrite all existing files in the "Output" folder.')
common.add_argument('--tree_method', default = 'iqtree_fast', choices = {'iqtree', 'iqtree_fast', 'raxml', 'all'}, help = 'Program to use for tree-building')
common.add_argument('--tree_method', default = 'iqtree', choices = {'iqtree', 'raxml', 'all'}, help = 'Program to use for tree-building')
common.add_argument('--blacklist', type = str, help = 'A text file with a list of sequence names not to consider')
common.add_argument('--og_identifier', default = 'OG', choices = {'OG','OG6','OGA','OGG'}, help = 'Program to use for selecting seq by GC width')
common.add_argument('--sim_taxa', default = None, help = 'Path to the file with the taxa (10-digit codes) to apply the similarity filter on.')
@ -39,18 +39,15 @@ def get_params():
core.add_argument('--similarity_filter', action = 'store_true', help = 'Run the similarity filter in pre-Guidance')
core.add_argument('--sim_cutoff', default = 1, type = float, help = 'Sequences from the same taxa that are assigned to the same OG are removed if they are more similar than this cutoff')
core.add_argument('--guidance_iters', default = 5, type = int, help = 'Number of Guidance iterations for sequence removal')
core.add_argument('--guidance_path', help = 'Path to the downloaded Guidance folder (probably called guidance_Linux or guidance_MacOS-arm64, this folder should contain a folder called "script" which contains the guidance_main.py script). You can download this folder from this link: https://github.com/XseniaP/Guidance_mid/tree/main')
core.add_argument('--seq_cutoff', default = 0.3, type = float, help = 'During guidance, taxa are removed if their score is below this cutoff')
core.add_argument('--col_cutoff', default = 0.0, type = float, help = 'During guidance, columns are removed if their score is below this cutoff')
core.add_argument('--res_cutoff', default = 0.0, type = float, help = 'During guidance, residues are removed if their score is below this cutoff')
core.add_argument('--guidance_threads', default = 20, type = int, help = 'Number of threads to allocate to Guidance')
core.add_argument('--trimal_cutoff', default = 0.3, type = float, help = 'Gap masking threshold for TrimAl. The maximum proportion of sequences without gaps for a site to be removed (i.e. to remove sites with 70% or more gaps, set this parameter to 0.3).')
core.add_argument('--allow_large_files', action = 'store_true', help = 'Allow files with more than 2,000 sequences to run through Guidance.')
CL = parser.add_argument_group('Contamination loop parameters')
CL.add_argument('--contamination_loop', default = None, choices = {'seq', 'clade'}, help = 'Remove sequences by looking at the sisters of each sequence in a rules file or by picking the best clades')
CL.add_argument('--contamination_loop', default = None, choices = {'seq', 'clade', 'both'}, help = 'Remove sequences by looking at the sisters of each sequence in a rules file or by picking the best clades')
CL.add_argument('--nloops', default = 10, type = int, help = 'The maximum number of contamination-removal loops')
CL.add_argument('--cl_tree_method', default = 'iqtree_fast', choices = {'iqtree', 'raxml', 'fasttree', 'iqtree_fast'}, help = 'Tree-building method to use in each contamination loop iteration.')
CL.add_argument('--cl_tree_method', default = 'fasttree', choices = {'iqtree', 'raxml', 'fasttree', 'iqtree_fast'}, help = 'Tree-building method to use in each contamination loop iteration.')
CL.add_argument('--cl_alignment_method', default = 'mafft_only', choices = {'mafft_only', 'guidance'}, help = 'Alignment method to use in each contamination loop iteration.')
CL.add_argument('--cl_exclude_taxa', type = str, default = None, help = 'Path to a file containing taxon names present in input MSA/tree files but which should be removed in the first iteration of the contamination loop.')
@ -68,7 +65,7 @@ def get_params():
other = parser.add_argument_group('Other arguments')
other.add_argument('--concatenate', action = 'store_true', help = 'Remove paralogs and generate an alignment for concatenation')
other.add_argument('--concat_target_taxa', type = str, default = None, help = 'The taxonomic group (sequence prefix), groups, or a file containing a list of groups (multiple prefixes) for which to select sequences to construct a concatenated alignment')
other.add_argument('--concat_target_taxa', nargs = '+', default = None, help = 'The taxonomic group (sequence prefix), groups, or a file containing a list of groups (multiple prefixes) for which to select sequences to construct a concatenated alignment')
other.add_argument('--tree_font_size', default = 12, help = "Change this if you're not quite happy with the font size in the output trees. If you want smaller font in your trees, you can lower this value; and if you want larger font in your trees, you can raise this value. Some common values are 8, 10, and 12. Size 16 font is pretty big, and size 4 font is probably too small for most purposes. Iconoclasts use size 9, 11, or 13 font.")
other.add_argument('--keep_temp', action = 'store_true', help = "Use this to keep ALL Guidance intermediate files")
other.add_argument('--keep_iter', '-z', action = 'store_true', help = 'Keep all Guidance iterations (beware this will be very large)')
@ -77,7 +74,7 @@ def get_params():
return parser.parse_args()
#Cleaning up existing output and creating a new output folder structure. This function is only called once, in eukphylo.py
#Cleaning up existing output and creating a new output folder structure. This function is only called once, in phylotol.py
def clean_up(params):
#If an output folder doesn't exist, create one.
@ -125,7 +122,7 @@ def clean_up(params):
if params.end == 'trees' or params.contamination_loop != None:
os.mkdir(params.output + '/Output/Trees')
os.mkdir(params.output + '/Output/ColoredTrees')
if params.start == 'trees' and params.contamination_loop == None:
if params.start == 'trees':
copy_input('Trees')

View File

@ -1,70 +0,0 @@
#!/bin/bash
## Last updated Jan 2025 by Auden Cote-L'Heureux; modified Sept. 2025 by Adri K. Grow
## This shell script is used for running EukPhylo part 2, and includes a general setup for use on an HPC that uses
## the Slurm workload manager. It also includes several example run commands, which correspond to examples explained in more detail in the
## EukPhylo Wiki (https://github.com/Katzlab/EukPhylo/wiki/EukPhylo-Part-2:-MSAs,-trees,-and-contamination-loop).
## These run commands can also be copied and run in the terminal / command line separately, without a shell script.
## For the contamination loop, We recommend iterating the sister/subsisters loop multiple times as branches will shift. In contrast, we recommend only running clade grabbing once
## SLURM-SPECIFIC SETUP BELOW
############### FOR UMASS UNITY HPC ############### (DELETE section if not applicable):
#SBATCH --job-name=EukPhylo
#SBATCH -n 10 # Number of Cores per Task
#SBATCH --mem=125G # Requested Memory
#SBATCH -p cpu # Partition
#SBATCH -q long # long QOS
#SBATCH -t 334:00:00 # Job time limit
#SBATCH --output=Run_EP.%A_%a.out # Stdout (%j expands to jobId)
#SBATCH --mail-type=ALL
#SBATCH --mail-user=email@email.edu
#SBATCH --array=1-600%50
module purge #Cleans up any loaded modules
module load conda/latest
module load mafft/7.505
module load diamond/2.1.7
conda activate /work/pi_lkatz_smith_edu/Conda_PTL6p2/envs/PTL/
############### FOR SMITH GRID HPC ############### (DELETE section if not applicable):
#SBATCH --job-name=EukPhylo # Job name
#SBATCH --output=Run_EukPhylo.%j.out # Stdout (%j expands to jobId)
#SBATCH --nodes=1
#SBATCH --ntasks=10 ## On the Smith College HPC (Grid), we have to change this to be double the number of task/batches you want to launch
#SBATCH --mail-type=ALL
#SBATCH --mail-user=email@email.edu ##add your email address for job updates
#Load required modules
module purge # Cleans up any loaded modules
module use /gridapps/modules/all # make sure module locations is loaded
module load slurm
module load ETE/3.1.3-foss-2024a
module load Biopython/1.79-gfbf-2023a
module load DIAMOND/2.1.8-GCC-12.3.0
module load MAFFT/7.526-GCC-13.3.0-with-extensions
module load RAxML-NG/1.2.2-GCC-13.2.0
module load IQ-TREE/2.3.6-gompi-2023a
module load tqdm/4.66.1-GCCcore-12.3.0
module load Python/3.12.3-GCCcore-13.3.0
module load Guidance_mid/2.1b-foss-2023a #Smith College HPC specific
export PATH=$PATH:/beegfs/fast/katzlab/grid_phylotol_setup/programs/standard-RAxML-master #Smith College HPC specific #export PATH=$PATH:/Path/To/Executable/Files
## PROVIDE YOUR PARENT PATH
parent='/Your/Home/Folder/' # The folder where you are running EukPhylo (this should contain the Scripts and input data folders)
## EXAMPLE RUN COMMANDS BELOW
# A simple run of part 2, starting from ReadyToGo files and running through tree building
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/eukphylo.py --start raw --end trees --gf_list ${parent}listofOGs.txt --taxon_list ${parent}taxon_list.txt --data ${parent}Input_folder --output ${parent}Output_folder > Output.out
# Another example starting from ReadyToGo files and running through tree building, with the commonly used similarity filter cutoff, blacklist, and "sim_taxa_list" arguments (see Wiki)
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/eukphylo.py --start raw --end trees --gf_list ${parent}listofOGs.txt --taxon_list ${parent}taxon_list.txt --data ${parent}Input_folder --output ${parent}Output_folder --similarity_filter --blacklist ${parent}Blacklist.txt --sim_cutoff 0.99 --sim_taxa sim_taxa_list.txt > Output.out
# An example of running just the concatenation step of part 2, starting from trees
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/eukphylo.py --start trees --concatenate --concat_target_taxa Sr_rh --data ${parent}Output > log.out
# See the Wiki (https://github.com/Katzlab/EukPhylo/wiki/EukPhylo-Part-2:-MSAs,-trees,-and-contamination-loop) for more details!

33
PTL2/run_phylotol.sh Normal file
View File

@ -0,0 +1,33 @@
#!/bin/bash
#SBATCH --job-name=meta033 ##change this to a shortened name of your project
#SBATCH --output=Run_phylotol.%j.out # Stdout (%j expands to jobId)
#SBATCH --nodes=1
#SBATCH --ntasks=10 ##change this to be double the number of task/batches you want to launch
#SBATCH --mail-type=ALL
#SBATCH --mail-user=youremail@smith.edu ##add your email address
module purge #Cleans up any loaded modules
module use /gridapps/modules/all #make sure module locations is loaded
module load slurm
module load ETE
module load Biopython/1.79-foss-2021b
module load DIAMOND/2.0.13-GCC-11.2.0
module load MAFFT
module load BioPerl
module load RAxML
module load IQ-TREE/2.1.2-gompi-2021b
module load tqdm/4.64.1-GCCcore-12.2.0
module load Python/3.9.6-GCCcore-11.2.0
export PATH=$PATH:/beegfs/fast/katzlab/grid_phylotol_setup/programs/standard-RAxML-master
parent='/beegfs/fast/katzlab/Adri/p2PTL/033_meta/B1_meta_033/' #add your path starting with the name of your folder, should begin with /beegfs/fast/katzlab/
#if you are running batches, you need an srun line for each batch!
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/phylotol.py --similarity_filter --sim_cutoff 0.95 --sim_taxa sim_taxa.txt --blacklist GuidanceRemovedSeqs_allConservedRuns_ML_nov_dec_2023.txt --start raw --end trees --gf_list B1_listofOGs.txt --taxon_list taxon_list.txt --data OutgroupR2Gs --output ${parent}Output_folder_B1 > Output_folder_B1.out &
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/phylotol.py --similarity_filter --sim_cutoff 0.95 --sim_taxa sim_taxa.txt --blacklist GuidanceRemovedSeqs_allConservedRuns_ML_nov_dec_2023.txt --start raw --end trees --gf_list B2_listofOGs.txt --taxon_list taxon_list.txt --data OutgroupR2Gs --output ${parent}Output_folder_B2 > Output_folder_B2.out &
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/phylotol.py --similarity_filter --sim_cutoff 0.95 --sim_taxa sim_taxa.txt --blacklist GuidanceRemovedSeqs_allConservedRuns_ML_nov_dec_2023.txt --start raw --end trees --gf_list B3_listofOGs.txt --taxon_list taxon_list.txt --data OutgroupR2Gs --output ${parent}Output_folder_B3 > Output_folder_B3.out &
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/phylotol.py --similarity_filter --sim_cutoff 0.95 --sim_taxa sim_taxa.txt --blacklist GuidanceRemovedSeqs_allConservedRuns_ML_nov_dec_2023.txt --start raw --end trees --gf_list B4_listofOGs.txt --taxon_list taxon_list.txt --data OutgroupR2Gs --output ${parent}Output_folder_B4 > Output_folder_B4.out &
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/phylotol.py --similarity_filter --sim_cutoff 0.95 --sim_taxa sim_taxa.txt --blacklist GuidanceRemovedSeqs_allConservedRuns_ML_nov_dec_2023.txt --start raw --end trees --gf_list B5_listofOGs.txt --taxon_list taxon_list.txt --data OutgroupR2Gs --output ${parent}Output_folder_B5 > Output_folder_B5.out &
wait

View File

@ -1,4 +1,4 @@
<img src="https://github.com/Katzlab/PhyloToL-6/blob/main/Other/Katzlab.png">
**EukPhylo version 1.0** is an updated version of the PhyloToL pipeline from the Katz Lab (https://www.science.smith.edu/katz-lab/) at Smith College. EukPhylo is a phylogenomic toolkit for processing transcriptomic and genomic data that includes novel phylogeny-informed contamination removal techniques. See our Wiki (https://github.com/Katzlab/EukPhylo/wiki) for more information on installation and usage!
**PhyloToL version 6** is the latest version of the PhyloToL pipeline from the [Katz Lab](https://www.science.smith.edu/katz-lab/) at Smith College. PhyloToL is a phylogenomic toolkit for processing transcriptomic and genomic data that includes novel phylogeny-informed contamination removal techniques. See our [Wiki](https://github.com/Katzlab/PhyloToL-6/wiki) for more information on installation and usage!

View File

@ -1,86 +0,0 @@
'''
Author & Date: Adri K. Grow + ChatGPT, Nov 11th 2024
- Updated 02/13/25 to accept either transcriptome and genome assembled data in command line
Motivation: assess and rename assembled transcript or genome files for use in EukPhylo Part 1
Intention: warn if any 'transcripts.fasta' or 'contigs.fasta' files are missing or empty for an LKH, otherwise rename and copy them with their assigned 10-digit code by LKH
Input:
- a base directory containing subdirectories for each LKH, named either 'WTA_LKH<xxxx>' or 'WGA_LKH<xxxx>', each containing a 'transcripts.fasta' or 'contigs.fasta' file
- a mapping .txt file with LKH#s tab-separated with corresponding 10-digit codes
Output:
- a folder named 'renamed_transcripts|contigs' with assembled files now named by 10-digit codes; e.g. "Sr_rh_Ro04_assembledTranscripts.fasta"
Dependencies: python3
Usage:
- for transcriptomes: python3 ProcessAndRenameAssembledData.py <assembled transcriptomes directory> <mapping_file.txt> transcriptomes
- for genomes: python3 ProcessAndRenameAssembledData.py <assembled genomes directory> <mapping_file.txt> genomes
'''
import os
import shutil
import sys
def read_lkh_mapping(mapping_file):
"""Reads the LKH number to 10-digit code mapping from a file."""
mapping = {}
with open(mapping_file, 'r') as file:
for line in file:
lkh_number, code = line.strip().split('\t')
mapping[lkh_number] = code
return mapping
def process_directory(base_dir, mapping, output_dir, data_type):
"""Iterates over all subdirectories in base_dir, processes relevant fasta files."""
if not os.path.exists(output_dir):
os.makedirs(output_dir) # Create output directory if it doesn't exist
# Set file naming patterns based on data type
folder_prefix = "WTA_LKH" if data_type == "transcriptomes" else "WGA_LKH"
fasta_filename = "transcripts.fasta" if data_type == "transcriptomes" else "contigs.fasta"
output_suffix = "_assembledTranscripts.fasta" if data_type == "transcriptomes" else "_assembledContigs.fasta"
for folder_name in os.listdir(base_dir):
folder_path = os.path.join(base_dir, folder_name)
if os.path.isdir(folder_path) and folder_name.startswith(folder_prefix):
lkh_number = folder_name.split('_')[1] # Extract LKH number from folder name
fasta_file = os.path.join(folder_path, fasta_filename)
if not os.path.isfile(fasta_file):
print(f" WARNING: file '{fasta_filename}' is missing in folder {folder_name}.")
continue
if os.path.getsize(fasta_file) == 0:
print(f" WARNING: file '{fasta_filename}' is empty in folder {folder_name}.")
continue
if lkh_number in mapping:
new_name = f"{mapping[lkh_number]}{output_suffix}"
output_path = os.path.join(output_dir, new_name)
shutil.copy(fasta_file, output_path)
else:
print(f"Notification: No 10-digit code found for LKH number {lkh_number} in folder {folder_name}.")
def main():
if len(sys.argv) != 4 or sys.argv[3] not in ["transcriptomes", "genomes"]:
print("Usage: python script.py <base_dir> <mapping_file> <transcriptomes|genomes>")
sys.exit(1)
base_dir = sys.argv[1]
mapping_file = sys.argv[2]
data_type = sys.argv[3]
if not os.path.isdir(base_dir):
print(f"Error: The directory '{base_dir}' does not exist.")
sys.exit(1)
if not os.path.isfile(mapping_file):
print(f"Error: The file '{mapping_file}' does not exist.")
sys.exit(1)
output_dir = os.path.join(os.getcwd(), "renamed_transcripts" if data_type == "transcriptomes" else "renamed_contigs")
mapping = read_lkh_mapping(mapping_file)
process_directory(base_dir, mapping, output_dir, data_type)
if __name__ == "__main__":
main()

View File

@ -1,101 +0,0 @@
#!/usr/bin/env python3
#Author, date: Giulia Magri Ribeiro and Adri K. Grow updated from Xyrus Maurer-Alcala and Ying Yan; June 13 2025
#Motivation: Trim adaptors from reads and quality trimming before Assembly
#Intent: clean up reads
#Dependencies: biopython and bbmap folder
#Inputs: parameters.txt, fastq.gz forward and reverse reads
#Outputs:trimmed reads in ToAssemble folder
#Example: python3 Trim_Reads.py parameter.txt
#Katzlab parameters are 24 for quality trimming and 75 for minimum length as of June 2025
from Bio import SeqIO
import sys,os
import time
#------------------------------ Checks the Input Arguments ------------------------------#
if len(sys.argv) == 1:
print ('\n\nThis script will remove Adapters, do quality trimming and length trimming on given score and assembly from your raw reads')
print ('\n\nChecking the overall quality and reads size on FastQC is recommended\n\n')
print ('Example Usage:\n\n\t' + 'katzlab$ python3 Trim_Reads.py parameter.txt\n\n')
print ('\t\tQuestions/Comments? Email Giulia (author) at gribeiro@smith.edu\n\n')
sys.exit()
elif len(sys.argv) != 2:
print ('\n\nDouble check that you have added all the necessary command-line inputs! (see usage below for an example)\n\n')
print ('Example Usage:\n\n\t' + 'katzlab$ python3 Trim_Reads.py parameter.txt\n\n')
print ('Please also check that you have a parameter.txt (tab separated values) file which should contain your current filename, new filename, score of quality trimming, and minimum length (see an example below)\n\n')
print ('parameter.txt example:\n\n\t' + 'XKATZ_20161110_K00134_IL100076423_S41_L005\tLKH001_Spirostomum\t24\t100\n\tXKATZ_20161110_K00134_IL100076416_S17_L005\tLKH002_Loxodes\t28\t100\n')
sys.exit()
else:
parameter_file = sys.argv[1]
mailaddress = 'your_email@xxx.edu' # default email
if not os.path.isdir('ToAssemble/'):
os.system('mkdir ToAssemble')
### takes your raw read data and renames the files with your assigned new names and alters the end to either FwdPE or RevPE
def rename(code):
for filename in os.listdir(os.curdir):
if filename.endswith('.fastq.gz'):
# Forward read patterns
forward_tags = ['_FwdPE', '_R1', '_FPE']
if any(tag in filename for tag in forward_tags):
for tag in forward_tags:
if tag in filename:
cur_name = filename.split(tag)[0]
break
if cur_name in code:
new_name = code[cur_name]
print(cur_name, new_name)
os.system(f'mv {filename} {new_name}_FwdPE.fastq.gz')
os.system(f'mkdir -p {new_name}')
# Reverse read patterns
reverse_tags = ['_RevPE', '_R2', '_RPE']
if any(tag in filename for tag in reverse_tags):
for tag in reverse_tags:
if tag in filename:
cur_name = filename.split(tag)[0]
break
if cur_name in code:
new_name = code[cur_name]
print(cur_name, new_name)
os.system(f'mv {filename} {new_name}_RevPE.fastq.gz')
### Uses the adapters.fa file in the bbtools resources folder (and BBDuK) to remove adapter sequences -- update if necessary
### Uses BBDuK to quality trim reads so the average is q24 and the min length is 100 -- adjust if needed ... flags will be added eventually
def QualityTrim(qtrim, minlen):
for filename in os.listdir(os.curdir):
if 'FwdPE' in filename:
new_name = filename.split('_FwdPE')[0]
qscore = qtrim[new_name]
lscore = minlen[new_name]
qtrimcmd = '_q'+qscore+'_minlen'+lscore
log_file = filename.split('_Fwd')[0] + '/' + filename.split('_Fwd')[0] + qtrimcmd + '_bbduk.log'
os.system('./bbmap/bbduk.sh -Xmx20g in1=./' + filename + ' in2=./' + filename.replace('Fwd','Rev') + ' out1=ToAssemble/'+filename.replace('FwdPE','FPE'+qtrimcmd) + ' out2=ToAssemble/' + filename.split('Fwd')[0]+'RPE'+qtrimcmd+'.fastq.gz qtrim=rl trimq='+qscore+' minlen='+lscore+' mink=11 k=23 hdist=1 ktrim=r ref=bbmap/resources/adapters.fa stats=' + filename.split('_Fwd')[0] +'/'+ filename.split('_Fwd')[0] + qtrimcmd + '_Stats.txt overwrite=true'+ ' > ' + log_file + ' 2>&1')
### Calls on rnaSPAdes to do the transcriptome assembly on the quality trimmed files.
#def rnaSPAdesAssembly():
# for filename in os.listdir(os.curdir+'/ToAssemble'):
# if 'LKH' in filename:
# if 'FPE_q' in filename:
# os.system('python rnaSPAdes-0.1.1/bin/rnaspades.py -m 26 -k 21,33,55,77 --min-complete-transcript 300 -1 ToAssemble/' + filename + ' -2 ToAssemble/' + filename.replace('FPE','RPE')+' -o ' + filename.split('_FPE')[0] + '/; echo "Finished assembling ' + filename.split('_FPE')[0] + '" | mail -s "Finished Transcriptome Assembly ' + (time.strftime("%d/%m/%y")) + '" ' + mailaddress) > out.txt
def main():
code = {}
qtrim = {}
minlen = {}
for line in open(parameter_file,'r'):
code[line.split('\t')[0]] = line.split('\t')[1].split('\n')[0]
qtrim[line.split('\t')[1]] = line.split('\t')[2].split('\n')[0]
minlen[line.split('\t')[1]] = line.split('\t')[3].split('\n')[0]
rename(code)
QualityTrim(qtrim, minlen)
# rnaSPAdesAssembly()
main()

View File

@ -1,18 +0,0 @@
#!/bin/bash
#
#SBATCH --job-name=Gigi_spades
#SBATCH --output=rnaSPAdes_run.%j.out # Stdout (%j expands to jobId)
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --ntasks-per-node=64
#SBATCH --mem=180G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=xxx@xxx.edu
module purge #Cleans up any loaded modules
module load SPAdes
rnaspades.py -m 500 -t 50 -1 ToAssemble/SRR26595464_FPE_q24_minlen75.fastq.gz -2 ToAssemble/SRR26595464_RPE_q24_minlen75.fastq.gz -o Assembled/SRR26595464
rnaspades.py -m 500 -t 50 -1 ToAssemble/SRR26595465_FPE_q24_minlen75.fastq.gz -2 ToAssemble/SRR26595465_RPE_q24_minlen75.fastq.gz -o Assembled/SRR26595465
rnaspades.py -m 500 -t 50 -1 ToAssemble/SRR26595468_FPE_q24_minlen75.fastq.gz -2 ToAssemble/SRR26595468_RPE_q24_minlen75.fastq.gz -o Assembled/SRR26595468

View File

@ -1,12 +1,11 @@
#Author, date: Xyrus (last modified by him Sept 17 2020), most recently updated by Auden on October 17 2024
#Author, date: Xyrus (last modified by him Sept 17 2020), most recently updated by Auden on July 19 2023
#Motivation: Generate lots of codon usage statistics to aid in identifying useful characteristics for de novo ORF calling
#Intent: Summarize nucleotide composition statistics for a fasta file or folder of fasta files
#Dependencies: Python3, numpy, BioPython
#Inputs: Fasta file or folder of fasta files
#Outputs: A fasta file filtered for properly formatted sequences and several spreadsheets summarizing GC3S, ENc, RSCU, etc.
#Outputs: A fasta file filtered for properly formatted sequences and several spreadsheets summarizing GC, ENc, RSCU, etc.
#Example: python3 CUB.py -i seqs.fasta
#Note: Use "python3 CUB.py -i seqs.fasta --require_start --require_stop" to see more conservative estimate
#Note: in this script we use GC3 and GC3S interchangeably, though the abbreviation GC3S is probably more correct
#Note: Use "python3 CUB.py -i seqs.fasta --require_start --require_stop" when using the on R2G files.
#Dependencies
import os
@ -20,11 +19,11 @@ import argparse
class CalcCUB:
"""
Returns the Effective Number of Codons (ENc) used (observed and expected)
Returns the Effective Number of Codons used (observed and expected)
following the equations originally from Wright 1990.
"""
def expWrightENc(gc3):
# Calculates the expected ENc from a sequence's GC3 (GC3S) under Wright 1990
# Calculates the expected ENc from a sequence's GC3 under Wright 1990
if gc3 > 1:
# If GC3 looks as though it is > 1 (e.g. 100%), converts to a float ≤ 1.
# Calculations expect a value between 0 and 1
@ -33,7 +32,7 @@ class CalcCUB:
return round(exp_enc, 4)
def nullENcGC3():
# Calculates the expected ENc from the null distribution of GC3S
# Calculates the expected ENc from the null distribution of GC3
# values (0, 100% GC)
null = [CalcCUB.expWrightENc(n) for n in np.arange(0,.51,0.01)]
null += null[:-1][::-1]
@ -357,12 +356,6 @@ class GCeval():
return round(GC(''.join([seq[n] for n in
range(2, len(seq)-len(seq[2:]) % 3, 3)])), 4)
def gc3s(cdnTbl):
# This function return the GC content of the third position of a codon excluding Tryp and Met
syn = round(GC(''.join([k[-1]*v[-1] for k, v in cdnTbl.items() if v[0] != 'W' and v[0] != 'M'])), 4)
return syn
def gc3_4F(cdnTbl):
# # This function return the GC content of the third position of four-fold
# # degenerate codons
@ -392,7 +385,7 @@ class SeqInfo(object):
def ENcStats(self):
# Stores the various Effective Number of Codons calculations in the class
self.expENc = CalcCUB.expWrightENc(self.gc3s)
self.expENc = CalcCUB.expWrightENc(self.gc3)
self.obsENc_6F = CalcCUB.calcWrightENc(self.cdnCounts_6F)
self.obsENc_No6F = CalcCUB.calcWrightENc(self.cdnCounts_No6F)
self.SunENc_6F = CalcCUB.SunEq5(self.cdnCounts_6F)
@ -403,7 +396,6 @@ class SeqInfo(object):
for k, v in self.gcFuncs.items():
setattr(self,k,v(self.ntd))
self.gc4F = GCeval.gc3_4F(self.cdnCounts_No6F)
self.gc3s = GCeval.gc3s(self.cdnCounts_No6F)
def RSCUstats(self):
@ -437,23 +429,23 @@ def CalcRefFasta(fasta, gCode):
def WriteWrightOut(seqData, outName, comp):
if comp == False:
with open(outName+'/SpreadSheets/'+outName.split('/')[-1]+'.ENc.Raw.tsv','w+') as w:
w.write('SequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3S\t'
w.write('SequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\t'
'GC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\t'
'ObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
for k, v in seqData.items():
name = [k]
gcs = [str(v.gcOverall),str(v.gc1),str(v.gc2),str(v.gc3),str(v.gc3s),str(v.gc4F)]
gcs = [str(v.gcOverall),str(v.gc1),str(v.gc2),str(v.gc3),str(v.gc4F)]
ENc = [str(v.expENc),str(v.obsENc_6F),str(v.obsENc_No6F),
str(v.SunENc_6F),str(v.SunENc_No6F)]
w.write('\t'.join(name+[str(v.amb_cdn)]+gcs+ENc)+'\n')
else:
with open(outName+'/SpreadSheets/'+outName.split('/')[-1]+'.CompTrans.ENc.Raw.tsv','w+') as w:
w.write('SequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3S\t'
w.write('SequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\t'
'GC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\t'
'ObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
for k, v in seqData.items():
name = [k]
gcs = [str(v.gcOverall),str(v.gc1),str(v.gc2),str(v.gc3),str(v.gc3s),str(v.gc4F)]
gcs = [str(v.gcOverall),str(v.gc1),str(v.gc2),str(v.gc3),str(v.gc4F)]
ENc = [str(v.expENc),str(v.obsENc_6F),str(v.obsENc_No6F),
str(v.SunENc_6F),str(v.SunENc_No6F)]
w.write('\t'.join(name+[str(v.amb_cdn)]+gcs+ENc)+'\n')
@ -481,7 +473,7 @@ def getCompFasta(fasta, gCode, require_start, require_stop):
def WriteNullENcOut(outName):
with open(outName+'/SpreadSheets/' + outName.split('/')[-1] + '.ENc.Null.tsv','w+') as w:
w.write('GC3S\tENc\n')
w.write('GC3\tENc\n')
w.write('\n'.join(CalcCUB.nullENcGC3()))
@ -566,14 +558,14 @@ if __name__ == "__main__":
o.write(folder.split('/')[-1] + '\t' + line)
with open('CUBOutput/SpreadSheets/ENc.Raw.tsv', 'w') as o:
o.write('File\tSequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3S\tGC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\tObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
o.write('File\tSequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\tObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
for folder in folders:
for line in open(folder + '/SpreadSheets/' + folder.split('/')[-1] + '.ENc.Raw.tsv'):
if 'SequenceID' not in line:
o.write(folder.split('/')[-1] + '\t' + line)
with open('CUBOutput/SpreadSheets/CompTrans.ENc.Raw.tsv', 'w') as o:
o.write('File\tSequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3S\tGC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\tObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
o.write('File\tSequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\tObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
for folder in folders:
for line in open(folder + '/SpreadSheets/' + folder.split('/')[-1] + '.CompTrans.ENc.Raw.tsv'):
if 'SequenceID' not in line:

View File

@ -1,13 +1,14 @@
'''
#Author, date: Godwin Ani and Laura Katz, Feb 9th 2023
#Modified: Adri Grow, April 6th 2025 to allow clustering at 100% (1.0) and output renamed file(s) with id clustered appended to file name
#Author, date: Godwin Ani and Laura Katz, 9th- Feb - 2023.
#Dependencies: Python3, CD-Hit
#Intent: For clustering nucleotide or amino acid sequences with the CD-Hit program
#Inputs: A folder of containing AA or DNA fasta files
#Outputs: A folder of clustered files
#Example: python Cluster.py -t dna -id 0.95 -ov 0.67 -i input_folder_dna -o output_folder_dna
#Intent: For clustering nucleotide or amino acid sequences with the CD-Hit program.
#Inputs: A folder of containing Amino acid or DNA fasta files.
#Outputs: A folder of clustered files.
#Example: python Cluster.py --type dna --identity 0.95 --overlap 0.67 --input input_folder_dna --output output_folder_dna
'''
import os
import argparse
from tqdm import tqdm
@ -15,10 +16,8 @@ import subprocess
def input_validation(value, error_message):
try:
integer, fractional = value.split('.')
value = float(value)
if value == 1.0:
return value
integer, fractional = str(value).split('.')
if int(integer) == 0 and len(fractional) == 2:
return value
except ValueError:
@ -29,44 +28,39 @@ def input_validation(value, error_message):
def cluster_sequences(program, identity, overlap, input_folder, output_folder):
for file in tqdm(os.listdir(input_folder)):
if file.endswith('.fasta'):
output_name = f"{os.path.splitext(file)[0]}_{int(float(identity) * 100)}clustered.fasta"
subprocess.run([f'{program}', '-i', f'{input_folder}/{file}', '-o', f'{output_folder}/{output_name}', '-c', f'{identity}', '-d', '0', '-aS', f'{overlap}'])
subprocess.run([f'{program}', '-i', f'{input_folder}/{file}', '-o', f'{output_folder}/{file}', '-c', f'{identity}', '-d', '0', '-aS', f'{overlap}'])
for file in os.listdir(output_folder):
if file.endswith('.clstr'):
base_name = os.path.splitext(file)[0] # removes .clstr
if base_name.endswith('.fasta'):
base_name = base_name[:-6] # removes .fasta from end
new_name = f"{base_name}.txt"
os.rename(f'{output_folder}/{file}', f'{output_folder}/{new_name}')
os.rename(f'{output_folder}/{file}', f'{output_folder}/{file.split("FILE")[0]}Clustered.txt')
def main():
parser = argparse.ArgumentParser(description='Cluster amino acid or nucleotide sequences using CD-HIT.')
parser.add_argument('-t', '--type', choices=['aa', 'dna'], required=True, help='Type of sequences (aa for amino acid, dna for nucleotide)')
parser.add_argument('-id','--identity', type=str, required=True, help='Sequence identity threshold (e.g. 1.0, 0.99, 0.95)')
parser.add_argument('-ov', '--overlap', type=str, required=True, help='Sequence alignment overlap value (e.g. 0.67, 0.75)')
parser.add_argument('-i', '--input_files', type=str, required=True, help='Input folder containing sequences in fasta format')
parser.add_argument('-o', '--output', type=str, required=True, help='Output folder for clustered sequences ending with -id value')
parser = argparse.ArgumentParser(description='Cluster amino acid or DNA sequences using CD-HIT.')
parser.add_argument('--type', choices=['aa', 'dna'], required=True, help='Type of sequences (aa for Amino Acids, dna for DNA)')
parser.add_argument('--identity', type=str, required=True, help='Sequence Identity Threshold (e.g., 0.99, 0.95)')
parser.add_argument('--overlap', type=str, required=True, help='Sequence Alignment Overlap Value (e.g., 0.67, 0.75)')
parser.add_argument('--input', type=str, required=True, help='Input folder containing sequences in fasta format')
parser.add_argument('--output', type=str, required=True, help='Output folder for clustered sequences')
args = parser.parse_args()
if not os.path.isdir(args.input_files):
print(f'Error: Input folder "{args.input_files}" does not exist.')
if not os.path.isdir(args.input):
print(f'Error: Input folder "{args.input}" does not exist.')
exit(1)
if not os.path.isdir(args.output):
os.mkdir(args.output)
if args.type == 'aa':
identity = input_validation(args.identity, 'ERROR! Use format 0.## or 1.0 for amino acid sequence identity threshold.')
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for amino acid sequence alignment overlap value.')
cluster_sequences('cd-hit', identity, overlap, args.input_files, args.output)
identity = input_validation(args.identity, 'ERROR! Use format 0.## for Amino acids sequence identity threshold.')
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for Amino acids sequence alignment overlap value.')
cluster_sequences('cd-hit', identity, overlap, args.input, args.output)
elif args.type == 'dna':
identity = input_validation(args.identity, 'ERROR! Use format 0.## or 1.0 for nucleotide sequence identity threshold.')
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for nucleotide sequence alignment overlap value.')
cluster_sequences('cd-hit-est', identity, overlap, args.input_files, args.output)
identity = input_validation(args.identity, 'ERROR! Use format 0.## for DNA sequence identity threshold.')
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for DNA sequence alignment overlap value.')
cluster_sequences('cd-hit-est', identity, overlap, args.input, args.output)
else:
print('Invalid sequence type. Choose "aa" for amino acids or "dna" for nucleotides.')
print('Invalid sequence type. Choose "aa" for Amino Acids or "dna" for DNA.')
exit(1)
if __name__ == "__main__":

View File

@ -27,7 +27,7 @@ enc_null <- data.frame(read_tsv('ENc.Null.tsv'))
#you need as.numeric to ensure R is reading the variable correctly
gc3_plot <- ggplot(gc3, aes(as.numeric(GC3.Degen), as.numeric(ObsWrightENc_No6Fold)))+
geom_point(size = 0.1)+
geom_line(data = enc_null, aes(GC3S, ENc))+
geom_line(data = enc_null, aes(GC3, ENc))+
theme_classic()+
labs(x = 'GC3 Degen', y = 'ObsWrightENc_No6fold')+
theme(legend.position = 'none')+

View File

@ -1,65 +0,0 @@
#!/bin/bash
## Last updated on Jan 9th 2024 by Auden Cote-L'Heureux
#Intent: Calculate TPM for assembled transcripts
#Dependencies: None
#Inputs: Must be in a folder along with a folder called 'Transcriptomes', containing assembled transcripts as output by rnaSpades (transcripts.fasta),
## and a folder called 'RawReads' containing the fwd and rev reads prior to assembly, with the same file prefixes as the corresponding assembled transcript files
#Outputs: A folder, containing a 'quant' file which has TPM data.
## If running on an HPC, include parameters here! For example, on a Slurm system you might use
#SBATCH --job-name=tpm
#SBATCH --output=Salmon.%j.out # Stdout (%j expands to jobId)
#SBATCH --nodes=1
#SBATCH --ntasks=60
#SBATCH --mem=60G
mkdir Indices
## First, build transcript indices
cd Transcriptomes
IFS='/'
for TRANS in *; do
#read -a trapsplit <<<"$TRANS"
#traf=${trapsplit[1]}
tax=${TRANS:0:10}
./../salmon-1.9.0_linux_x86_64/bin/salmon index -t $TRANS -i ../Indices/$tax
done
## Now calculate TPM
cd Indices
IFS='/'
for TRANS in *; do
read -a trapsplit <<<"$TRANS"
tax=${TRANS:0:10}
fpe='NA'; rpe='NA'; fpesub="FPE"; rpesub="RPE"
for TRIM in ../RawReads/*; do
read -a tripsplit <<<"$TRIM"
trif=${tripsplit[2]}
if [ "${trif:0:10}" == "$tax" ]; then
if [[ "$trif" == *"$fpesub"* ]]; then
fpe=$trif
fi
if [[ "$trif" == *"$rpesub"* ]]; then
rpe=$trif
fi
fi
done
if [ "$rpe" != 'NA' ]; then
./../salmon-1.9.0_linux_x86_64/bin/salmon quant -i $TRANS -l A -1 ../RawReads/$fpe -2 ../RawReads/$rpe --validateMappings -o ../quants/$tax
fi
if [ "$rpe" == 'NA' ]; then
./../salmon-1.9.0_linux_x86_64/bin/salmon quant -i $TRANS -l A -r ../RawReads/$fpe --validateMappings -o ../quants/$tax
fi
done

View File

@ -1,56 +1,51 @@
'''
#Author, date: ?
#Uploaded: updated by Adri Grow, Aug 2025
#Intent: map a group of trimmed reads to a reference
#Dependencies: Python, HISAT2, samtools, (optional: sambamba)
#EDIT LINES: 19 & 36
#Inputs: Folder named 'TrimmedReads' containing the forward and reverse trimmed reads that start with the same unique identifier for each sample/cell
#Outputs: Folders with the names of the unique identifier (e.g. LKHs) containing the bam files
#Usage: python3 ReadMapping.py
#IMPORTANT: Lines 34-42 manipulate the output files in several different ways including converting .sam to .bam, sorting, optional deduplicating, optional quality filtering, and retaining only mapped reads. It is the responsibility of the user to determine exactly which commands are needed for their dataset.
#Uploaded: updated by Adri Grow, 2024 (previous Adri Grow 2023)
#Intent: map a group of trimmed reads to a reference.
#Dependencies: Python3, hisat2, samtools, sambamba
#EDIT LINES: 18 & 32
#Inputs: Folder named 'TrimmedReads' containing all the trimmed reads.
#Outputs: Folders with the names of the LKHs containing the sam/bam files.
#Example: python ReadMapping.py
'''
import os
from Bio import SeqIO
#This first command builds your reference with HISAT
#If you've already done this, DON'T run this command! Instead, comment it out (use a # in front of it)
#It will output several files. Don't worry about them, HISAT will know what to do
os.system("hisat2-build Foram_reference.fasta Foram_Index") #Replace "Foram_reference.fasta" with your reference fasta name, and optionally change "Foram_Index" to your preferred index name
#this first command builds your reference with Hisat.
#If you've already done this, DON'T run this command! Instead, comment it out (use a # in front of it).
#It will output several files. Don't worry about them, Hisat will know what to do.
os.system("hisat2-build Foram_reference.fasta Foram_Index") #change to your reference.fasta and rename the index
folder = os.listdir("TrimmedReads") #Replace "TrimmedReads" with the name of the folder containing your trimmed reads, if different than TrimmedReads
folder.sort() #This sorts the trimmed reads folder so that all the files are passed in order
folder = os.listdir("TrimmedReads") #Insert the name of the folder which has your trimmed reads inside the quotes
folder.sort() #This sorts the folder so that all the LKHs are in order.
for x in folder:
#This is specific for file names starting with 'LKH' unqiue identifiers formatted similar to 'LKH###_FPE.fastq.gz'
if "LKH" in x and "FPE" in x: #Assigning a variable to forward reads. Make sure you have both forward and reverse reads for each cell!
if "LKH" in x and "FPE" in x: #assigning a variable to forward reads. Make sure you have both forward and reverse reads for each cell!
FPE = x
sample_id = FPE.split("_FPE")[0]
if "LKH" in x and "RPE" in x: #Assigning a variable to reverse reads
if "LKH" in x and "RPE" in x: #assigning a variable to reverse reads.
RPE = x
if FPE.split("_FPE")[0] == RPE.split("_RPE")[0]: #Match sample IDs dynamically
#The next few lines are several HISAT commands that will create new files
#If necessary, EDIT the name of the index and the name of the trimmed reads folder in the very next line only
os.system("hisat2 -x Foram_Index -1 TrimmedReads/" +FPE+ " -2 TrimmedReads/" +RPE+ " -S sample.sam") #running HISAT2
os.system("samtools view -bS sample.sam > sample.bam") #converts .sam file to .bam file
os.remove("sample.sam") #remove the .sam file (already converted to .bam, sam files are large and unnecessary to keep)
#os.system("samtools fixmate -O bam sample.bam fixmate_sample.bam") #use this command if you will be using the sambamba markdup command to remove duplicate reads (Katzlab default for transcriptomics and amplicon is to not remove duplicates)
os.system("samtools sort -O bam -o sorted_sample.bam sample.bam") #sorts the .bam file alignments by leftmost coordinates
#os.system("sambamba markdup -r sorted_sample.bam sorted_sample.dedup.bam") #removes duplicate reads - may not be appropriate for your study or protocols, user will need to determine if this is best practice for their study
#os.system("samtools view -h -b -q 40 sorted_sample.dedup.bam > sorted_sample.q40.bam") #only keeps reads with mapping quality ≥ 40, input is the dedup file but can easily be modified to use the sorted .bam file
#os.system("samtools view -h -b -q 20 sorted_sample.dedup.bam > sorted_sample.q20.bam") #only keeps reads with mapping quality ≥ 20, input is the dedup file but can easily be modified to use the sorted .bam file
os.system("samtools view -h -F 4 -b sorted_sample.bam > sorted_mapped_sample.bam") #only keeps mapped reads, using the sorted .bam file as input - this is the Katzlab transcriptomic and amplicon final output that should be used for continued analyses
if(FPE[:7] == RPE[:7]):
#The next few lines are several Hisat commands that will create new files.
#EDIT the name of the index and the name of the trimmed reads folder in the first command below
os.system("hisat2 -x Foram_Index -1 TrimmedReads/" +FPE+ " -2 TrimmedReads/" +RPE+ " -S sample.sam")
os.system("samtools view -bS sample.sam > sample.bam")
os.system("samtools fixmate -O bam sample.bam fixmate_sample.bam")
os.system("samtools sort -O bam -o sorted_sample.bam fixmate_sample.bam")
os.system("sambamba markdup -r sorted_sample.bam sorted_sample.dedup.bam")
os.system("samtools view -h -b -q 40 sorted_sample.dedup.bam > sorted_sample.q40.bam")
os.system("samtools view -h -b -q 20 sorted_sample.dedup.bam > sorted_sample.q20.bam")
os.system("samtools view -h -F 4 -b sorted_sample.dedup.bam > defaultparameters_sample.bam")
if not os.path.isdir(sample_id):
os.mkdir(sample_id) #making folders with the names of the LKHs or unique identifiers
if not os.path.isdir(x[:7]):
os.mkdir(x[0:7]) #making folders with the names of the LKHs
for file in os.listdir('.'): #These lines move the bam files created into the new LKH/unique identifier folders
for file in os.listdir('.'): #These lines move the sam/bam files that Hisat creates into the new LKH folders.
if(file.endswith('.sam') or file.endswith('.bam')):
os.rename(file, f"{sample_id}/{file}")
os.rename(file,x[:7] + '/' + file)
print("~~~~~~~~~~~:>~") #When the snake appears in terminal, the script has finished running for all samples/cells!
print("~~~~~~~~~~~:>~") #When the snake appears, your script has run!

View File

@ -1,10 +1,10 @@
#Author, date: Auden Cote-L'Heureux, last updated Aug 18th 2025 by AKG
#Author, date: Auden Cote-L'Heureux, last updated Apr 1st 2024 by GA
#Motivation: Select robust sequences from trees
#Intent: Select clades of interest from large trees using taxonomic specifications
#Dependencies: Python3, ete3, Biopython
#Inputs: A folder containing: all PTLp2 output trees and all corresponding unaligned .fasta (pre-guidance) files
#Outputs: A folder of grabbed clades and filtered unaligned fasta files
#Example: python3 CladeGrabbing.py --input /Path/To/TreesandPreGuidance --target Sr_rh --min_presence 20
#Example: python CladeGrabbing.py --input /Path/to/trees --target Sr_rh --min_presence 20
#IMPORTANT: key parameters explained in "add_argument" section below
#Dependencies
@ -18,7 +18,7 @@ def get_args():
parser = argparse.ArgumentParser(
prog = 'Clade grabber, Version 2.1',
description = "Updated Aug 1st, 2023 by Auden Cote-L'Heureux, modified by AKG Aug 18th 2025"
description = "Updated Aug 1st, 2023 by Auden Cote-L'Heureux, modified by GA Feb 13th 2024"
)
#add_argument section with parameters explained
parser.add_argument('-i', '--input', type = str, required = True, help = 'Path to a folder containing input trees (which must have the file extension .tre, .tree, .treefile, or .nex)')
@ -28,8 +28,6 @@ def get_args():
parser.add_argument('-nr', '--required_taxa_num', type = int, default = 0, help = 'The number of species belonging to taxa in the --required_taxa list that must be present in the clade. Default is 0.')
parser.add_argument('-o', '--outgroup', type = str, default = '', help = 'A comma-separated list of any number of digits/characters (e.g. Sr_ci_S OR Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes, to describe taxa that will be included as outgroups in the output unaligned fasta files (which will contain only sequences from a single selected clade, and all outgroup sequences in the tree captured by this argument).')
parser.add_argument('-c', '--contaminants', type = float, default = 2, help = 'The number of non-ingroup contaminants allowed in a clade, or if less than 1 the proportion of sequences in a clade that can be non-ingroup (i.e. presumed contaminants). Default is to allow 2 contaminants.')
parser.add_argument('-ft', '--first_target', type=str, default='', help='[Optional] A comma-separated list or .txt file of complete/partial taxon codes for an initial, broad clade search. If provided, the script will first find clades with these taxa before applying the main --target filter.')
parser.add_argument('-fm', '--first_min_presence', type=int, default=0, help='[Optional] Minimum number of sequences from --first_target required in a clade for it to be used in the second-stage search. Ignored if --first_target is not provided.')
return parser.parse_args()
@ -87,155 +85,86 @@ def reroot(tree):
def get_subtrees(args, file):
newick = get_newick(args.input + '/' + file)
tree = ete3.Tree(newick)
newick = get_newick(args.input + '/' + file)
majs = list(dict.fromkeys([leaf.name[:2] for leaf in tree]))
tree = ete3.Tree(newick)
# Only try to reroot trees with more than 2 major clades (original behavior)
if len(majs) > 2:
tree = reroot(tree)
majs = list(dict.fromkeys([leaf.name[:2] for leaf in tree]))
# -------------------------------
# FIRST-STAGE (optional) FILTER
# -------------------------------
def get_outer_leafsets():
"""
Return a list of sets, each set = leaf names of an outer clade
that passes --first_target, --first_min_presence, children_keep,
and contaminants logic (using args.contaminants).
If --first_target is not used, return one set containing ALL leaves.
"""
if not args.first_target or args.first_min_presence == 0:
return [set(leaf.name for leaf in tree)] # no outer filter → whole tree
#Only try to reroot trees with more than 2 major clades. This was added to fix the ETE3 "Cannot set myself as outgroup" error
if len(majs) > 2:
tree = reroot(tree)
# Parse first_target codes
if '.' in args.first_target:
first_target_codes = [l.strip() for l in open(args.first_target, 'r').readlines() if l.strip() != '']
else:
first_target_codes = [code.strip() for code in args.first_target.split(',') if code.strip() != '']
#Getting a clean list of all target taxa
if '.' in args.target:
try:
target_codes = [l.strip() for l in open(args.target, 'r').readlines() if l.strip() != '']
except AttributeError:
print('\n\nError: invalid "target" argument. This must be a comma-separated list of any number of digits/characters to describe focal taxa (e.g. Sr_ci_S OR Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes. All sequences containing the complete/partial code will be identified as belonging to target taxa.\n\n')
else:
target_codes = [code.strip() for code in args.target.split(',') if code.strip() != '']
outer_sets = []
seen_leaves = []
#Getting a clean list of all "at least" taxa
if '.' in args.required_taxa:
try:
required_taxa_codes = [l.strip() for l in open(args.required_taxa, 'r').readlines() if l.strip() != '']
except AttributeError:
print('\n\nError: invalid "required_taxa" argument. This must be a comma-separated list of any number of digits/characters (e.g. Sr_ci_S OR Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes, to describe taxa that MUST be present in a clade for it to be selected (e.g. you may want at least one whole genome).\n\n')
else:
required_taxa_codes = [code.strip() for code in args.required_taxa.split(',') if code.strip() != '']
for node in tree.traverse('levelorder'):
# large enough and not subsumed by already accepted outer node
if len(node) >= args.first_min_presence and len(set(seen_leaves) & set([leaf.name for leaf in node])) == 0:
leaves = [leaf.name for leaf in node]
target_codes = list(dict.fromkeys(target_codes + required_taxa_codes))
# children_keep logic but for first_target
children_keep = 0
for child in node.children:
taken = False
for code in first_target_codes:
for leaf in child:
if leaf.name.startswith(code):
children_keep += 1
taken = True
break
if taken:
break
if children_keep != len(node.children):
continue
#Creating a record of selected subtrees, and all of the leaves in those subtrees
selected_nodes = []; seen_leaves = []
# count first-target hits (use [:10] uniqueness like original)
first_hits = set()
for code in first_target_codes:
for leaf in leaves[::-1]:
if leaf.startswith(code):
first_hits.add(leaf[:10])
leaves.remove(leaf)
#Iterating through all nodes in tree, starting at "root" then working towards leaves
for node in tree.traverse('levelorder'):
#If a node is large enough and is not contained in an already selected clade
# contaminants logic applied to FIRST-STAGE (reuse args.contaminants)
passes_contam = ((args.contaminants < 1 and len(leaves) <= args.contaminants * len(first_hits)) or
(args.contaminants >= 1 and len(leaves) <= args.contaminants))
if len(node) >= args.min_presence and len(list(set(seen_leaves) & set([leaf.name for leaf in node]))) == 0:
leaves = [leaf.name for leaf in node]
if len(first_hits) >= args.first_min_presence and passes_contam:
outer_sets.append(set(leaf.name for leaf in node))
seen_leaves.extend([leaf.name for leaf in node])
#Accounting for cases where e.g. one child is a contaminant, and the other child is a good clade with 1 fewer than the max number of contaminants
children_keep = 0
for child in node.children:
for code in target_codes:
taken = False
for leaf in child:
if leaf.name.startswith(code):
children_keep += 1
taken = True
break
if taken:
break
return outer_sets
if children_keep == len(node.children):
target_leaves = set(); required_taxa_leaves = set()
for code in target_codes:
for leaf in leaves[::-1]:
#print(leaf)
if leaf.startswith(code):
target_leaves.add(leaf[:10])
# Build outer sets; if user supplied first-stage args, we'll restrict inner search to these
using_first = bool(args.first_target) and args.first_min_presence > 0
outer_leafsets = get_outer_leafsets()
for req in required_taxa_codes:
if leaf.startswith(req):
required_taxa_leaves.add(leaf[:10])
break
leaves.remove(leaf)
# --------------------------------
# ORIGINAL INNER FILTER (unchanged)
# --------------------------------
# Getting a clean list of all target taxa
if '.' in args.target:
try:
target_codes = [l.strip() for l in open(args.target, 'r').readlines() if l.strip() != '']
except AttributeError:
print('\n\nError: invalid "target" argument. This must be a comma-separated list of any number of digits/characters to describe focal taxa (e.g. Sr_ci_S OR Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes. All sequences containing the complete/partial code will be identified as belonging to target taxa.\n\n')
else:
target_codes = [code.strip() for code in args.target.split(',') if code.strip() != '']
# Getting a clean list of all "at least" taxa
if '.' in args.required_taxa:
try:
required_taxa_codes = [l.strip() for l in open(args.required_taxa, 'r').readlines() if l.strip() != '']
except AttributeError:
print('\n\nError: invalid "required_taxa" argument. This must be a comma-separated list of any number of digits/characters (e.g. Sr_ci_S OR Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes, to describe taxa that MUST be present in a clade for it to be selected (e.g. you may want at least one whole genome).\n\n')
else:
required_taxa_codes = [code.strip() for code in args.required_taxa.split(',') if code.strip() != '']
target_codes = list(dict.fromkeys(target_codes + required_taxa_codes))
# Creating a record of selected subtrees, and all of the leaves in those subtrees
selected_nodes = []; seen_leaves = []
# Iterating through all nodes in tree, starting at "root" then working towards leaves
for node in tree.traverse('levelorder'):
# If using first-stage filter, only consider nodes fully inside some outer clade
if using_first:
node_leafs = set(leaf.name for leaf in node)
# require subset (node fully contained in an accepted outer clade)
if not any(node_leafs.issubset(S) for S in outer_leafsets):
continue
# If a node is large enough and is not contained in an already selected clade
if len(node) >= args.min_presence and len(list(set(seen_leaves) & set([leaf.name for leaf in node]))) == 0:
leaves = [leaf.name for leaf in node]
# Accounting for cases where e.g. one child is a contaminant, and the other child is a good clade
children_keep = 0
for child in node.children:
for code in target_codes:
taken = False
for leaf in child:
if leaf.name.startswith(code):
children_keep += 1
taken = True
break
if taken:
break
if children_keep == len(node.children):
target_leaves = set(); required_taxa_leaves = set()
for code in target_codes:
for leaf in leaves[::-1]:
if leaf.startswith(code):
target_leaves.add(leaf[:10])
for req in required_taxa_codes:
if leaf.startswith(req):
required_taxa_leaves.add(leaf[:10])
break
leaves.remove(leaf)
# Grab a clade as a subtree if it passes all filters
if len(target_leaves) >= args.min_presence and len(required_taxa_leaves) >= args.required_taxa_num and ((args.contaminants < 1 and len(leaves) <= args.contaminants * len(target_leaves)) or len(leaves) <= args.contaminants):
selected_nodes.append(node)
seen_leaves.extend([leaf.name for leaf in node])
# Write the subtrees to output .tre files
for i, node in enumerate(selected_nodes[::-1]):
with open('Subtrees/' + '.'.join(file.split('.')[:-1]) + '_' + str(i) + '.tre', 'w') as o:
o.write(node.write())
#Grab a clade as a subtree if 1) it has enough target taxa; 2) it has enough "at least" taxa; 3) it does not have too many contaminants
if len(target_leaves) >= args.min_presence and len(required_taxa_leaves) >= args.required_taxa_num and ((args.contaminants < 1 and len(leaves) < args.contaminants * len(target_leaves)) or len(leaves) < args.contaminants):
selected_nodes.append(node)
seen_leaves.extend([leaf.name for leaf in node])
#Write the subtrees to output .tre files
for i, node in enumerate(selected_nodes[::-1]):
with open('Subtrees/' + '.'.join(file.split('.')[:-1]) + '_' + str(i) + '.tre', 'w') as o:
o.write(node.write())
def make_new_unaligned(args):

View File

@ -1,4 +1,4 @@
#Author, date: Auden Cote-L'Heureux, last updated Nov 11th 2024 by Adri Grow
#Author, date: Auden Cote-L'Heureux, last updated Dec 18th 2023
#Motivation: Understand the topology of trees
#Intent: Describe clade sizes for different taxonomic groups
#Dependencies: Python3, ete3
@ -236,10 +236,9 @@ if __name__ == '__main__':
for tree_file in tqdm(os.listdir(args.input)):
if tree_file.split('.')[-1] in ('tre', 'tree', 'treefile', 'nex'):
clades_per_tax, majs_per_clade, mins_per_clade = get_clades(args.input + '/' + tree_file, args)
base_filename = os.path.splitext(tree_file)[0]
clades_per_tax_per_file.update({ base_filename : clades_per_tax })
majs_per_clade_per_file.update({ base_filename : majs_per_clade })
mins_per_clade_per_file.update({ base_filename : mins_per_clade })
clades_per_tax_per_file.update({ tree_file.split('.')[0] : clades_per_tax })
majs_per_clade_per_file.update({ tree_file.split('.')[0] : majs_per_clade })
mins_per_clade_per_file.update({ tree_file.split('.')[0] : mins_per_clade })
write_output(clades_per_tax_per_file, args, majs_per_clade = majs_per_clade_per_file, mins_per_clade = mins_per_clade_per_file)

View File

@ -151,7 +151,7 @@ def reroot(tree):
return best_clade
#Get the biggest clade for each taxonomic group (stops once it finds one)
for taxon in [('Ba'), ('Za'), ('Op'), ('Pl'), ('Am'), ('Ex'), ('Sr')]:
for taxon in [('Ba', 'Za'), ('Op'), ('Pl'), ('Am'), ('Ex'), ('Sr')]:
clade = get_best_clade(taxon)
if len([leaf for leaf in clade if leaf.name[:2] in taxon]) > 3:

View File

@ -1,50 +0,0 @@
''' Author, Date : Godwin Ani, 10 - July - 2024.
Motivation : To make phylogenetic trees more presentable.
Intent : Shorten the tip labels of phylogenetic trees.
Dependencies : Python3, ete3
Inputs : A folder containing trees
Outputs : A folder of trees with shortened tips.
python3 RenameTips_v1.0.py -i input to_folder_of_trees
'''
import os, re, sys, argparse, string
import ete3
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input')
args = parser.parse_args()
os.makedirs(args.input + '/renamed', exist_ok = True)
def get_newick(fname):
newick = ''
for line in open(fname):
line = line.split(' ')[-1]
if(line.startswith('(') or line.startswith('tree1=')):
newick = line.split('tree1=')[-1].replace("'", '').replace('\\', '')
return newick
def tree_formatting_wrapper(file):
newick = get_newick(file)
tree = ete3.Tree(newick)
any_letter = tuple(string.ascii_letters)
for leaf in tree:
if leaf.name.startswith(any_letter):
leaf.name = str(leaf.name).split('_Len')[0]
leaf.name = str(leaf.name).replace('Contig_', 'Ct')
leaf.name = str(leaf.name).replace('_XX_0', '')
tree.write(format=1, outfile=args.input + '/renamed/' +file.split('/')[-1] + '.tree')
for tree in os.listdir(args.input):
if tree.split('.')[-1] in ('tree', 'tre', 'treefile', 'nex'):
tree_formatting_wrapper(args.input + '/' + tree)