mirror of
http://43.156.76.180:8026/YuuMJ/EukPhylo.git
synced 2025-12-27 04:50:26 +08:00
Compare commits
166 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1814246c04 | ||
|
|
591e32d5e5 | ||
|
|
fbf51f815a | ||
|
|
ac84339565 | ||
|
|
bdec8612ee | ||
|
|
db49d4965e | ||
|
|
f2d8fd9e6c | ||
|
|
fbaa61f23e | ||
|
|
8222b6404e | ||
|
|
f5ca94b51c | ||
|
|
033ed1237e | ||
|
|
4f400d61c8 | ||
|
|
692eabc6ad | ||
|
|
f17b43ffc9 | ||
|
|
12870b2007 | ||
|
|
10c4dda6b7 | ||
|
|
5bee8e55d2 | ||
|
|
a00e51523f | ||
|
|
cb71db1f72 | ||
|
|
e785532921 | ||
|
|
c042f62249 | ||
|
|
efebf01057 | ||
|
|
0fc18547d6 | ||
|
|
fee7125729 | ||
|
|
c92cfb1b19 | ||
|
|
5fcd3b937e | ||
|
|
20d3926f10 | ||
|
|
873862adbc | ||
|
|
333a7bc063 | ||
|
|
b582321c3a | ||
|
|
7a2320284a | ||
|
|
f53b6926c5 | ||
|
|
399811662c | ||
|
|
a05f94c9fa | ||
|
|
343496b598 | ||
|
|
3f9ef410a7 | ||
|
|
fb5353fe96 | ||
|
|
4e63a165b7 | ||
|
|
99647c87bc | ||
|
|
586a9e0c63 | ||
|
|
aec7f6caf9 | ||
|
|
5486c4289b | ||
|
|
c079b58d87 | ||
|
|
d16560f747 | ||
|
|
0b1feed651 | ||
|
|
d33ac6c2c3 | ||
|
|
ef6b633de7 | ||
|
|
c891188eec | ||
|
|
c767bd9a0a | ||
|
|
c84f3387ff | ||
|
|
3e4f4987a2 | ||
|
|
e7facbf6e7 | ||
|
|
3fe4ee3ae5 | ||
|
|
d81926f179 | ||
|
|
6cb8a7b98a | ||
|
|
fdf525d433 | ||
|
|
8ccde9800a | ||
|
|
d32fbcc4e8 | ||
|
|
9e4d605bff | ||
|
|
d01086e017 | ||
|
|
e0930c5748 | ||
|
|
cc7a66b179 | ||
|
|
16a96f137e | ||
|
|
fa1f50e6f0 | ||
|
|
9239110401 | ||
|
|
dc813c351a | ||
|
|
e452ae0117 | ||
|
|
2651fe7b89 | ||
|
|
c92f490837 | ||
|
|
7074a1e71a | ||
|
|
95b26b1880 | ||
|
|
fba84adb17 | ||
|
|
8c7a1a2eed | ||
|
|
e8aa277e44 | ||
|
|
998ce5bd9c | ||
|
|
4da74765ee | ||
|
|
6a9ae23e8c | ||
|
|
0cfea8840c | ||
|
|
d161957d86 | ||
|
|
4e52430af7 | ||
|
|
fdea233780 | ||
|
|
d7577022c4 | ||
|
|
cf0948023d | ||
|
|
04c41a6f44 | ||
|
|
c39df0e39d | ||
|
|
9b6b28ac03 | ||
|
|
31cb84fa28 | ||
|
|
a2ec0edb37 | ||
|
|
3d8cd2e221 | ||
|
|
e18b3dfe0c | ||
|
|
8487f1d836 | ||
|
|
d1b9a64e60 | ||
|
|
6ff435291c | ||
|
|
030401e9b0 | ||
|
|
e3d215c7e9 | ||
|
|
feab503fb6 | ||
|
|
d4f27a3032 | ||
|
|
1aebef4725 | ||
|
|
dfd826ee94 | ||
|
|
45de3036c9 | ||
|
|
7e9b90c79b | ||
|
|
5284a71ce8 | ||
|
|
4f033e8ab2 | ||
|
|
a559f61567 | ||
|
|
fc190415ee | ||
|
|
743d650b74 | ||
|
|
676a11a287 | ||
|
|
0df7470b92 | ||
|
|
3dc60dcd2e | ||
|
|
8d48e65b7f | ||
|
|
0ac714a63b | ||
|
|
7348c7445f | ||
|
|
3ac37547b7 | ||
|
|
de566bf546 | ||
|
|
db22976d3c | ||
|
|
a9a151eeb3 | ||
|
|
41223ec75c | ||
|
|
97da58aaf0 | ||
|
|
ee7dce1af7 | ||
|
|
b61eb8be1b | ||
|
|
ea96ad0fe4 | ||
|
|
1d7749b3a6 | ||
|
|
375bfef45e | ||
|
|
f5bb221378 | ||
|
|
31d0a8ddb2 | ||
|
|
3491166695 | ||
|
|
cd226beb9c | ||
|
|
10a8c2f78a | ||
|
|
2173eda5a0 | ||
|
|
60b0f9a9b4 | ||
|
|
bd72ed3a43 | ||
|
|
0fbd2ae863 | ||
|
|
19943a13fd | ||
|
|
04fe7029d2 | ||
|
|
fe7ef7b71a | ||
|
|
2ad02897c8 | ||
|
|
eb9e81f4c0 | ||
|
|
4f5bce5963 | ||
|
|
d595a51b4f | ||
|
|
31a9295485 | ||
|
|
733851b957 | ||
|
|
c0dfdb6248 | ||
|
|
baf5fd037d | ||
|
|
1e543b3989 | ||
|
|
a8a2a5ff2d | ||
|
|
1c309f8e10 | ||
|
|
61f808343f | ||
|
|
22eb559650 | ||
|
|
dcf4079b86 | ||
|
|
f2701e7f65 | ||
|
|
a0b45ed2c8 | ||
|
|
fb6e7aa6e7 | ||
|
|
d249add545 | ||
|
|
d57fdad826 | ||
|
|
f18717ac6b | ||
|
|
da6ac878d3 | ||
|
|
eb81b5ebfa | ||
|
|
679f1a10c8 | ||
|
|
85be0f31ca | ||
|
|
4472f44e36 | ||
|
|
e862010cfc | ||
|
|
d7f02a243c | ||
|
|
4b8a3fbe64 | ||
|
|
c4aab9eaef | ||
|
|
7904ec8b35 | ||
|
|
41cc1354b3 |
@ -1,5 +1,5 @@
|
||||
# Last updated Sept 19th 2023
|
||||
# Author: Xyrus Maurer-Alcala
|
||||
# Author: Xyrus Maurer-Alcala and Auden Cote-L'Heureux
|
||||
|
||||
# This script classifies translated CDS into gene families by
|
||||
# similarity-searching using Diamond against a reference database of
|
||||
|
||||
@ -2,12 +2,12 @@
|
||||
# Author: Auden Cote-L'Heureux
|
||||
|
||||
# This script produces both taxon- and sequence-level statistics to describe the ReadyToGo files
|
||||
# output by PhyloToL Part 1, as well as some OG-level information from the Hook (OG reference)
|
||||
# output by EukPhylo Part 1, as well as some OG-level information from the Hook (OG reference)
|
||||
# database. It relies on the utility script CUB.py to calculate composition statistics (GC content,
|
||||
# Effective Number of Codons, etc.). Both sequence level and taxon-level stats are summarized in tab-separated
|
||||
# outputs written to the Output folder. This script requires that the OG reference database is available as an
|
||||
# amino acid fasta file in the Databases/db_OG folder with the same file name as the .dmnd file used in script 4.
|
||||
# This script is intended to be run as part of the PhyloToL 6 Part 1 pipeline using the script wrapper.py.
|
||||
# This script is intended to be run as part of the EukPhylo Part 1 pipeline using the script wrapper.py.
|
||||
|
||||
import os, sys
|
||||
import argparse
|
||||
@ -30,7 +30,7 @@ def get_args():
|
||||
description = "Updated March 31th, 2023 by Auden Cote-L'Heureux"
|
||||
)
|
||||
|
||||
parser.add_argument('-i', '--input', type = str, required = True, help = 'Input path to the "Output" folder produced by PhyloToL Part 1. This folder should contain both the "ReadyToGO" and "Intermediate" folders.')
|
||||
parser.add_argument('-i', '--input', type = str, required = True, help = 'Input path to the "Output" folder produced by EukPhylo Part 1. This folder should contain both the "ReadyToGO" and "Intermediate" folders.')
|
||||
parser.add_argument('-d', '--databases', type = str, default = '../Databases', help = 'Path to databases folder')
|
||||
parser.add_argument('-r', '--r2g_jf', action = 'store_true', help = 'Create ReadyToGo files filtered to only include sequences between the 25th and 75th percentile of silent-site GC content. Please be aware that these are not necessarily the correct or non-contaminant sequences; examine the GC3xENc plots carefully before using these data.')
|
||||
|
||||
|
||||
@ -1,12 +1,15 @@
|
||||
# Last updated Sept 2023
|
||||
# Author: Xyrus Maurer-Alcalá
|
||||
# Author: Xyrus Maurer-Alcalá and Auden Cote-L'Heureux
|
||||
|
||||
# The aim of this script is to generate lots of codon usage statistics to aid in
|
||||
# identifying useful characteristics for de novo ORF calling. It is intended to be
|
||||
# stored in the 'Scripts' folder for the PhyloToL Part 1 pipeline scripts, and is
|
||||
# stored in the 'Scripts' folder for the EukPhylO Part 1 pipeline scripts, and is
|
||||
# called by Script 5b to calculate composition statistics for Part 1 output files.
|
||||
# It should not be run separately.
|
||||
|
||||
# Users should think about including start/stop constraint as default includes all
|
||||
# sequences, which can capture pseudogenes
|
||||
|
||||
# Dependencies:
|
||||
# Python3, numpy, BioPython
|
||||
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
# Last updated Nov 2023
|
||||
# Author: Auden Cote-L'Heureux
|
||||
|
||||
# This script is run as the first step of the PhyloToL 6 Part 1 GENOMES pipeline,
|
||||
# This script is run as the first step of the EukPhylo Part 1 GENOMES pipeline,
|
||||
# before any sequence data are actually processed. It checks to ensure that the input
|
||||
# CDS files and databases are properly located and formatted.
|
||||
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
# Last updated Sept 2023
|
||||
# Author: Auden Cote-L'Heureux
|
||||
|
||||
# This script is a WRAPPER for the PhyloToL Part 1 GENOMES pipeline. Users should
|
||||
# This script is a WRAPPER for the EukPhylo Part 1 GENOMES pipeline. Users should
|
||||
# use this script to run the pipeline, rather than running any of the sub-scripts (number 1a through 5b)
|
||||
# independently. To run an individual step in the pipeline, use --script X where X is the number (1 through 5).
|
||||
# To run multiple sets (usually all of them), use --first script 1 --last_script 5, or whichever first
|
||||
@ -19,8 +19,8 @@ import CheckSetup
|
||||
def get_args():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog = 'PhyloToL v6.0 Part 1 for GenBank Genomes',
|
||||
description = "Updated January 19th, 2023 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/AudenCote/PhyloToL_v6.0"
|
||||
prog = 'EukPhylo Part 1 for GenBank Genomes',
|
||||
description = "Updated January 19th, 2023 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/Katzlab/EukPhylo"
|
||||
)
|
||||
|
||||
parser.add_argument('-s', '--script', default = -1, type = int, choices = { 1, 2, 3, 4, 5 }, help = 'Script to run if you are only running one script')
|
||||
|
||||
@ -1,24 +1,45 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
#SBATCH --job-name=PTL1_genome
|
||||
#SBATCH --output=PTL1.%j.out # Stdout (%j expands to jobId)
|
||||
## Last updated Jan 2025 by Auden Cote-L'Heureux
|
||||
|
||||
## This script is intended to be used to process genomic CDS with EukPhylo part 1 on an HPC that uses the Slurm workload manager.
|
||||
## The first part of the script are Slurm-specific parameters that should be adjusted by users to fit their resource allocation
|
||||
## needs and restrictions, followed by some example commands taken from the GitHub Wiki, more detail for which can be found
|
||||
## here: https://github.com/Katzlab/EukPhylo/wiki/EukPhylo-Part-1:-GF-assignment
|
||||
|
||||
## Slurm specific code
|
||||
|
||||
#SBATCH --job-name=EukPhylo
|
||||
#SBATCH --output=EukPhylo.%j.out # Stdout (%j expands to jobId)
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=1
|
||||
#SBATCH --ntasks-per-node=64 ##change to number of srun when running multiple instances
|
||||
#SBATCH --ntasks-per-node=64 # #change to double number of srun when running multiple instances
|
||||
#SBATCH --mem=160G
|
||||
#SBATCH --mail-type=ALL
|
||||
#SBATCH --mail-user=YOUREMAIL@smith.edu
|
||||
#SBATCH --mail-user=email@email.edu
|
||||
|
||||
module purge #Cleans up any loaded modules
|
||||
module use /gridapps/modules/all #make sure module locations is loaded
|
||||
|
||||
#Unity server
|
||||
module use /gridapps/modules/all
|
||||
module load conda/latest
|
||||
module load uri/main
|
||||
module load diamond/2.1.7
|
||||
module load VSEARCH/2.22.1-GCC-11.3.0
|
||||
conda activate /work/pi_lkatz_smith_edu/Conda_PTL6p1
|
||||
|
||||
#Grid server
|
||||
module use /gridapps/modules/all
|
||||
module load slurm
|
||||
module load tqdm
|
||||
module load Biopython/1.75-foss-2019b-Python-3.7.4
|
||||
module load BLAST+/2.9.0-gompi-2019b
|
||||
module load DIAMOND/0.9.30-GCC-8.3.0
|
||||
module load tqdm/4.66.1-GCCcore-12.3.0
|
||||
module load Biopython/1.79-gfbf-2023a
|
||||
module load BLAST+/2.14.1-gompi-2023a
|
||||
module load DIAMOND/2.1.8-GCC-12.3.0
|
||||
module load VSEARCH/2.25.0-GCC-12.3.0
|
||||
|
||||
path='/beegfs/fast/katzlab/PTL1/Genomes/'
|
||||
parent='/Your/Home/Folder/'
|
||||
|
||||
## Example run command
|
||||
|
||||
# Start at script 1 and go through script 5 (the final script) using the Universal genetic code
|
||||
srun -D ${parent}Scripts python3 ${parent}Scripts/wrapper.py -1 1 -2 5 --cds ${parent}Input -o ${parent}Output --genetic_code Universal --databases ${parent}Databases > log.out
|
||||
|
||||
srun -D ${path}Scripts python3 ${path}Scripts/wrapper.py -1 1 -2 5 --cds ${path}PTL1GenomesBatches/PTL1GenomesBatch2 -o ${path}Output/PTL1Genomes_OutputBatch2 --genetic_code Universal --databases ${path}Databases &
|
||||
wait
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
|
||||
# This script is intended to remove transcripts below or above a given
|
||||
# size range from a transcriptome assembly. It should be run as part
|
||||
# of Part 1 of the PhyloToL version 6 pipeline, using the script wrapper.py.
|
||||
# of Part 1 of the EukPhylo pipeline, using the script wrapper.py.
|
||||
|
||||
# Prior to running this script, ensure that you have assembled your
|
||||
# transcriptome and renamed the assembled transcripts in the format of
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
# by removing sequences with low coverage relative to other
|
||||
# very similar sequences from samples sequenced on the same
|
||||
# plate. This script is optional, but to be run as part of the
|
||||
# PhyloToL version 6 Part 1 pipeline using the script wrapper.py
|
||||
# EukPhylo Part 1 pipeline using the script wrapper.py
|
||||
|
||||
# The specifics of parameters are described below and include removing seqs 1/10
|
||||
# the coverage of the most highly expressed, and keeping all seqs with coverage >50.
|
||||
|
||||
@ -5,12 +5,12 @@
|
||||
# all length-filtered assembled transcripts against a reference database. It then
|
||||
# writes these sequences into a separate file, removing them from the remainder
|
||||
# of the sequences that will go forwards for gene family assignment. This script
|
||||
# should be in Part 1 of the PhyloToL version 6 pipeline using the script wrapper.py.
|
||||
# should be in Part 1 of the EukPhylo pipeline using the script wrapper.py.
|
||||
|
||||
# You must run Script 1a before this step. Optionally, you may also have run Script 1b.
|
||||
# Before running this script, ensure that you have a properly formatted rRNA reference
|
||||
# BLAST database in the Databases/db_BvsE/SSULSUdb folder; it is relatively narrow in scope
|
||||
# and could be easily replaced
|
||||
# and could be easily replaced or updated to better capture a user's target taxa
|
||||
|
||||
#Dependencies
|
||||
import argparse, os, sys
|
||||
|
||||
@ -9,7 +9,7 @@
|
||||
# to a prokaryotic sequence, it is labeled with an "E"; if it's best hit to a prokaryotic
|
||||
# sequence has an e-value >1000 times that of its best hit to a eukaryotic sequence, it is
|
||||
# labeled with a "P". Anything else gets a "U". This script should be run as part of the
|
||||
# PhyloToL version 6 Part 1 pipeline using the script wrapper.py.
|
||||
# EukPhylo Part 1 pipeline using the script wrapper.py.
|
||||
|
||||
# Prior to running this script, ensure that you have run scripts 1a (and optionally
|
||||
# script 1b) and 2a, and that your prokaryote and reference databases (or the default
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
# gene families. We provide the Hook database on the GitHub, but this
|
||||
# may be replaced with a custom reference database by REPLACING the
|
||||
# .dmnd and .fasta files in the Databases/db_OG folder. This script
|
||||
# is intended to be run as part of the PhyloToL 6 Part 1 pipeline using
|
||||
# is intended to be run as part of the EukPhylo Part 1 pipeline using
|
||||
# the script wrapper.py.
|
||||
|
||||
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
# frequencies in all reading frames; it then reports these frequencies in a spreadsheet
|
||||
# (gcodes_output.tsv) for the user to inspect in deciding which genetic codes to use,
|
||||
# if unsure. This step can be skipped if genetic codes were input from the beginning. This
|
||||
# script should be run through the PhyloToL 6 Part 1 pipeline using the script wrapper.py.
|
||||
# script should be run through the EukPhylo Part 1 pipeline using the script wrapper.py.
|
||||
|
||||
#----------------------------------------- NOTES -----------------------------------------#
|
||||
#
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
# This script is intended to translate nucleotide sequences. It does this using
|
||||
# the gcode_output.tsv file output by script 4 and containing in-frame stop codon
|
||||
# frequencies. The user can use this stop codon information to fill in the last
|
||||
# column in this file with the genetic code for each taxon. If the user input a
|
||||
# column in this file with the genetic code for each taxon, as outlined in the Wiki on Github. If the user input a
|
||||
# genetic code or list of genetic codes to script 1, then the gcode_output.tsv will
|
||||
# be filled automatically. sequences are translated using the Diamond BLASTp results
|
||||
# from OG assignment as a starting point for determining coding sequence boundaries.
|
||||
@ -14,7 +14,7 @@
|
||||
# of transcriptomic data, poor genetic code assignment or low-quality/partial data can
|
||||
# interfere with this process).
|
||||
|
||||
# This script is intended to be run using the wrapper.py as part of the PhyloToL 6 Part 1
|
||||
# This script is intended to be run using the wrapper.py as part of the EukPhylo Part 1
|
||||
# pipeline. It requires that the setup of the 'Output' folder be that as output by script 4
|
||||
# of this pipeline.
|
||||
|
||||
|
||||
@ -5,8 +5,9 @@
|
||||
# First, all sequences shorter than 33% or longer than 150% the average length of sequences
|
||||
# from the same OG in the Hook database are removed. Then, for each transcriptomic sample,
|
||||
# all sequences within an OG are compared at the nucleotide level to the sequence with the
|
||||
# highest “score” (defined as k-mer coverage multiplied by length). The script should be run
|
||||
# as part of the PhyloToL 6 Part 1 pipeline using the script wrapper.py. It requires that the
|
||||
# highest “score” (defined as k-mer coverage multiplied by length) using BLAST, and sequences that
|
||||
# are 98% identical to the master sequence are removed. The script should be run
|
||||
# as part of the EukPhylo Part 1 pipeline using the script wrapper.py. It requires that the
|
||||
# structure of the 'Output' folder be as output by script 5, and that the Databases/db_OG folder
|
||||
# contains a .fasta file containing all amino acid sequences in the OG reference database (Hook)
|
||||
# with the same file name (until the extension) as the .dmnd file for the reference database used
|
||||
|
||||
@ -3,8 +3,8 @@
|
||||
|
||||
# This script does not process sequence data in any way. It only renames the outputs of
|
||||
# script 6 to the 10-digit taxon code which prefixes the file names, and then moves output
|
||||
# 'ReadyToGo' files into a separate folder. It is intended to be run as part of the PhyloToL
|
||||
# 6 Part 1 pipeline using the script wrapper.py.
|
||||
# 'ReadyToGo' files into a separate folder. It is intended to be run as part of the EukPhylo
|
||||
# Part 1 pipeline using the script wrapper.py.
|
||||
|
||||
import argparse, os, sys
|
||||
from argparse import RawTextHelpFormatter,SUPPRESS
|
||||
|
||||
@ -2,13 +2,13 @@
|
||||
# Author: Auden Cote-L'Heureux
|
||||
|
||||
# This script produces both taxon- and sequence-level statistics to describe the ReadyToGo files
|
||||
# output by PhyloToL Part 1, as well as some OG-level information from the Hook (OG reference)
|
||||
# output by EukPhylo Part 1, as well as some OG-level information from the Hook (OG reference)
|
||||
# database and the original input assembled transcripts. It relies on the utility script CUB.py
|
||||
# to calculate composition statistics (GC content, Effective Number of Codons, etc.). Both sequence
|
||||
# level and taxon-level stats are summarized in tab-separated outputs written to the Output folder.
|
||||
# This script requires that the OG reference database is available as an amino acid fasta file
|
||||
# in the Databases/db_OG folder with the same file name as the .dmnd file used in script 3. This script
|
||||
# is intended to be run as part of the PhyloToL 6 Part 1 pipeline using the script wrapper.py.
|
||||
# is intended to be run as part of the EukPhylo Part 1 pipeline using the script wrapper.py.
|
||||
|
||||
import os, sys
|
||||
import argparse
|
||||
@ -31,7 +31,7 @@ def get_args():
|
||||
description = "Updated March 31th, 2023 by Auden Cote-L'Heureux"
|
||||
)
|
||||
|
||||
parser.add_argument('-i', '--input', type = str, required = True, help = 'Input path to the "Output" folder produced by PhyloToL Part 1. This folder should contain both the "ReadyToGO" and "Intermediate" folders.')
|
||||
parser.add_argument('-i', '--input', type = str, required = True, help = 'Input path to the "Output" folder produced by EukPhylo Part 1. This folder should contain both the "ReadyToGO" and "Intermediate" folders.')
|
||||
parser.add_argument('-d', '--databases', type = str, default = '../Databases', help = 'Path to databases folder')
|
||||
parser.add_argument('-r', '--r2g_jf', action = 'store_true', help = 'Create ReadyToGo files filtered to only include sequences between the 25th and 75th percentile of silent-site GC content. Please be aware that these are not necessarily the correct or non-contaminant sequences; examine the GC3xENc plots carefully before using these data.')
|
||||
|
||||
|
||||
@ -3,10 +3,12 @@
|
||||
|
||||
# The aim of this script is to generate lots of codon usage statistics to aid in
|
||||
# identifying useful characteristics for de novo ORF calling. It is intended to be
|
||||
# stored in the 'Scripts' folder for the PhyloToL Part 1 pipeline scripts, and is
|
||||
# stored in the 'Scripts' folder for the EukPhylo Part 1 pipeline scripts, and is
|
||||
# called by Script 7b to calculate composition statistics for Part 1 output files.
|
||||
# It should not be run separately.
|
||||
|
||||
# Users should think about including start/stop constraint as default includes all
|
||||
# sequences, which can capture pseudogenes
|
||||
|
||||
# Dependencies:
|
||||
# Python3, numpy, BioPython
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
# Last updated Nov 2023
|
||||
# Author: Auden Cote-L'Heureux
|
||||
|
||||
# This script is run as the first step of the PhyloToL 6 Part 1 TRANSCRIPTOMES pipeline,
|
||||
# This script is run as the first step of the EukPhylo Part 1 TRANSCRIPTOMES pipeline,
|
||||
# before any sequence data are actually processed. It checks to ensure that the input
|
||||
# assembled transcripts files, databases, genetic codes, and conspecific names files (the latter
|
||||
# used only with cross-plate contamination, script 1b) are properly located and formatted.
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
# Last updated Sept 2023
|
||||
# Author: Auden Cote-L'Heureux
|
||||
|
||||
# This script is a WRAPPER for the PhyloToL Part 1 TRANSCRIPTOMES pipeline. Users should
|
||||
# This script is a WRAPPER for the EukPhylo Part 1 TRANSCRIPTOMES pipeline. Users should
|
||||
# use this script to run the pipeline, rather than running any of the sub-scripts (number 1a through 7b)
|
||||
# independently. To run an individual step in the pipeline, use --script X where X is the number (1 through 7).
|
||||
# To run multiple sets (usually all of them), use --first script 1 --last_script 7, or whichever first
|
||||
@ -21,8 +21,8 @@ import CheckSetup
|
||||
def get_args():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog = 'PhyloToL v6.0 Part 1 for Transcriptomes',
|
||||
description = "Updated September 29th, 2023 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/AudenCote/PhyloToL_v6.0"
|
||||
prog = 'EukPhylo Part 1 for Transcriptomes',
|
||||
description = "Updated September 29th, 2023 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/Katzlab/EukPhylo"
|
||||
)
|
||||
|
||||
parser.add_argument('-s', '--script', default = -1, type = int, choices = { 1, 2, 3, 4, 5, 6, 7 }, help = 'Script to run if you are only running one script')
|
||||
@ -92,7 +92,7 @@ def script_four(args):
|
||||
if os.path.exists(args.databases + '/Taxa_with_few_sequences.txt'):
|
||||
with open(args.databases + '/Taxa_with_few_sequences.txt', 'r') as f:
|
||||
content = f.read()
|
||||
print(f'These samples do not run through PTL6p1, perhaps because they has no good hits to the hook. We suggest you remove them and restart.')
|
||||
print(f'These samples did not run through EukPhylo part1 because they have no good hits to the hook database or the Diamond sequence aligner ran out of memory. We suggest you remove them and restart.')
|
||||
print(content)
|
||||
print('Stopping Run.')
|
||||
os.remove(args.databases + '/Taxa_with_few_sequences.txt')
|
||||
|
||||
@ -1,29 +1,57 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
#SBATCH --job-name=PTL1_GBF
|
||||
#SBATCH --output=PTL1.%j.out # Stdout (%j expands to jobId)
|
||||
## Last updated Jan 2025 by Auden Cote-L'Heureux
|
||||
|
||||
## This script is intended to be used to process genomic CDS with EukPhylo part 1 on an HPC that uses the Slurm workload manager.
|
||||
## The first part of the script are Slurm-specific parameters that should be adjusted by users to fit their resource allocation
|
||||
## needs and restrictions, followed by some example commands taken from the GitHub Wiki, more detail for which can be found
|
||||
## here: https://github.com/Katzlab/EukPhylo/wiki/EukPhylo-Part-1:-GF-assignment
|
||||
|
||||
## SLURM-SPECIFIC SETUP BELOW
|
||||
|
||||
############### FOR SMITH GRID HPC ############### (DELETE section if not applicable):
|
||||
## Slurm specific code
|
||||
#SBATCH --job-name=EukPhylo
|
||||
#SBATCH --output=EukPhylo.%j.out # Stdout (%j expands to jobId)
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=1
|
||||
#SBATCH --ntasks-per-node=64 ##change to number of srun when running multiple instances
|
||||
#SBATCH --ntasks-per-node=1 ##change to number of srun when running multiple instances
|
||||
#SBATCH --mem=160G
|
||||
#SBATCH --mail-type=ALL
|
||||
#SBATCH --mail-user=YOUREMAIL@smith.edu
|
||||
|
||||
#SBATCH --mail-user=email@xxx.edu ##add your email address for job updates
|
||||
module purge #Cleans up any loaded modules
|
||||
module use /gridapps/modules/all #make sure module locations is loaded
|
||||
|
||||
module use /gridapps/modules/all
|
||||
module load slurm
|
||||
module load tqdm
|
||||
module load Biopython/1.75-foss-2019b-Python-3.7.4
|
||||
module load BLAST+/2.9.0-gompi-2019b
|
||||
module load DIAMOND/0.9.30-GCC-8.3.0
|
||||
module load VSEARCH/2.21.1-GCC-10.3.0
|
||||
module load tqdm/4.66.1-GCCcore-12.3.0
|
||||
module load Biopython/1.79-gfbf-2023a
|
||||
module load BLAST+/2.14.1-gompi-2023a
|
||||
module load DIAMOND/2.1.8-GCC-12.3.0
|
||||
module load VSEARCH/2.25.0-GCC-12.3.0
|
||||
|
||||
parent='/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams/'
|
||||
############### FOR UMASS UNITY HPC ############### (DELETE section if not applicable):
|
||||
## Slurm specific code
|
||||
#SBATCH --job-name=EukPhylo
|
||||
#SBATCH --output=EukPhylo.%j.out # Stdout (%j expands to jobId)
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=1
|
||||
#SBATCH --ntasks-per-node=64
|
||||
#SBATCH --mem=40G
|
||||
#SBATCH --mail-type=ALL
|
||||
#SBATCH --mail-user=email@xxx.edu
|
||||
module purge #Cleans up any loaded modules
|
||||
module use /gridapps/modules/all
|
||||
module load conda/latest
|
||||
module load uri/main
|
||||
module load diamond/2.1.7
|
||||
module load VSEARCH/2.22.1-GCC-11.3.0
|
||||
conda activate /work/pi_lkatz_smith_edu/Conda_PTL6p1
|
||||
|
||||
srun -D ${parent}Scripts python3 ${parent}Scripts/wrapper.py -1 1 -2 7 -x --assembled_transcripts ${parent}AssembledTranscripts -o ${parent} -n ${parent}Conspecific.txt --genetic_code Universal &
|
||||
#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 2 -2 7 -x --assembled_transcripts ${parent}Plate4/Assembled_Transcripts -o ${parent}Plate4 -n ${parent}Plate4/Conspecific.txt --genetic_code ${parent}Plate4/Gcodes.txt &
|
||||
#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 2 -2 7 -x --assembled_transcripts ${parent}Plate7/Assembled_Transcripts -o ${parent}Plate7 -n ${parent}Plate7/Conspecific.txt --genetic_code ${parent}Plate7/Gcodes.txt &
|
||||
#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 1 -2 7 -x --assembled_transcripts ${parent}Plate11/Assembled_Transcripts -o ${parent}Plate11 -n ${parent}Plate11/Conspecific.txt --genetic_code ${parent}Plate11/Gcodes.txt &
|
||||
#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 2 -2 7 -x --assembled_transcripts ${parent}Plate18/Assembled_Transcripts -o ${parent}Plate18 -n ${parent}Plate18/Conspecific.txt --genetic_code ${parent}Plate18/Gcodes.txt &
|
||||
wait
|
||||
## PROVIDE YOUR PARENT PATH
|
||||
parent='/Your/Home/Folder/'
|
||||
|
||||
## EXAMPLE RUN COMMANDS BELOW
|
||||
|
||||
# A simple run that goes from script 1 to script 7 (the last script) using the Universal genetic code
|
||||
srun -D ${parent}Scripts python3 ${parent}Scripts/wrapper.py --first_script 1 --last_script 7 --assembled_transcripts ${parent}AssembledTranscripts -o ${parent}Out --genetic_code ${parent}Gcode.txt --databases ${parent}Databases > log.out
|
||||
|
||||
# Including the cross-plate contamination step, using conspecific names
|
||||
srun -D ${parent} python3 ${parent}Scripts/wrapper.py --first_script 1 --last_script 7 --assembled_transcripts ${parent}AssembledTranscripts --output . --genetic_code ${parent}Gcode.txt --databases ${parent}Databases --xplate_contam --conspecific_names ${parent}Conspecific.txt > log.out
|
||||
|
||||
293959
PTL1/Transcriptomes/TestData/Am_tu_He24_assembledTranscripts.fasta
Normal file
293959
PTL1/Transcriptomes/TestData/Am_tu_He24_assembledTranscripts.fasta
Normal file
File diff suppressed because it is too large
Load Diff
268771
PTL1/Transcriptomes/TestData/Sr_ci_Hx49_assembledTranscripts.fasta
Normal file
268771
PTL1/Transcriptomes/TestData/Sr_ci_Hx49_assembledTranscripts.fasta
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,89 +0,0 @@
|
||||
>NODE_2069_length_1109_cov_20.350386_g2025_i0
|
||||
GTATCAACGCAGAGTACGGGGGATGAAGATCCAGGTAATGTAGATTATCGTAGTATAGGTGGTTTAAATGATCAAATACGTGAAATACGTGAATCAATTGAATTACCATTAACTAATCCAGAATTATTTAAACGTGTTGGTATAAAAGCACCAAAAGGTGTATTATTATATGGACCACCAGGTACTGGTAAAACATTATTAGCACGTTGTATGGCAAATACTATGGATTGTAGATTTTTAAAAGTTGTTGCTAGTGGTATAGTTGATAAATATATTGGTGAATCAGCACGTATTATAAGAGAAATGTTTGGTTATGCAAGAGAAAATGCACCATGTATTATATTTATGGATGAAATTGATGCTATTGGTGGTAAAAGATTTAGTCAAGGTACATCTGCTGATAGAGAAATACAACGTACATTAATGGAATTATTAAATCAAATTGATGGATTTGAAGAATTAGGACGTGTTAAAATTATTATGGCTACAAATAGACCTGATGTATTAGATCCTGCATTATTACGTCCAGGTAGATTAGATAGAAAAATTGAAATACCATTACCAAATGAATCAGCAAGAATTGATATATTAAAAATTCATGCTTCAAAATTAACTAAATCTGAAAATATTGATTATGAAGCTATTGTTAAACTATCAGATGGTTTTAATGGTGCTGATATACGTAATATTTGTACTGAAGCTGGTATGATGGCTATAAGAGCTGATAGAAATTTTGTAAATCAAGAAGATTTTATGAAAGCTGTAAGAAAATTAAAAGATGCTAAAAAATTAGAATCAAAATTAGAATATAAAAAATTGTAAACATTTCTAATTGTGGTAATATAATTAACGTTTTCAAAATTAATTTTCCAATATTATTTAATTCACTTTTGTCTTAATATAAATTGTGGGGTTTTTTTATAAAAACAAAATTATAAATTTTACGCAAAATTTAAACATTTAAAATTAATTAATTTTCATCGTCCTGATCAACAAAATCAATAAGTTTACTTTGTTGTTTAGCTCGTATTTTTTCCATTTCAATTGCTGATGTATTACGTTCTTGTTTTTGTGATGAATCTTCTTGTCTTCTTTTAGCTCTATCATAG
|
||||
|
||||
>NODE_2305_length_1021_cov_17.683544_g2261_i0
|
||||
GGTATCAACGCAGAGTACGGGCATTTGAAGAAGAACAAAAACATTATTTAGCTAGTTTAGAAAATAATGGTCTAACAACACAGTTTAATCCTTTAATTTATTGCAAATTTCCATCTAAAGATATTACAAAAAATTTACAAGATCCTAGAAAATTATTTGTTGAATCAATGAAAAATTATTTGAATAATAATAATAAATTAATAGGTGCATTAGCTGGTACTATTGGCCAAAAATATATACCTGTATTTAATTCATATTCAAAAAAATTTAATATACAATTTTCACTTCCAAAAATGTAGTTAACATATACAATCTAATTTTGTAAGGTTTATAAAAGTTTAAATAATTTAATTAATTAAATTTGTTTGTTTTGTTATTTGTGTAATTGTTGTATTAATGCTTTGTTTTTATTAAAATATATTTAAATCACATGAAAGGAAAAATATTAAAATTATGAACAAATTATTTAAGACTTTATTATATAAATTAACGTACATTAAAACACATATATATTAATTTAAATTTAATTTAAATATGATGACCATCCATTAAAGAATCATCATATGTATTATATCTTACAAATGGTGGTATACCTGTTGTATATATATCAGTTGCTAATTCTTCAGAAGTTAACATTCCATCTTTTTCCGCATCACTTGATTCTTTATCCACTAATTTTCTTATTTTTGTTTCCATATTTTTTAATTCTTTCTCATCAAGCCAATTTTGTTCTAATATCATATTTCTTATATATCGTAAAGGATCAAATTGTGTTCTGTATTCTTGAACTTCTTCATTATTTCTATAGGATATACCTGGATCAGACATACTATGACCATGATATCTATAAGTCATAGCTTCCATAACAATAGGACCTTTTCCTGATCGACACCAATCTGCTGCAAAACGAGTAGCCTCACGAACACATAAAACATTCATTCCATCTACTTGAATACCAGGAATATAATCTCCTCTTGTATAAAATTCAGTACTAGCGGATGCTCTTTCAATTGAAGTACCC
|
||||
|
||||
>NODE_2310_length_1019_cov_82.463002_g2266_i0
|
||||
ACAGATTATAGATTAAGACGTAGACTAGTTCAACAAGATAAAAGAAAATATAATGCTCCAAAATGGAGATTAGTTGTAAGAATTACAAATAGAGATGTTATTGCGCAAATAGCATGTGCTAAAGTGCAAGGTGATCATATATTATGTGCTGCATATGCACATGAATTAAAAAGATATGGATTAAAAGTAGGTTTAACAAATTATGCAGCTTGTTATGCTACTGGTTTATTATTAGCAAGACGTTTATTAACAAAATTAGGTTTAGATAAATATTATCCAGGTAAAAAATCAATAGATGGTAAAAGATTTATTAGTAGAGTAACAAATGTAAAATGGCAACCAGATCAAAAAGTTTATAGACCTTTTAAATGTATATTAGATAAAGGTTTAGCACGTGGTACATCAGGTGCAAGAATATTTGGTGTAATGAAAGGTGCTATTGATGGTGGTTTAGATGTACCACACAGTGTTAATCGTTTTCCTGGATTTAAAAAAGGAGAAGACGGAGCAAAAGATGAATACAATGCAGAACGATTATCAGAACGTATTTATGGTCAACATGTTGCACAAAAAATGAGACATTTACAAGAAAATGATCCAGATAAATATAGTAGACATTTTAGTCAATATATTAAAGAAGGTATTGAACCTGATAATTTAGAAGAAATGTATGAAAAAGTTCATCGTGCTATACGTAAAGATCCAAGTGCACCACAAAAAGTTAAAAAAGATTATAATAATATGGATGTTAGAAAAAAACAAGCTAGATGGACACAAGAACAAAAAACTGATCAAAAAGTTCAGAAAATTGCTGATTTAAGATTAAAATTTAGTAAACAAGATAAAAAGAAAAATAATAATATGGATGTTGATGAATAATTTAAATATATGTTTGTTTGCTTGTTAATTGTTGGAAATCTTTTGCAGTATATTCTGATCAATTACCAATCTATTACGCACAAAATAGTCATGGTCCCATATTTCATTGACAATTTATCTATCGACCGACAGAACATGCA
|
||||
|
||||
>NODE_2554_length_947_cov_14.820366_g2508_i0
|
||||
GGTATCAACGCAGAGTACGGGAATGACAAATACAAGTGGTAAATGGGATCAAGTATCAGGCATTTTACGAGATGTAAATGTCGATATGGCAATGGAACAGGCTCAAACAAATTTATTGTACAAAAAATTGTTGGAAGATATGAGAGAACGAACAAAAGAAAGAGAATGGGATGATCCAAATCGTGTAACTATAACATCTGGTGAAAAAAACAATTCAAATAAAAATAATAACGATGATGAAGATTCTGATTCTGAATTTGACGATTTTTTCGATGATCCTGAAATGGATAAATTACATCAACAACGACTTGAACAATTAAAACAACAATACAAAGAAAAAAAAAAAAAATTAGAAAAGGGTCATGGTCATTATGATGAAATGAATGAACGAGATATGTTAAAAATGGCATGTGATACAGATTATGTAGTTGTTAATTTTTATAATGATGAATTTGAAAGATGTAAAATAATTGATAAACATTTTCGTATATTAGCACAAAAACATATTGAAGCACGTTTTGTTCGTGTTAATGTAAAAAAATCACCATTTATAGTATCAAGATGGAAAATTAAAACATTACCAACAATAGGAGTTATTATTAATGGTTTATTTGCTGATAAAATAATAGGATTTGCAGATTTTGGAAATAAAGATGATTTTCCAACTGCTGCACTTGCTAAAAGATTAATAAAAACTGGTGTATTAAAAAATTTACAAACTGGAAAACGTAAATTAAGAAATAAAAGAACAACTGCATTAACAGATATTGAATAAACATATCAACGTATTAACATTGTAATCAAATTTATAATAATGTTAATATTTAACCAATTTTTGTGATCATTTTGTTGTTGTTTTTTTTTAATTTTGAAAAAAAATATTGAAATTTTTTCAAATTGTGATTGTGTAAACTGTTTTTAAATATCGAGATATATCATAACAAAGG
|
||||
|
||||
>NODE_2573_length_940_cov_56.809689_g2527_i0
|
||||
GAGTACGGGCAGAAGAAAACGTAAAGCAAATTGTTTAAAGAAAAAATTAAAATTAAGCATACCAGCACCTGAAGGTTTAAAAAAAGCCTGGGAAAGAGAAAACCGTATTAAAAAATTAAGATCAGAAAGAGCACAAAAAATATTAAAAAATTTACCAAACAAAAGAGATAAACAAGCAAAACGTATTGAGAATTATGAAGCAAAATATTCAAATATAGTACGTAGAAGACAAGAAAATGAGTTAAAAGCACGATTAAATGGAAATTTTTATAAACCAGAAGATTCACGAGTAATGGTAGTAGTAAGAATTAGAGGTATAAATCGTGTATCACCTAAAGCAAGAAAAGTATTACAATTATTTCGTTTATTACAAATTCATAATGCAGTATTTGTTAGAGTAAATAAAGCAACAATATCTATGTTAAAATTAATTCAACCTTATGTAGCTTATGGTTATCCATCAGTAAATAGTATAAGAGCATTAATTTATAAAAGAGGATATGCTAAAATAAGACATAGACCAGGTAGTATATCAAGAATACCAATTATGTCAAATAAATTAATTGAAAAACATTTAGGTAAATATGGTATTGAAACTATTGAAGATATGGTATTTCAAATATATACTGGTGGTAAATTCTTTCGTCAAGCATCTAACTTTTTATGGCCTTTTAAATTAAATTGTCCAAAAGGTGGTTATAGAGGTAGAAAACGAAGACATTTTAACGAAGGTGGTACTTATGGAAATTGGGAAAATAATATTGGTAATTTAGTCAAAAGAATGATTTAAATTATACTTGCGTTTGTTGTTATTAATTCAAATTTTAATTCAGTTTGATTATATTTCGAATTCGCATTGTTTACAATTATTGCGAATCATCGATATACTATTAAATGAAAATTTGTCTTCTAGATTGACAATTCTTGATAAAATTAATTC
|
||||
|
||||
>NODE_2620_length_930_cov_17.252042_g2574_i0
|
||||
AAAAAAAAAAAAATCAATTGAAAAGTTTTACTTTTAAAGTTTCACACAACACGCACATAAAAAACAAAGTTAAAGAGAAGTAATATTAGCAACACCAGGTAATACTTTACCTTCTAATAATTCTAATGAAGCTCCACCACCAGTAGAAACATGACTAACTTTATCTTTTAATCCAGCTTTTTTAATAGCAGATGCACTATCACCACCACCAATAATAGTAACAGTACCATTTTGAGTAGCTAATGCAAATGCATTACATAAAGCAGTAGTACCTTTACTTGTTTTTTCCCATTCAAATACACCAGGTGGTCCATTCCATACAATAGTTTTAGCACTATTTATACTATTTTCAATTAATCTAATGGTTTCAGGTCCAATATCTAATCCCATCCAATTATCAGGTATACCATCTTTAGCAGTACATGTTTTTATTTGACAATTTGGATCAAATTTATCACCAATTACAAAATCAACTGGTAATATTATTTTAACATTATTTTTTTTAGCTTTTTCTACTAAATCAGATACTGTTTTTGCACCTTCTTCATCATATAATGATGTACCAATTGACATATTATTTATTTGTTTTAAAAATGTATATGCCATACCACCTGCTATAACCATTTCATTTACACGATCTAATAATGAACTAATTACTTTTATTTTATCTCTTACTTTTGCACCACCTAATATTGCTAAATATGGTTTTTTTGGTACTGCTAATGCTTGATTAAAATAATTTAATTCTTTTTCTACTAATAAACCACTTACACGTAATGGTACATCAACACCAACCATACTACTATGACCACGATGACATGTACCAAATGCATCATTAACATATAAATCTGATATTGATGTCATTACACTTCTTAATGAAGATATTTCTTCTTTTGATGCCCCGTACTCTGCGTTGATACCCTGTCTCTT
|
||||
|
||||
>NODE_2637_length_926_cov_4.390387_g2591_i0
|
||||
GGTATCAACGCAGAGTACGGGTGCATCCATGGTGTAATAAAAATATATCTATATGGAAAAAATATAATAAATATAATAGTAATTCATTTACACAAATAAATTTTAATCCTTCGGTTCCTCAAAGGGTATTATTATATAATAATTATCATTTAGTTTTATTAATTTTAAATAAAAATTTACAAAAACATAATTTTGATTTGTTTGATATTAATATAAATCATTCAAATAAACGAAGAAAATTAAATAATAATAATATTAGTCAATATAATAATAAAAAACACAACAATACAAACAAAAATAATTATAAAATCAATGAATTAAAAGTAATAAAAAAATATCAAAATATTTTATTATCAAGTTTTATAAATTGTGATGAGTTATTACTAATTGAAATGAAATGGGATAATATTACTCAAAAATTAAAAAATCCAATATATTCAAAAAAATATGGTATTTAATTAATCGACATATTGAAATTTAGTTATTTTTGTATGTGTTTTATTGATTTTGTATTTAATTTGATAATTTTTTAGCTAAATTGATATTAAAATTACAAACAATCTATACTTCTATATAAGATTTATTACTAACATTTCCACTACTAATTAAAGATTTTGGTCTTTTATCACAATAAACACGTCGACAATTATACATTAATCCTTGATCGTATGGATTTACATTTTGTGAATAAGTATTTTTAACATTTTCATTTGTAGTTTGATTTATAATAATTAAATGTAAATGATAAAATGCTAATGAAGATACACTAAAAAATATTAATATACCGAATACAAATAAACCAATAGCAAATGGATTATCTTCTATGCTTGAAAATAATTCATCTGACCAACTACGATGTTTATTATTGTTATCATGATTATTATTAAATGAATTTGCTTTTTCAATTGCTTTTCGATAAATAACCC
|
||||
|
||||
>NODE_2714_length_902_cov_47.945718_g2668_i0
|
||||
AGAGACAGGGTATCAACGCAGAGTACGGGGAATTATTAAAAACAGAAAATTGTGAATTATTATTTACAGATGATGCAATTGATAAAATAGCAGAAATAGCAGTTAGATGTAATGAAAATATTGAAAATATAGGTGCTAGAAGATTAGTTACAGTATTAGAAAAAGTTATGGAAGAAATTAATGTTAATGCTTCAGATGAAACTTCTAATAAATATGTTATTGATGTACCATATGTAGAAAAACAAGTTGAGGATATATTTAAAGGAGATGATTTACAAAAATGGATTCTTTAAATGTAAATTATTCAATAAATTCATTTTTTGATTCACAAGTAATGTACTTCAATTAAACCAGTTAGCTATTCACAACTGTATACGTTAATTTCATGTACTGAATTTCTGCAACTATGAATTATTTAATTAAACAATATCCCTCAATTGAACATTAAATGATTTATTTACAAATTGAACTATATCGACGTCATTCATCAGTATTTTGAGTTGTATTATCTTCCTCAACAACTTCATTGTCTTTCTTAGTGTTGTCTTCATTTACTTCATCATTATTATTACTGATTTGTGTCGTATTATCATTGTTAGTCTCAGTTTCAGAATTGTTGTTATTTTGTTTTGATTGATTTTCATTATCATTTTGAGTTTCTGTTTTATCTTCTTTTTCTTCGTTGTCTTGTTCATCACCACAAAAATATTGGTTCATTTTTTTTAATGTGTATTCTCTCAAAACTAAATTATCTTCCTCTTTATCACCAAATCCACTAGCTGTACCATTAAATTCTAAAATATATTCTTCATCTTTGGTTGTTACTAAAACATCTACTGTAACAATATCCATACCACCTGCAAATTTAGCACATTCATCTGCCCAGTACTCTGCGTTGATAC
|
||||
|
||||
>NODE_2774_length_888_cov_5.245399_g2728_i0
|
||||
ATTTTCATCCAAAACACACAACATCCAAAATTTACAAAATTGAATTTCGAATAATAATTTACAATTTAGCTTGTTTAGCAGCCAGATTATACATAATTTCTTCACTACCAGCTGCAATAGCACTTGCTCTAACACCTCTATAACCTTCTTCAACTCTACCAGCTCTACCACCTCTAACATAACTTCTACCACCAAATATTTGTGAACAGTCAATTAATATTTTTTGATATGATTTTGTAATATGTACTTTCATTAATCCTACATTACGTGGTATACTTTTATCTTTACGTCCATATTGATCAAATGATAAATCGTATGCCATTTTTTCTAAAAATAATTGACAAGTAATAGCTAATCTTGAAATATCAGCAATTTTATGTTTAATTACTTGATGATCCATTAAACGTTTACCAAATGTTTTTCTTTCTTTTGCCCATAATATTGTATCTTCTATTGCTGCTCTCATACCTGCTACTGCATCTGCACATACTACAAATCTTTCAAAATTAAAATTATACATTAATGGTTTAAATCCTTTATTTTCTGTACCAATGAGATTTTCAACAGGTACTTTCACATTTACGAATGTAACAGATGCTGTATCATTTAAATCAGCTCCTTGCATATTTAATTTTGATGTATATACACCTTTACAACGTGGTATTAACATTAATGATACTTGACCTGGTCGATCACCTGTTTTACATAATGTTGTAAAATAATCTGCTCGGGCGCCACCTGTTATCCAATATTTTGAACCATTTACTACGTAATATTTTCCATCTTCAGATTTAACAGCAGTAGTTTTAATTCTAGCAACATCACTACCACCAGTCATTTCACTAATAGCTAAAGATATTAATTTATCCCCGTACTCTGCGTTGATAC
|
||||
|
||||
>NODE_3004_length_845_cov_42.306995_g2957_i0
|
||||
GTATCAACGCAGAGTACGGGGTAACAAAAATGGATTTCGCCTCCCCAAAAGGTGGTTTATCCCGTGTAGATGGTTCAAGTGTAACAGATTTCGCCAAAGATGAAGAATGTGTTCAATTTACCAAAGATGTTTTGAAGAATCGTGACAATAAAGACAATCCAGGTGAAATAGATACAATCGCATTATCCAAAATAAATAGTAAAGATTACAATGTAATATTTTTTGCAGGTGGTCATGGTACAATGTGGGATTTTCGTGATAATAAAGATGTAAATAGATTAGCAAAAGAAATATATGAAAATGGTGGTATTGTTAGTGCTGTATGTCATGGTCCATGTGCATTATTAGGTATTAAATTATCTAATGGTGAATATTTAATTAAAGATAAATTAGTATGTGGTTTTACCAATGATGAAGAAGAAGCTGTTAATTTAACTAAAGTTATGCCATTTTTATTAGAAACGGAAATGAAAAAAATTGATGGAAAATTTGTTTCAAGAAAAAAACTGGTCTTGTTGTGCTGTATTAGATAGAAGAGTTGTTACTGGACAAAATCCTGCTAGTGCTGGTAGATGTGCAGAATTAATTTTATCTTGTTTTAAACCGGATGAATTAATTGAAGAAGAAGAAGAACAACCAAAACAAAATATGATGTTTGATGATGATGAATAAAGTTGTGTTTAACTTGAAGTCAGATTTGTAAATAGCTAAATTAGTGACTGTTTCTTGTAGCATTATTTTTAAACTTCGACTCAGAAAATGAGCAACATATTTTTGACCTTGAAATGTTTTGGTTTTGTTTGTTCACTAACTTTACTTAATGCACTTTGTGACAGCTGTCTCTT
|
||||
|
||||
>NODE_3014_length_842_cov_25.146944_g2967_i0
|
||||
GTATCAACGCAGAGTACGGGGGAAGAGGAAGAGGTAGAGGTGGTAAATCATTAGAACAATGGCAACCAATTACTAAATTAGGTAGATTAGTTAAAGATAGTAAAATAAATACTTTTGAAGAAATATTTTTACATAGTATTAAAATTAAAGAACCTGAAATTGTTGATTTCTTGCTTGAAAAACTTGGATACGAATTGAAAGATGAAATAATGAAAATAAAACCAGTCCAAAAACAAACAACAGCAGGTCAACGTACACGTTTTAAAGCGTGGGTAGCAGTAGGTGATAGTAAAGGTCATATAGGATTAGGTCAAAAATGTGCAAGTGAAGTAGGTATAGCAATTCGTGGAGCACATATATTAGCAAAATTATCATTAGTACCAATACGTCGTGGTTATTATTTATCAAAATTACGTGATCCTCATACAGTACCTGGTAAATTAACAGGACAATGTGGTAGTGCTAGAGTAAGATTAATACCTGCACCTCGTGGTACTGGGTTAGTTGCTGCTGGTGTATGTAAAAAAATGTTAGGTATGAGTGGTATTGAAGATATTTATGTATCTGCAAGAGGTCAAACTAGAACTACTGGTAATTTTATTACTGCACTTTTCCTTGCATTAAGAAAAACTTATAAATTTATTACTCCTGATTTATGGGCACCATTTGCATTAAGAGATAATCCATTAGATAAATATCAAATTAAACAAGAAGATGATCAAAGAAGTTAAAATTAATATCATTATGACAATTTTGAAAAAATATTATCAATTGATAAAATTGTTGGTTTTTTTTGTAACAAGTTGAAGTTTTTGCATCATATGTTTTGCATGTTATGTCTC
|
||||
|
||||
>NODE_3028_length_840_cov_13.483703_g2981_i0
|
||||
GAGACAGCTCGAATGCGCACTGAATTAAGAACGCAAAATAAGGTTTAATTCACATTTACACAATTTTGAATGACAATAACAGACTAAGATAGTGTATTTTCCAAAGTGCCATCGTGGTTGTTACAGTACTTGCAGTATAAAAGAATTAAACAGACACGTTAACATTTGCACAAAATACAACTTTCTCCCATTTTTCGTTTAATTCTAAATCATAAAATATATCATTACCGCTAATATTGCAATCAAAACAATGATCAAACAAATTTTTTTTCCTTCAGAATGACCCATTAATTTATCTAATTTTTTGGTAAGAGAGTTTATTTTACCCATTGCTGTGTCCATTTCAGTATCAACTTCGTCCAACAAAATTAATTGATCTTCAAGTTCTGCTTTAATATTGTTTCCATGAACACCTAATCGTCGCAATGTACCCAACATATCATCTAATACTAAGTCTTGTTCGTCCCTTTGTATTTGTTTTTGGGTTTGTGTATCCTCAATATAATTTTGATTTATTTCTTCTACCAATCTGTTCTTAGGTTCTGTGTTCTGTTTTTCTGATTTTATACGGTCTTGTTTTAGTTTTTCTCTAGTTCGATCAGATTGCATATTATGACGACATTCATCAATATATTTTTGAGTATTGTTAAGAAATTCTTCTCGAGATTCTAATTCATATTTACTTATATGTGGATAATTTTCAGGATTATTACGCACAGAATCGACTGTAGATTTACGAATTTCTTTTAATGCATGTTTAATATCTTTATATTTTCCTCTTAAATTATCGGTTAATTCTTTAAATCGTTTATTTTTTACCCCGTACTCTGCGTTGATACC
|
||||
|
||||
>NODE_3108_length_826_cov_13.136786_g3061_i0
|
||||
GAAAGAAATGTGAACTTTCAATTTTTGTTTGAGAATTTCAACAAAAATTTTTGCAATTTTAAACACAACAAGCAGTACTAAAAAGAAAAATAATACGAATTCTAATAGTAATAAATTTAATTTTTTTTTCTTTGTGTTTTTGTTTTTTGTTTTTGTTGATTTCTTTTTTTACTACCTTTATTTTTCTTTTTTGAACCTTTTGATTTTTTTTTACTACCTTTATTACTTTTTTTATTATCATCAACATCCATTACATTTTCACCTCTTTCTCTTCTTTGTTTTCTTTCAATTTGTTTACGTTTTTGTTCTTTAATTCTATCTTTTTTAGTAGTACCAAAAAATTCTTTTTTTTCAACAGCAGTTTGAAATCTACCAACACCCATTTTACTCGATGTATCAATAAATTTAAGATTAATTTCTTCAGATGCACCTGAACCACTTGGTAATACAACAGGTTTTCTTAAAGTAATTGGTCTTTTTCTTGGACCAGGACAAGTACCTTTTATCATAATATAATCTTGTTTAACAATACCATAATTAACAAAACCACCAAGTGGAGTTATATTTTTGGATGTTAAATCATTTTCAGTTGATGCATTAAATGATGTTGGTTTACCATTTTCATCATATTCAATTGCTTTACCAATTCTATAAATTTTTTTATGAATTTCTGTTCTATGATGATAACCTTTTTGACCTTCTCTAGCAATTTGATAACCAACACGTGCTGGATGCCATGAACCAATACATGCTACTTTTCTTAAACCTCTATGTGTTTTTCTAGGTAATCTAGTTACACCCCCGTACTCTGCGTTGATACTGTCTC
|
||||
|
||||
>NODE_3121_length_824_cov_4.390146_g3074_i0
|
||||
GACAGGTATCAACGCAGAGTACGGGGGTATATTTAATTTTGAAGGTGGTTGTTATGCTAAAACATCTAAATTATCATTAGATACTGAACCTGAAATTTATCGAGCTGTAAAATTTAACGCATTGATGGAAAATGTTTGGATATCACCATACAGTCATGATATTGATTATTTTAATTTATCAATAACAGAAAATGGTCGTGTATCATATCCAATTGAACATATAGATAATCGTGAAGATTCATTAGCAGGTGGACATCCAGAATATATAATATTTTTATGTTGTGATGCATTTGGTGTATTACCACCAATAGCAAAATTAAATGCAGGTCAAAGTATGTATCATTTTATAAGTGGTTATACAGCTAAAGTAGCAGGTACTGAAAGAGGTATTAAAGAACCACAAGCAACATTTAGTCCATGCTATGGTGCTGCTTTCTTAACATTACATCCAATGGAATATGCAAGATTAATGAAAAAAAAATTAGAAAATCATAATGTTGATTGCTATTTAGTAAATACTGGTTGGACTGGTGGTCCATATGGTGTTGGTGAAAGAATGAGTATTAAAACTACTAGAAATTGTATTAATGCTATATTTAATGGTGCTATCAAAAAATCGAGATTTAGAGAAGATAATTTATTTAAATTTAGTGTACCTGAAAATATACCAAATGTTGATAGTAATTTATTAAATCCTAGAAATACATGGAGTGATAAAAATGCATATGATGAAGCTGCTTTAGATTTAGCGGATAGATTTGCTGTAAATATTGCACAATATACGGATGATGTTAATGAATATGAAGGATGTGGACCTATTGGAC
|
||||
|
||||
>NODE_3128_length_822_cov_12.560748_g3081_i0
|
||||
ATCATGTTGAATTAAGTCTAACAAATGTTCTGTAGACCATAATTCAACATGATCACTGAATCCAAATGAAGTAAATAAACAAAAAACAAGCAAAACACTAGACATTTTATTACTCTACTTTAGTTTTAAATTAATTTGGAATTGTCCAATTAATTTCATCTTTTCCTAATTTTTTCAATAATGAATTTGTTTTTGAATATGGCTTACTTCCATAAAAACCTCTTGAAGCAGACAAACCTGATGGATGCGCAGATTTAACTATTTTATGCAAATTAGTATTTATAATTAATTCTTTTTTTTGTGCTTGTTTACCCCATAATATAAATACAACACCATCTTTTTTTTTATTTGATATTGTTTTAATAACACTATCTGTAAATTCTAACCAACCAAATTTCTTATGGCTATTTGCTTTGTGAGCTTCTACAGTTAATGCAGTGTTTAATAATAATACACCTTGTTCTGCCCATGTAACTAAATTACCATGATTAGGCCTTTTAAATGATTTACCTAAATCACTTTCTGCTTCTTTATACATATTTCTTAATGATGATGGAACTTTAATACCTTTTGGTACACTAAAACATAAACCTTCTGCTTGTCCATCATCATGATATGGATCTTGACCTACTATAACAACTTTTAACTTTTCCCATGTACATAATTCAAATGCTCGATATACTTGATGTTTTGGTGGAAATACCTCTATTTTCGGATCTGATTCTACATTTTTTAAATTTTTAATTAATTTTAAAAAATATGGTTTTTTAAATTCATTTTGCAACATTTCTTTCCATCCCCCGTACTCTGCGTTGATACCAC
|
||||
|
||||
>NODE_3147_length_818_cov_35.135570_g3100_i0
|
||||
TTCAATGAATTGAAACTAAACCACAAAAAAAATCAAAAAACAATAAAATTGCACCCCAATAGACAAGACTTTAAGAGTCAAGAAAAAATTCAAAAAGAAACATGTGCAACAAACAACAAACTAATTAACAAAAATGAACTTACAAAAAAAAAAAAAATAAAATAATTTAATTTTCACCACATTTTGAAATATTTAATTACCACCACGTAATCTTAATACTAAATGTAAAGTACTTTCTTTTTGAATATTATAATCACTCAAAGTACGATTATCTTCTAATTGTTTACCTGCAAATATTAATCTTTGCTGTTCTGGTGGTATTCCTTCTTTGTCTTGAATTTTTGCTTTGACATTTTGAATTGTATCATTAGCTTCAACATCTAATGTAATTGTTTTACCTGTTAATGTTTTTACAAATATTTGCATTGCACCACCACGTAATCTTAATACTAAATGTAATGTACTTTCTTTTTGGATATTGTAATCACTTAAAGTACGATTGTCTTCTAGTTGCTTTCCTGCAAATATTAAACGTTGTTGTTCAGGTGGTATACCTTCTTTATCTTGAATTTTAGCTTTAACATTTTGAATAGTATCATTAGCTTCAACATCTAAAGTGATAGTTTTACCTGTTAATGTTTTCACAAATATTTGCATAGCACCACCACGTAATCTTAATACCAAATGTAATGTTGACTCTTTTTGAATATTATAATCACTTAATGTACGATTATCTTCTAATTGTTTTCCTGCAAATATTAAACGTTGTTGTTCAGGTGGTATACCTTCTTTATCTTGAATTTTAGCTTTAACATTTT
|
||||
|
||||
>NODE_3153_length_817_cov_5.100806_g3106_i0
|
||||
CCACAAATTCTAAAAATAAAAAATATGTTTCCTTGATTCTTGTTTTATTTTTATACAAGATCAGGTACTGCTTCATCATCTTCTTCATCTTCACCATCTTGAACATTATTATCTAAATTAGCTAAATCTTTATTTAACATTTCATTAATAAAATCTTTTTCTTCATCTTTTTCATTAATAATATCTCCACGTGCTTGTAAATTATTCAAAATATTATCACTTGCTTCATTATTTTCTTCAGATTTAACAGGATATTTTTGTCTTAATTCTTCTCTTCTTTTTTCAATTTTTTCTAATTCTTCTTGTCTTTGTTTAGTAATATTTTTCATACATTTATCTAACATATTTTGTGATCTTCAGCATTTGGTGTTAATCTTAATACTTTTTGAAATGCAGTTGCTGCATCTTTATATTTATTTAATTTCATTAATACCATACCACGTAAATGATGTCCTTTGCCATAATTTTCTAATGGATCTAATTCACGAATTTTTTGACAATCTTCTAATGCTTTTTCGTATTGATTATTTTGATAAAACATTAATACTCTATTGCTATATAATATAATATTTGTAGGATGTTTTTCAATTGCTTTTGATACTTTTTCTATTGCTGATGTGTAATTTTTTTCATTAAAATCTTTATTTGCATCTGTTTTTATTGTTTGTGCTTCTGACCAAAGAACATTTAATTCATCATTAGTTCTAAATTCAAGTGGAGTACTTTTTCTTAATTTATTTAATTCTAGTTTAATTTGTTGATATGTCCAATTTTCACATTTTTTATTTCCTATTCTAATTAATTGACTACCAATAGG
|
||||
|
||||
>NODE_3211_length_808_cov_24.370068_g2924_i1
|
||||
GATAGAACATTATCAGATTATAATATTTAAAAAGAAAGTACATTACATTTAGTTTTAAGATTACGTGGTGGTGCTATGCAAATATTTGTAAAAACATTAACAGGTAAAACCATTACTTTAGATGTAGAAGCTAATGATACTATTCAAAATGTCAAAGCTAAAATCCAAGATAAAGAAGGTATACCACCAGAACAACAACGTTTAATATTTGCTGGTAAACAATTAGAAGATAACAGAACATTAAGTGATTACAACATTCAAAAAGAAAGCACATTACATTTGGTATTACGTTTAAGAGGTGGTGCAATGCAAATATTTGTTAAAACGTTAACTGGTAAAACTATTACATTAGATGTTGAAGCAAACGATACAATTCAAAACGTAAAAGCAAAAATTCAAGATAAAGAAGGAATACCACCAGAACAACAACGTTTAATATTTGCTGGTAAACAATTAGAAGATAATCGTACTTTAAGTGATTACAATATTCAAAAAGAAAGTACATTACATTTAGTATTAAGATTAAGAGGTGGAGCAATGCAAATATTCGTAAAAACATTAACTGGTAAAACAATAACATTAGATGTTGAAGCAAATGATACAATTCAAAATGTAAAAGCAAAAATTCAAGATAAAGAAGGAATACCACCAGAACAACAAAGATTAATATTTGCAGGTAAACAATTAGAAGATAATCGTACTTTGAGTGATTATAATATTCAAAAAGAAAGTACTTTACATTTAGTATTAAGATTACGTGGTGGTAATTAAGCTAGAAACCAAATCAATAAATCACTCAAATATTTTC
|
||||
|
||||
>NODE_3261_length_799_cov_53.852617_g3212_i0
|
||||
GTTCTAAACCTCGTGGACCTGGTATGGAAAGATCAGATTTATTACGTGATAATGGAAATATATTTGAAAAATTAGGAAAAGTAATGAATACAAAAGCAGCAAGATATTGTAGAACTACAGTAGTAGGTAATCCATGTAATACAAATTGTTTAATATTAGCAAGCAATTGTCCTGATATTGATCGTAATAATTTTACTGCTATGACTAGATTAGATCATGATAGAGGTTTATCATTAATATCAAGTAAAGTATGTTTACCTGTAAATGAAATAAATTATTTTAGTATTTGGGGAAATCATAGTGCAAGTTTATTTCCTGATTTATCAAATACATTAATACATGGTGTTGAATGGGACGAATTAATTGGTAAATATAAATCTGATAGATTTTTTAGAAATGAATTTATTCCAAGAGTACAACAGCGTGGTGCAACTATTATAGATGTTAGAGGATCATCTAGTGCTGCTAGTGCTGGAAGTGCATGTTTAGCTCATACAAGAGATTGGATATTTGGTACACCACAACCTGATTGGACATCTATGGCAATATTTAGTAATGGCGAATATAATGTACCAAATAATTTAGTATTTTCTTTTCCTGTATGGTGTAAAAATGGTTATTATCAAGTAGCAAGTACACCATATCAAATAAATGCATTTCAACAATATTGGATTGAGAAAAATATTCAAGAATTGAAAGATGAGCGAGATATGGTTTCTAATTTTGTTAGATAATTAATTCTAATGTAAAAATTTTAGAAAACCTTTTTGACTTTTTGTGTTTTTTTTGAGTAATTTAT
|
||||
|
||||
>NODE_3269_length_798_cov_24.885517_g3220_i0
|
||||
ATATAAAAGCGCAAAGATTCCCAAGCCAGGAGAAAAAAAAAAAGAAAACACAAGCAACAAAACAAAAAAACGAATGAAGTAGAAAACTAAAAATAAACTATCACAAAAATATAACGCAAAATGATAAGATTGTATTCAATGAATAATGATGTTTAAAAGATTTTTTATGACTGTGTTTAATTGATATTACTATTTCATAAATTAGTTCTTATGAACTACTGATACAACCTTACCAAGCATAATTAATGAATTACTATCCATAGCAGCAACTCTACCTAATGGTTTACAATCATCAAATGCACATACAACAAATGGCATTTTAGGTTTAAATACAACTTCTGCTTGATCACCAGCTTCAATATATGGTGGCATTTCAACTTTTTGATTATTTGTTGATTTACCACTTTTCCATTTTATTTCAACCATTTGACATGGTGCTTTAGCAGTTCTAATATGTATACTTGGTGTAAAACCACCTTTATATTCATCATTTTGTTTTTTAGCACATTTTAATTGTCCAGGATGATCTTGTACAAATACTAATGCTGTAAATGTATCAGCTTGTTTTGGTGGTTTAGGATCATCTTCATCATCATTACACATAACATCTCCTACTTTTGGCATATTTTCTTTTTTTAAACCTTTGACATTTACACCTACATTATCACCATGGACTGCTTTTTTTACTGTTTTATGATGCATTTCTATGCTGAATGCTTTACCTTTTACACCAGATGGATAAAATCTTACATTTACATCTGGTACTAATTTACCTTGTTCAATACGTCCCCCGTACTC
|
||||
|
||||
>NODE_3271_length_798_cov_13.121379_g3222_i0
|
||||
TAATTTTTGTGTGCTTTATCGAGTACAAAAAAGTTTAACGTTAACACTAACACTAGAACACTAAATCAACGAACAATTATAATATGACGTGCATGACTGTACGTTAATATCAATAGTCTTAGTAAATATTATATAATATGTGAATTTGAATACACAGGTTAATCTAACTCAACAGAAAAAGCAATAATAAATATAATATAAGCAGCACATAATATATATCCATGAAATAATTTTAACGTTAATGATGATTTATACAATAAAAAACATAATACTATTAATAAAACACCTAATGCCAATAAAAATAATTCAAATCCAGTATCTGATTCAACATCTAATGTTTTAAAACCTATCATAACTGATTTTAAAAAAAATGATAATCCAACACATATACAAATATCAAATACATTTGAACCTAATGCATTTGATACTGCCATAGTACCTTTACCTTGTTTTGCAACTAATATACTACTAAAACAATCTGGTAAAGAAGAACCAATTGCTAATAATGTTAATCCCATAACATCTGCATTTAATTTAACACAATTACCGATTTTATTTGCACAATCAACTGCTAAAAATGTTAAAAAACCCATCCATACTATTGATGCTATAAATACTGCTGATAATTTACAATATGTTATTATTTCTGATATTGGACGATCATTTGCATCATGTGGATCATGTTGTACTGCTTGAGAATTTGATGATTTATTTATACTTGTATGATAACCAGGTGAATTTAAATCTGGTATTGTATAACTAAATATAATTCGAAATGGTATTAACAATATTTTAAGT
|
||||
|
||||
>NODE_3272_length_798_cov_8.382069_g3223_i0
|
||||
AAAAAACAATACAAAATGAAATAATATCTCAAATATCCAAATCTGAATCATCTTCTGATTCTTCTTGTTTTATTCTTTTTTTTTCTTGTTTTAATTTTTTTTCAAGACCAAGAAAAGTTTCTTCAAAAAATCCGTATTCATACAATTTTTCTTCTAATTTAATTGTTTCTAATTTACCATCTGGTGCAACCCAATCTAATCCAACTAATTTTCTATCAACTTTAAATTTTTTGGAACAAACTAATGTTGGTAACATCCATATATTTAATTTTTCCATTAAATAACTTGCATTTTTTGCTTCAATTTCTACAAATTTACATTCAATATGTTTTCGAGCCAATAATGTTAAATGTTCCCTTAATGTATTACACCATTTATTTGTTTTATTAAAAAAATGACATATGACATATTCTGATGTTTTAACTTCATTAAAAAATTCACGTTGATCTGGTAATAACACCATACGTCCATGATTTGACGACATCCATTTTTTTCCCCGTACTCTGCGTTGATACNNNNNNNNNNGTTTTAGTTGCTCTAAACGTTTTTTCCTTAGTTCTTGAACATCTTCTTTTGAAAGATTATTTAATCGTTCAATTTCTTCATCAACTTCTCGTTCTTTGTCATTTAAAACTTTTTGCATTGTTGTAGTGACTTTGTCACGAACTAATTGTTCCATTTGCCATTTTTTGTATTGGTTTTGAAGGTCTTGTTGACTTTGTTTTGACATTGCTCACCACACAAGTTTTAAAAAGTTTGTTTTGTACCCGTACTCTGCGTTGATACCACCTGTCTCTT
|
||||
|
||||
>NODE_3363_length_786_cov_44.642356_g3313_i0
|
||||
TCTATATAAAGCTAAACCGATCATTTGTATAGATAAAAAAATGTCCACAAACACAACAAACAAACACAGATGACTCACACCATTAGTACATATGATCAATGACAACCATGACATGCAAAATAGATCCATATGTACACTGAAAATTAATATTAAATTATAAATTTACTTATAAAAATTGTCTTAAAATGTTTTCTAATTTAACGATATCATCAGCAAATCTACGAATACCTTCAGCTAATTTATCATTTGCCATAGGATCTCTACACATACCATATCTAAAATTAGGTTCTGTAACATTTAATTGTTGTTTATAATCACTTTTTTGACTTGGTTCTAATACTCTTGTTATTTGATCATTACTATTAGTTAATTGTTCTAAAAATTTAGGACCAATTGTTAATCTATCACATCCAGCTAATGCTAATATTTGTTCTTTATTTCTAAAAGATGCACCCATTACAATTGTTTTAAAACCAAATGTTTTATAATATCTATAAATTTCTAATACATTTTTAGGACCTGGATCATCTTTAATATCAAAACCGTCAACACCTTGATTTTTTTTATGCCAATCTGTTATTCTTCCAACAAAAGGTGAAATTAAGTAAGCACTACCAATTTTAGCAGCAGCTGCTGCTTGCCATATATTAAATAATAATGTCATATTACAATTAATATTAAATGCATGTAATCTTTGACATGCTTGAATACCTTCCCAAGTTGATGCTATTTTAATAAGTATTCTATTCTTCGCATCTTTAATACCCCCGTACTCTGCGTTGATAC
|
||||
|
||||
>NODE_3371_length_785_cov_53.504213_g3321_i0
|
||||
GAGTACGGGGTGGTCAAAGATGTACATCATTAAGAAGATTATTTTTACATGAATCAATATATGATGAATTTTTAAATAAATTAATGAATAAATATAAAACAATTAAAATAGGTAATCCATTAGAAAATGATACATTATGTGGTCCAATGATAAATAAAGAGGCTGTACAAGATTATGTGAATGGTATAAATTTAATTAAAAAATCATCAAAATCTAAAATATTATGTGGAGGTAATGTTTTAGATAATATGAAAGGTAATTTTGTTGAACCAACTATTGTTCAAACAGAACATACTGAACCATTTGTAAATGAAGAATTATTTGCACCTGTATTATATGTTATGAAATTTAAAACATATGATGAGGTAGTAAAAATGCATAATAGTGTAATACATGGATTAAGTAGTTCATTATTTACTAAATCACATACTAATATATTTAAATGGTTAGGACCAACTGGTAGTGATTGTGGTATTGTTAATGTTAATATTGGTACAAGTGGTGCTGAAATTGGTGGTGCTTTTGGAGGAAATAAATATACTGGAAATGGAAGAGAAAGTGGATCAGATTCTTGGAAACAGTATTGTAGACAAAGCACATGTACCATTAATTATTCAGATGATTTACCGTTAGCACAAGGTATCAATTTTGGCTCTGATGAATAAATTAAGTCTTATTTGTTTGTGAGTGTGTTAATTGCTCACACGTCGTTGTTTTGTTTGTTGAGTGAAATAAATAATGCTTATTTAAAAATTAAAATTTAAAATCAAGTTTTGTGTATGTTT
|
||||
|
||||
>NODE_3405_length_781_cov_5.103107_g3355_i0
|
||||
CCAAAAAGGGGAGTATAGCGACCCTTTGCGGCTAGTATGAATAATAAAACTTAATTATTTTGGTCAACAGGATTTCTACAAATAGGACAAATATGATTTCTTTGCAACCATGTATCAATTTCTTTAGTATGAAATATATGTAAACATGGTAATCTTCTAATTTCATCACCTTCTTTAAATTTTTCTAAACAAATACAACATTTTGAATTTTCATCATTTTGATTATTATTATTATTATTTTTTTTATTATCATCAATTGATGTTGTTTTTTTTTCATGATATTTATCAGTGGGTAATCTTTCAATATCTTGTTGATTAGCACCTCTAGGTGGATTTGGAAATTGTCTTAATAATTCTTCATATGTCATATTATGTATATCTCTATTTAATGAATTCATACCATAAATATCATTACCCATTAAAGCTCTTAAATTTGGTAATCTTGACAATATATTATATGGATTATCAACGTTTGCATTATCAAAATCATCGTTACTGCCACTAATATCACGCATTAAAAATGAATTCGTACCTAAATCAAATGTATTACTATCCATATTATCACTATTATTATTATTATTTCTGCTGTATGTATTTCTAATTGTTGTAAAATTCAATCCGGTGCCATTACTATTATTTCCTGTTGAAGAATAATTATATGAAAATTGTGTACCATCATCATCAGCCCAACTACGTGATTCATCATTATTGTTGTGCATATTTTGTAATCCAAGCAAATTAAAAGGATCTTCACGATCCCCGTACTCTGCGTTGATACCAC
|
||||
|
||||
>NODE_3438_length_776_cov_5.551920_g3388_i0
|
||||
ATCATATATTCTTGTTTTACCTAATTGAATACTTACAACTGCATTTATATGTTCTTCTGCACTTTTAATCAATGGTACATCTTGAATATGTCGTTGTTTTACTAATAATTCTATTTCTGTACAACCTAAAATACAACATTGTGCGCCTTTTATTTTACACAATTCATTTCTTATTACATTAATAAAAAAATTTCGTGATTTTTGTTCAAATTTATTAAAACTTAATTCTTTTTCAATTATTCGCTCCATTTCTATCTGATCAGATTCATTTGATGGTATAACTACTTTTAATCCATGTTGACGTAAACGTGCTTTTAAGTAATCTTGTTGTAAAGTAAAACGAGTGCCAACAAGTCCAACTGTATTAAATCCTTTACTTATAATTGATTTTGCACAACAATCTGCTATATGCAATATTGGAAAATGTGGCAATATTTTAGTCAAATAAGGTACTATCATATGACCAGTATTGGATGCAATAACAAGAAAATCTGCACCTGCATTTCGAACACGTCTAGCAGCATCAGATAATAAACCAACTACTAAATCCATTCGATCTGCTGTCAAGTAAGCAACGTATTCTTCTAAATTTACAGAATACATTACCATTTTGCTTGTATTCCCAGCAAATTTCCATTTCGAACCACTTTGAATTTTTTCATTAATTTGTATGTAATAATCCGCTCCACTAATATGTGATATACCAGTAACAATACCAACAGTTGGCTGAGTAATTACAGTATTCATTTCAACTTGCCCGTACTCTGCGTTGATAC
|
||||
|
||||
>NODE_3456_length_773_cov_10.982857_g3406_i0
|
||||
AGAGACAGAGTAAACACAAATATTTAATTTTGGGTGAAATTTAAATAATTGTCAAAAACAAACAACAAAATATCCCATCCATATACAAAACACGAAAAACGATAAATATTTCATAATGAGATGAAAATATTTAAAAAAAATCAACAATGATGTAACATTTACTCCAAAGAATATTAAGAATAAATATTCCCTTTACGTAACAATATATCGTATGAATATAATATTACAAAATACTAATTTTCATTATTATTCATAACAATATCATCATCTTCCACACTATTATTATTATTATTATTATTATCTGTAGTTTTATCATTTTTATTATCTTCAGTCTCGTTTTCTTTACTTTCTGTTTGATAATTTTGATTATTATTTTCACTATTAGTATTACTATCAGCATTTTCATCAGTTTCATCCCATTTTATAGAAAATCTAGTAACTCTAGGTTTATCAGCTAATACAGTACGTGTAATTTTATTAGGATCTATATTTAAATTTTTTTTTTTATTTGTATTTTCATTTTTATTATTATTATTAGTATTTTCTTCTTTATTATTTTTATTATCGTTAATATTATCATCATTTTTATATTCATTGGATACATAATATCCAATACGTATAAATTCTTCTTCATTATATGAACAAGATAATAAAACTACAGTAACATCCAATAAATCTTGTGGTGATATTAATTTTGAATTTGGTGCTGGTGCTTCAAATACAAATCGATTTTTACCTAAAGATATTGGTCCCCCGTACTCTGCGTTGATACC
|
||||
|
||||
>NODE_3462_length_772_cov_13.801144_g3412_i0
|
||||
AAGAGACAGGTATCAACGCAGAGTACGGGGTACCACAAATTGAAGTTACATTTGATTTAGATGCTAATGGGATTTTATCTGTTTCTGCTAAAGATAAAAAAAATGAAAGTAATAGTAAAAAAATTACAATTGATCAACAAAAAGGTAGATTAAGTGAAGAAGAAATTAAAAAAATAGTTGAAGAAGCAGAAAAATATAAATCTGAAGATGAAGAATTAAAGAAAAAAATAACAGCTAAAAATGATTTAGAATCATTTGCATATCAAATGAGAAATACATTAGATGATGGTAAATTTAAAGATGTAATTAAAAAAGAAGATAAAGAAAAAGTAGAAAAAGCAGTCAAAGAAGTAATTGAATGGGTTGATCAAAATCCAAATGCAGAATTAGATGAATTAGAAGCTAAAAAAAAAGAATTAGAAGATTTATGGAAACCAATTATTATGGAAGCTTATAAATCTACTGGTGGTCAACCTGGACAAGGAGGTATGCCTAATATGGGTGGTATGGGTGGTATGCCTAATATGGGAAATTTTCAGCAACCAACAAATACTCAATCAAATAAAGGACCTGAAATCGATGATGTTGATTAAATTACTATTAATTCATTGATTTATTAAAATACATAATAAAATATTAAATTAAAATATTTTTTTAAAATTATAAATAAGTTTTTGAATTTGGGATTGTGATCTGATTTCTTTTCCAAATTTAAATTTTATATGTTTTGTTTTAATTTGTTTTCATTTTGGTTTTATTGTCGATCTGGCTT
|
||||
|
||||
>NODE_3477_length_770_cov_21.413199_g3427_i0
|
||||
GTGGTATCAACGCAGAGTACGGGGGTAAATGTAAAAGCAGTATTACGAGAATGTGGTGGTAGATCAAGACTAATTGGACCAAATTGTCCAGGAATAATAAAACCAAATGAATGTAAAATAGGTATTATGCCAGGTCATATACATATGCCAGGTAAAATAGGTATTGTTAGTAGAAGTGGTACATTAACATATGAAGCAGTTAATCAAACAACTGGTGTAGGATTAGGTCAATCAACTGTTGTAGGTATTGGAGGTGATCCATTTAATGGTACTAATTTTATAGATGTATTACAAAAATTTAAAGATGATCCTGAAACTATTGGTATTATTATGATAGGAGAAATAGGTGGTGGTGAAGAAGAACGAGCTGCAGAATGGATTAAACAAAATAATTTAACTGAAACTAAACCTATGGTTGGTTTTATATGTGGTGTAACTGCACCTCCAGGAAGACGTATGGGACATGCTGGTGCAATTGTATCAGGAGGTAAAGGTGATGCTAAATCAAAAATGGAAGCTTTAAGATCTGCTGGTGTTGTAGTAAGTGATTCACCTACTATTATGGGTAAAACTATGTTGCGTGTAATGCAAGAAAGAGGATTACACTAAAAATCATCAATTACACCTTATTTATAATTCATATACAATTCAATTACAGTTTGTGTTTTATCAATTTGACTTGTTTGTTTAACTATTTGTTTAAAAAATTAAAAAATTTAATTTTTTTCTTTTTTTGTTTTCACAACAATAAAACGGATAACATATAATAT
|
||||
|
||||
>NODE_3481_length_769_cov_29.005747_g3431_i0
|
||||
GAGTACGGGGGACGTATTGAACAAGGTAAATTAGTACCAGATGTAAATGTAAGATTTTATCCATCTGGTGTAAAAGGTAAAGCATTCAGCATAGAAATGCATCATAAAACAGTAAAAAAAGCAGTCCATGGTGATAATGTAGGTGTAAATGTCAAAGGTTTAAAAAAAGAAAATATGCCAAAAGTAGGAGATGTTATGTGTAATGATGATGAAGATGATCCTAAACCACCAAAACAAGCTGATACATTTACAGCATTAGTATTTGTACAAGATCATCCTGGACAATTAAAATGTGCTAAAAAACAAAATGATGAATATAAAGGTGGTTTTACACCTAGTATTCATATTAGAACTGCTAAAGCACCATGTCAAATGATTGAAATTAAATGGAAATCAGGTAAATCAACTAATAATCAAAAAGTTGAAATGCCACCTTATATTGAAGCTGGTGATCAAGCTGAAGTTGTATTTAAACCTAAAATGCCATTTGTTGTATGTGCTTTTGATGATTGTAAACCATTAGGTAGAGTTGCTGCTATGGATAGTAATTCATTGATTATGTTGGGGAAAGTTGTATCAGTAGTTCATAAAAATTAGATAAAAATTATTAATTTTACAAAGTGAGAAAAAAGGAGCATTTTTTACGTTTTTTTACGTTTTCTTTAATTCTATGTGGTTTGTAACTTTGTTTGTTTGTGTGGGTATGATAAATTGAAATTTTTAATTCTTTTTTTAGCGAAAATTTTTTGCCGGTTCCTAAGCCTCAATG
|
||||
@ -1,50 +0,0 @@
|
||||
>NODE_96_length_4637_cov_31.218614_g87_i0
|
||||
CTTGGATAAAATGGTTATTCTTTTTCCGAATTGTGAGATGATAGAAATCAAAGGATTGACGTTGACTGAGGTTACATTTGAAAATCTTGTTTTATGTGTGGATAATATAAGTATGGAAGTGATTGCTGCACAAGATTTCAATATTTGTACTAAATCATCACAGGCTGGGAAAAAACCTCGTCTTAAGATGGTAAGCAAATTACAAAGGATTTTAATCACCGAACCAGGCATAGAAGTGGATGACATTATGGATGAACTTAATACTATTCGTGAACGTTTCGTAGATTTACATTGGAATATTAAATATTTAGAGAGTTATCATTCTGCAGCATCAATAATTATCACCAGAGATAATGAATATTTTAAAGAACGTGTTCAACAAAAAAAGAAGGAAATTAAGAAACGAATTAAAGCCATCTCTAGTTCAAAGCCACATACGCCACTCAATGAAGCATCAGACAATATATCATCCGATGTACAATCTTCTGCAGTAGCAGGAGAAGAATCTGCTGCTGGTGTTTTAAAAAATGCTGCAGAAAAAGTGGTTGAGGTGAAGAGTGATGATGATGAAAAAAAAGTTGGGGAAGAAGATTTATCTGAAGATGATAATGATGCACAAGATGAAAATGCTTTTATAGAATTTGAAGAAGCAGATTTTGCAAATGATATGCTTGATAATAATGATGACATAAAAGAAGATATGGATGAGGATGATATTGATTGGGATGATTGTCCAGAATATCTACAATTACAAAGTTTTGGATTTGATCGACTATGGGTGAAATGTTCACTCACTTTATACAATAGAAAATTGCAGGATTGTTTGGAATATTTAATATCAGCAGAACATGAAAATGCTAAAAATGCGTATGATGCCATATTAGCTAATGATCCAAATGCATTCACCAATGATAATATTTATAATCCTTTGTGGAAATGTGTAATGTGTGGTTTTAAGAATCGCGGAGCTCAACTCACATGTTTATTATGTTCACTTGGACAAAGACCAGCACCAACCGTTCCTAATACTATGGATTTATTGTTGGATGAAGATGAAGTACTTCCAGCAGTGGTTGAATTAGATTTAGGTCCATGGAATTGTCCATTGTGTTCTTTTGAGAATGAGAATTCTAAATTGGCATTTTGTGATATGTGTCAAAAAGGTAAGAATTCTGTGCAATGGGAACAATTGATATCTGGTGAATCACGCATTGTAAAACGATGTAATTTTAAATCAACAGATGATAGAAATGGTGTAGTATTTTGGATTGGTACACTTTGTGGTCAATCAAAATGGAAAAACCCAGGATTATTCAATCGTATAAGATGTGAAGCATCAACGCTCACTCCCGATTCACATCCAGTGACATCAACACTCACTGATCGAGTAAGTGTACGATGTGTAACAACTGCTACTCCAGGTTCATGGATGATGATTGATTTTGTACATTTTAAAATACGTCCAACACATTATAGTTTGAGACATTATATCTCATGGAGAAGTGAAGCAATGCGTAATTGGGTGTTAGAAGGATCGAACACTCCTTCCAACAATAATAATAATAATTATAATAAAGAATGGATTCAATTGCGTGAACATAAAAAGGATGCATCATTGTTTGGTAAGGGATCAATTTCTACCTGGAAATTGAATGTCACTGATAATGCATATCGTTTTTTTCGTATCAGGCAAACTGGTCTCAATTCTAACAATCATCAATATTTAGCTTGTTCTGGTTTTGAAATGTATGGTATTGTATTTCCTGTTAATGTTTTGCAACCTTTGGTTGAGATTAGAGTTTTAATGCTAAGTGTTGAAGAACCAATTGAAGATATAGATCATGATGAAAGATTATTGCAGAGTATTAAAAAACAACAGATAAATCATTTTATGTCAAATTTGGATTCTAATCTTGATGGACATAAGTTTAATTATAAATCAGATTTCGATGAGAATGGTGTTTTATTTTTTTTAGGTACAGAATATGGAACAGCACCATGGAATAATCCAGCATTATCTGGTGTAGTGACTGTTACATCATCTGGGATGGATCCGGAGAGTGTGGCAGCAACTGCTATTTGTGGTAGGAGTGCTGAACGTTGTGTGTGTTTGCCAGTTGATGACAATTGGATAAAGGTTGATTTGAATAATTTCTTCATTCAAATCAGCCATTATACTTTGAGACATTATACTTCAGCACATGAGGCGTTGCGTAATTGGTATTTTGAGGCAAGTGATAATGCTCAGAATTGGACTGTGTTGAAAACGCATAGAAATGATGAGGGTTTGAAGATGGAGAGTGGTAGTACTAAAACGTGGAAATTAATTGGAGAGCGAAGGAGATTTCGTGCTTTTCGTATTCGACAATATTCTAGAAATTCTAATGGGCATATGTGGTTGGCTTGTAGTGGATTGGAATTGTATGGAATTATGTTTTTGCAAAAGTAGGTAAAGGTTGGTAAATGAGAGTAAAATGAAATCATTGAAAACTCAGATGTTTATAAAGGATGTCTGTCTCATAGAAGTGTTGACATTGTTCGCAACAATGTATCTCCTGAATATCAAATACTACAAGTTCTTTCACATGTAAACTTATTAGTTCTATTATTTTATTATTTTATTATATTTCTTGATCTGTTTGTCTCTCGCAACCACAAAAAATTACAAACAATTTCAAACTAGCCTTCCCTTCTATGATCATATTTACTAAAATACATACTGCTGTTGTTGCGCTTTCTGCTGTGACTTAATTGGCAATGAACTTGGATGATTCCACGATGTAGTTCCAGATGTATGATCCACATAATACACAGTACCATTCTCATCATATGATCTCTCCCATCCCGGAGGCAATGGTGCATTCGCATACTGATCATTTGCAACCACTGCAGGTGCAGTAGACATTTGATACACACTCTGTTGACCCAGCTGTTGATCAGCTTCATTGTACGCAAATTGTTTCACATTATATTGTGCTTGAGATTTAGGCCGTGGTTGAATATTATGTGCACGTACATATGATAGAAACTGATCAGGAATCTCAATCAATGTTTCTCTACTCAAATCAGATAACTTATGATACTTATTCATAGGCACAAATTGAACTATATCACGTTTGGCCACTTGTCCTCTGCTATTCATCAATCCATATTGATCACCATCCAATTCATCCATTGCACTAAAATCTGCATTACCAACACCAACAATAATTACAGATATTGGTAAATTCTCATTCGACATTGCAACAATTTCATTGCGAGTTAATTTCATATCATTTATAATACCATCAGTAATAATCAATAATATAAAATATTCCAAATTGCCAGCAGCAGCAGCCGCAATCCCATGAGCTTTTTTCAAAATCGGTGCAAATAATGTTGGACCACTTAAAGCAAAAGAACCACCTCTGATACTCTGTAAATATGTTTGTTCAATACCATATATATCGTAAACTTCAGGATCATGAGAATTAAAATTCAAATTGAAATCATGAAAAACACCATTGTATGTAGTATTCCCAATTCTAATATTACTGAAATGTGCGCCAAATCCCCAAACTGGAAATTTTTGGTCAGAATCGTACACTTTAATAATATTACCAATGGTTCGAATGGCATTCTGATATGGTGATGGATTTTGACCAAAAATATAATGTAAACTTTGATAATCTTTTGGGTGTCCATTAGAGCCAGTGAAATCAATTGCAACCATCAAACTCATATCCAAATCACCATTCATATAATCTAAAAACGAATTCACTGTTTGTGATTCACACGAAACAACATTTAATGTACCATACGTCTTCTTCTTCTTCTGCACTCGTTTCAATGGCATATTTTTCGGCATATTTTTCAATTCGTACAAATTAGTATCAATTTGGCCAATAAAATCATCACTGCCATTGCTATCCCAATCATACACTTTAATGCGTATTGGTCGATATTCATCATTGTTGCATAATCGCTGACTCTCAATTTTGAATGGTTTCCAAACTGGATTCAACGTGCGTTTGATAAAATTTTCACGATCTCCATAAACGGTAATTTCCTTGCCATCTTCTCTGGTTCTACAGAGAAGAAAATATGGGTCAGCTTTTCCAAACAAACCATCCATTTTTGGTAAATTTTTTGCACTGAATTGCATGGTAATTAGAGTGTTTCCTTGTTCGTTGATTTGTTCTAGTGTTGCTATCACGTGTGAGAAACGTTTGGTTTTTTTATTTTTCAAATTCTTTCCTTTGTACATTAGTTTTTTTGCCATTCGCGAACCAGATGAATGAACCAATTCGCCCAAAACCATAGAACATGATCCAAGAATATCATGATCTTTCAATCGTTTGGAACCTTTCTTATCTTCGTCATAGCAGTCAAATCTAAGAATCTGTTCTTCTTCAAAATAGTAATCCATACGAAATTGAGTGCTAAAATTTGGATTGTGATTATCGTATATTACTTCTGTTCGACCAACTTCTACAAATGATTTATTTTTGCTGCCTTTTACGTACAGTATTACAAATGGGTCTGATTTGCTGTGTACGTCTTTTTTTATCAAATCACGACAGCTGACGAAGATTTCAACACTTTCTTTTAAAATTGGTGCTAAAACGTTGCTATTGTTGTTGTTGTTGTAGTTAACG
|
||||
|
||||
>NODE_109_length_4370_cov_38.616755_g82_i2
|
||||
AGCAATGATGATTGGTCTAAAATATACCCAGGTGTTATTGGTATTGCAGCAGAAACCATGGGTTGGGAAGGAAATAATGCTAGAAATCCTATGCTCGTAAAATTATTCTTGAAAATGATAACTATTGCAATGACCAAAGCCAAAGCTTCTAAAAAAGATACAGGTATGTTTACCATCTATTTATCATGGGCTATGCAGAAATTGCAAGAAAAGAAATTCAATGCAATGGCTGAAGAATGTATAATGGCGACTTGCTTTCAGTATGGACCTAAATTAGTGTTCACTGCAATGGAGAAATTTATTACAGAGCCAGAAAATGCACAAAATGATCCATTTAAGAATGAGAGAGCATTTGGACCAGTGATTAAATTTATGCATCGTATGGTCAAAGAATTTGGTGTGGATAATTGTTATCCATTGCGTATGCTACGTTTGACACATGCGTTGTCTGCCAAATGCAGACAGAAAGATTCTAAAGAAGCATGTTATAATGTGATCACTGAAATGTATCAGCAATTGGGTAAAAATATGAAAGGTGTATGTCTTGGACCATTGGGTAAAGCGCAAGTGAAAGTGTTGACCAAAAGATTTGATGCTATAAAAAATGCAGGAACATTCATGCAATTACGCTGTACACGAACAGAGAAAACACCTGCAGCCAAAGGAGGACAAACAGAAGAATATGAAGAAGAAGTAATTGAGTGGGTTGAAGATACATCAGCAGAAGTGGCAGCAGCACAGGCAGCACAAGCTGAAGTAAAAGCAGCAGAACCAGCTCAAGCTGCTGTTGTTGCGGTTGTTGCGGTTGCTGCTGCACCAGAAGAACCTGAAGTAAAGGAGGAAATAAAAGAACCAGAGAAGTCTCCTACTCCTCCACCACCGGATGGACCAATAAAAAGTACTGCCAAACAAAAAAGTAAACGTTTGAAGAAAGCGAAGGCGAGATACAAACAATTTGAATGGAAGCCTTGGAGTAAAAATGAGATAAAAGAATATGATGCATATTTGTTTCCTAAAAAAGAAGAGGATGAGCCATACAGAGATGAACAGTTTGGTAAGGATTTCAATATCAAGATTGGTTCTTTGAAGAAATGGAAAAAATGCCTTGAGCCAATTGAGATATGGATGGATACAAATTGGAAACAATTCTATTGTGCATCAGATTTGGTTATGTATTGGTCGACATACATTTTTGAGCTTAATCCACAGCCTAATATTGGTAAAGCATTTATTCCTTTGTGGAAGAAGTATTGTGAGCTGTTGCAGGAACATTTGCAGATTATGGATGAAGCAGAAGGAAAATACGTGTTAGCCTTGGTAATAGAGAAAGGTTTCGTTCATCGAGGATTGATTGAAAGTGTCCATGATTTCTTGGATGTTTTTGAAGTAGTTTTCAATCCTGCTCTTTGTATGAAAACATATTTGACCATGGCTGAAAAAAGTAAATCAAATGCAGTAAAATTGCATTGCATGTCACGGTTAGGAATATGCATAGAACGCTATGGTTTGGTGAAATGCGAATCAAAGAAGAATAAATTGAAATCTAGCAAAGCATTCAGAGATGCTAAAGACTTTTCTCGTCAATTTATAAAAATGTATCCCAAATTGATATCAGATAAAAAAACAGCAGTGCAAACTAAAAAGATATTGAAGTATGTGTATGAAGTGATTGGTGATAAAGCATTTTGGAGTGCAATGAAAATCAAAGACAAAACTAAAGACAAAGAAGAAATGACTGATCTCATATCAGATGCAGATAAATTCAAGAAAGGAAAATTTCGTAAACCAGAAGAACATTGGGTGGGAGATATGATCAAAGCTGCCAATAAAAGAAAAGCAAGAGAAGCTGTATTAGGTGGTGGAGCAGGTGGAATGCCAATGGGTGGTCCAGGAATGGGTGGAGGTGAACATGGATTGCTTGATCCAACGCAATATGCACCAAAAGGACCTCTTCCTGCCTTAGTTCTCAATAATTTCACACCAATGCATTTCACTGCACAACAATTGTTAGCTTTGCGATATGGTCCAGTTCCACATCTGTGGGAGATAAGTAATATTCGTGATATGGTTGGTAGATTACCAGCAGCTTTCAGCATGGATTGGGAAGAATTCGAACAAAAAAAACAAACAGTTGATACTAAAGCTACCATGCCTTACCTTAGAATTGGGAAAGTGCGAGTGGATGTAGAAACAGGCAAAGAATATCCAGAAATAGTACAAAATGGTAAAATACAATTGTTTGCAAGTGTTCCACCAACAGATGATAGAGCACATCAATTGTTGGTAGAAGAGAAAGAGGCTGCATATATGAATAGATTGAGAAACGATGATAATCATATTCAAGCTATTCGCCAGATTGAAGCAGAAAATTCAGAATATTTTGATCCAGCCAATGATGGTTCATCATGGTTGGCATTGTATGATGTCAATGGTTTTGATGAAACTGAACGTCAAATACAGATGATAGAAATGAGACAGATATATGGGGTAAAGTCATTGCAGACATGGTTGCATACTGGTAAGAATGTTAATGTTATTGAACAACGAGTGGAGAAAATTATTGATAAATTATTGCCGTGGTTTAAAAGTTTGATCAATACTAAAACTGCAATGAGTGATCATCGTAGATATGAGCATGTGTATCAAATATTGCAATTGCTGAAATTTGTTTGCAAGTTTAAAGCGCTTCGATCATTGAATATATCGGTGTTGAATGCGTTTTTTGAGACTTTGTTGAATGCTATCACATATGATAATATGCCTGCGCTTTTCAATCAACATCATAAGCGAGTGCTTGGTGAAATGAATTCAGTTTTATTGACATCTATAAATTACACACCGCCAGTGGAAACATTATGTATTTTAATACAATTGCTATCAAAATGTGATCCAACCAATGATAGTGAATCAAAAAAGTTGTATGCAGAAGCTATTTCTAAATTAATTTACAGACTGTTGAATAAATTCGATCGAGCAATTATGAAAACAGATGAAAGTAAAATAAAAATTCTACAAGAAATTCATCAATTTTTTTCTTTGATATCGTATGAAACATGGCAAATGGCTGATAGTTCGAGGCGTTTTCCATTGAAGATTATTCAAATGGTGATTGCTAGAATTGTGTATTATTCTGGACATGAAACTGCTCAATTGTTGGAAGAGTCAATTCAGCAAATGCAAATCACTCAGAATATGAATGGAGGGCCCACTCCACTCAGAGATACATATGCTAAACGGTTGATATTGAATTTTATTCAATCATGTGAGAATCGAAGATATCAGCAGCAAAATCCGGGGAAAATGAAGAAAGTGCCGAAACCGCATTCGCATTATGATGATATGGAACGAAGGCCAGACAATATTTACCAGGCACAATCTGAAATGCCTTTTGCTCAAGGAAACAGAGGAGGTAGTGTTAGTACGAGTCATCAGCAGGAAGCACCTTCGCAGATTATGCGTGCTAAGAAAGAGATTTCAGTGCAAGCGCAATCGTATGCAAAATCTAATGCTATTGATTTGAATTTCTCGATATGGGCTGGAATGACTGGAAGAGATCGAAGGCAAAACAATTTTGGTGTTTATTAAATGTGAGTTGATAGGAAAAATATATTTATATTGTTTGTGTTGCATCTTGCATGCGTAGTTTTATGTTTTCTGTGCGTTTTTTTTGTTTTCAATCATGAGTTCAAGTAATATCACTGTGCATGTAAATACAGCCACTTTTGCCACGATTTATAAACACTTGCTCCATCAGCATCATTACTACCGGTATGCATATCAATCTCAGAAGTTTCTACGACTAAATTACAAGATTGCTGCTTCTGCTTTTGATCATGCTTGTAATGACTAAAAAATAGATTCATAGTATTTTCATATATAATATCAGACAAGTGATTAATATCCTCTCCTTTGATTGCTGCAATCACTTCCAAAACTTGAATAATATGACATGGCTCTGTTCTTCCTTTCTTCACTTGCAAATAATCTGATGTTTTCTTATATTTCTTTATCTCATTCTTGAAGTGTGTTTGCTGAACATAAGAATATCCAGCAAACGATTTTTTAATTCCACAAAAAGGACAGTCAGTTTCAATCAACAATCGATCATTTGGTATGTATCGAATCATATCCAAATTATCATTGGTCTTCAATGAACATCCATTTATGCCAATGTACATTCCTTCAAATTTCAAAAACTCTTTCATATCTTCAATAGTATCAGTAAAAGAATGCACGACAGCAGTTTTCCATCGATGTCGATTTTTAGACAGGAGTTGAATGAGATCTTTAGTGGATCGTCGGTTGTGTAAAAATATTGGTAAATAATTGCTGGCTTCACACAAATCGAAATGTTTTTCAAAATGTTTGAG
|
||||
|
||||
>NODE_156_length_3937_cov_77.750514_g133_i0
|
||||
TGTTCGGTTGCTGGTATCTCTGATCTTGCTCGCTATGAAGAGCTTACTGAATCATTAATTTTGATGAGAATTGATGAAGGATTACAACATTCTTTATGGGAGATATCATCCGGTATTTTCAATTTAGGTAATATTGAGTTCACTCGCGAAGGCGATGGTTTTGCAGCAATAAACAAAAAATGCCCAAAATTCATCCATGCAGTTGCTGAACTCTGGGGAGTCAAAGATAGTATGATAAATGATCGTTTACTTACTAAGAATATGAGAGTAATGAAGAAGACTATAACCCAACGGATAACTTACGAAAACTCTATTACAAATCGTGATAGTATTGCTAAAGGTATTTATGAGAATATTTTCTTATGGTTGGCGGAGAGAATTAATGCTGAGTTGTATCAAACAGAAGAGGATGTGAAATCTATTCTGTTCATTGGTATTTTAGATGTTTTTGGTTTTGAGAATTTTTATATTAATTCTTTAGAGCAGTTTTGTATTAATTTTACAAATGAGAAATTGCAGCAGTTCTTTAATTATCATATTATTAAATCTGAGCAGGAGGAGTATATTAAGGAGAGTGTGTTTTGGACACCGTTGTCTGTGCCAGATAATATTAATTATGTACATATGGTTGAGAATAAGGATCATGGGTTTTTTGCGTTGTTGGATAGTGCATGTAAAGCACCGAAGCCTTCGGTTGAGGCTTTTATGCAGGAGTTGTTTAAAAAGAATGGGAAGAATCCGTGTTTGGCTACGATTAATGCTCCTGGGGTTGGGATGTTTAGAGGTGGACCAAAGAATGCTAAAAAAAAAAAAAAAGGAAAAGGACGTTCAAGAGGAATATTCACTGGTTTTAGTATTTTACATTTTGCAGATCATGTTGGATATGATGCTAAACATTTCTTAACCAAAAACATGGAATCTGTCCATGCAGATACTGCTAAAATGATGGCCAAATCAAGTAAAGCTTTAACCGCTCAAATCGGTGGTCCAGTGAGTGGCAGTAAGAAATCCAGAAAGAAAAAATCTGTAACTTCCGTATTCTTCAGTGGTATAAAAATATTAATGAAAAACTTATCTGCAACCGAACCATACTTCGTAAGATGTGTGAATCCAAACAAACAGAAATCCAGTAAAGTATGGAATGTAAACTTAGTTAAACACCAATTACGCTGTGGAGGTTTGGTGGAAGCACTAAAAGTATTAAAATTAGGATATCCCACACGTGTACCCTACGCAACCTTATTTGATAAATACCATGGAAATGTCACTAACCCATTGATAAAAAACATGGGACCCGAAGCATTTTCAACTGCATTGTTAATTGCTTTTGATGTAAGTGAAAATGATTACGAATTAGGTTTAACCAAAATATTCTTCAAACCTTCGAAAGCAGCAGTTTTAGAGACAATCATGGGTCAAGCTGGTCAACCCTTAAGTAAAGCACAAAATGAAAAGATCACAAAATGGGTTGTACAAAAACGTATCAAACAAATGATGGGTACTTGCAAAGCGTTCTTGGAATACAGAAAACGTGTACGTTTAACACGTGCTGCTCGCAGATGGCAGTATGCTGGTAGAGTTGCTTCTCTTTTGGGTGGTAGTGTACTCAGTCATTTGTATATGGCTCGTGAGATAATACTAAAACGTAAGAGAGAGGAAGCATCCGTGAAAATGCAATCATTCTTTCGTGGTTCATATGAACGTGGAAGATATATCAAACATATTGTAAAAGTAAAGAAAGCTACTAAAATTGTATGGATATCTTATCGTAGATGGCAAGAGAGAGTCAATCTTCAGATATGGTTGGATGTCAAAGTAGTAGAGACACGTAAAAGAAAAGAAGAGGAAGAGAGAATCCGAAAAGAATTAGAGCGTGAAAGAAAGTTGGAAGAGGAGAGATTAGAAAGAGAGAGGATATTGGAAGAGAAGAGACAGGAAGCAGAACGTTTGAGATTGCAAGCGTTGATGGCAGAAGAGGAAAGACAAGCAGAGATGGAGAGATTACGTGATGAAGCAGAACAGTTACGTTTGGAAGCAGACAACAGAAAGGTTGAAGCAGAGAAAGAGAGAATACGTTTGGAAGAGGAAAGAAAAGCAGAGATGGAGAGATTACGTTTGGAAGCTGAGAAATTACGTTTGGAAGCAGAGAAGAGAAGACAAGAGGAAGAAGAAGCAAAACGTGAGGAAGAGCGTATACGTAAGGAAGAAGAAGAGAGAATACGTGCAGAGGAGGAAGCTAAGATATTGGAACAAGAACAGGAGGAGGCTCGTAAGAGACAAGAACAGAAGTCTAAGTCAATCAAACAGGAGAAGAAGATTATTGAGTTAGAGAAACGTGAACATAAGAGAAGAAAACGTGAGGCGAGTATAAAATCGAATAAAAAGAAGAGGAGAAAACGTGAGGATGACAGACAAGATGAAGAAGATGCAAGATTTATAGAGAAGAATTTGATTGAGATGGTTAGTGATACGGATGATGAGTCTGCTAGCAGTGAGACTGACAGTGAAAGTGATGGACCAATTATGAGTGTGAAAGAGTTGTTGAAGAATTTCGATAAAATTGCAAGTACTGGTCAATTGTTTTTGAAGTATACTGGGAAGAGAAGACGTAAACCGCAGGATAGAATTGTGAAAGTGTCGTTTGATAATAATTATAAACCTAAGCAGATCTCATGGGGTAGTGGTTCGAGACATATTGATTTTAGTGATATATTGTATATTGCGCAAGGACATTGGACACCAGTGTTTCAGGCTAGAACAGAGTCTTTGGATGCGAAGTTGTGTTTTAGTGTGGTTGGTAAACAACAGATATTGGATGTGCAAGCACAGACTAAAGATATGTGTGAGTTGTGGGTTAAAGGGTTACGCCGTTTGATTGGACAAACAGATGAACAAGCACTTAAGTTGTGCAAACAGAATTTGGAGAGTGGGAATTTGCCTGGTTTTAAGAGTAAAGATCAGAATGAACAAAAACGTGCAGAAAAGGAGCATAAGAGACGAACGAAATCGTTGATGCTTTTACAGCAGGATTTGTTTGTGATGACAACTACCACTGTTTTTCGTAATTTGGATGAAGAACGCATATGGGATATTGATCAACAAGTGCGTGAACAGTTCAATGCAAAAGTGTTGTATGAGCAAGCGTTGAGAGAAGATATTCCATGGCGACAGTGGAATCACTGGATACGTGAGAAGATTGTTACTTATTTGCGAACGAACAATCGAATTGCAGCTCCACAGCCAATGTATGGTCAACAACAGTTTGGTGGACAACCGCAGTATGGACAGGCACAGCAAATGTATGGTCAACAGCAGTTTCAACAGCCAGTACAACAGTTACAGTATCAGCAGCAGTATAATCAGTATCAACCGCAACAACAGCAGGTGTATGGTGGTGGTCAGCAGTATGGACAGCCACAACAAATGTATGGTCAACAGCAAGCAGCACGGAGTAGTGCTTACAGTCAGGGTGGTGGATATGGTGGAGGAATGATGCAACAGGCACAGCAACAGCAACAACTGCAGCAACAACAGCAACAACAGCAACAGCAGCAGCAACGTCAACAGATGCTGAGACAGCGGCAAAGTAGTCAGGTATCTTTACCGCAGATGAATACTGGGAATTTTAATATGCCTGCTCAACAGCAGACGGATAATTTTGGATATGGTCAACAGCAGCAGCAGGTGCCAATGCAACAACAGCAGATGCCGAATATGGGAGGAGGAGGAGGAGCTGCGTATGGTGGAGGTGGTGGTGGTGGTGGTGGAGGAGGAGAGAATGATGAAAATTGCACATTGATGTGAAAACGATGAAAATCATATCTGAAATTTTTGTGTTCATATAGAAAATGATTGAATTTTGTGCTTCTTATGCTTGAAAAGTACATTTTTTTTATTGGTAATGTTATTTTATTATTTTTTTTTTATTACTTATCCG
|
||||
|
||||
>NODE_162_length_3902_cov_67.743317_g126_i1
|
||||
ATGACTTCCACTTTAGTAACCTTTGAGCCCTGTTCTTGCCCTTCAATCACCACTTTGTTCAAATCACAGCACAAAATGCCGTCGGATATTTCTCAGTCATATGAAAATAAATATTTTATATTGATGTTTCATCACAAATGTCTATTTATTTGTATTCTTATTTCCACTCGCTTTGGATGAACTCTTACCACTGGTGTTCTTACTACTCTTCTTAATGAATGGTGTCATAGGATCATAATTGCACACAGCAGTCGACAAAGAAATATTAAATGCACTATTCTTCACAATCTGTGATGCATGTCCAACACATTGCTCAATAGCAATATCACATCCCATATCCATAGCCTCCTGCAATGGATAACCACTTGACTTACGACTCATATCACCTGAATTGAATTTCTTATTCATAGTCAAAATACGCTTTGGTAATTCATCACGTATTTCAGGCTCCAACAATGCAGAAGATGACAACTGCCGTATCCACAAATGCTTACGCGACATCATTTCCAAATTATGTCTCACCAACGAACCCATAAAAAAAGAATGAGACAATACGTAATATCTGTTGTAAGCTGCTCCCCATCTCTTGGATAATATCGATTGATCTGCTACTTTCATATCTGGACCAGTGTTCTTTGGTGGTTCATAATCTGGTGGATAACACATGAATGTCAATAATTCCACCTTAGATTTAGCCAAACATCGAAGAGGATCAACCACAATACCTGCCCATTTTCTGAATTGCATCTGCCACAATAGCTGACAACTTACATCTATTGCACTGAAAAAGGAATTTGAATCTACACCCACTTCAAATGGATGTGAATGATACCAACCAATAAATCTATCTTTTCTTAGCATTTCAATTCTATCTTGCATACGAGTCATGAAACCTAATACTTTTTCATCATCTGCTACCACTCTTGTTTCTGATCCTTCTACAGGTAATTCAACGCAATCCATAATGATGATAGTTGCATGGTCAATTCTACCCACTAACAATCCCATGATTTCTATGGGTAATCCACCTTTTTTTCTACCTTGATTTACACCTTGTATGGCATGTTTTACCATTTTTGTAGCTGCTAAAACGTTGATTCTACACTTAGTAAAATATCGAGGATTTTTATTCCATGGGGCTTCTTTGTTCAATTGCGCGATTTTCTTATCGTCATAGGTGTATACACTATCTTGTATTGGAATTGCAAATGTTGCTGGTTTTTTTGAACCAGGGTCAGTCTTACGTTCAGAACGCGATGAATTATTTCCTCTGCGCATGATCAAAGTAGTTTTCAAAAAAAAGTAAAATGTAGGCAGAGGACTTGAACAAATACAATCAATCAATCAATCAATCTTTCATAAAAATTCAATAATTTTTCTTCTTTCTTCCCTTGCTATGAATATACCACAATAAAGTTTGAGTATAAATACGAAATTCAAACGTGCCAACTTCCTCATCTCTATTCTTCAAAGTTTTCGACAACAAATCAGGCAACACATTCCAATGTTTCTGATGCTGCTTCAAATCATGTTCACTCCATTTCATCAAATGCTTATAACTCCCCATAAAAAAATCATCATCAGCTTCATCATAGTCATAAACCTTGATATTAATCTCCAATCTCCCACCAGTATTGCCAACAATCCCCCGAGAAATACCACTGTTATCTTTCACAGACAACCCACTATCTCCATATAACTCATCATTCACTCTTTTATTAAAACCTGGTGAACCTGCCCTTGCCCGCTTACTCCCATGCGTCTTCATTTGCATATCAATCAAATCAGGAATATCTTCAACCGGTATTATAAACAACAGTGGTTGTCTCCACTCAGGATTGAGATTCTTTATAATAGTCTCTGTCCGTTTTACTTTCCCAGCAACCGAAACACTAGTGTATGGATCACTTGTACCTGTACTCTCATCATTCTCATCTTCAAAATCATGTGCAGGTAAATTCATTGCACGTTCAACAGCTATAACAATAGGCACTCCTCGACCTTCACACATACGCAAAGCATCCTCATGTCTGAATTGTGTCACCAAAGTAGTAGTATAAAACTCCCAAGTGACATCATCGCACACTTTGGCTGCAATCATGAATGTCAACAGTTTGGATTGTTTGGATTTCAATGCATACATCACTGGACTATCATAATCTACCACTGGTAATCTCTCCACAGATTTACCCACCTTCCATAGTAAATCAGTTAAGTATAACTCATTTGTGGTATCATCTGCGATATCTATGAATATCTCTGCCACATCTCTGGCAGATACTACTCCATCATCACTTTGACGAAGGGCAGCACACAATTGAAGAAATTCACGGAAATCGAATGTACGACTTTTATCATCATCGAATGCAAGGAAGAGTTCTTTTATTTCGTTATGGGAACGACTGAAGATTACAGATACCAAACCTTTTGGTGAATCTGCTAACATTTGACCTAAGAAACGACATTCATGAGTGGTCAAATTCTTCAATGAATATGCACTTAATTTTATTGTATGTAGTGTACATTCTGGAGATATGAGTGCTTTAGATACTAATCCAACACTATGTGAATCAAATCCAAAGTCAGTCCAATTCACTGCTGACCGTTTACCTCTCTCAATGGATGGTAAAAGTGGTACAAAATTGAATGAACCTTGCAATACTACTGTCACATATTCATAAAAATGAAGACCTGATTGCTTACTCTCTATTTTAAGTACAACTGTCCATAGAATTAATACTAAAAAAGATGTAATTAGTGATAAAAGAACTGCACCTAAGTCAACATTACTTGGATTTGAACGAGAGAAACGTGCAAAGATATATAATTGTAATACAGTTTGACATATGGTTTCAGCAACCACTTCAGAAACTCTACGCAATTTTTTATACCCTCTTTCAGCATGTGATTCTTCACGCAATCTTTTCTTAGTTATACACAAATAAATTGGTTTGATAATAAAATCTTCAAACAAGAGATAGATATCAATAAATATGAGTATAAGAACACCGATGGGTGCAATTGCAAATAAAATTACAAAGAGTTGTATTTTCTTATCTGGACTTCCAGTCACTTGTTTACTGACAATCTCCTCTGATGAACTTGCGTCATCTTCTTTGTATTTATCATTGCTATCATAATCATAAAATTCTTCTTTTTGTTCGTCAGTTTCACCATCAACAGTGGTGGATACTTTCAATTCTGATTCTGTGGATGATTCCATTGCTAATTTTTGTAGATATCCAAATGCAGCAGCCCATGCAATATAATATGGTGCCAATATAAACACACAACTCAACATAAAAAGCCAATTTTCTTTAGCATCAGATAATGTCTGAGCAACACGAATGTCAGTGATGAGATCTGCAAATATGACACATTCTGAGAAGAAATGTAAGAGTAATATCCCACTGACCACATATATCGGATGTTTTCTTACTTTGACAGTCATCGCTCGTATAAAAGTAATATTATGCAAGAATCCCCATACGCCGGATAACTCTTTTGGTTCAGGAATGTTCACCAATCTCTTCAGTGTTTCTGGTGTTACTTCGGATAGTGTTCCATCGGTTGTGCATGATTTTCTTATCCATCCTTGAACTGGAAGTACTACCTCTAAATATAAACCATTGTCTGCATTTGTTTTACCTGTGGTCACTAACACTTCATACTGCTGAACTAATTTGCATATTTTATAGTCTGCATTTGGTTCGGGATGGATGGGGAAACGCCAATCTTCGTTTACCATATACATTTGACCTTTTTTTAAATCTTTTGGTTTCAAATATGATTGGTCTAGAGTTGGGCCAGAAACTTCAAACGTTATTTCGTTGGCATTTAGAAACTGCTCTGCTGCCTGTTGTACTATAATAGAAGTCATGGTTTGTTTTTGGAATGTAGGA
|
||||
|
||||
>NODE_166_length_3897_cov_25.502339_g139_i0
|
||||
TCACTTAATCATGTCTGGTAAAAAACTATTCAACTTCAAAAAACGTCGCGGCATTTCAACCGACGAATCCCCCAAACAACAAAAACGTAAACACAGCTCCGGCCAAAAACCACGCTCCCAAACCTCATGGGGCCTAATGTCCCTAGCAACCACGCGATCCAACGGCATAAAAAAAACCAGCGCAGTTCCCTCATCAATAATGACAAACGTCCATCGCATGATAAAAACCTCCTCAGCAACCAACACTTCAATAACCCCAACCCCTTTGCACAACCGCAGTATCCACAATTACAATATCAACACTCCCTTGGGTTACTCATCATCAGTATTGAATGCACGCTTAAGATCTCAACACTCCTCACACGCAAACAACAACAACAACAACCCATTGGATAACATCATCAACCAATTGCACAGTCCACGTCGTTCATTACTACCCACACATATGACTCGACCAATGACAGACCGTTCAAAAACCACTTCCAATAAACCTCAACAAAAAATCATCCATAATACAATGTACTCCGTATTCAGCAACGCAGGCATGGAATCATTGATGAATAAATTGCAGGGACAAACCAAAGATACCTTCAGCAGCAATACAAACATCCATCAAACTCCTCGTTTACAACCATATAACAATTTATCCAAGAGTAGTAACAGTCCAACTGCATTTTCATTGCTCAATGGTGGAGGATATGGTCAACGTTCAAGAAATTCAAGTTTGAATGTTGTCAATGGTGAGACATGTACTGTATCCATTGAAACTATGGTTGATAATGTAAGTAATAAATCTGTGAATAGCAATACTAGTGCACCACCCAATAGTTTTATGTTACCTTCGAATGTCATGAAAGCTATGACAGATGATGATGTAAAAGCATGGGAGCAAGAGTTGATTGAATGGGAAGTGAAAGGTGAATGGATTGAATGTAGAGAGACATTGACTGGGAGAGTGATGTGGTATAATTCTTTATCAAATCGTATTATATTTGATTCACCTCCCGATGGCGTACAACCATTACAACATACAATACATACCAAAAGTTTAGGTCCAAAATCGGATGATTACTTTCTAAGTCGACCATTGACACCATACGAAAACGAAGATGTAGTTCAAGGTTTACTAAAATTCGATGATAGCACATGGGACTTCACCAATGATTTGGTAGATGTATATCGTACACGTGCAGCCATGCGAAATCATAAAAAAATGCTACCTAAACCTCAGCAATATTGTTTTCCAGAGTATTTTTCAATGTCTCCATTGCCTGAGTTGCCTGGTGGTCGATTCTTAGCTAAGATACGTTTACCAGAGGAGTTTTCTGCACATCGTAGATACTCAAGTGTGCAAATAAAAGTGGATTACACTGCAGCATCAGATGCTATCAAACAGGGTGTGGATAAATTGGATGAACCATTCAACACTCGCAGACAGGATTACATTCTGAAAGTTGTGGGACAAGAGGCGTATATGTATGGTAGACGTAAGATTATTGATTATGAAGCAGTAAGAGATGCAGTTCGCAATGAGGATGATGTTGAATTTGTACTTATTCAAAGATCAGATTTCAAAGAAAAAGTTGCGGAAGCAAAACAACATCAGTTGGAATATGCTAAGCTTTTTTCTACTGCATATCCTGCGAATTTGGTGGAGTCAGCGAAGGAAACATTGAAATATGATTTCAATAATCTCAGGGATAATTTACCGAATATGAATTCGAAGAAACAACGGTCAATGATAGATTTTGAACCACAGGATTATATATCTTTGTATGATTGCGACTGGCATTATAGAATAAAAATTGAAGGGTTGACTAATGCTACATCTTTGCCGAGATTTGATGATCAGTCAATGAAATCTATTTATGTGGTAGCTGAGTTGTGGATGGGTGATCTTATGTTCGATCATGCGACACTTATGACACGTAATTCATATCCATCGACGAATATTCGTTGGGGACAATGGCTTTCATCTCGTAATCAAACATTTGCGCAGATACCGAGGGAATCTGTGCTGTGTTTTATGGTTATGGGTATCAAAGAAGGCGATAATAAACCTCAATGTTTGAGTTATTGTAGATTACCTTTGATTGATCATCGAAATTGTTTGCGTTCTGGTAAATATTTACTTAATATGTGGCAGATTCCAGTGTTCAAAATGGTCAAAGATGGTCCTAAAACTGATCCATATTTAGATCGTCCATTCAGATATCGTGGATGTATGCGTGATAGAAATATGAAAGCAATGGCAGGTAATGATGATGAAGATTATGAACAATGTCAATTGCTCATAGAATTTGATGAGTTTGCATTCGATGTCGTTGCACCCAAGTATTTACCAAAAAAAGATTATAGCGAAGTTGATGTTGGTGGTAAATTGAATCATACACAATTGACCAAACAACAAAAGAGTTCCATACATACAATAATAAATAAAACACCTTTGGAAGTGCTGGAACAAAAAGATAAACATCTTATTTGGCAATCACGTGATTTACTATGGCATGATCCAAGCGCTTTGCCAGCTTTCTTACGTTCTGTGAATTGGACCAATTTGTGTCATATCTCAGAAACACATAAGTATTTGGATTTGTGGGCAAGTCCCAAAAGACCTGAGAACGCTATAGAATTCTTAGATTATAGGTTTGCAGATACGAGAGTGAGAGAGAAAGCTTTGGAATGGTTGGAAGATTTGCATGATGCAGATCTACAAAAATATCTGTTGCAATTGGTTCAGTGTTTGAAATATGAACCTCAGAATGATAGTGCACTCTCTCGATTTCTTATACGTCGTGGACTTAAGAGTCCATATCAGATTGGACACTTTTTATTCTGGCATTTGAAGGCTGAGTATCATAAGGAACAGTATACAGAGAAGTTTGGTTTACTTATGGAAGAATATCTGTTGCATGCTGGAGTACATACAGAACAATTGTTTGTTCAGCATGCATTGCTAAAACGTTTGGAATTGATTGCAGAGAAGATACAACAGGCAAAGAGAAGTATGAGTAGTGATCAATGTAAGAGATTATTTCGTAAGGAATTGTATGCACTGAACAAAGATTTACCAGATATGCCCATACAAATACCATTGAATCCGAAGTGGAGTGCTAAAAAGATAATTATTGATGAATGTCGATATATGAGCAGTAAAAAAGTACCTCTTTGGTTAGTGTTTGAAAATGGTGATGAATATGCTCCACCGATAAAAATTATGTTCAAATCTGGAGATGATTTGAGACAGGATATGCTGACGTTGCAGATTATAACTATTATGGATCGTTTGTGGTTGGACAATAAGTTAGATCTGCATTTGAAGCCGTATTCGGTGATGGCTACGGGTGTCAATCGCCATAATGAGGGAGTTGGTATGCTTGAAATGGTATTGCAATCGTGTACGGTCAATACTATAAATGTTGAATATGGTGGGGCATTCAATGAGAAGACGATTGATTCTTTTTTGCGGAAGTATAATACGTATGATCAGTCATTGAATAAAGCACGAGAGACATTTGCGCGTTCTTGTGCTGGATATTGTGTGGCTACTTGGGTTTTGGGCATTGGTGATCGACATTCGGATAATTATATGGTCACTCAGAATGGGCAATTCTTCCATATTGATTTTGGACACTTTTTGGGGAATTTTAAGTCGAAGTTTGGTTTTAGACGGGAACGGTCACCGTTTGTTTTTACTCCGCAGATGAAGTTTGCCATTGATTCTGGTTTGAGAAAAAATAAATTGTATTATGATTTTCTGGGATGGTGTTCGGAGAGTTATAATGTGTTGCGAGTGAGGAGTCGTTTGTTGTTGGTGTTGTTTTCGTTGATGGTGGCTGCTGAAATGCCGGAGTTGATGAGAGAGTCGGATATTGGGTATTTTCGGCAGATGCTGAA
|
||||
|
||||
>NODE_170_length_3872_cov_43.923097_g143_i0
|
||||
TGTGCTCTTCCGATCTGACTGATTCTTCAACTAGACTCGTAAACTCGGAAACCCATCAAACCCTAGAAAAACAGCCATTCTTAAGAAATATGGATACCGACGACGAAGATGAAGCCCTAGCAAATGCCCCCAAATACGAAAAGAAATTCCTAAACATTGGTGGAATGACCTGCGGTGCCTGCTCTTCAGCAGTAACCAAAATAATCATCGAACAAGAAGGCGTTCAATCTGCAACAGTCTCACTAATGATGCAAAGAGCAGAAGTAATTTTCGATCCACTCATCATTGACCTCCCTCAAATCATTGAAGAAATCGAAGACACAGGTTTTGATGCCAGCGAACTGAAATTATCCACAACCAATTCCAATCAATTCATGATACATATATTATACCCATTAGCATCCACCACTGAAATACAAGTTACTGATATATTACTAACCATAGATGGCGTACTAACTGTAAAAACAGTTGATGAAGGAATTGAGAATCTTGGTCTCATTGATAGTGATTCTATGCGTTCATTATCAATCATAGATGGTGGTGGAATTGGCGCTGATGATTTATATATCTCTAAATATGGTAGCAATGGTGATCGTACATTGGAAAACGCATCAATGTATGTGAACATAAAATTCGATCCAAAAATCACTGGTATGAGAAGTATTACAGAGTGGATCAATTCTGCAGTCAATACTGCCTTCAGATGTAGGATATTGTTTGACAGTAGTGATATTTCACAGCGTAAAAAAAATATTCAACGTGGAAGAGAAGAAGAGATACATAAATGGAGATCACTCTTAAAATTCAGTGCATGTTTTGCTATTCCAGCATTTGTTCTCGCAATGGTATTCCCAATATTCCCAGCATTTCGCAATGCATTTGATACAAAAATACTTCCAGGATGTCTTTTACGCGATGCAATATTATTCTCTTTAGCTACACCCATACAATTTGGTCCACCTGGATTGCTATTCTACAGAGGTGCACATAAATCTTTGAGAGCAGGTGTAGCCAATATGGATGTTCTGGTTGCTTTAGCTACTACCATATCATATATATTTTCATGTTTCAGTATACTTTTATGTATTATCAATGATCATTCATCTGCAGATGAAGAGACAACATTTGAGACATCTGCTTTGTTGATTACAGTTATAATATTAGGTAAGTATATGGAGACTTTAGCAAAAGGTAAGACATCACAAGCTTTAGATAAATTGATGAATTTAGCGCCATCTACAGCGAGATTGGTTGATAATTGGAATGATGATGAAAATGAAGAGAAAATTCATCATCAAGAAGAAGAAAAGTCATCGATGACAGATACTGGCATATTTGCAGCAGAAAAAGTGAATGAAGAGCAACGTGCTTTACACATACGTGAGATTGATGCACGTTTAATACAATTGGGTGATATTGTACAAGTACAACGTGCAATGAAAGTCCCCTGTGATGGCATAATTGTTGAAGGTGTCTCCAGTATCGATGAAAGCTTAATTACAGGTGAGTCATGTCCAGTAAATAAAGAGATCGGTGACCAAGTCATTGGTGCTACAGTAAACATAAGTAATACCATATACTTTCGTGTCAATAAAATTGGAAGTGAAACTGTACTCTCAAAGATTATAACCTTAGTTGAGAATGCACAATCTTCTAAAGCACCCATCCAAAAAACAGCAGATTTGGTGGCCAGTAAGTTTGTACCTGCAGTGGTAGTTATAGCCATATTAGTATTTCTTGGATGGTTTTTTGCTTTGGAATATGGTTTGGCAGATATGCGTTCTTTGTTTGATGTTGCAATGAGAACAAGTGCAGCAGTGTTCTATGCAGTCATATTTGCAGTCACTGTATTAGTAATCTCATGTCCATGTGCATTGGGTTTAGCAACACCTACAGCAGTGATGGTGGCCACTGGAAAAGCAGCAGAGTTAGGTATACTTTTTAAAGGAGGTGAACCATTGGAGATTGCAGGTACTACCAATTGTTTGGTGTTTGATAAAACTGGAACTTTGACTGAAGGTAAAATGCAAGTGGTAAATATCATACGTTTGACAGATGGTATGCTGTATAATGTAATACAACAGCCACAGTCATACAATAGTAGTACTTCAGATCATGTGAAAAACAGACATGATTTTTGGAATTTTGTGTACGGTGCTGAATCTCAGAGTGAACATGCAATTGCAACTGCAGTTTGCAAATTCATTGAAGGTAAACCAAATGTTTCTAAATACGTAACTGCTGACAGTAAAACTAACACTATAGATATGAGTATCATAGATAAAAAAAGTAAAACAACAGCATATGAGAGAATAAGTACTGATGATAGCACTATAGATGTTGGTGAACAACAAGAGATAAGTTATAATGCAGATAGTTTTTATAGCATAAGAGATAATGAGTTAGGGAATACATTTGAATTCAAGCAATGGACAGCCAATGAGTTCAATGCAAAAACAGGCAAAGGAGTCAATGCTGTTTATGAATTACCCACTTATTGTCAAATGTTCATTGGCAACATAAAATACATGAAAGAGAACAATATCAGTGCTATATTGATGTCAAAGTTCTTGACCAATGATAGCGTTGACAATGATGTATACTCTGAAGTAAAAAACAGAGCAGATGATGAAGAACAAAAACATGCATACGTAAGTGTTGAAGAAGAGAATGCATATACCATAGTAATGGATAAAAGCAATGAACTAAAACGCAAAGGATATACAGTAATATTTGTAGCATGCAACAAACATTTGACTGCAGTTTTGTCCATTGCAGATAAATTGAAATCCGATTCGTATGCAGTCATACAATATTTGCAGCATGAGATGCATATACCATGTTATATGATCACAGGTGATAATGAATTGACTGCATATGCAATTGGTGATATGTTAGGTATAGATAGAGATCATATCGTTGCTGGTGCACAACCGGAAGATAAACAGAAGATTGTGAAATTACTGCAGCGTACGCGTGCAATACATGTGAGAGAGGGCAGAAAAAATTTCATCACTGCTTTCTTTTCTGGTGGTAGTGGTGGTAGTAAAAAAAGTGTGGAAGCAGTTGAATTTAGTTTGATACAGAATGGAAGTGGTGATGGATATGTTAGCAGTCGTGGTAGCACTAGCAGTAGTGTAAAAATTGATGAAGTGAGAAATATAGTGACATTTGTTGGAGATGGCATCAATGATTCACCATCTTTGGCACAAGCGGATGTTGGAGTTGCAATTGGTGCAGGTACTGATGTTGCTATTGCTTCAGCATCTGTTGTTTTGATGAATGATGGGTTGAGTGATGTTTTGAATGCAATTGATTTATCTAAGGCTACACTTATGCGAATAAAATCTAATTTTGTATGGGCTTTGTTGTATAATACATTGATGATACCGTTTGCTGCGGGACTGTTTTATCCATGGTTGCATTGGGCATTGCCGCCTTTTATGGCTGGTATTTTGATGTGTTTGTCGTCGATCAGTGTTGTGTGTAATTCTATGTTTTTGCGGTTGTACAAACCACCGGATTGGAGGAAGTCGTTTGATGAGAAAGAGAGGAAGAGGAGAAAGATGGGGATTAAGAAATATGGGGATAATCGTAGTCGGGGAAGTCGAAAGAGTAGTTATAAAGTGGAGAGTAAAGCATCGATTAATTTGTTGGAGGAAGATTGAATTTATTGTTTGTTTTTGTGTGTGGAATATTTAGGTGAACTTCTTGATGTGTAGTGAATGAACTGGGTATTAAACATATTTATTTACATTGTACTGTAGTCTGACGTACACTTGTAGTGACAGGGTAGCTTGTTTTTGTATGGAGCCAGTCTCAGTCAGACATCTACTACTTAAATTAAAGTTTTCCACTGAAAAAAAAAAAAA
|
||||
|
||||
>NODE_181_length_3796_cov_18.176675_g149_i1
|
||||
ATAGTACACATGTAAAACCAATATCAACTGCAAATGATATACAATCACAAACTGTTTCTATTCATACATCATCCAAAGATAAAAGTACAACAAATGAAACAAAAATTGAAGAAGAATTGAATACAACTGTGATTGTTGATGATAGCGAAGAAGAAATTGAATATCATATAAGAATATTTTGGGCTGGCGATAGTCGAGCAATACTTATTCGTCATTCATCAAATGTGGAATCAAAAGAAATTCAAAATTTACCATACACAATAAAAAATATTTTACAAAAGAGAGATATCCCCAATGAAAATACAAATCAATCACAGACATATTTCTTTGAAAGTAATACTAATCAATCATCAACGCATACCACCCAACCCAAACAACCCAAACATCAATCACCTCCAACTCTCCCACATTTCATTGATTTAACCATAGACCATTCACCAAAATGCAAATCAGAATACGATCGCGTTCTAGCTGCCCATGGCCAAATAATATCCAACCGCGTTGATGGTAAACTAGCTCTAAGCCGTGCATTCGGTGATAAATCAATGAAAAACAATCCCAACCTATCATTCAACAAACAACGAGTTATCTCTGTATGCGATGTACAAACAATAACTGCCAAATCCAATGATTACTTATTCTTATTCTGTGATGGATTGGTTGAACAATGGAAACATCCCCAACTAATATCTCATCTGTCCACAAATGTTTCTCATTTCAATGACAATGTGTATGCATTGGGTGATACATTCGATGATATCATTGATGCTGGATCAAAAGATAATATGTCTGCAATGCTAATACAATTTACAAATGGTGAACAATATGGAGATATTATGGAAAATATATCAAAACCTAAGACATTTTTACCTGGACCTTTGTATTGTTCGAGACATGATCGTAGATTTGTTACATGTTATATGGAAAATGCAAAGAGATTTGGACATAAGGATAGACCATATCTAAGACGAGCTGCATACAAATCAGATATAAAGTATTTACAGAAGTATGGGATTAAGCATTTGTTGTATGAGCAGAGATGTAATCGTACATGTAAGGATATTATTTCTGATATTCGTAGAGGTATTCATGAGATTGATAAAAGAAGGGAATTGGAGATGAAGAAAATGGAGAAAAAGAAGAATGAGAAACAGGAAAACGTTTTATGTACAAAGAATACAGAAACAATAAATATTTCTAATATAAATACAGAAACGATAGATAAAGAGATTGTTACTGCTGCTGATGATATTGTTATTGGTGATGGTATTGTGGATGAATTGAGTGCAAATGAAATACCTTTAACTCAGGGAAGTGTTGTAAATACAATTGATAGTCAATTGAGTGAAGAAGTTGGAGATGAAGGACCATTGAATATTTTCAGTGATGATGATGAGGATGATGATGAGGTTAGTACTGAGATTATTGATAGTAGTGATTATGATAGTAGTTGTAATTTATTGAGGTATACATCATCGAATATCAGTAGTATTGAATCTCTTTCTCCTGTGAAAGAATCTGTTTCCCCTGTGAAACAATTGCATGAGAATACGGAGTTGAATTTGGAGGATGATGGAATTGAAGATGAAATGAAGATGGATGCTTTGGAAACAGAGGTGGAAGTTGAGGTTGAGGTTGAGGTTGAAGTTGAAGAAGAAGTGGAAGTTGAAGAAGAAGAAGAGATAAATAGCAGTAGTAGTATATCTCCATTCTATAGTGCAAGTGCAATGGTGAAAAGTGCTTTTATATCTGTGAGTGAATTTGCAAGTAATGTGATTAAGAAAGTTATACCGGGTGTTTCTGGGGATAGTATGAGTAGAAAGAGATCATTATCTGAAATATTATCTTCTTCGATTGATTTTACGGATGAAATGTTGCCATCAGCATCGAAAATAATGAATAATACAAATACTATGGAAGATAATGATGGTGGAGTTAATTCAAATGATGCAGAATTATTGAGTCCAGCAACAAAGAAAAGGAAACTTAATAAAATGCAATAGAAATATATTTATACTGTTGAAAGATTACAAATTATTAATGTTGCCCTTGAAGTGTAAGTTTTTTATTTAAATGTTTTGTCATTATGACTCTCATTTGACAACTGCTGTTTTTTTCTCTACAATAAACCAAATCAATTGAAATTGTTTTACAAAATGAATTTTTCAAAATCTTCCCCTCTAAATATTTTCCCCAAATGTTCCTCAATATAAACCTCATCAATAACATAAGCCTTCCCTTCCCCCTCTGCAGCTTCAATATTAATATCCTCCATAATCTTCTCAATAACAGTAACCAACCTCCTAGCCCCAATATTCTCCACATTCATATTACAATCAACCGCTGTATCCGCAATACAATTGATTGCACCCTCAGTAAACTCCAACTTACACCCTTCAGTCTCCAACATCTTCACCTGCTGTGAAATCAAATTATACTCCGTTGCAGTCAAAATCCTACACAAATCATCCTTCGTCAACGGTTGCAACTCAACCCTCACCGGCAAACGTCCCTGAAACTCTGGCATCAAATCTGACGGCTTAGAATCATGAAAAGCACCTGCACATATGAACAATATATGATCAGTGTTGATTTTACCATAATTTTGTACAGTGATTGATGTACCCTCAATCAAAGGCAATAAATCTCTTTGTACACCTTCCTGAGATACACGTCTTTCACTTCCACCTCTCATACTATCTCTATTCCCACATATTTTATCTATCTCATCAATGAATACAATTCCATCATTCTCTGCCAATTCTATTGCACTTGCAGCTAAATCTGTGTCAGATAAAAAACTCTTCAATTCTTGTTCCATCAAAATTTGACGAGCTCTCTTTATTGTATGTGACTTCTTTTCATATTTATGTTCACTGTTACTGCCACCTCCACTTCCTGGAGGACCAGCGAACACTAAAAAAGGACTGCGTAGACCACCTCTCAGAAATGAAGATATATCAGATTCACGTTGAACATCTATTTCAACAGTTACAGTGTCCAACAAACCATTGTCCAATTGTGATTGCCAATATTTCTGTTCTGTATCTTGCAATTTACCCATAAGTGAACGGAGGAGTATTTTATCCACGGCTGTTTTTAGTTCTTCACGCATTTCTTTCTCTTTTCTGGATTTTACGAGAATTGCTGCATGTGCTACTAAGTCTTCGATGATAGTGTTCACATCTTTGCCAACATATCCAACTTCGGTGAATTTGGTGGCTTCAACTTTGACGAAAGGTGAGTCAGAGAGACGTGCTAGTCTTCGAGCGACTTCTGTTTTTCCACAACCGGTGGGGCCAATCATTAGTATGTTTTTTGGGGTTACTTCGCGTCTGTAGTCTTCTGGTAATTGTTGGCGACGCCAGCGAGCACGCCATGAAATTGCTACTGCTTTTTTGGCGTCGTTTTGGCCAACGACGAATCTGTCCAATTCTGATATCATTGCGTTTGGTGTCATATTATATGGATCATCATTTGCTTTTTTATCGATGTTGGAGGATTTGTTGGTGTCATCTGTGGTAGTGTCTGTTGGGTTGTTGGTGGTGGTAGTGGTGGTGGGAGGATTTGAAGGGGTATTGGTATTGTTGGTTGTGTTGTCAGATGATGTGAGGGAGGGTGGATTGGATTGTAAGTGATTTAAGTATTGTTGATGGAGTATGGATCTTTGGAGGTTGGGATCATTGAATGCGTTGTTGGTGAGTGATGGGGATGGTTGGATGAAATAATTGTATTTGGAATAAGTTAATATATTATGATGATTGATTTGTAGGAGTTTTTTAAAACAAACAAGTCG
|
||||
|
||||
>NODE_194_length_3707_cov_26.335703_g159_i0
|
||||
ATGAGTTTTGTATTGATATCGATGATGCATTAGGTGATTGGCCATCACCATTAGCATGTTTGAATGGACAAAATCCATTCTCACTTGGTTTTGTTCCTCCAATGAATTCCATGGCTGCAGCAATGATGATTCAACATCCATATATGTCAAAAATGTCACATGCTGCTGGTGGTAATTATCATCGTTCTGCAAGAAATTTGCATATACCACATACAGTAAATCGACGAACAGCTGAAGACCATCACAGTAGAAAAGAAACATTCACTTTATCTGACATTGTAAGCATTACTCGATCTCCTAGAAACTGTGAGGAATCAATATATACTTCTGTTTCTGGCACTTACCCACATACTTCTTCTTACCTTAACCCACCTTTTACAACTACTCTAAAAACATCTCAGTCTCTTATGGAACGTATTTCATCACCAGCATATTCTTCCAGACGTAGAGCAAACACTGGTACACAAAAAGCTCAACGATTATCGTTATGGAAGGAACAAATGCTATATGTTTCTGAACGCCATAATAATTCATCATCGCATGGAAGTCATCCACCAGTTAGTAATAGTGCTTTACCAGGTGGTGGCAATGCTTTATGTTCAATCCCATCTTCGCAGTCTGTTGCTACTTCTCGTGAATCATCTACATTTGCTGTTGTGGAACCACGTGAAACTCAACGGCAACATTTGGTTTTAACGCCAAGTAATTCTGCTCCACCTGCGAGTGAATTACCACAGGAATTGAAGCAACCGTTGATGTCAACGTACGCTAGTGCACAGATATACCATTCGAAAACTGTTGTTGAAGTGGAATATAATATTGGTGATCGTGTATTGGTAGATGGTGGTAAGTTAGGTCAAATTAAGTCGATTGGACAGCAACCTCAGTGGGGAGCTGGTACTTTTTATGGTATTAGATTGGCTGGAAAGAATGGTTCATGTGATGGTGCTTGGAAAGGTATTCGTTGCTTCTCTTGTCCACCAGGCTATGGTGTATTCATAAAGAAACAGCGTATTACACAAAAGTTACCAGATGCGAAGTTTGAATATATGGATGAAAATTTCATTGAAGATAACAAACAAAATACACGCGACCGGCAGTTGAAGGAGATCACTAAAAACGATTCTTCTTCTGATGAAGATCCTGGAGAATCAGACATTCCTACCCCAGGTAATTTTCATCTTTTGCAACACTCAACATCCGAAAGAATCCTATCTTTAACACCAAAAACTGATAAGCGTGTAAAACTTTCATTTGATAAAGCAAGTAAATCATCAAAACCACCATCATCAAAACCACCATCATCTAAAAATAAGACAGCATTGACTCGTAAATGGACTGGTGGTGCACCGCGTACTGTTGTTCTACGTAATAAAATTCCACCAATCAAACTGCATGCTGGATACAAAACATGTCAAGAGGGTATGAAATTACGTGAAGAAGAAAAATATAAATTAGCAATTGAGAAATTTAAGATAGGTATTGCATTATTGCAAGTAACTCTATTGGAAATTGATCGTGGATTGGAAAAACGTAAATGGAGAGGAATAATCGAAGAATTTCGTCATAAAATGAGTCGATGCACATTTGAAATACAGAATTGCAAACGTAATCGCTTTCTAAAAAAACGCAATCAAAATATACAAAGAGCAGCTGAACTAAAAGCACAATTGGAATTAGAGCATCAAAGGAAAGCAATGCAAGAAGCATTATCAAATGCTGTTGGTCGTGAACGTAATTGGACAGTAACTCGTGAAGATATGGAAGTGAAATTGCAAGCAAATCGTGCAGAAATTGCAGCCGCAAATAATGCTGATAGTGATGAGGAAGAAAAATCTGGTAAAAAGAGTAGCAATAAGAAATATATGCGTGATCAGACTTTAGCGAAAACTAAACGAGAATATAGATTAAAAAAGAAAGGCAAAGGAAAAAAAGATTGGAGAGATTATCCAGGATATGGTGGAGATATGGGGAAAACCGGAGATGACAGTGGTGGCGATGATGATGATGGTGGAAGTGATGAAGAAGATGGTGGAAAGAAAAAAAAGAAATCTAAATTATCCAAACATGATCAAGAATTGCGTTCCAGAATAGAAGGTGATGTAATGACTGAAGCACCAGATGTTTCATTTTCAGATGTGGTTGGTTTAGCAAATGTAAAGTTAGCACTCTACGAAAGTATTATATTACCTTTTTTGCGACCAGATTTGTTTCAATCTATAAAGAAAAGTACACAAGGTATCCTACTCTTTGGTCCACCTGGAAATGGTAAAACTATGATTGCTAAATGCGTAGCAGCCCAATGTGATTGTACATTCTTCAGTATTTCTGCATCTTCCATAACTTCTAAATTTGTTGGTGAAGCAGAACGTATTATGCGTACATTGTTTGATATGGCTCGTCAACGATCACCTTCAATTATATTTATTGATGAAATTGATTCATTATTGAGAGCACGTGGTGGTGCTAATGAAGCTGAATCTTCGCGTAGAGTAAAAACTGAATTTCTCATTCAATTTGATGGTGTAAAAAGTGCTCAGCAGGCAGATACAACAATAACAGTGATTGGTGCTACAAACTTGCCATCACAATTGGATGATGCAGTACTTCGTCGATTTCCAAAACGTATTTTAGTTCCAAATCCGAATTCTGTTGCACGATATGGTTTAGTTCGCTTATTGATGTGTAAGCAAAATCATGCAATAAATGAGAAACAATTTCAAACAATTGCTAGAAAACTGGATGGTTACTCATGTTCAGATATTGCTATGCTTTGTACAGATGCTAAAATGGGTCCAATACGTTCTATAAAAGGTTCTGATATACTTACAACTCGAAAATCTGAAATACCACCAATATCTATGAATCATTTTCAACAGTCAATAAAAAATATACGCTCTTCATTGTCTGAAGAAGCAGTAAATGCATATCGTGCTTGGGATGATGAGTTTGGTTCGAAACTGTTCTTAACTATGGATGTATTGCCAAAAGATATGTTAGCCAAGGAATTGATGCCTGTTGAAGAAGAAATAGCAAACAAGAAACGAAAAATTAAGGAAGAAGTGGAAAGAATTATTGCATTGGCAGATGCACAAAAGGAGAAAAAGATGCTGAAACAAAAGGAAAAACCAAAGTCGACAGTAACAAATAAGAAGACTGTATCATCATCGTTGTCTCAAACACAACCATCTTCTGAATCGAAATTGAAAGCATCAACGAGTGGAAAATCTGCGAAATCTCAGATGCAATCGAATAAGAAAAAGTCTAAGGCACCTGTAGCAACAGCTCCTGCAACAGTGCAGAGTAAAGCAAAGCGTAGTTCAGTAAAAGGGAAGAATAAAACCAATAGTAATATGAGTGATGCAGTGCAGGTAAAGAAGAATAGCAGTAGTGGTCATAATGGTGGTAGCAGTAGAAGAAAGAAAGGAAGTCTTCGTTTGACTGTTGTAACGAACAATAAAGTTGAATCCAGCAAAAGAAGAAAGAGTTCAAGCAGTGCTAAATAGTTGTGTGATTCATATCATGTATAGGTTGGCAATGCTTAATATAAAATCCGAATAGGAATATATCACCGAAATTTTTTATAATATTTTATATTTAGTTGTTGCTTGTGTTGTTAATAGACAAACTCCATGTACCGTAGGACAGGAAAGTTTTAGAACAATGTAGTGATCTGGCAGTGGTGGTGTGTGGAAGTGGTGGCGGTG
|
||||
|
||||
>NODE_200_length_3691_cov_73.986546_g164_i0
|
||||
TTTTTTTTAGTGTACGTTCTACCACAATATGGTAAAGAAAAAGTAAATCAAAAACAAAAAAGAGCAGAAAAAAAATAGAACGTAAAGGTACTAAAATTTAAATATAAATACTGTATATTATGGAGGGAAGTGCAATCAAATGAAAAAACTTCATTCTCGATTGCTTAACTTCCTTCCTATACAATAAAAATGATTTATAAATATATATATATACTAAAAAAAACTGAGCATAATAAACAATAAGTGCTACAAACAAACCATAAATATTGATATTCAAAGAAAAAAACAAAAACCTATTTATTTTTAGATTTCACTGCAAATTGCTTCAATTTAAGCCATCGATTCTCACGTTGTTCTTTTTTTTGAATCCGCAATTGCTCATTTTCATCAAATTTCTCATCCAACGACTGATGTTGTTTTTCAAGTTTCTCAAAAAACTCTGGATCTCTTCTCTTAAAATCAGACATAATTTGCCGATTAATATTTCGAATGGCAGACAACCAATACTTCTCTTTATTCTCCAACAATGCATTATAAATCATTGGCCATATCTTCTCTCGATGTAAATCAACACAAATACGAACTGTCTCATCTCTCCAAACCATAAGACTCCGTTCAGCAACCTGGTGATGAGGACTAGTCATACAATCCACAAATTGTTTCAAAACTGCAATAGTAATTGTAGCAAATGCAGGATCACTTTGTTTTACATGCGGATGACGTATACATACATTCAGAATATGAACAATCTCAGTAATAAACAATTGCTCTTTCAAAGGAGAGAATTTAGGCCAGAACTTAAGTATCCCACCCATAATCACTGGTGCTGAATGTGGATCTTTAGTTACAAACTGTGTACAACATTGCAACAATTGTTCATGAAATTCATCCAAATGGAAAGACTTATGCAATGGTACAATCACATTACGTAATACCACATGGTAATCAGGTTTTACCGGTACATTCAATCCTTGAAATACACTACAGAATATCTCAAGTATCTCTGGTAATCCTTGCCAAGAACTTGTATCGCGAAATGTGGAAGTATAAATATAGCGATAGCAATAATCCGACAAATGTTTTCGAATCGCTTTACGTAATGCCATAAATCTACCATAAATGGCATGTACAATAATTTTCACATACTGTGGTTCTCTATCATCACAAGAACCAAACAAATGAATGAGATTTTCTAAAAATGCTCCTTGCAAATGTTTTTTCATTGTTTTTTTATCAATGTGTGTGTTGGTAACCACATGAAAAGTGAGTTCGTATATCAATTGTAAATGATCCCATGCTGGATCACGAAAGTCCATTTCATCATCATTTGGGTCTGTACGTATCATATTTTCATCTGGTAATGCACGAAACAAATTCTTCCAAATTGTTTCAATGCATTGTTTGAGCAGTTCAATCGAATTCCATTTATTTCCACTCATACACTGTAGTACTTCCAACAATAAGTCTTTCTTGTTATTTATGCGTATCATCTCTTCTTTGGTTGCTTCTTTGAGTTCAACATTAGTGTTGTTATTGTTGTTGTTGTTGAACTCTTCATCGCTGTTTTCATTCTCATTCTCATCCGTTTCATCCTTGCATAATTCAAAATTGCAAAGTACTGAGCAAAGTTCTAATTTCTCTTTCATGAGAAGTTGTCTTTCATTTACATCAGTAACATCTGACAGTGATTTCATTGAATCACCTGCAGATAATAACTCATGTAACAATCGATTGGTTCTCTCTCTTTGTTTCTCTTTGGATGCATTCAATTTCTCAAACATTGGACTATAATCAGGCAATGGAACTGAAGGTTCATTTCCTCGAAGAGAACCTTTGTTTTTACCTTTCTTTTGTTTCTTTCGAAGAACATTTGGCGAAGATGGACACTCACCAGGTAATACACCACCAACAGCAATGATTTGTTTTTGTGAAAGTACTTTAGCTCCACTAACTTTAGCTTCCTTCTTAGCCATCAATAATTCATGTTTCAATTGAGAAATCTCATTATCTTTAGCAATAATCTCACGTTCTTTTCTCTGCAAAATAGAATCTTTTTTGGACAATTGAGCTTCCAAACGATTGATTTTACCATTCAACTGTTGCATATCAGCCAACAATTGCGACTTTTCTCCACGTAACTCACTCACAGGTGGATGATTCATTGCAGTTGAAGGACTATGTCCATTTCTATCATGCAATTGTTTGGCCATTTTAAGATCTTTCTCAAGTGCTTCGATTTTATACTGCAACTCTTGCTTATCTTTGCTGTATCTCTCCAATGATTGTTGAACCATAACTTTATCTCTATTGTGTCGATCTAAAGATAACTGTATGTACTTCTGTCCCAATCGAGCATAGCAACTATAGTGAATTTGATGATGCGGTGGAGGAATCGCAGATTTCAATGTCTTAGCATAATTCATCACTTCATTGCACAATATCTTACCCTCTTTGTATTTGTTGCTGTATTCAAGCAATCGAGAGTAGAAATAACAGATATCACCTGCGAATGGATTGAGTTCCAAAGCTCTTTTATAAAATATTTCAGCATTCTCCAATTTCTCAACCAATACCAAACAATTGGCATAATATGTGAGAGCTTCGGTTGATGTTGGATACTTATCAACTGCTCGTTGATAAATATCCAACGCTTCAACAAACATACCTTGCTCTTCAAAAAACACTCCATGTTTTAGTGGTTCAACTACAATGAACTTTCCATAATCATAGAAATGTGGTTTGGATGCATGTGATTTGAGAACATGAAATTCATGTGCATTCAATAGTATATCATTGTCAGATGTATACCCATTGATTGGTCGAGATTTTCTAAATTTCATCTCAAAAATGGTAGATACATACTGTGACATTTCTCTTATCTTAGCAATCAATTCTTCGTAATTGAAATTGGATGATAGATGAATATGAAAAACAGGTGTTGTGGCAGCAGATGATGTTGTTTGCGTACGTCGTCGTTGATCATTGTCGCAATATATGGCTAACTCAATGGTGGTTGATATCATTTTTATTTTTGTTGTACTCGAAGTCGAAGAAGAACCATTTGTATGTGTGTGCAAGGATGAATGTTTACCTCTTTTACGTCTATGACTTATACCATATGATGCACCATCAGAATCAGAAGAAGATGAACGTGCATGAGTGCTGGGGAGTGCAGCAACCTTTTGAATGCTGGAATTGTTGCATTTTAGAAAATATGCATATTTGGAGCTAGTTTTAAAATAGCGAGTACCTTTGAACATTCCATCACTGTCTCCAATTGCTTTGCGTAACTGTACTCCATAATAGACATCTGAGCAACGATTGGGAGTACCAATGTATCTGATTTGACCAACACTCTTACGACTGCCTACGTACAATTTCACCCAATCTGTGATGCCAATTCTGGAGATGGAGTCAGAGGGCATTACTTCTTGTATGCGAGCGAAACGTAAGAAGAGACCACGTTTAGCTTTGCATTTGAAGTATCGAACATTACCTACCACACCATTTGACTTACCTTTTGATTCATTCAATTCTATGCCGTAGTATATGCCTTTTCTTGCTTGGATCTCTCCTACGAATCGGACTACTCCTAATAAGTCTTTTGTTAAACGGACGCGGTCACCGACTGTTACAACGTCGTTCAAGTTTGTAGTATGGCCGTTTGTTCCAGACATGATTTTACACAATA
|
||||
|
||||
>NODE_202_length_3674_cov_52.950345_g165_i0
|
||||
TTTTCTTTAATGTTATATGAATCTTATCTAAGCACTTTCTCGAAAAAAGAAACCAACCACCCCCAAAAGCCCTTGAGCTAAGTGCCCTAATATAAAAAATATTTTCAGTTTTCGTTGCATGCATTTACCACATGTGTAAACGCTGTGCAATAGAATATATAAATATGATATCATCAAATTATAATCGTATTCTTACGTACAGAATACAACCATAACGTAATCAAACAACAAGCAACAACAATAAAGAAAAGCGTCAGCATAAGCATCAACGATTCCCAAGCAGCCATCCGTTGCTCATGCAAAAATACCTTCCGCACATTCTCCTCCATAACTTTCTGTTCTGCATCTGTACGAATAACACCACCACCTCCTTTATCACCGAATTCCCCACCAACCTCATCACCAAAATAATATTTATCAAAATAATGATTCGCCACTTTAGCATCATAGATATCTTGCATTTCAGATAATCTCTGATTCCTAACACTATCCATCTCCAACTCATCATCAACACCATCACCATCACCAAAACCATATTCATCATCAACAAATCCACCTTCAAAACCATAATTATCATCACCCACATACCCACCATCATCAATTCCATCAACCATCATATATTCCTCAATATCAGCAGCATCTGCATTCATCCCATTACCATTCTCATGTTTATCCACATAATCACTCAACCACTCAGCAGTCTTCTCCTTATCATTATTCAAAAATTTATTGTTCAACATAATCTTCTCTAAAATTGACCGCACAACCCGTACCTTACTATGTATAAGTTTCCTCTTTCCATCATTATCAAAATAATAATCATTTATCTCACGATATACTTTGATATTAGCAAAAGTAGATGCCATACCCGCCTGTCTCGGTTTACCTCTACTTCTCTTCAACATATTCTGCCATACAATACCACGTCCATAACAACGTCTCAAACTTCCAAACCCATGTCCTTTCACATTACGACGTACTTTTGGAGAGAGTACAAAATTGATTGCACGTTGCAACATATATGGATCACGATATACAGCACCAATTGCACCTAAAATCACTGTCTGCACTTCTGTATTTGCACTACCATATATGTATAACAAATTATTGTAAATCAGTTTAGTATTGCGTGACTTCTATGCTTATTTTCATTCATTTTACTTAAGCCAGCTTCAAATATAGCTTGTAAAACATTAGCATCGATATCTGCTTTGAAGTATCCTTCTTTAGTTACTCCAAACTTATGATTGTTTTTGCGCATTATACTCAACAATAAATTATATCCAGTGTTGATTATTGTTTGATCTTCAAATCGTATGAGTGCAGATAAAATTAAGGGACGAAGTAATATATGATCATTTGATTCGCTATTGCTATCTTTTTTATGTCTATGATATGAGCTACTTCCATCTCCATCCCATTTTCCAATTGCTCTCCATATTTTATGTATGATGTTTTTAGCAAAATAACGAAAGTCTTTTCGTAATAATTGTAGAGTCATATCTGGTGAATGGAAGAGTTCACAGTATATATCATCGATATGGATGAATGATTCAATGATCAAACGCCATAATAAATAATTTATTTTTCCGTGTTTACCATTGTTGATTTTACTGACAATTGATGATACAAATGATATATAATTACCAGCTGATATGTAGTTGAACATTAGTAATGCGAATCTGTCTGCAACAATACTGAATTGGTCCACTTTTGATAAATGGTCGAATGATGAGATAATTAGTTTGAGACTTTTTTTATCATAGTGTGTACGAAATAGAGATTTCATTTCTGGATTTATGATATAATATTCGTTACCTTTGAGGCATTTCTTTCTAGCCATAGTTATCTTAAGGGCAGTAGAATCAAACAATCGCTTATTATCATATGTGCTATAACAATTTCTCACACGCAATGGTATACTCCACAACTGATCCATATTAAGTTTAGTTTGCGAATGTAGATTAGGTATCATCCGATCTTGACTCAAAACCAATGCAACTCTACCATCACTCATATCTATCGACACTTTCACTATAGGTAAACCAATATTCAACACCCAATCATTCATCATACCTAAAATCTTCTTACGTGTCATATACTTATGCATATTTCCAATCATCGCATTATCCGCTAATATTTCAAACAAATCATCGGTATCTGAATTAGAATATTGATATTTCTTCAGATATGCTTGCAAACCATTCATAAATCTATAATTTCCAAAATATTCACTCAACATATGCAAAAAACCTGCTCCCTTATGATATGTTATACCATCGAACAAAGAACGAACATCTCTTGGACGAACAGCAACTTCACCATTCTGACTCTCTTTATCAACAACAATAGCATGTGTTTCATGCGAACAATCCAACAGCATAGCACGATGTGTATCATAATAGTATTGATCCCAAAAATTCATCTCTGGATATGCATGTTGTGCACCAATAATCTGGAAATAAGCTGCAAATCCTTCATTCAACCATATATGATTCCACCATTTCATAGTTACCAAATTACCAAACCATTGATGTGCTTCTTCGTGTGCAATAACCATTGCTACTTGCATAACATCCATTAGTGTGGAATGTGCTGGGTCTACCAACAAACGTATATCACGATAAGTTACCATTCCCCAATTCTCCATTGCTCCCATTTTAAAATCGCTTACTGCCAATGAATACATTTTAGGTAGTTTATAAGTCATATGAAACAATTTCTCATAATAAGGCATTATGGATAGAGTTGATTTTAAAGCAAATTGAGCACGAATGTGCTTTTGAATGGGGAAGTATATGCGTTGGGATATATTTCCAGATTTACCAGCAACATATTCATAATCGCCAATGGCAAATGCAACTAAATATGTTGACATTTTAGGGGTGGTGTCGAAACGCATTAATTTACATTTGGAAGTATGACCGACATGAGATCGCCAAAGGCAATCATGTTTATCTGTTTTTTCTTCATATTTGACGAGTGTATTGGATATAACAGTTGCTGGATAAGGTGCATATACATTTAGATTGAATTTCGCTTTGAAATGTGGTTCATCAAAACAAGGAAATAAACGACGAGCATTTGTTGCTTCAAAATGTGTGATTGCATTATGTATCATAACACCATTATGCTCATATGAAGTAATGTACAATCCATTCATTTCTGTCTGCATTTTACCCTTGAATTCCAGATATAAAATGGCTGATATATAATCATTATTTTTCTGTTTTCTAAGAATAGATAGTAATTTAGTGTTAGCTAAATTGAAATCGAATTTGACTGTCTGTGCATGTTTATTGTAATGAGCATTTGTTTTATGGTACACTTTATTATGGTTGTTATATAAGTATAAGCGACATTTTATTACTTTTATATCACGTGCGTTGATGGTAATTTGTAAGTTTTCATTGCTGTTTTGTAAATGATCTTTGGTTATGCGAATACTGATGTATTCGAAGGCTTCGATTGTGAGATCACTAAAAAATGGATGAATAGTTATATTATATTTCACAGGGTATATAGTTTTTGATAAGCGATAGGTTGCATGGGATGATGATAATGAAAATGAGGTTATTGATATTAGAAGAAATACTAGGATTGTGTTGATTAAGTATGACATACAGTGTTGAAAAACTGCAGACGCTTAGAACTTTGTGCTAATTCACTCCATT
|
||||
|
||||
>NODE_226_length_3556_cov_34.562589_g181_i0
|
||||
TTGTCAACACATGGAACATGCACGCTTTACGTGCAGTTCAACTTTTTTGTAGGAAATCTTGTTATTCACATCTTCAATCTGTTGCTATCATGACGTCCATGCCAACCGTTATCAAATCTCCTGCATCATCTTCTTCCTTATCTTCATTTCAATCAATGATGTATTTTTTCCACAACAAAGAACAGCAACCTACCCAATTCATTTCCAATTTGAATTCGATTGAAAACTCATCACAAATGTTGTCAATCGATTGGCAAACATTAAAAATACAAGCACAAAAACAACAAACATCTGAAACAGAATCAGAATCAGAATCAAATTCAGAGACAAGAAATATTTCCGTGATTTCCAATTCCAAACGCGGTCTCAAATGGGACAATGGCTATTTTTATAACAAATTACAAACGAAATGGCTTGGACAAACATTTTCATTTTGCGATACTTTGTCTTCCACCCAAACGTTTCTGAAATCAAATGCTCAACAGCTTCCATGCGGAACTCTCTGCACATGCAATACCCAAACATGCGGTCGAGGAAGAGGAGGAAATGTTTGGGAATCTCCGAAAGGCTGTCTCATGTTTTCATTCACTGCTGATTTGCATTCATCGAAAGCACGTTTTTTGCCAGCCATGCAATATTTGATATCTGTAGCAATAGTGCAATCCATCCTACATTTGACTGATGATCAATTGCAACTGCAGCTCAAATGGCCGAATGACATCTACCATGGAAGCACTGAAAAAATTGGAGGAGTGCTTTGCGAAAGTTTATATTTACAGGAAAGCAAGAAATATCGAGTGATTGCTGGAATTGGATTGAATGTCGATAATGAACAACCAACCACTTGCATTTCTAAAATTATCAATGATTTGAATGGTAGTAGAAATGAAAATGAAACTGGAGAGAAACTGTCGATCATATCGAGCATCAATCGCGAAGATGTTCTCAGTACTTTTTGTAATATTTTTGAAGGCATGTTCGATGATTTTTGTTGTCAAGGATTTTCAAATCATCATCGCCAGCAATATTTGCAATATTGGATGCACTCTGGGCAAACAGTGACTGTCAGAGAATCTGAGGATTCGGTGGTGAGCAAGGAAGTGACCATCACTGGTATCAGCAAATACGGATTGTTATTGGCTCACAATGCTCTCGGGGAAGGATTTGAACTACATCCGAATGGAAACTCATTTGATTTTTTGAAAGGTCTGATTTGTAGAAAAATACAAAATATAAGCGTCAAATATTGATATTTATGTAGTGATTTGTTAGGATGAACTATTACTTTTGTTCGTATGCCGTATTTATATCACAACAATACCGTACACACATTTTCAACAAATAATCTATTTCTATGGTAAATGCTCATCAATATTTTTACTAATACCATTTTTTGGATAGATAGCAATGGTATTCTTCTGCACACGAGTCGTTGGTTCCGACTTAGCATAAGTAATCATCCCCAACAAAGGTTCATACTCTGATGATTCACTTCCCTCCAATAAATTCTTTTTCAATCCTCCATCAAATATAACATCACTGCACTGTTTCTCCGGAAATTCCACAATATATTTACTCTCTCTTTCCATCAAAGTACCATACATACAATACTGATACCACTTAAAAGCCAAACAAAACCCACCAGCAATAACAACAACAGCCACCACAAAAGTCATCCATTTCAACACTTTCTCAAATCTCTTAACACTCACAGGTCCACCAATCCGCTTCACCAACATAGCATTCACCGCTTCATGTGCTATTTTATATTCCATAGCCATTGCATAATACTTAGATTGACTCACATCAATATCTGTATATTTCACATCTGCCGACATCAATGATTTATCAACCAATACTTCCTCAAATTCATATTTAATTCCATTCAATCCATAATTCTTCATCCATGGCATCCAAAACCCTCTAAAACGCAATGGATCACTATTGGTATCAAAAGAATTCTCATATTCTTCAACCAAAGTAACAGTAGAAATATCATCATATGCCAACAACATATTATACATATATTGATACAAATTTGAAAATCTTATATCATTGCTGAGATCATAATATTCGCAAGGATCACTTACGATATCAAACAAACATGGTTTATCACTGCATGGAACATCATCTTCACTCACAGCACTCTGTTGAACATCATCTGGAATACTACCACATCCCAGATAATCCATATATCCATCACTGTGTGCATTAATTGCGAAATCACTCGTCCAAAATGCATATGCACTATTAACATTTGAAGCACCAATGATTTTATATCTTCCATATCGATATGCACCACATACCTTACCATCCTTACATTCATATGAATCAATATTATACAGAAATTCTCTGTTCTCCAATGATAACTCATTATCAATTGTCCCATATTGCAAAGCATTCCACATATTTTTACCATCGAATACACGTCGATCACTGAATATCTCATCATCTGCCAAACCAATAGCTGATAGAATGGTAGGATACCAATCTGTGACATGCATAACACTATCAAACACTTTACCTTTTCTATCATCATTCAAATATCCACCAGCAATAAATGCAGGTGTACGTACTCCACCTTCATACAAATATGCTTTCCCACCGCGCAATGGATAATTATCACCAGATAATATGAATGCACCATTGTCTGATATACCAATCAACAATGTATTCGACCATACATTTGATAAGGATTTAAGATGGTCTATCATTTGTTTGAGAGCATGATCAGCACATGTGGTCACTGATTGTGCAGCTATTCTCAAATCACATGAGTTGATTGCTGTGAGTAATGCTGGGAAGCCATATTGTTTGAGACAGGTGGTTGATTGTAATTTTTCCATGTATGGTTGTATGCATTCTTCTGTTGTTTCTAAAGGGAAATGGGAAGCATGCATAGCTAAGTAAATGAAGAATGGTTTGTTGGTGGATGCAAATGCAGTGGTTATTTGTATGAATTGTTCTGTTTCCCATATACTTAGATATTTATTGTTGTTTTCAATGAAATGATCATTTATTCGTAGATCATAGCCATTGTATATTTTATTATCAACAGTGAGAGAGTAATCATGTGAGTAGTAATGTAACATGGGTGATAATGAACCATGGAATGCATCAAAGCCTCGATGTAAAGGACATAAATCTGGTCGATGGAAACCTAAGTGCCATTTTCCAACTGCTGCTGTTTTGTACTCATTCTCTTGCATGTCATTTGAGATCATTTTCTCAAATACTGATAAACCAAATGGGTAGCCATTCTCGATTACAGATGATTGGAGACCGAAACGTATTGGGTAACGACCTGTGACTAATGCTGAACGAGTGATGGAACATAAAGGGTATACGTAATGGGAATCTATGGTTAGAGCTTCGTTTTTTATGAAATCTGTGAGGAATGGCATTTGGGTGTCTGTGTGTTTGTGCCATGGAACGTCGTTGAATCCCCAATCATCTGTTACTAGTAAAATTACATTGGGTTTGGTATTTTTATCTTTTCCATGTACTTGGAGAGCTAGAAAAAATAAGCAGAGACCTAGAAAAGTAGATGAATGGT
|
||||
|
||||
>NODE_238_length_3524_cov_80.640288_g191_i0
|
||||
CAACAATTTAAAAACAGACATGTACGCGTCTGTTAAAAAATTAATTTTCAAAAGGATCACTCATCGTTCATTTTCTTCTCGTGCGTTTGATGAATGGAACCCACGAATCCACTCTCTATCTGCAAAATATCCAGGGTTGTTCAATCGTCAATCATTTGTCGATGATCAAATAGAACTTCTTAAATCGGCGGAATCTTCCTTGCCTGGAATTGCAGAAGAATACGAGAAAGGAAGGAAAATGGTAAATAATATGATAGGGGATGAATTGTTTCCAAATGTGAATGAGGTTCAACCGCAGAATGAACCAATATTCTCCAATACGTATTTTGAACAAAATTCCACTGAATTTGAACAAAAGCTGAGCACAGGTGAATCAATAATTGCCAAATTGCTTGCTTTGCAGCAATCGAGTGCATCATCGGTTGAAGAATCAAATCTGCACATGATTGAAGTCTCAAACTTTGCCGATGAGATTGAAGAATGGCGTTTATCATTGCTCAGCAATGGAACATTCAATGATACACTTGGGTTTCCAATTTCCGATAAAAAGTTGGATGCATGGGTAACCAATGAATACTACCATGCACCAGATTATGTTCGAGAACAAGGGATGATAACATATGAAGAACATAAGAAGTTAGAGAATCAATGGATGCTCGAGGTTGATGAACGATTGGTAATTAAGTTGAAGAAAGATTGTTCAACCAAAGGAATTGATTTTAATGAAGATGCTTGGGTGAGATTTTGTGGAATAGTGAATGATATCTTCCATCCTCATAACATCGATCATTATACTCTCTCAAATGCAAGTAAATCTTTATTTCGAGCAGCGCTTTGGCAATCTGTCAATAATGATGGTAGTGATCGACCAACAATGGTTGAAGAAAGTATTCTTCAATTGAAAGAGATATCGGAAGATATGCGCGATAATGATTGTCAAATTGTTCGACGGTGCTATGAACAGAATATTCAAATGAAGTATGCATCATTGGTGGCATTGCAATTGCAAATCACTGATCCATTGTTTGCTGCCAAATTATGTGATGAAGTACGGTTGACTCTGGAAAAGGTTGCTTTTGATAGAGCAGTATGTGATGTTATGCGTGATTTGGAGATACCCAGTATTGAAGCAGAGAAGAGAGCAAATGAAGAAGAATTACGAGAGACAATGTCAACAAATCCCGTGAATTTACTGCGCAATATCAAACGATTAGAAAATGAAGCACAAAAGTTCAATATTGAAGTACCAGATTTAGATTTTGTGTTAGATTCTATTTTGCCAATAAAATTATGGGGCAATGATTTCTCTCGAGTGAAGACTGCCATTCTAGCATATGCTACAGGAGATACATCCGTGAAATTGTTGGACATTCATTCTGCGATTGAATCTGTTCTCATTTCTTCGAATTTGGTGCAGTCCATTGAGAATGATGGAGGATTGAATGATGAGGAAATTACAAAATTCAGTGTTGAATTTCATAATGCACGTGCATCAAATCCAGAATGGTGGAATTTGAAGCAGTTTTTGGTGAATCATTGCAATCAAAAACGAGATCAATCATCCATAGTAAGCAATCAAAGAATTGTGAGAAGAAAATTGAGTTTGATTGATCCCCTGCTGAGCAATCTCATTTGTATAATGTTTGCAGATGGTACATGTGATTTGAATTCATTTCATTCACTTCTCGCAGATTATCAAACAATAATGCGTAGATACCGAGGAGAGGTGGATGCAGTGATCACTAGTGCACAAGCACTCGATGAAACTACATTCAATTCTATTTTGATGGCGTTGGAGAATGCAAATCCAGAGAAGAAAATTACTTTGAACCAAGCAATTGATACTGGAGTTCAAGCTGGATTTATTGTCAAAGCTGGATTGCAGAAATTTGATTTTTCATTGGCCTCGCATTTGCAGCAGGCACTATGACAGGCACTATGAATTTTTATAAAATTATAAAATAAAACGGTTTTGAACCTTGATTTAAAAATGAATTGCTTTCATTCCACATGCATTCATACATGCTTCTTTAAATGCTTCGCTAAGCGTTGGATGTGCATGACATGTGCGAGCTAAATCTTCAGCAGATGCTCCATATTCAATACCAATCACACCCTCTGCAATCATTTCTCCAGCAGCAGATCCAATGATATGTACACCCAACAATCTGTCTGTTTCTTTATCTGATAATATTTTCACAAAACCTTCCGTTTCTCCATTTGTCCTCGCTCGAGAATTTGCTGCAAAAGGAAATTTACCCACCACATATGAAATATTACTCTCTTTCAACTGTTCTTCAGTTTTCCCAACACACGCAATCTCTGGTGAAGTATATATCACAGATGGTATGCTATTATAATTTACATGTCCAGACTTTCCAGCAATGACTTCAGCCACTGCTATTCCTTCTTCCTCTGCTTTGTGTGCAAGCATTGGTCCTTCAATGCAATCACCGATAGCATAAATATTGTTATGAGTGATACTCCTCCAATCCCCATGAACTCCATCACGTATTTTTATTCTTCCTTTGTCTGTCTCCATACGAATGGACTCTGTATCTAAGCCAAGATTGCTAGTGTTTGGTCTACGCCCAATTGCAATGAGTAATTTTTCACATTCATACACTTTATCATCAGTGCATGAAACTTCAATCATATCTGATGTTGTTTTCACTGATGATAAACCAGTGGATAGTTTGATATCTATTCCTTGTTTTTTCAACATCTTCTGAAATGCATTTGCAACTTCTAAATCTGTGCCAGGTAATATCCTATCTAGAAATTCGATTACAGTTACTTTTGTTCCGAGTCTGCTCCAAACACTTCCCAATTCAAGTCCTATGACTCCAGCACCAACCACAATCATAGATTTTGGTATTTCAGAAAATGAGAGAGCTCCAGTGGAACTGACAATATTTTGTTCATTCACTTCTACTCCAGGTATATCTGTACTCACACTTCCACTCGCAATTACAATATTCTTGGCAGTCACTATTTTTTCACCAGAATCAGAATTTATTGAGATTTCTGTTGGACTCTTCAATTGTGCCCATCCTTTTTCATAATGTACCTTATTTTTTGAGAATAAGCCTTCAATGCCTTTGGTGAGACCAGAAACTGCATTTTCTTTGTATTTCATGAATTGATTGAAATCGAATTTCACATCTGCAATAATTCCTCGATTGCTCAAATCATGCAATGATTGATGATAGAGATGGCTATTGTGCAATAATGCTTTTGAAGGAATACAACCTACGTTCAGACATGTGCCTCCTAATGATCCACGTCCTTCGACACATGCTGTATTAAGGCCCAATTGAGCTGCTTTGATTGCACAACAATATCCTCCAGGACCACCGCCAATTACCACTACATCGTATGCTGATGTTGAATTTCTGTTCTGTCGAATACATAGAAAACTCCCACTCTGTATATTTGTGCTATGTTGCAGTCTTTGACCAACGCGTTTTACAGCTAGCGCTAACATTGAATTTAAGTCACGAATTAATTTTATTAATTAGATC
|
||||
|
||||
>NODE_257_length_3425_cov_46.478969_g206_i0
|
||||
GTGCTCTTCCGATCTTTTTTTTTTTTTCTGTCGACCATTCGTTCGTTTTCATGTCGTTCTTGTGTCTTCTTGGTTTCTCTTGCTTACTCTTCCACTTCACACAAAGTTCCAATAGCCACGACTCACATGGATTTACCCAAACGAATAAGTTCCAACCAAATATAATCTTCATGTTTTGCGATGATTGTGGCTTCAATGATTTTGGTTTCAATCATGAATCAAATGTCCAAACACCATTCATCGACACTCTAGTAAAAAATGAAGGTTTGATAATCAATACACATTATGTCCATAATTTATGTTCCCCAACCAGAGCAGCATTTCTTAGCGGTAGATATGCACATAAACTTGGATTACAAGTTGGAATGCTCACTGAAAATACAGACTATGCATTAACCAGACAAGTATCTCTACTTTCAAATGAATTTCAATCTCAAGGATATGCTACTCACGCAATTGGAAAATGGCATTTAGGCTACCAAACATGGGAATACACCCCAACCTACCGTGGCTTCGACACTTTCGCAGGTTTTTACAACGCAAAATCTGAATATTTCACCCATAAATTCAAAAATTCCCAAAACATGGATTACTACGACTTAAGATTGAATGAAGAACCAGTAAGAGATGCAACAAACATTTATGGTACTCAATGGGAACAAGAACAAGCAATATCAATTCTAAAAACAAAACATAAATTAGATGAACAACCATTTTTCATGTATCTTGCATGGCAAGCAGCACATGAACCATCAGAAGCACCAATGAAATATCAAGACATATATTGTGTCAGTGACACATGTGATGATAGTGATATACATAAAGCACAACTCACATCATTAGATGATAATATACAAACAATTGTAACTTACCTCAAAGAGAATGATTTATGGTCCAGAACATTACTTGTATTCTCTGGTGATAATGGAGGTGCAGTTGGATTTGGTGATAACTTCCCATTACGTTCATATAAATTCTCACCATTTGAAGGTGGAGTACGTGTACCTGCATTTGTTACAGGTGGTTTTCTTAATCCAATGCGCTATGGAGAACATGTGGATGAATTTGCTGTTCATGTCACTGATTGGTATCCAACATTATTGTCTGCTGCTGGTCTTTCAATAACACATGCAAAATCACTTGCATATCATTCTAGTACGTATGATGAAAATACATTGGAATTGATTGATTTTGATATACCTTTGGATGGCTATGATATTTGGCAATTTATTCAATATGGAACATATTCAAAAGATGGTAAAATTGCAGATTTCTTCATACGTAACAATGATGATGGTAAAGGAAGCAGTAGAAATTTGGGGAATACTTTGAGTCATGGACGTGAGATTATATTGAATATTAATAATATGAATTGTAAGTGGGATTCATGTGGATCTATGATTATTGGTGGGAGATGGAAATATGTGAGAGGTGGGAATATGTGTGGTTCTATTGTGGATTGTAATGCATGGATGGAAACAAGTGATACTAATATTCTGAAGTGTGAGTCGTATTCTGATATTAGTGGTAAAAGTGTTGATGAAGGTGGTATTAGTGCTATTGACTGTATTGAGACTGAGGATGGATGTTTGTTTGATATCATTGGTGATCCATGTGAATATTTTAATTTGGGTGAACGCAATCCAAATATTGTGAATGTGGCGAAGAAGAGATTGCAAGCTGTTTATGATGGAGATGCTATTGCTCCATTGGATTTAATGGGGAAGTTGCGTGGGGATGTTATTGATCCTGCTTTGCATGGGGATGTTAGTGATTTTTGGGGTCCTTTTCAGTTGTTTGAGGATGTGCAGTTTGAAAAGCTAATATTTCGTGATTTTACATTGCTTTATGAGGGGAGGAAAGTAGAGGTTGGGGATTATGAGGATGATGGTTTGATTGTTGATGTTGTGAGTGGGGAAGAGTCTGGATATCATAGTAAGGGAATGTTTGAGTTTGATGATGATGATATATTTGCAATTATTATCATTGGAGTGGTGTCGATATTGTTGTTGATGTTGTGTGCTGGTATTTCGTATTTTTGGAGAAAAGTGGATCGGAAATCGGAAGTAATGGCAGAAATGCAGCCCTTACTTGTTTTGTAGCAGGATGAGTGAACCTAATGAACGACGTTGAATTCTATTGACTGTTTCATCATGCGTTTTGCTCGTATTTTTGCCTTTGTGTCGCGTACTACTCGCGTTTTATTTAGAAAATCAAGTTCTTCCTCTACCACAGAACTGAAGAAAACCAAATTAAATGCTTTCCACATCGCAAATAACGCAAAATTAGTAGATTTCGGAGGATTTTCAATGCCAGTTCAATACAAAAATTTATCCATAACAGATTCCACTCTCCACACACGCAAATCATGCTCATTATTCGATGTATCACACATGGGACAATTGAAATTCTACGGAAAAGATCGCATTCGATTCTTAGAATCAATATTAGTGTCCAATGTATCTCCAATAAAAACAAATCAATTGAAATATTCATTAATGCTCAACAAAAATGGTGGAATCATTGATGATTTAGTAATTGCCAATTGCGATCAATCTAAAAACGATAATCCCCATCATTATATGGTTATAAATGCTGGACGTATCCCAGAAGATTTACAACATATTGATCATCAACTCTCTCAATTCAATGGTGATTGTAATTATACATTCATGGATAATCAATCATTGATTGCATTACAAGGGCCAAAAGCAGTTAATGTTCTGCAAAGATTAATAACTACTGATTTTGATTTCAATGGTTTGAAATATTACAATATGAGTGATATGAATATCAGTGGTATTCCAATTCAAGCATCGCGCAGTGGATATACTGGTGAAGATGGATTTGAGATTTCTGTGAGCAGTGATCAGATTGTTGAATTAGCTGAGGTTTTGTTGAATGAAAGTGAAGTTGAACTGGCGGGTTTGGGAGCGAGAGATGCTTTGAGATTGGAAGCTGGAATGTGTTTGTATGGAAATGATTTGAATGAGAATATAACTCCAAATCAAGCATGTTTGTTGTGGACAATGAGTAAGAAGAGAAGAATGGATGGAGGGTTTCTTGGGTTTGAAGCTGTTCGACATCAAATTGTGAATGGAGTTAAGATACAGAGGGTTGGGTTGATTGGGGAAAAAGGACTTACGCCGAGAGGTCATCAGAGAGTGGTGAATGAGAGTGGTGTGGATGTTGGGGAGGTTACATCTGGTACTTTTTCTCCGTGTTTGCAATTGCCGATTGCATTGGGGTATGTTGGTAGTGATTGGTGTGAGATTGGTACACGTTTGGAAGTTATAATTAGGGAAGGTAAGAAAATAAAGGTAGAAGTTTGTAAATTACCTTTTGTAGAAACGTCTTATTATAAGTAAACATTTGTAAGGGACACAAGGAACATAATTTTTAATTTATTTGACGATTTAAGGGGC
|
||||
|
||||
>NODE_284_length_3345_cov_73.038532_g225_i0
|
||||
ACGTCAGTTCTTGTGCCTTACAAAAAATAAAGACACAATAGATTTAAAAATAAAGGGTGAAGCAAGAAAAAAAAATATAAAATAAAAATCAAAAAAGTATGGAAAAAAACTGTTTATAGCACAGATAATTGCAAAAAGAGAAGTAAAATACATATTACACACACAAACAAGAAATATATTCACCATCCAACAAACTTATACAGATTCTGAATTAGATGGTGAACTATCAGAACTTGTTCCATGAGCCAATTGCAATTGCGACATTGGCAAATCATCTGAATTTAAAACAGAGATAATATCGAAATGTGTACTATACTCATCACTTTCAATATCTCTTACCACCAACACATAATGTAATGATGTCTCTTGTACTGCTAAATACGTACCAGCAAATTCATTGCTCTCAAGTGTACCATATTTACTGTTATATCTAAATAATGAATATTTACTACCACTTCCATCATTCACATCTACACTCCCACCCTTATTGTACACACGTAAGTATCGACCAGTTTTATTATTTCTAAATTTTACTAAATCTCCATATATTTCTAAAATCCATTGTGAATTAATTTTCTCATATCCATTACCACGTGCATTAACCTCTTCATCATCCGATGCCACTCTCAAATGCTTACCAAATGCATGCTGTAACAGCACAACGCTATAATTCTTCAAAACACTCTCTCCATTCATTATCTTCCCATGTCTTACATCAGATATCATATGTTCATCATTATAACTTCTATTACGTCTATTATTATTATTCCTATTACCATTCATCCTCAATACTTCCATATCATCCTCATATTCATCATCATAATCACTTCCACCAGCACTACTAAAATTATTTACATTTATATTTGCAGAAAAGATAACTTTCGACCATGAATTCTTTCTCTTCACTCGTTTCTCAGCAAGTGTCAAAGGTTTAGGTTTCTTTCCTCTCAACTTCTCTTTTGCATGCTTTTGAAGTACAATTTCATATGGATCACGATTGTTAACCTCCGCATCCGATATCGCTTTTGCAATCGCATTCTCCTTTCTATCTTTCTTTGATAAATATTGCTGCATCTTCTTCCAATATGCACTATCCTTACTACGATATATCTTCTCCACTGAACATGAATTCTGTTTTAATGCTTGATTTTTATGATCACTATTGCGATAAAAAGCTTCACACAATTTGGGTAAGAACGCTTTTCTATCATAATCCACCAACCGTTGCATACTCTGTTCTTTCCATGCAGCAATTGCACGTTCAGCTACCGGTTGTCTGGTACTCAACATACAATCTGTAAGTTTTGTGGCAGCCGCCACCAATATCTCTTTATAGTTCTCGTACACAAATCCATGCTGGTGAGTGATCAATACATTGATGATATTCACTACTTCCATAATGAATATCTCTTCCTTTTGCGGTGATTGCTGTGGCCAGAAACGAAGCAGACCACCTAAGATTACTGAACCACCATTGCCATCCTTAGCAACATAATTCACACAACATTGTGTCAACTGTTCCCAGAACTTCTTCAATCGACGACATTTGTGCAATGGCACTAAAACATTTCGCACAAATGATTGCCAACTTTCTTTTACTGGTACAGTCAAACCTGGGATTATTGCACATATGATTTGCAATAATTCAATGATACCATTGACATGTTCAAAGTCTGAGGTATATATCATACGATAGAAGTATCCACACATGATTTGTATGATATATGGACGTAGTTTTAGACAACGACCATATATTTTATGAAGTATCATCATTAGATATGCACGTTCGCGGTGATCTTCAGATGCGAAGAGTTCTACTAAGCATTTGAGGAATTTACCAGTCATGTATTTCTCCATCATTTGGGCAGTGACCATTGGAGTGTTAATCACTCGCCAGGTTAGATCGTAGACTAATTTTAAGTGACGCCAGCTGGGATCTTCAAAGTTCGACTCTTTGTCGAATACTTCATCGTTGAAGAAGGAGAGAGCGGGTGGGCGATCTTGATATGGAAGAGGACGAAATAGATTGGCACCTACGGTTTCTAAGCATTTTTCGTAGAGGTTCTCTTTGAACCATACATGTCTTGCTATGAATTCACTCAATTCTAAGAGTAATTTCTCTTTTTTTATTATTAATTCATCCATTTCTGGTGTTTGAAAAAAGAAATCACATCGTGCTTGGCAATAATACAACTTAAGTACAAACATATCTTCTCTTTCATCTTCTGGATAATCAGCAATGGGTGATAGATTACTGTAATACAATTCTTGAGCAATACCACCTAATTTTCTGCTTTGCGCGAGATATATCTCTAAAACTCGATTGGTGGATGAATATGAACGAGAGTCTTCTGGTGAAGGTTTAGGTGTAGACGATGATGAATTCTGCTGTCTTGGCTCTACTTTTCTATCATCAGAGTCCTCATCACCTGTACCACTTTCATACAAAGATACCCAACCTTTGAACATCAATTTCTCACTCATACGAAGTTTTTTTATTCGTTTTATTTGTGGTTTGGGTGTTTTTGGCTTCACTTCAATGAAGAATGGATCATCTCGATTGGTATTACTTTTATCTGGTTTTTTTATTGATGGATCCACTGGTGCTGGAATAATTCTTTTGAAAGGTAACATTACTCCATGATTATCTTCAGCATCAAAATAGAATTTCTTTTTTACTGTTCCATTGTTTTTACCTTTTGGTGCATCCAATTCTACACCAAACCATACACCCTTATCAAAATGTGTGGGTCCAACATATTTGATAATACCAGGTGTCTTCTTCTTTACATATATAACTCTATCACCAATGGTAAGATTAAATTTCTTACCACGATGGAGTACACGTTCAACTCTCTGTTTTGGTACCCATATGGTTTTATTTCTGGGCGATTGATTATCAAATACATTTTCACTGCTTAGATGAACATCCATGACCACACGTACCCATACTCCACGTTTTTCTGGTTTAGCACCAATTGAAGTTACTTTTCCACTTCTTTTGTTTGCTAATAAGATTTTATCTCCAACATATAATTGTAGATCATCATTTTGATAGCTTATGTACTTCTTTTTATTTTTATGACCAGTGCTTTTGGTTCTATATTTTTTTGGATGTTGAGGTGAACGAGCAGTGCCATTGTGGCTGTTGTTATCTGTGTTAGTATCTATATTTTCAGGTTCTTCTTCATTATCGACATCTGGCATTGTTGTTGAATCACCTCTTTTTAGAGATGCTAATAATTCGTCATCATTGCGTTTCTTTTTCTTAAACCAACCAGACATTTTCTATGTTTATGATTTTTACTTTATTTCCTACGTTTTTTACGGTTCGCTTGAGCAAAA
|
||||
|
||||
>NODE_285_length_3344_cov_82.848558_g226_i0
|
||||
TCAATTAAGACCTGTTGTGCTTCAAAAATCAAATCAAATCACAAAACCAAACCACAAAAACCACAAACATAAAATCATAGATGAGTTTCATAGCTGCCACAGAACAATACTACACCCACTACGAACAAGAAGGTCCTCTAAAACGCACTGAAGACATCCGCGATCACAAATGGCTCTTCTCCAGACACGACCTCTGTTCACTCATGGCCAATTTCCAAGAAAAACGCTTAGCACTCTCCACACTCGCCCAAATGGGCGGTAGCCAAGGCTTATGCTACGCCCTACGCAGTCACTCACGCACTGGCCTCGGCATCGATGAAATAGAAGACAACTCAGAAATCAATGCATTAGACATCAGACGCAGTAAATTCGGCATAAATGAATTGCCCCCACATAAATCTGATCCATATTACAAACTATGCTATGATGAATTGCAAGACCCTATGCTCTGTGTGTTAGTGGTCGCAGGTGTCATCTCTCTAGTCGTTGGTGCAGCATTGCATGCAGCGGATGGTGGATATATTGAAGGATTAGCAATACTCATTGCAGTAGTTATTGTAGTAAATGTTGGAGCCATAAACAATTGGCAAAAAGAGAAACAATTCCGAAAAATGGATGAAGAGAATAAAAAGAAGAATACAATAGTAATGAGATCTACAGAAATGGAGATTCCATGGAGTGAAGTGGTAGTTGGAGATCTAGTTATACTTAGAAATGGTTTTACTGTTCCAGCTGATGGTGTATTTGTATTAGGTACTGAAAATCTGCATACGGAGGAAAGTTCACTCACTGGTGAATCGCGTGAATTATCGAAGAATAGCGAGAATCCATTGTTGATGAAAGGTACGAATGTTGTGGAAGGTGAAGGACTTATGCTGGTGGTTACTGTTGGTCCATACACAGAATGGGGAAAACTTATGCTCGGATTGCAAGAGGAAAGAAAAGATACTCCATTGCAAGAGAAATTGGATCGTTTGGCTGGGTTGATTGGCTATGGCGGTGGAGCAGTAGCAATTCTTTTGTTCATAATATTGACCATCAACTGGGGAATCAATGGTGGACATGATGCAGATATAAATATATTGAATTTCTTGATCATTGCAATAACTATTGTAGTGGTTGCTGTTCCAGAAGGTTTACCATTGGCAGTTACCATCTCACTTGCATACTCAATGAAGAAAATGTTGTTGGACAATAATTTTGTACGTCATTTGAAAGCATGTGAGACAATGGGTAATGCAACAACCATATGTTCAGATAAGACTGGAACATTGACTACCAATCGAATGTCTGTTCAGCAAGTGTTTATGTATGGCAGACGATTCATTCGTAGTGAAATATGTGATACTCCAGAGATTGCTGCAAATGCTTTGACACCAGTTGTGCATCAATTGCTGATGAATTGCATATGCACTAATACCAAATCATTTCAGGAAGAGCCTAAAACTGTAGATGAAAGAGCAGCCATCGATGCTGGGAAACGTAAAAAAAGACTCACTGGCGGTAACCAAACGGATTGTGCAATGTTGCAGTTTGCAATTGATTTGGGTGCACATGATTACAAAGAACGTCGTAGAAACAGCCCAGTTACTAAAATGTTCCCTTTCAATAGCAAAGTAAAACGTAGTTCAGTGTTAGTGAGAGATAAGAATCGATATATAATGTATACCAAAGGCGCAGCTGAAGTAACACTTGAGATATGTACACATTATATGTCCAACAATGGAGAAAGTGTACGTATGTCCAACGAAGATAAAGCAAAAGTCTTAAAAGCTATGAATCTGATGACAAAACGCGGTTTACGCTGTCTCGGCACTTGTTACAAAACATTTGACAAATCTGAAATACCTTTTTCTTCCATAAGTTTGAATATCGCAGAAGAAGATTGCGGAATATTGTTTGAGAATATGATATGGATAGCAGTGATGGCCATACAAGATCCAGTACGCGATGAAGTACCAGATGCAGTTCTAACATGTCAGAGAGCAGGTATTGTAGTACGTATGGTAACTGGTGATCATTTGGAGACTGCCAAACATATTGCAAAAGAATGTCATATCCTCACATGTGCTGACCATGTGTGTATGACTGGAGAACGTTTTCGTTCTTTGACAGATGATGAAAAATTTGATCTTCTGCCAAGATTGAGAGTGTTGGCTCGTTCTAAACCTAAAGATAAGGAACAATTGGTGAAATGGTACAAAGAGAACAACAATGACATCGTAGCTACCACCGGTGATGGTGCAAATGATGCATTGGCATTGAAAGAAGCGAACATTGGACTGTCAATGGGTATACAAGGGACAGATGTTGCAAAAGAAGCATCAGATATTATTATTATGGATGACAATTTTGCATCTATAGTGCAGACAGTGATGTGGGGCAGATCTGTTTATGATAATATAAGAAAGTTTGTACAGTTTCAATTGACTGTGAATGTGGTTGCTTTGACATTGTCTTTGATTGCGGCATTTTGGACAGAGTTTGCTAATCCTTTGACAGCTGTGCAGTTGTTGTGGGTGAATTTGATTATGGATACAATGGCTGCGTTGGCATTGGCTACGGAGGACCCTACTCCAAAACTGTTGGACAGACATCCTTTTACACCTGATTCGAATTTGATCACTCAGATTTTGTGGAGGTTTGTGTTTGGACATTCTTTGTATCAATTGGTACTGCTTTTGATGACAATGTTTGTAGCGGATGAGTGGTTGGGTATTAGAGATATGGAGAAAGGGGAGGAACAGAATCGGAGACATTTGACTGTGATCTTCAATACTTTTGTGTGGATGCAGATTTTTAATGAGTTCAATGCACGTAAGGTGAATAATGAGTGGAATATTTTTGAACATTTGTTCGATAATCTGTACTTTTGGTTTATTATGGGAGTGACTGTGCTTTTGCAGATATTTATGATTGAGTTTTTTGGGGATTTTGCGTCAACAGAGGGTTTGAATGGAAAGGAGTGGGGATATTGTTTGGCATTGGGTGCAGGTTCTTTGCTGTGGCATCAGTTGGTGAGATTGGTCCCAGTGGATTTCAATGATGGCATTAAAATTGTGGATAGTGATGTGCTTTTTAAGACTGAGGTTGAATTTGAACCAGGGTATGTTGCTCCGAATGCGGAGGTCATTGATGATGAGAAGGATGATAGTACTTTGACTGCTGATAATATGGTACATGTTGCAAGTCATAGTACACAGCAGCATCTGTGATTTTTTTTTTGTGTTTTGTGCATAACATCAATGAAAACTATTTTTTGGGTATTTACTTTTTTTTAGGTGTATGTTCGTTGTTGGTTTGTAGTTGTACATATTTGCGGTTTATTTTTATATGAGTGTAGT
|
||||
|
||||
>NODE_294_length_3304_cov_64.247619_g234_i0
|
||||
GAAAGGATTAGAATGGGCGATTTTTTATCTCATGTTCCATGTTCGATGAATAATCAGGAATTGATAGTGCATATGATTGAGAATTATTTGGAAGAGAGGGAGAGTCAAAGTAAAAATCACGCGGCATTTTATGAGAATGGAAATATTCCAAATTATATCCAGCAATTATTTAACGAGTGCACTTCTGACAGATATGCATTTCTTCTCAATTCATATGCAGTCAAGCATTCAATCACTAATGAACAATTGCTGAAATATTTGGATGGAAGTGAGAATGGAAAGAATTCTCCTTTTTTGAGTGTTTTTGGGAGGATTGCTGAAGCACATGTGTATGAATGGTTGGTGACTGGAGATGAATACGTTTCATATTTTTCAGATTCTCATCCATGGGTAGTAAATCAGCAATCATATCGAATAAATCTCGCAAAAGAATCTGAGGATGAGCCATTGTATCTAAAGTTCCATTGGGAATTAAACAATGATAAAAGTGATGGTTTATTATGGTGCTTATTGGTCTGTGATCAATTGCCAATGCAAGTGGAAGAAGTGAGAATGAATTTAGGGATATATATTCCAGAAACGAAATCTGAAACGAATGAGGGTTCTTTTTTGAATCGCCAACAGTTTAGAGGCAATCAATTGCACAGCATTGTAGAAAGCAAGAAGTTATTGATGGAGAATAATCCGCAAGTAAAGAAACCTGAATTTAATCTCAAATATATTTTCAGATGCTGAATGTCAAAAATAAATAGTTTCACTATGAAATATGTACACATACAAAACAAAAACAAAAACAGTCAAAATGGTAAAAAATAACAAAACAGTCAAAATGGTATATAAAAATACAATAAATATGGCCAACCCTCCCCAAATCAATCACTTTAAACTTCTCAAAATTGTCTCTCAAAATATTTCCCAAAATCATGCTTCGGATCAATAGGCTTAGCATCAAGCGCATCATTCGGATCAATCTGAATCTTCGCAAAATTCCCCAAATAAGTCGAACTCTTAACCTTATTAACAATAGGTGCAGGCATAGTCCCATCATTCAACTTAGCCCACGAAAAACTAGAATACCACGGATGCTTACGAATATTATTAGCACCACCTCTCATAACTCCCAACCTCCGAGTAGGCTTATTATGCAACAAACCTTTGATCAAATCACGCACTTCAGCACTGAAATATCTCGGAAAACGTATTCTCCCACGTATAATCTTACGATATGTCTCAATTGGATCATCAGCCACAAAAGGTGGAAAACTAGCCAACATCTCATATATCAATATCCCAAGCGTCCACCAATCAACACCTTTTCCATGTCCTTGACCAGTGACAATCTCAGGACACAAATAATCTGGTGTTCCACACAAAGTAAATGTCTTACCTGTCAAAAATTTAGCAAAACCAAAATCAGTAACTTTTAAGTATCCATCATTGTCCAACACCAGATTCTCTGGTTTCAAATCACGATAAATGATATTCATACTGTGCATGTAATCAAATGCTTCAATGACACAACCAGCATAAAAACGAGATGCTGGTTCATTGAAGTATCTGCGAGAACGTAGGATAGTAAATAATTCACCACCCAAACATACATCCAATAAAAAGTATACACGTAGTGGGTCATTGTAAGTGCAGTGTAAGTTCACTAAGAACTTGTTTTTCATAACATCCATCACTTGTTTTTCACTTACGATGTGTTTTTGCAGTTCAAGTTCTATCACTTGGAACTTTTTTATGGATTTTAAGGCGTATGATTTTTTTGTATGTGGATCGACCACTAAAGTTACTAAACCAAAACCACCTTTGCCAAGCACTCCAACAGTCTGCAACTCATGCAAATCACAAATCTTTTTAGGTTTCGTAGCTGCGTTTTTATTTTGCTCTGCAGCTTTCGATCTTATTTGCTTTTTCTTCTCACGAACAGCATCTTCAGCTATCATTTTTGCAGAACGACTGTACTCTGATATCTGATCATCCACTAATTCAATGACTGGTCCAAGCAAATCAAAGAAATGTTGACTGTCCATTTCTAAGCAGGTTAAGTCAGTTTTTGCACTGATTGTTGCTGCACGTGCTTGTTTTGTACGCAAAGCACGTTCACCAAAGAATTGTCCTTTAGTTAATATACCTTTTTCACCATTAACTTTTTTCCATTCTGCTGAACCTTTATATATAACATAAAATCGTTCACCAACTTCCCCCTGCCTAAAAACCACAGTTTTTGCTGAATATACCTTCTCTTCCAGCGACGAACCCAAATCCATAATCTCTGATCTCAGCAATGGCTTAAACAACGAAACAGTAGGCAAAAAAGCCATTAATTCTTCATCTTTCTGTTTACTGACAACAATAACCTCATATCTAAACATCTGTCGTTCAATCACCCATAAACGACCACCTTTTTTAGATACACATTTCACAGTTGCAGCTCTGGGTGCATCATTAATTAATGCAAGTTCACCAAAACACCCACCTCTTTTGTATGAATGTACTTTACATTTATCTTTTAATACATTATAATCTCCACTCTCAACCACATAAAAAGTCGTTGCTTTTAAGTCTCCTTGATTGATAATACATTCATTGTAATCAACAGTAATCATTCTCATACGTGATAATATTCGTCTCTTCATTGCTTTATCAAAATTCTGAAATAACAAATTATCAGCTACAGATTTCATCAACCATTTCATAGCACTATCACTCACATTCGACAAAGGTGCTGCAGCATATTTACTACTACCACTACCACTGCTGGTATGAGCATTATTATTATTGATATTACCACTGCCACCACTCTTGTACTCATCGCGTAATGCACTCAAATTTATTGGAGTAATAAAAGCATTTCGTTTTGCTTCACGTTTGGCAAATCTCACGGAAGAACTCGATTGTATAACCTTTTGAAACAATTTCTGGTTGCATACCAGACATATAACTTTAGAACATGCTTGTAAGCTGGCATTTCTTTTGGCATTTTGGAGTAATGCTTGTTCACCTGCGTAGTCACCTGTTTCTAGTGTTGCTACCACTTTGTTGTCTGTTGATAGGACGTTGACTATGCCTTCTACTATGATGTAGAATGAATCACCAGTCTCGCCTTCTTTCATTATATATTCACCTTTAGCGAACTGTTGGGTTACTAAACCATTTGAGAGGTTCTTACGTTCGATTGGTGATAAACCAGAGAGTAAAGGAACTGATGCTAGAATGGTGTTTATTACGGATTGTCTAGTTTTAGAGTACTTTGAACTGGTCTTTGTTTTATTTGTAGTGCCAGACATTATTTTTTTAT
|
||||
|
||||
>NODE_373_length_3084_cov_119.730807_g297_i0
|
||||
TTTGCTTATTTTTTTCTTATTCCTTGGTTCTTCTTGGTTTTGTGTTTGTTTAATGACAACAACAACACCTGCAGTAACGATAACAAACGTTGATCGGAACAACCCGTATGCATCTGCTTCTTTGTATGTGGGTGATTTGGCAGCAGATGTGACTGAAGCCACTCTCTTCGAATTATTCAACGCAGTTGGTCCAGTTGCCAGCATACGTGTGTGCAGAGATGCAACCACACGTCGTTCCTTGGGTTATGCTTATGTAAATTTCCATTCAGTGCACGATGCAGAAGTGTCATTGGATATCATGAATTTTACAAACATTCGTGGTCGCCACTGTCGAATTATGTGGAGTCAGAGGGACCCTCGCTTACGCAAATCTGGCAAAGGCAACATTTTTGTCAAGAATTTGCATGAGTCCATTGACAACAAAACACTTTATGATACGTTCTCTGTGTTTGGAAGTATTCTCTCCTGCAAAGTAGTGGTTGATAGAGATTCAGGACTTTCTCGCGGTTATGGATATGTACACTACGTGGATGATAAGTCAGCAGCTAAAGCTATCGAAGGAGTAAATGGAATGAAAATCAATCAATGCCAAGTGCATGCTGAGCTTTTTAAACCGCGCGAAGAGAGAATGAAAGACCCGAAGTATGAGTTCACTAATATTTATGTGAAATACATTCCAAGTGGTGTGAATGAGAAAAAGTTGGTGGAGCTTTTCCAGCGCGAAGCGGAAGAAGTTTGTAATAAGTATGATTTTTGGTACAGAGAGTATGGAATATCTGCTTGTTTTAATTTCAAGTCTACGGCTGGTGCTCGTAGAGCAATCAGAGAAATGAATGGCAAGTTTTTGCATGATTTCAAAGACATTGATCAATTGTTTGATGAGAAGGAGGCAGTAGCAGTGGTTGAAGATAAAGTTGCAGATGAAGCAGTTGTCAATGGTAATGAAGAGAAAGAGAACAGTGGGCAAGCTGAAGAATCTGTTGCCGGCGGTGATGAGAATAAAAATGCGAAGGCAGAAGAAGAAGAAAAAGATTCTGAAGCAGATATAATTACCACTTCTGCTGCCACTACCACTGGTAATTCAACTGCTATCGCTGCTGTTGCTTCAGAAGATAGCAAAGCAGATGATAGCAAAGATGACAGTAAATCGCGTGAAGATCGTCTTCCAGTGCCAACCACAAATGGTGCTTTGATGCGTGTGCAACAGCGTGGATTGTATGTTGCGCGTGCACAAAAAGGAAATGAGCGCAAAGAGTTTTTAAATCGCATGGCTCGAAGTGTAACTCTCAATGGAAGACGAATTGGAATTCCTGGGGCGAATTTGTATGTAAAGAATTTGAGTCCAGAGGTGAATGATGACAAATTACGTGAGATGTTCGCTATTTTTGGTACAATTACATCTGCAGTAGTGATGACAGAGAAAGATAGTAAGAAATCGAGAGGTTTTGGTTTTGTAGCATATCAAAAGAAAGAGAGTGCTGCTCGTGCAATTCATGAAATGATGAATAGTTTGCACAATGGTAAACCTTTGTATGTGTCGCGTGCTCAAAGCAAACAATTTCGCCAACAGTTTATAGCGAAACAACTTCGCCAAAGAGGTAATTTCAGTGGTAGAGGTCGTGGTGGATACCGTGGACGTGGAAGAGGTAATTACCGTGGTCGAGGTAGAGGTCGAGGAAGTGGAGGTTCATACAGTGGAGGATATCGTTCATTTCGCGGTGGATATCGCGGAGGATACAATAATTACCGAGGTAGAGGCAGAGGACGTGGACGTGGACGATCCAATTTCCCACCAACTTCGGTCTACAGTGGATATCCTCCTCAACAGCCTACCCCTTACGCAGGTGTATATGGAGGTGGACAAGCATATCCAGCATCGAGTCCATACGCTCGACAGTATTCTGGTCAATATCCGAATTATGCTCAGGCATCTCGTGTACCATATGGTTATCCTGGTCAAAGTGCAGCTAATGCACAAATGGCGAGAACTGCGCAACCATCAGTGCCTATGAATTATTCTGCACAGGGAGTTGTGCCGAATATGAATCGAGCATATATTCCGCAAACAGCACAAGGGTACATTCAACAACCGTATGTGAATCCGCAGCTTCAATTGAGACAAACGCAGAATATTCCGTCTCAGCCCGCAAGTGCTGCATTAGCGGTAGCTCAACCGCAGCAAATTCAATCTTTGCCAGCTGGACAGCAATCTGGTTCAATGATGTATGGAACATCATCTGTTCCTCGTCCACCGTACAGTGTGAATCAAATTGTGCATCAAGTGCCAATGCCATCACAGCCATTGGCACAACCACAGACATCTGTGAGTCAATTGAGTGCAAACAAGGATTCTGCTCCAGTCTTAGAGAACCATCCATTGACCAGTGAAATGTTGAAGGAAGCAAAGCCGCCAGAGCGCAAACGATTGATTGGTGAGAGATTGTTTCCAAAAATTCAAGTGGTTGAGCCTCGATTGGCAGGGAAGATTACAGGAATGTTGTTAGAGATGGATAATACTGAACTGTTGGTACTGTTGTCAGACCAGGCTGCGTTGATGAGTAAGATTAATGAGGCATTGGCTGTGTTGAAGGATCATCAACAGAAGCAGTCTCAGCGCAATCCTGAATCATCCAAGAATCAGTCATCCCAGGCAAACAAAGCAGGTTCGCAGTCAAATCAAGCAAGCTCTTCTGCTGCACAAGCGAATCAAACTAGTGTTGGACAACATGTTGCGCAACCGAGATCTGCTGCCAATCCATAAACAATTGTATGATGCTCGATGGGTGTTCAATTGCCTCATCCATCTCCATTGCATTCACTATTCTTATTATTTTTAGTTCTAATTTTTATTTAATTTTGCCATACACAAAAAAAAATAAAAAAAATAAAAAAAAAACTGATTTTCTATTGAAGTGTATGGTGTTTGACCAAGTTTTGTGTTTAAATTGTTTCTTTTTTCGATTTTATTTATTTTTTGCCTTTTGTTTAGTTTATATCACGGATATATATATACAATATTATGTTTCTAGAATGTGTTTATTACTATTTCCGAAACCATTGTCCACTTTGAAAAATTGAAAAAAG
|
||||
File diff suppressed because one or more lines are too long
301439
PTL1/Transcriptomes/TestData/Sr_rh_Ss02_assembledTranscripts.fasta
Normal file
301439
PTL1/Transcriptomes/TestData/Sr_rh_Ss02_assembledTranscripts.fasta
Normal file
File diff suppressed because it is too large
Load Diff
@ -1 +0,0 @@
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,12 +1,12 @@
|
||||
# Last updated Jan 2024
|
||||
# Authors: Auden Cote-L'Heureux and Mario Ceron-Romero
|
||||
|
||||
# This script chooses orthologs to concatenate OGs. This can be done as part of an end-to-end PhyloToL run,
|
||||
# This script chooses orthologs to concatenate OGs. This can be done as part of an end-to-end EukPhylo run,
|
||||
# or by inputting already complete alignments and gene trees and running only the concatenation step.
|
||||
# Use the --concatenate flag to run this step, and optionally use the argument --concat_target_taxa to input
|
||||
# a file containing a list of taxon codes to be included in the concatenated alignment. If a GF has more
|
||||
# than one sequence from a taxon, a representative ortholog must be chosen to include in the concatenated alignment.
|
||||
# To do this, for each taxon PhyloToL keeps only the sequences falling in the monophyletic clade in the tree
|
||||
# To do this, for each taxon EukPhylo keeps only the sequences falling in the monophyletic clade in the tree
|
||||
# that contains the greatest number of species of the taxon’s minor clade (or major clade, if the ‘target taxon list’
|
||||
# uses major-clade codes). If multiple sequences from the taxon fall into this largest clade, then the sequence
|
||||
# with the highest ‘score’ (defined as length times k-mer coverage for transcriptomic data with k-mer coverage
|
||||
@ -118,17 +118,15 @@ def remove_paralogs(params):
|
||||
|
||||
#Getting a clean list of all target taxa
|
||||
|
||||
if type(params.concat_target_taxa) is list:
|
||||
target_codes = [code.strip() for code in params.concat_target_taxa if code.strip() != '']
|
||||
elif params.concat_target_taxa != None:
|
||||
if os.path.isfile(params.concat_target_taxa):
|
||||
try:
|
||||
target_codes = [l.strip() for l in open(params.concat_target_taxa).readlines() if l.strip() != '']
|
||||
except AttributeError:
|
||||
print('\n\nError: invalid "concat_target_taxa" argument. This must be a comma-separated list of any number of digits/characters to describe focal taxa (e.g. Sr_ci_S OR Am_tu), or a file with the extension .txt containing a list of complete or partial taxon codes. All sequences containing the complete/partial code will be identified as belonging to target taxa.\n\n')
|
||||
elif params.concat_target_taxa != None:
|
||||
target_codes = [params.concat_target_taxa]
|
||||
else:
|
||||
print('\nERROR: missing --concat_target_taxa argument. When concatenating, you need to give the taxonomic group (sequence prefix), groups, or a file containing a list of groups (multiple prefixes) for which to select sequences to construct a concatenated alignment\n')
|
||||
exit()
|
||||
target_codes = [leaf.name[:10] for leaf in tree]
|
||||
|
||||
monophyletic_clades = { }
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Last updated Jan 2024
|
||||
# Last updated Jan 2025
|
||||
# Authors: Auden Cote-L'Heureux, Mario Ceron-Romero.
|
||||
|
||||
# This script contains the entirety of the contamination loop, an iterative tool to assess
|
||||
@ -323,15 +323,45 @@ def cl_mafft(params):
|
||||
if file.split('.')[-1] in ('fasta', 'fas', 'faa'):
|
||||
os.system('mafft ' + params.output + '/Output/Pre-Guidance/' + file + ' > ' + params.output + '/Output/NotGapTrimmed/' + file)
|
||||
|
||||
os.system('Scripts/trimal-trimAl/source/trimal -in ' + params.output + '/Output/NotGapTrimmed/' + file + ' -out ' + params.output + '/Output/Guidance/' + file.split('.')[0] + '.95gapTrimmed.fasta' + ' -gapthreshold 0.05 -fasta')
|
||||
os.system('Scripts/trimal-trimAl/source/trimal -in ' + params.output + '/Output/NotGapTrimmed/' + file + ' -out ' + params.output + '/Output/Guidance/' + file.split('.')[0] + '.95gapTrimmed.fasta' + ' -gapthreshold ' + str(params.trimal_cutoff) + ' -fasta')
|
||||
|
||||
#Utility function to run FastTree in between iterations (if this is the chosen tree-building method)
|
||||
def cl_fasttree(params):
|
||||
|
||||
for file in os.listdir(params.output + '/Output/Guidance'):
|
||||
if file.split('.')[-1] in ('fasta', 'fas', 'faa'):
|
||||
os.system('FastTree ' + params.output + '/Output/Guidance/' + file + ' > ' + params.output + '/Output/Trees/' + file.split('.')[0] + '.FastTree.tre')
|
||||
|
||||
|
||||
#Utility function to run Iqtree in between iterations (if this is the chosen tree-building method)
|
||||
def cl_iqtree(params):
|
||||
for file in os.listdir(params.output + '/Output/Guidance'):
|
||||
if file.split('.')[-1] in ('fasta', 'fas', 'faa'):
|
||||
if not os.path.isdir(params.output + '/Output/Intermediate/IQTree'):
|
||||
os.mkdir(params.output + '/Output/Intermediate/IQTree')
|
||||
tax_iqtree_outdir = params.output + '/Output/Intermediate/IQTree/' + file.split('.')[0].split('_preguidance')[0]
|
||||
os.mkdir(tax_iqtree_outdir)
|
||||
os.system('iqtree2 -s ' + params.output + '/Output/Guidance/' + file + ' -m LG+G -T 10 --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
|
||||
#Copy over the final output
|
||||
if os.path.isfile(tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile'):
|
||||
os.system('cp ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile ' + params.output + '/Output/Trees/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.tree')
|
||||
os.system('rm -r ' + params.output + '/Output/Intermediate/IQTree/*')
|
||||
|
||||
|
||||
#Utility function to run Iqtree_fast in between iterations (if this is the chosen tree-building method)
|
||||
def cl_iqtree_fast(params):
|
||||
for file in os.listdir(params.output + '/Output/Guidance'):
|
||||
if file.split('.')[-1] in ('fasta', 'fas', 'faa'):
|
||||
if not os.path.isdir(params.output + '/Output/Intermediate/IQTree'):
|
||||
os.mkdir(params.output + '/Output/Intermediate/IQTree')
|
||||
tax_iqtree_outdir = params.output + '/Output/Intermediate/IQTree/' + file.split('.')[0].split('_preguidance')[0]
|
||||
os.mkdir(tax_iqtree_outdir)
|
||||
os.system('iqtree2 -s ' + params.output + '/Output/Guidance/' + file + ' -m LG+G -T 10 --fast --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
|
||||
#Copy over the final output
|
||||
if os.path.isfile(tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile'):
|
||||
os.system('cp ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile ' + params.output + '/Output/Trees/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.tree')
|
||||
os.system('rm -r ' + params.output + '/Output/Intermediate/IQTree/*')
|
||||
|
||||
|
||||
#Wrapper script to manage parameters and iteration
|
||||
def run(params):
|
||||
|
||||
@ -440,11 +470,15 @@ def run(params):
|
||||
|
||||
if params.cl_tree_method == 'fasttree':
|
||||
cl_fasttree(params)
|
||||
else:
|
||||
if 'iqtree' in params.cl_tree_method:
|
||||
elif params.cl_tree_method == 'iqtree':
|
||||
cl_iqtree(params)
|
||||
os.system('rm -r ' + params.output + '/Output/Intermediate/IQTree/*')
|
||||
elif params.cl_tree_method == 'iqtree_fast':
|
||||
cl_iqtree_fast(params)
|
||||
os.system('rm -r ' + params.output + '/Output/Intermediate/IQTree/*')
|
||||
elif params.cl_tree_method == 'raxml':
|
||||
os.system('rm -r ' + params.output + '/Output/Intermediate/RAxML/*')
|
||||
|
||||
trees.run(params)
|
||||
|
||||
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# Author: Auden Cote-L'Heureux
|
||||
|
||||
# This script is what users should call when running any or all components of
|
||||
# PhyloToL 6 part 2. It briefly determines which parts of the pipeline should be
|
||||
# EukPhylo part 2. It briefly determines which parts of the pipeline should be
|
||||
# run (pre-Guidance, Guidance, tree building, contamination loop, and/or
|
||||
# concatenation) based on the --start and --end parameters, and then runs all
|
||||
# of these components. Each component is actually run by the run() function in
|
||||
@ -1,12 +1,12 @@
|
||||
# Last updated Apr 2 2024
|
||||
# Last updated Jun 02 2025
|
||||
# Authors: Auden Cote-L'Heureux and Mario Ceron-Romero
|
||||
|
||||
# This script runs Guidance in an iterative fashion for more both MSA construction
|
||||
# and more rigorous homology assessment than what is offered in PhyloToL 6 part 1.
|
||||
# and more rigorous homology assessment than what is offered in EukPhylo part 1.
|
||||
# Guidance runs until the input number of iterations (--guidance_iters, default = 5)
|
||||
# has been reached, or until there are no sequences below the sequence score cutoff.
|
||||
# All sequences below the score cutoff (--seq_cutoff, default = 0.3) are removed at
|
||||
# each iteration. By default, PhyloToL does not remove residues that fall below the
|
||||
# each iteration. By default, EukPhylo does not remove residues that fall below the
|
||||
# given residue cutoff (--res_cutoff) and columns that fall below the given column
|
||||
# cutoff (--col_cutoff, defaults are 0), though this can be turned on by adjusting
|
||||
# these parameters. Outputs at this point are found in the “Guidance_NotGapTrimmed”
|
||||
@ -14,9 +14,14 @@
|
||||
# that are at least 95% gaps (or --gap_trim_cutoff) generating files in the “Guidance”
|
||||
# output folder.
|
||||
|
||||
# Users should note that there are two version of Guidance. This script, by default, uses
|
||||
# the newest version (v2.1). Users who wish to use the older version of Guidance will have
|
||||
# to make a small change in guidance.py (look for a comment in the script with the phrase
|
||||
# "UNCOMMENT THE FOLLOWING LINE IF USING v2.0.2"). See the Wiki for more information here.
|
||||
|
||||
# This step is either intended to be run starting with --start = unaligned (but not raw)
|
||||
# inputs, meaning one amino acid alignment per OG. It can also be run directly after the
|
||||
# preguidance step. The run() function is called in two places: in phylotol.py generally,
|
||||
# preguidance step. The run() function is called in two places: in eukphylo.py generally,
|
||||
# and in contamination.py if the contamination loop is using Guidance as the re-alignment
|
||||
# method.
|
||||
|
||||
@ -24,7 +29,7 @@
|
||||
import os, sys, re
|
||||
from Bio import SeqIO
|
||||
|
||||
#Called in phylotol.py and contamination.py
|
||||
#Called in eukphylo.py and contamination.py
|
||||
def run(params):
|
||||
|
||||
if params.start == 'raw' or params.start == 'unaligned':
|
||||
@ -53,6 +58,25 @@ def run(params):
|
||||
guidance_removed_file = open(params.output + '/Output/GuidanceRemovedSeqs.txt', 'w')
|
||||
guidance_removed_file.write('Sequence\tScore\n')
|
||||
|
||||
too_many_seqs = False
|
||||
|
||||
#For each unaligned AA fasta file
|
||||
for file in [f for f in os.listdir(guidance_input) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta')]:
|
||||
nseqs = len([rec for rec in SeqIO.parse(guidance_input + '/' + file, 'fasta')])
|
||||
|
||||
if nseqs > 2000:
|
||||
too_many_seqs = True
|
||||
#Print if OG has > 2000 seqs
|
||||
guidance_log = open(params.output + '/Output/GuidanceLog.txt', 'w')
|
||||
guidance_log.write(file + ' has more than 2000 seqs.\nStopping run')
|
||||
print(file + 'has more than 2000 seqs')
|
||||
print('Do you want to run this?')
|
||||
print('Stopping run.')
|
||||
break
|
||||
|
||||
if too_many_seqs and not params.allow_large_files:
|
||||
return False
|
||||
|
||||
#For each unaligned AA fasta file
|
||||
for file in [f for f in os.listdir(guidance_input) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta')]:
|
||||
tax_guidance_outdir = params.output + '/Output/Intermediate/Guidance/Output/' + file.split('.')[0].split('_preguidance')[0]
|
||||
@ -77,8 +101,17 @@ def run(params):
|
||||
else:
|
||||
mafft_alg = 'auto'
|
||||
|
||||
#Running Guidance (one per OG per iteration)
|
||||
os.system('Scripts/guidance.v2.02/www/Guidance/guidance.pl --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
|
||||
#For Guidance v2.1 (2025 version) on the grid ... COMMENT OUT THE FOLLOWING LINE IF USING v2.0.2
|
||||
os.system('python ' + params.guidance_path + '/script/guidance_main.py --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
|
||||
|
||||
#For Guidance v2.0.2 (origin version in PhyloTol6). UNCOMMENT THE FOLLOWING LINE IF USING v2.0.2
|
||||
#os.system('Scripts/guidance.v2.02/www/Guidance/guidance.pl --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
|
||||
|
||||
#For UMass Unity users, use the following line and comment out the others:
|
||||
#os.system('python3 /work/pi_lkatz_smith_edu/Guidance/guidance_Linux/script/guidance_main.py --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
|
||||
|
||||
#For Smith College Grid users, use the following line and comment about the others:
|
||||
#os.system('python /gridapps/software/Guidance_mid/2.1b-foss-2023a/bin/script/guidance_main.py --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
|
||||
|
||||
#Checking for a sequence score file; if not available, Guidance failed.
|
||||
if os.path.isfile(tax_guidance_outdir + '/MSA.MAFFT.Guidance2_res_pair_seq.scr_with_Names'):
|
||||
@ -150,10 +183,10 @@ def run(params):
|
||||
os.system('mafft ' + tax_guidance_outdir + '/postGuidance_preTrimAl_unaligned.fasta > ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.postGuidance_preTrimAl_aligned.fasta')
|
||||
|
||||
#Gap trimming
|
||||
os.system('Scripts/trimal-trimAl/source/trimal -in ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.postGuidance_preTrimAl_aligned.fasta -out ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.95gapTrimmed.fasta -gapthreshold 0.05 -fasta')
|
||||
os.system('Scripts/trimal-trimAl/source/trimal -in ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.postGuidance_preTrimAl_aligned.fasta -out ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.70gapTrimmed.fasta -gapthreshold ' + str(params.trimal_cutoff) + ' -fasta')
|
||||
|
||||
#Copying over final aligments (pre and post gap trimming) into output folder.
|
||||
os.system('cp ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.95gapTrimmed.fasta ' + params.output + '/Output/Guidance/' + file.split('.')[0].split('_preguidance')[0] + '.95gapTrimmed.fasta')
|
||||
os.system('cp ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.70gapTrimmed.fasta ' + params.output + '/Output/Guidance/' + file.split('.')[0].split('_preguidance')[0] + '.70gapTrimmed.fasta')
|
||||
os.system('cp ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.postGuidance_preTrimAl_aligned.fasta ' + params.output + '/Output/NotGapTrimmed/' + file.split('.')[0].split('_preguidance')[0] + '.postGuidance_preTrimAl_aligned.fasta')
|
||||
|
||||
#Removing intermediate files if not --keep_temp
|
||||
@ -168,6 +201,8 @@ def run(params):
|
||||
os.system('mv ' + tax_guidance_outdir + '/' + gdir_file + ' ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '_' + gdir_file)
|
||||
|
||||
guidance_removed_file.close()
|
||||
return True
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# Authors: Auden Cote-L'Heureux, Mario Ceron-Romero, Godwin Ani
|
||||
|
||||
# This script is only run when --start = unaligned. This typically means that a user
|
||||
# is inputting ReadyToGo files as output by PhyloToL 6 part 1. The script contains two optional
|
||||
# is inputting ReadyToGo files as output by EukPhylo part 1. The script contains two optional
|
||||
# filters. One filter aims to remove sequences outside silent-site GC content ranges set by
|
||||
# the user, and relies on the output of the utility script ‘GC_Identifier_v1.0.py.’ See the manual
|
||||
# for details on using this filter. Sequence filtration by composition is set using the --og_prefix
|
||||
@ -27,7 +27,7 @@
|
||||
import os, sys, re
|
||||
from Bio import SeqIO
|
||||
|
||||
#This function is called ONLY in phylotol.py.
|
||||
#This function is called ONLY in eukphylo.py.
|
||||
def run(params):
|
||||
|
||||
#Reading in the list of gene families to use (--gf_list)
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# Authors: Auden Cote-L'Heureux and Mario Ceron-Romero
|
||||
|
||||
# This is a relatively simple script that only runs trees, using either IQ-Tree
|
||||
# or RAxML. The run() function is called in two places: both in phylotol.py, and
|
||||
# or RAxML. The run() function is called in two places: both in eukphylo.py, and
|
||||
# in contamination.py, where it is used to re-build trees. When starting at this
|
||||
# step, users must input one aligned amino acid fasta file per OG. Otherwise, if
|
||||
# starting at the pre-Guidance or Guidance steps, this step will be run if --end = trees.
|
||||
@ -12,7 +12,7 @@ import os, sys, re
|
||||
from Bio import SeqIO
|
||||
from color import color
|
||||
|
||||
#Called in phylotol.py and contamination.py
|
||||
#Called in eukphylo.py and contamination.py
|
||||
def run(params):
|
||||
|
||||
#Checking whether aligned files were input, or it should just start with the Guidance outputs from the previous step.
|
||||
@ -34,7 +34,7 @@ def run(params):
|
||||
for file in [f for f in os.listdir(guidance_path) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta') or f.endswith('.fas') or f.endswith('.aln')]:
|
||||
|
||||
#Run IQ-Tree
|
||||
if params.tree_method == 'iqtree':
|
||||
if params.tree_method == 'iqtree' or params.tree_method == 'iqtree_fast':
|
||||
#Make intermediate folders
|
||||
if not os.path.isdir(params.output + '/Output/Intermediate/IQTree'):
|
||||
os.mkdir(params.output + '/Output/Intermediate/IQTree')
|
||||
@ -42,10 +42,21 @@ def run(params):
|
||||
tax_iqtree_outdir = params.output + '/Output/Intermediate/IQTree/' + file.split('.')[0].split('_preguidance')[0]
|
||||
os.mkdir(tax_iqtree_outdir)
|
||||
|
||||
#Run IQ-Tree
|
||||
#Examples on how to run IQ-Tree
|
||||
#Comment on the lines that do not fit your system
|
||||
#Run IQ-Tree on the Smith College grid
|
||||
if params.tree_method == 'iqtree':
|
||||
os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G -T 10 --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
|
||||
elif params.tree_method == 'iqtree_fast':
|
||||
os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G -T 10 --fast --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
|
||||
|
||||
#Copy over the final output
|
||||
#Run IQ-Tree in HPC Unity Cluster
|
||||
#if params.tree_method == 'iqtree':
|
||||
#os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
|
||||
#elif params.tree_method == 'iqtree_fast':
|
||||
#os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G --fast --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
|
||||
|
||||
# Copy over the final output
|
||||
if os.path.isfile(tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile'):
|
||||
os.system('cp ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile ' + params.output + '/Output/Trees/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.tree')
|
||||
#color(params.output + '/Output/Trees/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.tree')
|
||||
|
||||
@ -2,33 +2,33 @@
|
||||
# Author: Auden Cote-L'Heureux
|
||||
|
||||
# This script is a general utility script that does two main things. First, it has
|
||||
# a function to read in all PhyloToL parameters, which is called in phylotol.py.
|
||||
# It also has a function that checks for and cleans up existing PhyloToL part 2
|
||||
# a function to read in all EukPhylo parameters, which is called in eukphylo.py.
|
||||
# It also has a function that checks for and cleans up existing EukPhylo part 2
|
||||
# output files from previous runs, and creates a new, empty Output folder structure
|
||||
# for the new run. This function is also called only in phylotol.py.
|
||||
# for the new run. This function is also called only in eukphylo.py.
|
||||
|
||||
#Dependencies
|
||||
import os, sys, re
|
||||
import argparse
|
||||
import shutil
|
||||
|
||||
#Reading in all parameters. This function is only called once, in phylotol.py
|
||||
#Reading in all parameters. This function is only called once, in eukphylo.py
|
||||
def get_params():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog = 'PhyloToL v6.0',
|
||||
description = "Updated January, 2022 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/AudenCote/PhyloToL_v6.0"
|
||||
prog = 'EukPhylo v1.0',
|
||||
description = "Updated January, 2022 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/Katzlab/EukPhylo"
|
||||
)
|
||||
|
||||
common = parser.add_argument_group('Commonly adjusted parameters')
|
||||
common.add_argument('--start', default = 'raw', choices = {'raw', 'unaligned', 'aligned', 'trees'}, help = 'Stage at which to start running PhyloToL.')
|
||||
common.add_argument('--end', default = 'trees', choices = {'unaligned', 'aligned', 'trees'}, help = 'Stage until which to run PhyloToL. Options are "unaligned" (which will run up to but not including guidance), "aligned" (which will run up to but not including RAxML), and "trees" which will run through RAxML')
|
||||
common.add_argument('--start', default = 'raw', choices = {'raw', 'unaligned', 'aligned', 'trees'}, help = 'Stage at which to start running EukPhylo.')
|
||||
common.add_argument('--end', default = 'trees', choices = {'unaligned', 'aligned', 'trees'}, help = 'Stage until which to run EukPhylo. Options are "unaligned" (which will run up to but not including guidance), "aligned" (which will run up to but not including RAxML), and "trees" which will run through RAxML')
|
||||
common.add_argument('--gf_list', default = None, help = 'Path to the file with the GFs of interest. Only required if starting from the raw dataset.')
|
||||
common.add_argument('--taxon_list', default = None, help = 'Path to the file with the taxa (10-digit codes) to include in the output.')
|
||||
common.add_argument('--data', help = 'Path to the input dataset. The format of this varies depending on your --start parameter. If you are running the contamination loop starting with trees, this folder must include both trees AND a fasta file for each tree (with identical file names other than the extension) that includes an amino-acid sequence for each tip of the tree (with the sequence names matching exactly the tip names).')
|
||||
common.add_argument('--output', default = './', help = 'Directory where the output folder should be created. If not given, the folder will be created in the parent directory of the folder containing the scripts.')
|
||||
common.add_argument('--force', action = 'store_true', help = 'Overwrite all existing files in the "Output" folder.')
|
||||
common.add_argument('--tree_method', default = 'iqtree', choices = {'iqtree', 'raxml', 'all'}, help = 'Program to use for tree-building')
|
||||
common.add_argument('--tree_method', default = 'iqtree_fast', choices = {'iqtree', 'iqtree_fast', 'raxml', 'all'}, help = 'Program to use for tree-building')
|
||||
common.add_argument('--blacklist', type = str, help = 'A text file with a list of sequence names not to consider')
|
||||
common.add_argument('--og_identifier', default = 'OG', choices = {'OG','OG6','OGA','OGG'}, help = 'Program to use for selecting seq by GC width')
|
||||
common.add_argument('--sim_taxa', default = None, help = 'Path to the file with the taxa (10-digit codes) to apply the similarity filter on.')
|
||||
@ -39,15 +39,18 @@ def get_params():
|
||||
core.add_argument('--similarity_filter', action = 'store_true', help = 'Run the similarity filter in pre-Guidance')
|
||||
core.add_argument('--sim_cutoff', default = 1, type = float, help = 'Sequences from the same taxa that are assigned to the same OG are removed if they are more similar than this cutoff')
|
||||
core.add_argument('--guidance_iters', default = 5, type = int, help = 'Number of Guidance iterations for sequence removal')
|
||||
core.add_argument('--guidance_path', help = 'Path to the downloaded Guidance folder (probably called guidance_Linux or guidance_MacOS-arm64, this folder should contain a folder called "script" which contains the guidance_main.py script). You can download this folder from this link: https://github.com/XseniaP/Guidance_mid/tree/main')
|
||||
core.add_argument('--seq_cutoff', default = 0.3, type = float, help = 'During guidance, taxa are removed if their score is below this cutoff')
|
||||
core.add_argument('--col_cutoff', default = 0.0, type = float, help = 'During guidance, columns are removed if their score is below this cutoff')
|
||||
core.add_argument('--res_cutoff', default = 0.0, type = float, help = 'During guidance, residues are removed if their score is below this cutoff')
|
||||
core.add_argument('--guidance_threads', default = 20, type = int, help = 'Number of threads to allocate to Guidance')
|
||||
core.add_argument('--trimal_cutoff', default = 0.3, type = float, help = 'Gap masking threshold for TrimAl. The maximum proportion of sequences without gaps for a site to be removed (i.e. to remove sites with 70% or more gaps, set this parameter to 0.3).')
|
||||
core.add_argument('--allow_large_files', action = 'store_true', help = 'Allow files with more than 2,000 sequences to run through Guidance.')
|
||||
|
||||
CL = parser.add_argument_group('Contamination loop parameters')
|
||||
CL.add_argument('--contamination_loop', default = None, choices = {'seq', 'clade', 'both'}, help = 'Remove sequences by looking at the sisters of each sequence in a rules file or by picking the best clades')
|
||||
CL.add_argument('--contamination_loop', default = None, choices = {'seq', 'clade'}, help = 'Remove sequences by looking at the sisters of each sequence in a rules file or by picking the best clades')
|
||||
CL.add_argument('--nloops', default = 10, type = int, help = 'The maximum number of contamination-removal loops')
|
||||
CL.add_argument('--cl_tree_method', default = 'fasttree', choices = {'iqtree', 'raxml', 'fasttree', 'iqtree_fast'}, help = 'Tree-building method to use in each contamination loop iteration.')
|
||||
CL.add_argument('--cl_tree_method', default = 'iqtree_fast', choices = {'iqtree', 'raxml', 'fasttree', 'iqtree_fast'}, help = 'Tree-building method to use in each contamination loop iteration.')
|
||||
CL.add_argument('--cl_alignment_method', default = 'mafft_only', choices = {'mafft_only', 'guidance'}, help = 'Alignment method to use in each contamination loop iteration.')
|
||||
CL.add_argument('--cl_exclude_taxa', type = str, default = None, help = 'Path to a file containing taxon names present in input MSA/tree files but which should be removed in the first iteration of the contamination loop.')
|
||||
|
||||
@ -65,7 +68,7 @@ def get_params():
|
||||
|
||||
other = parser.add_argument_group('Other arguments')
|
||||
other.add_argument('--concatenate', action = 'store_true', help = 'Remove paralogs and generate an alignment for concatenation')
|
||||
other.add_argument('--concat_target_taxa', nargs = '+', default = None, help = 'The taxonomic group (sequence prefix), groups, or a file containing a list of groups (multiple prefixes) for which to select sequences to construct a concatenated alignment')
|
||||
other.add_argument('--concat_target_taxa', type = str, default = None, help = 'The taxonomic group (sequence prefix), groups, or a file containing a list of groups (multiple prefixes) for which to select sequences to construct a concatenated alignment')
|
||||
other.add_argument('--tree_font_size', default = 12, help = "Change this if you're not quite happy with the font size in the output trees. If you want smaller font in your trees, you can lower this value; and if you want larger font in your trees, you can raise this value. Some common values are 8, 10, and 12. Size 16 font is pretty big, and size 4 font is probably too small for most purposes. Iconoclasts use size 9, 11, or 13 font.")
|
||||
other.add_argument('--keep_temp', action = 'store_true', help = "Use this to keep ALL Guidance intermediate files")
|
||||
other.add_argument('--keep_iter', '-z', action = 'store_true', help = 'Keep all Guidance iterations (beware this will be very large)')
|
||||
@ -74,7 +77,7 @@ def get_params():
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
#Cleaning up existing output and creating a new output folder structure. This function is only called once, in phylotol.py
|
||||
#Cleaning up existing output and creating a new output folder structure. This function is only called once, in eukphylo.py
|
||||
def clean_up(params):
|
||||
|
||||
#If an output folder doesn't exist, create one.
|
||||
@ -122,7 +125,7 @@ def clean_up(params):
|
||||
if params.end == 'trees' or params.contamination_loop != None:
|
||||
os.mkdir(params.output + '/Output/Trees')
|
||||
os.mkdir(params.output + '/Output/ColoredTrees')
|
||||
if params.start == 'trees':
|
||||
if params.start == 'trees' and params.contamination_loop == None:
|
||||
copy_input('Trees')
|
||||
|
||||
|
||||
|
||||
70
PTL2/run_eukphylo.sh
Normal file
70
PTL2/run_eukphylo.sh
Normal file
@ -0,0 +1,70 @@
|
||||
#!/bin/bash
|
||||
## Last updated Jan 2025 by Auden Cote-L'Heureux; modified Sept. 2025 by Adri K. Grow
|
||||
|
||||
## This shell script is used for running EukPhylo part 2, and includes a general setup for use on an HPC that uses
|
||||
## the Slurm workload manager. It also includes several example run commands, which correspond to examples explained in more detail in the
|
||||
## EukPhylo Wiki (https://github.com/Katzlab/EukPhylo/wiki/EukPhylo-Part-2:-MSAs,-trees,-and-contamination-loop).
|
||||
## These run commands can also be copied and run in the terminal / command line separately, without a shell script.
|
||||
## For the contamination loop, We recommend iterating the sister/subsisters loop multiple times as branches will shift. In contrast, we recommend only running clade grabbing once
|
||||
|
||||
## SLURM-SPECIFIC SETUP BELOW
|
||||
|
||||
############### FOR UMASS UNITY HPC ############### (DELETE section if not applicable):
|
||||
#SBATCH --job-name=EukPhylo
|
||||
#SBATCH -n 10 # Number of Cores per Task
|
||||
#SBATCH --mem=125G # Requested Memory
|
||||
#SBATCH -p cpu # Partition
|
||||
#SBATCH -q long # long QOS
|
||||
#SBATCH -t 334:00:00 # Job time limit
|
||||
#SBATCH --output=Run_EP.%A_%a.out # Stdout (%j expands to jobId)
|
||||
#SBATCH --mail-type=ALL
|
||||
#SBATCH --mail-user=email@email.edu
|
||||
#SBATCH --array=1-600%50
|
||||
module purge #Cleans up any loaded modules
|
||||
module load conda/latest
|
||||
module load mafft/7.505
|
||||
module load diamond/2.1.7
|
||||
conda activate /work/pi_lkatz_smith_edu/Conda_PTL6p2/envs/PTL/
|
||||
|
||||
############### FOR SMITH GRID HPC ############### (DELETE section if not applicable):
|
||||
#SBATCH --job-name=EukPhylo # Job name
|
||||
#SBATCH --output=Run_EukPhylo.%j.out # Stdout (%j expands to jobId)
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=10 ## On the Smith College HPC (Grid), we have to change this to be double the number of task/batches you want to launch
|
||||
#SBATCH --mail-type=ALL
|
||||
#SBATCH --mail-user=email@email.edu ##add your email address for job updates
|
||||
#Load required modules
|
||||
module purge # Cleans up any loaded modules
|
||||
module use /gridapps/modules/all # make sure module locations is loaded
|
||||
module load slurm
|
||||
module load ETE/3.1.3-foss-2024a
|
||||
module load Biopython/1.79-gfbf-2023a
|
||||
module load DIAMOND/2.1.8-GCC-12.3.0
|
||||
module load MAFFT/7.526-GCC-13.3.0-with-extensions
|
||||
module load RAxML-NG/1.2.2-GCC-13.2.0
|
||||
module load IQ-TREE/2.3.6-gompi-2023a
|
||||
module load tqdm/4.66.1-GCCcore-12.3.0
|
||||
module load Python/3.12.3-GCCcore-13.3.0
|
||||
module load Guidance_mid/2.1b-foss-2023a #Smith College HPC specific
|
||||
export PATH=$PATH:/beegfs/fast/katzlab/grid_phylotol_setup/programs/standard-RAxML-master #Smith College HPC specific #export PATH=$PATH:/Path/To/Executable/Files
|
||||
|
||||
|
||||
## PROVIDE YOUR PARENT PATH
|
||||
parent='/Your/Home/Folder/' # The folder where you are running EukPhylo (this should contain the Scripts and input data folders)
|
||||
|
||||
## EXAMPLE RUN COMMANDS BELOW
|
||||
|
||||
# A simple run of part 2, starting from ReadyToGo files and running through tree building
|
||||
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/eukphylo.py --start raw --end trees --gf_list ${parent}listofOGs.txt --taxon_list ${parent}taxon_list.txt --data ${parent}Input_folder --output ${parent}Output_folder > Output.out
|
||||
|
||||
# Another example starting from ReadyToGo files and running through tree building, with the commonly used similarity filter cutoff, blacklist, and "sim_taxa_list" arguments (see Wiki)
|
||||
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/eukphylo.py --start raw --end trees --gf_list ${parent}listofOGs.txt --taxon_list ${parent}taxon_list.txt --data ${parent}Input_folder --output ${parent}Output_folder --similarity_filter --blacklist ${parent}Blacklist.txt --sim_cutoff 0.99 --sim_taxa sim_taxa_list.txt > Output.out
|
||||
|
||||
# An example of running just the concatenation step of part 2, starting from trees
|
||||
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/eukphylo.py --start trees --concatenate --concat_target_taxa Sr_rh --data ${parent}Output > log.out
|
||||
|
||||
# See the Wiki (https://github.com/Katzlab/EukPhylo/wiki/EukPhylo-Part-2:-MSAs,-trees,-and-contamination-loop) for more details!
|
||||
|
||||
|
||||
|
||||
|
||||
@ -1,33 +0,0 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --job-name=meta033 ##change this to a shortened name of your project
|
||||
#SBATCH --output=Run_phylotol.%j.out # Stdout (%j expands to jobId)
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=10 ##change this to be double the number of task/batches you want to launch
|
||||
#SBATCH --mail-type=ALL
|
||||
#SBATCH --mail-user=youremail@smith.edu ##add your email address
|
||||
|
||||
module purge #Cleans up any loaded modules
|
||||
|
||||
module use /gridapps/modules/all #make sure module locations is loaded
|
||||
|
||||
module load slurm
|
||||
module load ETE
|
||||
module load Biopython/1.79-foss-2021b
|
||||
module load DIAMOND/2.0.13-GCC-11.2.0
|
||||
module load MAFFT
|
||||
module load BioPerl
|
||||
module load RAxML
|
||||
module load IQ-TREE/2.1.2-gompi-2021b
|
||||
module load tqdm/4.64.1-GCCcore-12.2.0
|
||||
module load Python/3.9.6-GCCcore-11.2.0
|
||||
export PATH=$PATH:/beegfs/fast/katzlab/grid_phylotol_setup/programs/standard-RAxML-master
|
||||
|
||||
parent='/beegfs/fast/katzlab/Adri/p2PTL/033_meta/B1_meta_033/' #add your path starting with the name of your folder, should begin with /beegfs/fast/katzlab/
|
||||
|
||||
#if you are running batches, you need an srun line for each batch!
|
||||
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/phylotol.py --similarity_filter --sim_cutoff 0.95 --sim_taxa sim_taxa.txt --blacklist GuidanceRemovedSeqs_allConservedRuns_ML_nov_dec_2023.txt --start raw --end trees --gf_list B1_listofOGs.txt --taxon_list taxon_list.txt --data OutgroupR2Gs --output ${parent}Output_folder_B1 > Output_folder_B1.out &
|
||||
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/phylotol.py --similarity_filter --sim_cutoff 0.95 --sim_taxa sim_taxa.txt --blacklist GuidanceRemovedSeqs_allConservedRuns_ML_nov_dec_2023.txt --start raw --end trees --gf_list B2_listofOGs.txt --taxon_list taxon_list.txt --data OutgroupR2Gs --output ${parent}Output_folder_B2 > Output_folder_B2.out &
|
||||
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/phylotol.py --similarity_filter --sim_cutoff 0.95 --sim_taxa sim_taxa.txt --blacklist GuidanceRemovedSeqs_allConservedRuns_ML_nov_dec_2023.txt --start raw --end trees --gf_list B3_listofOGs.txt --taxon_list taxon_list.txt --data OutgroupR2Gs --output ${parent}Output_folder_B3 > Output_folder_B3.out &
|
||||
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/phylotol.py --similarity_filter --sim_cutoff 0.95 --sim_taxa sim_taxa.txt --blacklist GuidanceRemovedSeqs_allConservedRuns_ML_nov_dec_2023.txt --start raw --end trees --gf_list B4_listofOGs.txt --taxon_list taxon_list.txt --data OutgroupR2Gs --output ${parent}Output_folder_B4 > Output_folder_B4.out &
|
||||
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/phylotol.py --similarity_filter --sim_cutoff 0.95 --sim_taxa sim_taxa.txt --blacklist GuidanceRemovedSeqs_allConservedRuns_ML_nov_dec_2023.txt --start raw --end trees --gf_list B5_listofOGs.txt --taxon_list taxon_list.txt --data OutgroupR2Gs --output ${parent}Output_folder_B5 > Output_folder_B5.out &
|
||||
wait
|
||||
@ -1,4 +1,4 @@
|
||||
|
||||
<img src="https://github.com/Katzlab/PhyloToL-6/blob/main/Other/Katzlab.png">
|
||||
|
||||
**PhyloToL version 6** is the latest version of the PhyloToL pipeline from the [Katz Lab](https://www.science.smith.edu/katz-lab/) at Smith College. PhyloToL is a phylogenomic toolkit for processing transcriptomic and genomic data that includes novel phylogeny-informed contamination removal techniques. See our [Wiki](https://github.com/Katzlab/PhyloToL-6/wiki) for more information on installation and usage!
|
||||
**EukPhylo version 1.0** is an updated version of the PhyloToL pipeline from the Katz Lab (https://www.science.smith.edu/katz-lab/) at Smith College. EukPhylo is a phylogenomic toolkit for processing transcriptomic and genomic data that includes novel phylogeny-informed contamination removal techniques. See our Wiki (https://github.com/Katzlab/EukPhylo/wiki) for more information on installation and usage!
|
||||
|
||||
86
Utilities/For_Assemblies/ProcessAndRenameAssembledData.py
Normal file
86
Utilities/For_Assemblies/ProcessAndRenameAssembledData.py
Normal file
@ -0,0 +1,86 @@
|
||||
'''
|
||||
Author & Date: Adri K. Grow + ChatGPT, Nov 11th 2024
|
||||
- Updated 02/13/25 to accept either transcriptome and genome assembled data in command line
|
||||
Motivation: assess and rename assembled transcript or genome files for use in EukPhylo Part 1
|
||||
Intention: warn if any 'transcripts.fasta' or 'contigs.fasta' files are missing or empty for an LKH, otherwise rename and copy them with their assigned 10-digit code by LKH
|
||||
Input:
|
||||
- a base directory containing subdirectories for each LKH, named either 'WTA_LKH<xxxx>' or 'WGA_LKH<xxxx>', each containing a 'transcripts.fasta' or 'contigs.fasta' file
|
||||
- a mapping .txt file with LKH#s tab-separated with corresponding 10-digit codes
|
||||
Output:
|
||||
- a folder named 'renamed_transcripts|contigs' with assembled files now named by 10-digit codes; e.g. "Sr_rh_Ro04_assembledTranscripts.fasta"
|
||||
Dependencies: python3
|
||||
Usage:
|
||||
- for transcriptomes: python3 ProcessAndRenameAssembledData.py <assembled transcriptomes directory> <mapping_file.txt> transcriptomes
|
||||
- for genomes: python3 ProcessAndRenameAssembledData.py <assembled genomes directory> <mapping_file.txt> genomes
|
||||
'''
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
def read_lkh_mapping(mapping_file):
|
||||
"""Reads the LKH number to 10-digit code mapping from a file."""
|
||||
mapping = {}
|
||||
with open(mapping_file, 'r') as file:
|
||||
for line in file:
|
||||
lkh_number, code = line.strip().split('\t')
|
||||
mapping[lkh_number] = code
|
||||
return mapping
|
||||
|
||||
def process_directory(base_dir, mapping, output_dir, data_type):
|
||||
"""Iterates over all subdirectories in base_dir, processes relevant fasta files."""
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir) # Create output directory if it doesn't exist
|
||||
|
||||
# Set file naming patterns based on data type
|
||||
folder_prefix = "WTA_LKH" if data_type == "transcriptomes" else "WGA_LKH"
|
||||
fasta_filename = "transcripts.fasta" if data_type == "transcriptomes" else "contigs.fasta"
|
||||
output_suffix = "_assembledTranscripts.fasta" if data_type == "transcriptomes" else "_assembledContigs.fasta"
|
||||
|
||||
for folder_name in os.listdir(base_dir):
|
||||
folder_path = os.path.join(base_dir, folder_name)
|
||||
|
||||
if os.path.isdir(folder_path) and folder_name.startswith(folder_prefix):
|
||||
lkh_number = folder_name.split('_')[1] # Extract LKH number from folder name
|
||||
fasta_file = os.path.join(folder_path, fasta_filename)
|
||||
|
||||
if not os.path.isfile(fasta_file):
|
||||
print(f" WARNING: file '{fasta_filename}' is missing in folder {folder_name}.")
|
||||
continue
|
||||
|
||||
if os.path.getsize(fasta_file) == 0:
|
||||
print(f" WARNING: file '{fasta_filename}' is empty in folder {folder_name}.")
|
||||
continue
|
||||
|
||||
if lkh_number in mapping:
|
||||
new_name = f"{mapping[lkh_number]}{output_suffix}"
|
||||
output_path = os.path.join(output_dir, new_name)
|
||||
shutil.copy(fasta_file, output_path)
|
||||
else:
|
||||
print(f"Notification: No 10-digit code found for LKH number {lkh_number} in folder {folder_name}.")
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 4 or sys.argv[3] not in ["transcriptomes", "genomes"]:
|
||||
print("Usage: python script.py <base_dir> <mapping_file> <transcriptomes|genomes>")
|
||||
sys.exit(1)
|
||||
|
||||
base_dir = sys.argv[1]
|
||||
mapping_file = sys.argv[2]
|
||||
data_type = sys.argv[3]
|
||||
|
||||
if not os.path.isdir(base_dir):
|
||||
print(f"Error: The directory '{base_dir}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
if not os.path.isfile(mapping_file):
|
||||
print(f"Error: The file '{mapping_file}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
output_dir = os.path.join(os.getcwd(), "renamed_transcripts" if data_type == "transcriptomes" else "renamed_contigs")
|
||||
|
||||
mapping = read_lkh_mapping(mapping_file)
|
||||
process_directory(base_dir, mapping, output_dir, data_type)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
101
Utilities/For_Assemblies/Trim_Reads.py
Normal file
101
Utilities/For_Assemblies/Trim_Reads.py
Normal file
@ -0,0 +1,101 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
#Author, date: Giulia Magri Ribeiro and Adri K. Grow updated from Xyrus Maurer-Alcala and Ying Yan; June 13 2025
|
||||
#Motivation: Trim adaptors from reads and quality trimming before Assembly
|
||||
#Intent: clean up reads
|
||||
#Dependencies: biopython and bbmap folder
|
||||
#Inputs: parameters.txt, fastq.gz forward and reverse reads
|
||||
#Outputs:trimmed reads in ToAssemble folder
|
||||
#Example: python3 Trim_Reads.py parameter.txt
|
||||
#Katzlab parameters are 24 for quality trimming and 75 for minimum length as of June 2025
|
||||
|
||||
|
||||
from Bio import SeqIO
|
||||
import sys,os
|
||||
import time
|
||||
|
||||
#------------------------------ Checks the Input Arguments ------------------------------#
|
||||
|
||||
if len(sys.argv) == 1:
|
||||
print ('\n\nThis script will remove Adapters, do quality trimming and length trimming on given score and assembly from your raw reads')
|
||||
print ('\n\nChecking the overall quality and reads size on FastQC is recommended\n\n')
|
||||
print ('Example Usage:\n\n\t' + 'katzlab$ python3 Trim_Reads.py parameter.txt\n\n')
|
||||
print ('\t\tQuestions/Comments? Email Giulia (author) at gribeiro@smith.edu\n\n')
|
||||
sys.exit()
|
||||
|
||||
|
||||
elif len(sys.argv) != 2:
|
||||
print ('\n\nDouble check that you have added all the necessary command-line inputs! (see usage below for an example)\n\n')
|
||||
print ('Example Usage:\n\n\t' + 'katzlab$ python3 Trim_Reads.py parameter.txt\n\n')
|
||||
print ('Please also check that you have a parameter.txt (tab separated values) file which should contain your current filename, new filename, score of quality trimming, and minimum length (see an example below)\n\n')
|
||||
print ('parameter.txt example:\n\n\t' + 'XKATZ_20161110_K00134_IL100076423_S41_L005\tLKH001_Spirostomum\t24\t100\n\tXKATZ_20161110_K00134_IL100076416_S17_L005\tLKH002_Loxodes\t28\t100\n')
|
||||
sys.exit()
|
||||
|
||||
else:
|
||||
parameter_file = sys.argv[1]
|
||||
mailaddress = 'your_email@xxx.edu' # default email
|
||||
if not os.path.isdir('ToAssemble/'):
|
||||
os.system('mkdir ToAssemble')
|
||||
|
||||
### takes your raw read data and renames the files with your assigned new names and alters the end to either FwdPE or RevPE
|
||||
def rename(code):
|
||||
for filename in os.listdir(os.curdir):
|
||||
if filename.endswith('.fastq.gz'):
|
||||
# Forward read patterns
|
||||
forward_tags = ['_FwdPE', '_R1', '_FPE']
|
||||
if any(tag in filename for tag in forward_tags):
|
||||
for tag in forward_tags:
|
||||
if tag in filename:
|
||||
cur_name = filename.split(tag)[0]
|
||||
break
|
||||
if cur_name in code:
|
||||
new_name = code[cur_name]
|
||||
print(cur_name, new_name)
|
||||
os.system(f'mv {filename} {new_name}_FwdPE.fastq.gz')
|
||||
os.system(f'mkdir -p {new_name}')
|
||||
|
||||
# Reverse read patterns
|
||||
reverse_tags = ['_RevPE', '_R2', '_RPE']
|
||||
if any(tag in filename for tag in reverse_tags):
|
||||
for tag in reverse_tags:
|
||||
if tag in filename:
|
||||
cur_name = filename.split(tag)[0]
|
||||
break
|
||||
if cur_name in code:
|
||||
new_name = code[cur_name]
|
||||
print(cur_name, new_name)
|
||||
os.system(f'mv {filename} {new_name}_RevPE.fastq.gz')
|
||||
|
||||
### Uses the adapters.fa file in the bbtools resources folder (and BBDuK) to remove adapter sequences -- update if necessary
|
||||
### Uses BBDuK to quality trim reads so the average is q24 and the min length is 100 -- adjust if needed ... flags will be added eventually
|
||||
def QualityTrim(qtrim, minlen):
|
||||
for filename in os.listdir(os.curdir):
|
||||
if 'FwdPE' in filename:
|
||||
new_name = filename.split('_FwdPE')[0]
|
||||
qscore = qtrim[new_name]
|
||||
lscore = minlen[new_name]
|
||||
qtrimcmd = '_q'+qscore+'_minlen'+lscore
|
||||
log_file = filename.split('_Fwd')[0] + '/' + filename.split('_Fwd')[0] + qtrimcmd + '_bbduk.log'
|
||||
os.system('./bbmap/bbduk.sh -Xmx20g in1=./' + filename + ' in2=./' + filename.replace('Fwd','Rev') + ' out1=ToAssemble/'+filename.replace('FwdPE','FPE'+qtrimcmd) + ' out2=ToAssemble/' + filename.split('Fwd')[0]+'RPE'+qtrimcmd+'.fastq.gz qtrim=rl trimq='+qscore+' minlen='+lscore+' mink=11 k=23 hdist=1 ktrim=r ref=bbmap/resources/adapters.fa stats=' + filename.split('_Fwd')[0] +'/'+ filename.split('_Fwd')[0] + qtrimcmd + '_Stats.txt overwrite=true'+ ' > ' + log_file + ' 2>&1')
|
||||
|
||||
|
||||
### Calls on rnaSPAdes to do the transcriptome assembly on the quality trimmed files.
|
||||
#def rnaSPAdesAssembly():
|
||||
# for filename in os.listdir(os.curdir+'/ToAssemble'):
|
||||
# if 'LKH' in filename:
|
||||
# if 'FPE_q' in filename:
|
||||
# os.system('python rnaSPAdes-0.1.1/bin/rnaspades.py -m 26 -k 21,33,55,77 --min-complete-transcript 300 -1 ToAssemble/' + filename + ' -2 ToAssemble/' + filename.replace('FPE','RPE')+' -o ' + filename.split('_FPE')[0] + '/; echo "Finished assembling ' + filename.split('_FPE')[0] + '" | mail -s "Finished Transcriptome Assembly ' + (time.strftime("%d/%m/%y")) + '" ' + mailaddress) > out.txt
|
||||
|
||||
|
||||
def main():
|
||||
code = {}
|
||||
qtrim = {}
|
||||
minlen = {}
|
||||
for line in open(parameter_file,'r'):
|
||||
code[line.split('\t')[0]] = line.split('\t')[1].split('\n')[0]
|
||||
qtrim[line.split('\t')[1]] = line.split('\t')[2].split('\n')[0]
|
||||
minlen[line.split('\t')[1]] = line.split('\t')[3].split('\n')[0]
|
||||
rename(code)
|
||||
QualityTrim(qtrim, minlen)
|
||||
# rnaSPAdesAssembly()
|
||||
main()
|
||||
18
Utilities/For_Assemblies/WTA_rspades.sh
Normal file
18
Utilities/For_Assemblies/WTA_rspades.sh
Normal file
@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
#SBATCH --job-name=Gigi_spades
|
||||
#SBATCH --output=rnaSPAdes_run.%j.out # Stdout (%j expands to jobId)
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=1
|
||||
#SBATCH --ntasks-per-node=64
|
||||
#SBATCH --mem=180G
|
||||
#SBATCH --mail-type=ALL
|
||||
#SBATCH --mail-user=xxx@xxx.edu
|
||||
|
||||
module purge #Cleans up any loaded modules
|
||||
module load SPAdes
|
||||
|
||||
rnaspades.py -m 500 -t 50 -1 ToAssemble/SRR26595464_FPE_q24_minlen75.fastq.gz -2 ToAssemble/SRR26595464_RPE_q24_minlen75.fastq.gz -o Assembled/SRR26595464
|
||||
rnaspades.py -m 500 -t 50 -1 ToAssemble/SRR26595465_FPE_q24_minlen75.fastq.gz -2 ToAssemble/SRR26595465_RPE_q24_minlen75.fastq.gz -o Assembled/SRR26595465
|
||||
rnaspades.py -m 500 -t 50 -1 ToAssemble/SRR26595468_FPE_q24_minlen75.fastq.gz -2 ToAssemble/SRR26595468_RPE_q24_minlen75.fastq.gz -o Assembled/SRR26595468
|
||||
|
||||
@ -1,11 +1,12 @@
|
||||
#Author, date: Xyrus (last modified by him Sept 17 2020), most recently updated by Auden on July 19 2023
|
||||
#Author, date: Xyrus (last modified by him Sept 17 2020), most recently updated by Auden on October 17 2024
|
||||
#Motivation: Generate lots of codon usage statistics to aid in identifying useful characteristics for de novo ORF calling
|
||||
#Intent: Summarize nucleotide composition statistics for a fasta file or folder of fasta files
|
||||
#Dependencies: Python3, numpy, BioPython
|
||||
#Inputs: Fasta file or folder of fasta files
|
||||
#Outputs: A fasta file filtered for properly formatted sequences and several spreadsheets summarizing GC, ENc, RSCU, etc.
|
||||
#Outputs: A fasta file filtered for properly formatted sequences and several spreadsheets summarizing GC3S, ENc, RSCU, etc.
|
||||
#Example: python3 CUB.py -i seqs.fasta
|
||||
#Note: Use "python3 CUB.py -i seqs.fasta --require_start --require_stop" when using the on R2G files.
|
||||
#Note: Use "python3 CUB.py -i seqs.fasta --require_start --require_stop" to see more conservative estimate
|
||||
#Note: in this script we use GC3 and GC3S interchangeably, though the abbreviation GC3S is probably more correct
|
||||
|
||||
#Dependencies
|
||||
import os
|
||||
@ -19,11 +20,11 @@ import argparse
|
||||
|
||||
class CalcCUB:
|
||||
"""
|
||||
Returns the Effective Number of Codons used (observed and expected)
|
||||
Returns the Effective Number of Codons (ENc) used (observed and expected)
|
||||
following the equations originally from Wright 1990.
|
||||
"""
|
||||
def expWrightENc(gc3):
|
||||
# Calculates the expected ENc from a sequence's GC3 under Wright 1990
|
||||
# Calculates the expected ENc from a sequence's GC3 (GC3S) under Wright 1990
|
||||
if gc3 > 1:
|
||||
# If GC3 looks as though it is > 1 (e.g. 100%), converts to a float ≤ 1.
|
||||
# Calculations expect a value between 0 and 1
|
||||
@ -32,7 +33,7 @@ class CalcCUB:
|
||||
return round(exp_enc, 4)
|
||||
|
||||
def nullENcGC3():
|
||||
# Calculates the expected ENc from the null distribution of GC3
|
||||
# Calculates the expected ENc from the null distribution of GC3S
|
||||
# values (0, 100% GC)
|
||||
null = [CalcCUB.expWrightENc(n) for n in np.arange(0,.51,0.01)]
|
||||
null += null[:-1][::-1]
|
||||
@ -356,6 +357,12 @@ class GCeval():
|
||||
return round(GC(''.join([seq[n] for n in
|
||||
range(2, len(seq)-len(seq[2:]) % 3, 3)])), 4)
|
||||
|
||||
def gc3s(cdnTbl):
|
||||
# This function return the GC content of the third position of a codon excluding Tryp and Met
|
||||
syn = round(GC(''.join([k[-1]*v[-1] for k, v in cdnTbl.items() if v[0] != 'W' and v[0] != 'M'])), 4)
|
||||
|
||||
return syn
|
||||
|
||||
def gc3_4F(cdnTbl):
|
||||
# # This function return the GC content of the third position of four-fold
|
||||
# # degenerate codons
|
||||
@ -385,7 +392,7 @@ class SeqInfo(object):
|
||||
|
||||
def ENcStats(self):
|
||||
# Stores the various Effective Number of Codons calculations in the class
|
||||
self.expENc = CalcCUB.expWrightENc(self.gc3)
|
||||
self.expENc = CalcCUB.expWrightENc(self.gc3s)
|
||||
self.obsENc_6F = CalcCUB.calcWrightENc(self.cdnCounts_6F)
|
||||
self.obsENc_No6F = CalcCUB.calcWrightENc(self.cdnCounts_No6F)
|
||||
self.SunENc_6F = CalcCUB.SunEq5(self.cdnCounts_6F)
|
||||
@ -396,6 +403,7 @@ class SeqInfo(object):
|
||||
for k, v in self.gcFuncs.items():
|
||||
setattr(self,k,v(self.ntd))
|
||||
self.gc4F = GCeval.gc3_4F(self.cdnCounts_No6F)
|
||||
self.gc3s = GCeval.gc3s(self.cdnCounts_No6F)
|
||||
|
||||
|
||||
def RSCUstats(self):
|
||||
@ -429,23 +437,23 @@ def CalcRefFasta(fasta, gCode):
|
||||
def WriteWrightOut(seqData, outName, comp):
|
||||
if comp == False:
|
||||
with open(outName+'/SpreadSheets/'+outName.split('/')[-1]+'.ENc.Raw.tsv','w+') as w:
|
||||
w.write('SequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\t'
|
||||
w.write('SequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3S\t'
|
||||
'GC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\t'
|
||||
'ObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
|
||||
for k, v in seqData.items():
|
||||
name = [k]
|
||||
gcs = [str(v.gcOverall),str(v.gc1),str(v.gc2),str(v.gc3),str(v.gc4F)]
|
||||
gcs = [str(v.gcOverall),str(v.gc1),str(v.gc2),str(v.gc3),str(v.gc3s),str(v.gc4F)]
|
||||
ENc = [str(v.expENc),str(v.obsENc_6F),str(v.obsENc_No6F),
|
||||
str(v.SunENc_6F),str(v.SunENc_No6F)]
|
||||
w.write('\t'.join(name+[str(v.amb_cdn)]+gcs+ENc)+'\n')
|
||||
else:
|
||||
with open(outName+'/SpreadSheets/'+outName.split('/')[-1]+'.CompTrans.ENc.Raw.tsv','w+') as w:
|
||||
w.write('SequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\t'
|
||||
w.write('SequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3S\t'
|
||||
'GC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\t'
|
||||
'ObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
|
||||
for k, v in seqData.items():
|
||||
name = [k]
|
||||
gcs = [str(v.gcOverall),str(v.gc1),str(v.gc2),str(v.gc3),str(v.gc4F)]
|
||||
gcs = [str(v.gcOverall),str(v.gc1),str(v.gc2),str(v.gc3),str(v.gc3s),str(v.gc4F)]
|
||||
ENc = [str(v.expENc),str(v.obsENc_6F),str(v.obsENc_No6F),
|
||||
str(v.SunENc_6F),str(v.SunENc_No6F)]
|
||||
w.write('\t'.join(name+[str(v.amb_cdn)]+gcs+ENc)+'\n')
|
||||
@ -473,7 +481,7 @@ def getCompFasta(fasta, gCode, require_start, require_stop):
|
||||
|
||||
def WriteNullENcOut(outName):
|
||||
with open(outName+'/SpreadSheets/' + outName.split('/')[-1] + '.ENc.Null.tsv','w+') as w:
|
||||
w.write('GC3\tENc\n')
|
||||
w.write('GC3S\tENc\n')
|
||||
w.write('\n'.join(CalcCUB.nullENcGC3()))
|
||||
|
||||
|
||||
@ -558,14 +566,14 @@ if __name__ == "__main__":
|
||||
o.write(folder.split('/')[-1] + '\t' + line)
|
||||
|
||||
with open('CUBOutput/SpreadSheets/ENc.Raw.tsv', 'w') as o:
|
||||
o.write('File\tSequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\tObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
|
||||
o.write('File\tSequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3S\tGC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\tObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
|
||||
for folder in folders:
|
||||
for line in open(folder + '/SpreadSheets/' + folder.split('/')[-1] + '.ENc.Raw.tsv'):
|
||||
if 'SequenceID' not in line:
|
||||
o.write(folder.split('/')[-1] + '\t' + line)
|
||||
|
||||
with open('CUBOutput/SpreadSheets/CompTrans.ENc.Raw.tsv', 'w') as o:
|
||||
o.write('File\tSequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\tObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
|
||||
o.write('File\tSequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3S\tGC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\tObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
|
||||
for folder in folders:
|
||||
for line in open(folder + '/SpreadSheets/' + folder.split('/')[-1] + '.CompTrans.ENc.Raw.tsv'):
|
||||
if 'SequenceID' not in line:
|
||||
|
||||
@ -1,14 +1,13 @@
|
||||
'''
|
||||
#Author, date: Godwin Ani and Laura Katz, 9th- Feb - 2023.
|
||||
#Author, date: Godwin Ani and Laura Katz, Feb 9th 2023
|
||||
#Modified: Adri Grow, April 6th 2025 to allow clustering at 100% (1.0) and output renamed file(s) with id clustered appended to file name
|
||||
#Dependencies: Python3, CD-Hit
|
||||
#Intent: For clustering nucleotide or amino acid sequences with the CD-Hit program.
|
||||
#Inputs: A folder of containing Amino acid or DNA fasta files.
|
||||
#Outputs: A folder of clustered files.
|
||||
#Example: python Cluster.py --type dna --identity 0.95 --overlap 0.67 --input input_folder_dna --output output_folder_dna
|
||||
#Intent: For clustering nucleotide or amino acid sequences with the CD-Hit program
|
||||
#Inputs: A folder of containing AA or DNA fasta files
|
||||
#Outputs: A folder of clustered files
|
||||
#Example: python Cluster.py -t dna -id 0.95 -ov 0.67 -i input_folder_dna -o output_folder_dna
|
||||
'''
|
||||
|
||||
|
||||
|
||||
import os
|
||||
import argparse
|
||||
from tqdm import tqdm
|
||||
@ -16,8 +15,10 @@ import subprocess
|
||||
|
||||
def input_validation(value, error_message):
|
||||
try:
|
||||
integer, fractional = value.split('.')
|
||||
value = float(value)
|
||||
if value == 1.0:
|
||||
return value
|
||||
integer, fractional = str(value).split('.')
|
||||
if int(integer) == 0 and len(fractional) == 2:
|
||||
return value
|
||||
except ValueError:
|
||||
@ -28,39 +29,44 @@ def input_validation(value, error_message):
|
||||
def cluster_sequences(program, identity, overlap, input_folder, output_folder):
|
||||
for file in tqdm(os.listdir(input_folder)):
|
||||
if file.endswith('.fasta'):
|
||||
subprocess.run([f'{program}', '-i', f'{input_folder}/{file}', '-o', f'{output_folder}/{file}', '-c', f'{identity}', '-d', '0', '-aS', f'{overlap}'])
|
||||
output_name = f"{os.path.splitext(file)[0]}_{int(float(identity) * 100)}clustered.fasta"
|
||||
subprocess.run([f'{program}', '-i', f'{input_folder}/{file}', '-o', f'{output_folder}/{output_name}', '-c', f'{identity}', '-d', '0', '-aS', f'{overlap}'])
|
||||
|
||||
for file in os.listdir(output_folder):
|
||||
if file.endswith('.clstr'):
|
||||
os.rename(f'{output_folder}/{file}', f'{output_folder}/{file.split("FILE")[0]}Clustered.txt')
|
||||
base_name = os.path.splitext(file)[0] # removes .clstr
|
||||
if base_name.endswith('.fasta'):
|
||||
base_name = base_name[:-6] # removes .fasta from end
|
||||
new_name = f"{base_name}.txt"
|
||||
os.rename(f'{output_folder}/{file}', f'{output_folder}/{new_name}')
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Cluster amino acid or DNA sequences using CD-HIT.')
|
||||
parser.add_argument('--type', choices=['aa', 'dna'], required=True, help='Type of sequences (aa for Amino Acids, dna for DNA)')
|
||||
parser.add_argument('--identity', type=str, required=True, help='Sequence Identity Threshold (e.g., 0.99, 0.95)')
|
||||
parser.add_argument('--overlap', type=str, required=True, help='Sequence Alignment Overlap Value (e.g., 0.67, 0.75)')
|
||||
parser.add_argument('--input', type=str, required=True, help='Input folder containing sequences in fasta format')
|
||||
parser.add_argument('--output', type=str, required=True, help='Output folder for clustered sequences')
|
||||
parser = argparse.ArgumentParser(description='Cluster amino acid or nucleotide sequences using CD-HIT.')
|
||||
parser.add_argument('-t', '--type', choices=['aa', 'dna'], required=True, help='Type of sequences (aa for amino acid, dna for nucleotide)')
|
||||
parser.add_argument('-id','--identity', type=str, required=True, help='Sequence identity threshold (e.g. 1.0, 0.99, 0.95)')
|
||||
parser.add_argument('-ov', '--overlap', type=str, required=True, help='Sequence alignment overlap value (e.g. 0.67, 0.75)')
|
||||
parser.add_argument('-i', '--input_files', type=str, required=True, help='Input folder containing sequences in fasta format')
|
||||
parser.add_argument('-o', '--output', type=str, required=True, help='Output folder for clustered sequences ending with -id value')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.isdir(args.input):
|
||||
print(f'Error: Input folder "{args.input}" does not exist.')
|
||||
if not os.path.isdir(args.input_files):
|
||||
print(f'Error: Input folder "{args.input_files}" does not exist.')
|
||||
exit(1)
|
||||
|
||||
if not os.path.isdir(args.output):
|
||||
os.mkdir(args.output)
|
||||
|
||||
if args.type == 'aa':
|
||||
identity = input_validation(args.identity, 'ERROR! Use format 0.## for Amino acids sequence identity threshold.')
|
||||
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for Amino acids sequence alignment overlap value.')
|
||||
cluster_sequences('cd-hit', identity, overlap, args.input, args.output)
|
||||
identity = input_validation(args.identity, 'ERROR! Use format 0.## or 1.0 for amino acid sequence identity threshold.')
|
||||
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for amino acid sequence alignment overlap value.')
|
||||
cluster_sequences('cd-hit', identity, overlap, args.input_files, args.output)
|
||||
elif args.type == 'dna':
|
||||
identity = input_validation(args.identity, 'ERROR! Use format 0.## for DNA sequence identity threshold.')
|
||||
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for DNA sequence alignment overlap value.')
|
||||
cluster_sequences('cd-hit-est', identity, overlap, args.input, args.output)
|
||||
identity = input_validation(args.identity, 'ERROR! Use format 0.## or 1.0 for nucleotide sequence identity threshold.')
|
||||
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for nucleotide sequence alignment overlap value.')
|
||||
cluster_sequences('cd-hit-est', identity, overlap, args.input_files, args.output)
|
||||
else:
|
||||
print('Invalid sequence type. Choose "aa" for Amino Acids or "dna" for DNA.')
|
||||
print('Invalid sequence type. Choose "aa" for amino acids or "dna" for nucleotides.')
|
||||
exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -27,7 +27,7 @@ enc_null <- data.frame(read_tsv('ENc.Null.tsv'))
|
||||
#you need as.numeric to ensure R is reading the variable correctly
|
||||
gc3_plot <- ggplot(gc3, aes(as.numeric(GC3.Degen), as.numeric(ObsWrightENc_No6Fold)))+
|
||||
geom_point(size = 0.1)+
|
||||
geom_line(data = enc_null, aes(GC3, ENc))+
|
||||
geom_line(data = enc_null, aes(GC3S, ENc))+
|
||||
theme_classic()+
|
||||
labs(x = 'GC3 Degen', y = 'ObsWrightENc_No6fold')+
|
||||
theme(legend.position = 'none')+
|
||||
|
||||
65
Utilities/for_fastas/Salmon.sh
Normal file
65
Utilities/for_fastas/Salmon.sh
Normal file
@ -0,0 +1,65 @@
|
||||
#!/bin/bash
|
||||
|
||||
## Last updated on Jan 9th 2024 by Auden Cote-L'Heureux
|
||||
|
||||
#Intent: Calculate TPM for assembled transcripts
|
||||
#Dependencies: None
|
||||
#Inputs: Must be in a folder along with a folder called 'Transcriptomes', containing assembled transcripts as output by rnaSpades (transcripts.fasta),
|
||||
## and a folder called 'RawReads' containing the fwd and rev reads prior to assembly, with the same file prefixes as the corresponding assembled transcript files
|
||||
#Outputs: A folder, containing a 'quant' file which has TPM data.
|
||||
|
||||
## If running on an HPC, include parameters here! For example, on a Slurm system you might use
|
||||
|
||||
|
||||
#SBATCH --job-name=tpm
|
||||
#SBATCH --output=Salmon.%j.out # Stdout (%j expands to jobId)
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=60
|
||||
#SBATCH --mem=60G
|
||||
|
||||
mkdir Indices
|
||||
|
||||
## First, build transcript indices
|
||||
|
||||
cd Transcriptomes
|
||||
|
||||
IFS='/'
|
||||
for TRANS in *; do
|
||||
#read -a trapsplit <<<"$TRANS"
|
||||
#traf=${trapsplit[1]}
|
||||
tax=${TRANS:0:10}
|
||||
|
||||
./../salmon-1.9.0_linux_x86_64/bin/salmon index -t $TRANS -i ../Indices/$tax
|
||||
done
|
||||
|
||||
## Now calculate TPM
|
||||
|
||||
cd Indices
|
||||
|
||||
IFS='/'
|
||||
for TRANS in *; do
|
||||
read -a trapsplit <<<"$TRANS"
|
||||
tax=${TRANS:0:10}
|
||||
fpe='NA'; rpe='NA'; fpesub="FPE"; rpesub="RPE"
|
||||
for TRIM in ../RawReads/*; do
|
||||
read -a tripsplit <<<"$TRIM"
|
||||
trif=${tripsplit[2]}
|
||||
if [ "${trif:0:10}" == "$tax" ]; then
|
||||
if [[ "$trif" == *"$fpesub"* ]]; then
|
||||
fpe=$trif
|
||||
fi
|
||||
if [[ "$trif" == *"$rpesub"* ]]; then
|
||||
rpe=$trif
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$rpe" != 'NA' ]; then
|
||||
./../salmon-1.9.0_linux_x86_64/bin/salmon quant -i $TRANS -l A -1 ../RawReads/$fpe -2 ../RawReads/$rpe --validateMappings -o ../quants/$tax
|
||||
fi
|
||||
|
||||
if [ "$rpe" == 'NA' ]; then
|
||||
./../salmon-1.9.0_linux_x86_64/bin/salmon quant -i $TRANS -l A -r ../RawReads/$fpe --validateMappings -o ../quants/$tax
|
||||
fi
|
||||
|
||||
done
|
||||
@ -1,51 +1,56 @@
|
||||
'''
|
||||
#Author, date: ?
|
||||
#Uploaded: updated by Adri Grow, 2024 (previous Adri Grow 2023)
|
||||
#Intent: map a group of trimmed reads to a reference.
|
||||
#Dependencies: Python3, hisat2, samtools, sambamba
|
||||
#EDIT LINES: 18 & 32
|
||||
#Inputs: Folder named 'TrimmedReads' containing all the trimmed reads.
|
||||
#Outputs: Folders with the names of the LKHs containing the sam/bam files.
|
||||
#Example: python ReadMapping.py
|
||||
#Uploaded: updated by Adri Grow, Aug 2025
|
||||
#Intent: map a group of trimmed reads to a reference
|
||||
#Dependencies: Python, HISAT2, samtools, (optional: sambamba)
|
||||
#EDIT LINES: 19 & 36
|
||||
#Inputs: Folder named 'TrimmedReads' containing the forward and reverse trimmed reads that start with the same unique identifier for each sample/cell
|
||||
#Outputs: Folders with the names of the unique identifier (e.g. LKHs) containing the bam files
|
||||
#Usage: python3 ReadMapping.py
|
||||
#IMPORTANT: Lines 34-42 manipulate the output files in several different ways including converting .sam to .bam, sorting, optional deduplicating, optional quality filtering, and retaining only mapped reads. It is the responsibility of the user to determine exactly which commands are needed for their dataset.
|
||||
'''
|
||||
|
||||
import os
|
||||
from Bio import SeqIO
|
||||
|
||||
#this first command builds your reference with Hisat.
|
||||
#If you've already done this, DON'T run this command! Instead, comment it out (use a # in front of it).
|
||||
#It will output several files. Don't worry about them, Hisat will know what to do.
|
||||
os.system("hisat2-build Foram_reference.fasta Foram_Index") #change to your reference.fasta and rename the index
|
||||
#This first command builds your reference with HISAT
|
||||
#If you've already done this, DON'T run this command! Instead, comment it out (use a # in front of it)
|
||||
#It will output several files. Don't worry about them, HISAT will know what to do
|
||||
os.system("hisat2-build Foram_reference.fasta Foram_Index") #Replace "Foram_reference.fasta" with your reference fasta name, and optionally change "Foram_Index" to your preferred index name
|
||||
|
||||
folder = os.listdir("TrimmedReads") #Insert the name of the folder which has your trimmed reads inside the quotes
|
||||
folder.sort() #This sorts the folder so that all the LKHs are in order.
|
||||
folder = os.listdir("TrimmedReads") #Replace "TrimmedReads" with the name of the folder containing your trimmed reads, if different than TrimmedReads
|
||||
|
||||
folder.sort() #This sorts the trimmed reads folder so that all the files are passed in order
|
||||
|
||||
for x in folder:
|
||||
if "LKH" in x and "FPE" in x: #assigning a variable to forward reads. Make sure you have both forward and reverse reads for each cell!
|
||||
#This is specific for file names starting with 'LKH' unqiue identifiers formatted similar to 'LKH###_FPE.fastq.gz'
|
||||
if "LKH" in x and "FPE" in x: #Assigning a variable to forward reads. Make sure you have both forward and reverse reads for each cell!
|
||||
FPE = x
|
||||
if "LKH" in x and "RPE" in x: #assigning a variable to reverse reads.
|
||||
sample_id = FPE.split("_FPE")[0]
|
||||
if "LKH" in x and "RPE" in x: #Assigning a variable to reverse reads
|
||||
RPE = x
|
||||
|
||||
if(FPE[:7] == RPE[:7]):
|
||||
#The next few lines are several Hisat commands that will create new files.
|
||||
#EDIT the name of the index and the name of the trimmed reads folder in the first command below
|
||||
os.system("hisat2 -x Foram_Index -1 TrimmedReads/" +FPE+ " -2 TrimmedReads/" +RPE+ " -S sample.sam")
|
||||
os.system("samtools view -bS sample.sam > sample.bam")
|
||||
os.system("samtools fixmate -O bam sample.bam fixmate_sample.bam")
|
||||
os.system("samtools sort -O bam -o sorted_sample.bam fixmate_sample.bam")
|
||||
os.system("sambamba markdup -r sorted_sample.bam sorted_sample.dedup.bam")
|
||||
os.system("samtools view -h -b -q 40 sorted_sample.dedup.bam > sorted_sample.q40.bam")
|
||||
os.system("samtools view -h -b -q 20 sorted_sample.dedup.bam > sorted_sample.q20.bam")
|
||||
os.system("samtools view -h -F 4 -b sorted_sample.dedup.bam > defaultparameters_sample.bam")
|
||||
if FPE.split("_FPE")[0] == RPE.split("_RPE")[0]: #Match sample IDs dynamically
|
||||
#The next few lines are several HISAT commands that will create new files
|
||||
#If necessary, EDIT the name of the index and the name of the trimmed reads folder in the very next line only
|
||||
os.system("hisat2 -x Foram_Index -1 TrimmedReads/" +FPE+ " -2 TrimmedReads/" +RPE+ " -S sample.sam") #running HISAT2
|
||||
os.system("samtools view -bS sample.sam > sample.bam") #converts .sam file to .bam file
|
||||
os.remove("sample.sam") #remove the .sam file (already converted to .bam, sam files are large and unnecessary to keep)
|
||||
#os.system("samtools fixmate -O bam sample.bam fixmate_sample.bam") #use this command if you will be using the sambamba markdup command to remove duplicate reads (Katzlab default for transcriptomics and amplicon is to not remove duplicates)
|
||||
os.system("samtools sort -O bam -o sorted_sample.bam sample.bam") #sorts the .bam file alignments by leftmost coordinates
|
||||
#os.system("sambamba markdup -r sorted_sample.bam sorted_sample.dedup.bam") #removes duplicate reads - may not be appropriate for your study or protocols, user will need to determine if this is best practice for their study
|
||||
#os.system("samtools view -h -b -q 40 sorted_sample.dedup.bam > sorted_sample.q40.bam") #only keeps reads with mapping quality ≥ 40, input is the dedup file but can easily be modified to use the sorted .bam file
|
||||
#os.system("samtools view -h -b -q 20 sorted_sample.dedup.bam > sorted_sample.q20.bam") #only keeps reads with mapping quality ≥ 20, input is the dedup file but can easily be modified to use the sorted .bam file
|
||||
os.system("samtools view -h -F 4 -b sorted_sample.bam > sorted_mapped_sample.bam") #only keeps mapped reads, using the sorted .bam file as input - this is the Katzlab transcriptomic and amplicon final output that should be used for continued analyses
|
||||
|
||||
if not os.path.isdir(x[:7]):
|
||||
os.mkdir(x[0:7]) #making folders with the names of the LKHs
|
||||
if not os.path.isdir(sample_id):
|
||||
os.mkdir(sample_id) #making folders with the names of the LKHs or unique identifiers
|
||||
|
||||
for file in os.listdir('.'): #These lines move the sam/bam files that Hisat creates into the new LKH folders.
|
||||
for file in os.listdir('.'): #These lines move the bam files created into the new LKH/unique identifier folders
|
||||
if(file.endswith('.sam') or file.endswith('.bam')):
|
||||
os.rename(file,x[:7] + '/' + file)
|
||||
os.rename(file, f"{sample_id}/{file}")
|
||||
|
||||
print("~~~~~~~~~~~:>~") #When the snake appears, your script has run!
|
||||
print("~~~~~~~~~~~:>~") #When the snake appears in terminal, the script has finished running for all samples/cells!
|
||||
|
||||
|
||||
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
#Author, date: Auden Cote-L'Heureux, last updated Apr 1st 2024 by GA
|
||||
#Author, date: Auden Cote-L'Heureux, last updated Aug 18th 2025 by AKG
|
||||
#Motivation: Select robust sequences from trees
|
||||
#Intent: Select clades of interest from large trees using taxonomic specifications
|
||||
#Dependencies: Python3, ete3, Biopython
|
||||
#Inputs: A folder containing: all PTLp2 output trees and all corresponding unaligned .fasta (pre-guidance) files
|
||||
#Outputs: A folder of grabbed clades and filtered unaligned fasta files
|
||||
#Example: python CladeGrabbing.py --input /Path/to/trees --target Sr_rh --min_presence 20
|
||||
#Example: python3 CladeGrabbing.py --input /Path/To/TreesandPreGuidance --target Sr_rh --min_presence 20
|
||||
#IMPORTANT: key parameters explained in "add_argument" section below
|
||||
|
||||
#Dependencies
|
||||
@ -18,7 +18,7 @@ def get_args():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog = 'Clade grabber, Version 2.1',
|
||||
description = "Updated Aug 1st, 2023 by Auden Cote-L'Heureux, modified by GA Feb 13th 2024"
|
||||
description = "Updated Aug 1st, 2023 by Auden Cote-L'Heureux, modified by AKG Aug 18th 2025"
|
||||
)
|
||||
#add_argument section with parameters explained
|
||||
parser.add_argument('-i', '--input', type = str, required = True, help = 'Path to a folder containing input trees (which must have the file extension .tre, .tree, .treefile, or .nex)')
|
||||
@ -28,6 +28,8 @@ def get_args():
|
||||
parser.add_argument('-nr', '--required_taxa_num', type = int, default = 0, help = 'The number of species belonging to taxa in the --required_taxa list that must be present in the clade. Default is 0.')
|
||||
parser.add_argument('-o', '--outgroup', type = str, default = '', help = 'A comma-separated list of any number of digits/characters (e.g. Sr_ci_S OR Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes, to describe taxa that will be included as outgroups in the output unaligned fasta files (which will contain only sequences from a single selected clade, and all outgroup sequences in the tree captured by this argument).')
|
||||
parser.add_argument('-c', '--contaminants', type = float, default = 2, help = 'The number of non-ingroup contaminants allowed in a clade, or if less than 1 the proportion of sequences in a clade that can be non-ingroup (i.e. presumed contaminants). Default is to allow 2 contaminants.')
|
||||
parser.add_argument('-ft', '--first_target', type=str, default='', help='[Optional] A comma-separated list or .txt file of complete/partial taxon codes for an initial, broad clade search. If provided, the script will first find clades with these taxa before applying the main --target filter.')
|
||||
parser.add_argument('-fm', '--first_min_presence', type=int, default=0, help='[Optional] Minimum number of sequences from --first_target required in a clade for it to be used in the second-stage search. Ignored if --first_target is not provided.')
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
@ -86,16 +88,83 @@ def reroot(tree):
|
||||
def get_subtrees(args, file):
|
||||
|
||||
newick = get_newick(args.input + '/' + file)
|
||||
|
||||
tree = ete3.Tree(newick)
|
||||
|
||||
majs = list(dict.fromkeys([leaf.name[:2] for leaf in tree]))
|
||||
|
||||
#Only try to reroot trees with more than 2 major clades. This was added to fix the ETE3 "Cannot set myself as outgroup" error
|
||||
# Only try to reroot trees with more than 2 major clades (original behavior)
|
||||
if len(majs) > 2:
|
||||
tree = reroot(tree)
|
||||
|
||||
#Getting a clean list of all target taxa
|
||||
# -------------------------------
|
||||
# FIRST-STAGE (optional) FILTER
|
||||
# -------------------------------
|
||||
def get_outer_leafsets():
|
||||
"""
|
||||
Return a list of sets, each set = leaf names of an outer clade
|
||||
that passes --first_target, --first_min_presence, children_keep,
|
||||
and contaminants logic (using args.contaminants).
|
||||
If --first_target is not used, return one set containing ALL leaves.
|
||||
"""
|
||||
if not args.first_target or args.first_min_presence == 0:
|
||||
return [set(leaf.name for leaf in tree)] # no outer filter → whole tree
|
||||
|
||||
# Parse first_target codes
|
||||
if '.' in args.first_target:
|
||||
first_target_codes = [l.strip() for l in open(args.first_target, 'r').readlines() if l.strip() != '']
|
||||
else:
|
||||
first_target_codes = [code.strip() for code in args.first_target.split(',') if code.strip() != '']
|
||||
|
||||
outer_sets = []
|
||||
seen_leaves = []
|
||||
|
||||
for node in tree.traverse('levelorder'):
|
||||
# large enough and not subsumed by already accepted outer node
|
||||
if len(node) >= args.first_min_presence and len(set(seen_leaves) & set([leaf.name for leaf in node])) == 0:
|
||||
leaves = [leaf.name for leaf in node]
|
||||
|
||||
# children_keep logic but for first_target
|
||||
children_keep = 0
|
||||
for child in node.children:
|
||||
taken = False
|
||||
for code in first_target_codes:
|
||||
for leaf in child:
|
||||
if leaf.name.startswith(code):
|
||||
children_keep += 1
|
||||
taken = True
|
||||
break
|
||||
if taken:
|
||||
break
|
||||
if children_keep != len(node.children):
|
||||
continue
|
||||
|
||||
# count first-target hits (use [:10] uniqueness like original)
|
||||
first_hits = set()
|
||||
for code in first_target_codes:
|
||||
for leaf in leaves[::-1]:
|
||||
if leaf.startswith(code):
|
||||
first_hits.add(leaf[:10])
|
||||
leaves.remove(leaf)
|
||||
|
||||
# contaminants logic applied to FIRST-STAGE (reuse args.contaminants)
|
||||
passes_contam = ((args.contaminants < 1 and len(leaves) <= args.contaminants * len(first_hits)) or
|
||||
(args.contaminants >= 1 and len(leaves) <= args.contaminants))
|
||||
|
||||
if len(first_hits) >= args.first_min_presence and passes_contam:
|
||||
outer_sets.append(set(leaf.name for leaf in node))
|
||||
seen_leaves.extend([leaf.name for leaf in node])
|
||||
|
||||
return outer_sets
|
||||
|
||||
# Build outer sets; if user supplied first-stage args, we'll restrict inner search to these
|
||||
using_first = bool(args.first_target) and args.first_min_presence > 0
|
||||
outer_leafsets = get_outer_leafsets()
|
||||
|
||||
# --------------------------------
|
||||
# ORIGINAL INNER FILTER (unchanged)
|
||||
# --------------------------------
|
||||
|
||||
# Getting a clean list of all target taxa
|
||||
if '.' in args.target:
|
||||
try:
|
||||
target_codes = [l.strip() for l in open(args.target, 'r').readlines() if l.strip() != '']
|
||||
@ -104,7 +173,7 @@ def get_subtrees(args, file):
|
||||
else:
|
||||
target_codes = [code.strip() for code in args.target.split(',') if code.strip() != '']
|
||||
|
||||
#Getting a clean list of all "at least" taxa
|
||||
# Getting a clean list of all "at least" taxa
|
||||
if '.' in args.required_taxa:
|
||||
try:
|
||||
required_taxa_codes = [l.strip() for l in open(args.required_taxa, 'r').readlines() if l.strip() != '']
|
||||
@ -115,18 +184,23 @@ def get_subtrees(args, file):
|
||||
|
||||
target_codes = list(dict.fromkeys(target_codes + required_taxa_codes))
|
||||
|
||||
|
||||
#Creating a record of selected subtrees, and all of the leaves in those subtrees
|
||||
# Creating a record of selected subtrees, and all of the leaves in those subtrees
|
||||
selected_nodes = []; seen_leaves = []
|
||||
|
||||
#Iterating through all nodes in tree, starting at "root" then working towards leaves
|
||||
# Iterating through all nodes in tree, starting at "root" then working towards leaves
|
||||
for node in tree.traverse('levelorder'):
|
||||
#If a node is large enough and is not contained in an already selected clade
|
||||
# If using first-stage filter, only consider nodes fully inside some outer clade
|
||||
if using_first:
|
||||
node_leafs = set(leaf.name for leaf in node)
|
||||
# require subset (node fully contained in an accepted outer clade)
|
||||
if not any(node_leafs.issubset(S) for S in outer_leafsets):
|
||||
continue
|
||||
|
||||
# If a node is large enough and is not contained in an already selected clade
|
||||
if len(node) >= args.min_presence and len(list(set(seen_leaves) & set([leaf.name for leaf in node]))) == 0:
|
||||
leaves = [leaf.name for leaf in node]
|
||||
|
||||
#Accounting for cases where e.g. one child is a contaminant, and the other child is a good clade with 1 fewer than the max number of contaminants
|
||||
# Accounting for cases where e.g. one child is a contaminant, and the other child is a good clade
|
||||
children_keep = 0
|
||||
for child in node.children:
|
||||
for code in target_codes:
|
||||
@ -144,7 +218,6 @@ def get_subtrees(args, file):
|
||||
|
||||
for code in target_codes:
|
||||
for leaf in leaves[::-1]:
|
||||
#print(leaf)
|
||||
if leaf.startswith(code):
|
||||
target_leaves.add(leaf[:10])
|
||||
|
||||
@ -154,14 +227,12 @@ def get_subtrees(args, file):
|
||||
break
|
||||
leaves.remove(leaf)
|
||||
|
||||
|
||||
|
||||
|
||||
#Grab a clade as a subtree if 1) it has enough target taxa; 2) it has enough "at least" taxa; 3) it does not have too many contaminants
|
||||
if len(target_leaves) >= args.min_presence and len(required_taxa_leaves) >= args.required_taxa_num and ((args.contaminants < 1 and len(leaves) < args.contaminants * len(target_leaves)) or len(leaves) < args.contaminants):
|
||||
# Grab a clade as a subtree if it passes all filters
|
||||
if len(target_leaves) >= args.min_presence and len(required_taxa_leaves) >= args.required_taxa_num and ((args.contaminants < 1 and len(leaves) <= args.contaminants * len(target_leaves)) or len(leaves) <= args.contaminants):
|
||||
selected_nodes.append(node)
|
||||
seen_leaves.extend([leaf.name for leaf in node])
|
||||
#Write the subtrees to output .tre files
|
||||
|
||||
# Write the subtrees to output .tre files
|
||||
for i, node in enumerate(selected_nodes[::-1]):
|
||||
with open('Subtrees/' + '.'.join(file.split('.')[:-1]) + '_' + str(i) + '.tre', 'w') as o:
|
||||
o.write(node.write())
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#Author, date: Auden Cote-L'Heureux, last updated Dec 18th 2023
|
||||
#Author, date: Auden Cote-L'Heureux, last updated Nov 11th 2024 by Adri Grow
|
||||
#Motivation: Understand the topology of trees
|
||||
#Intent: Describe clade sizes for different taxonomic groups
|
||||
#Dependencies: Python3, ete3
|
||||
@ -236,9 +236,10 @@ if __name__ == '__main__':
|
||||
for tree_file in tqdm(os.listdir(args.input)):
|
||||
if tree_file.split('.')[-1] in ('tre', 'tree', 'treefile', 'nex'):
|
||||
clades_per_tax, majs_per_clade, mins_per_clade = get_clades(args.input + '/' + tree_file, args)
|
||||
clades_per_tax_per_file.update({ tree_file.split('.')[0] : clades_per_tax })
|
||||
majs_per_clade_per_file.update({ tree_file.split('.')[0] : majs_per_clade })
|
||||
mins_per_clade_per_file.update({ tree_file.split('.')[0] : mins_per_clade })
|
||||
base_filename = os.path.splitext(tree_file)[0]
|
||||
clades_per_tax_per_file.update({ base_filename : clades_per_tax })
|
||||
majs_per_clade_per_file.update({ base_filename : majs_per_clade })
|
||||
mins_per_clade_per_file.update({ base_filename : mins_per_clade })
|
||||
|
||||
write_output(clades_per_tax_per_file, args, majs_per_clade = majs_per_clade_per_file, mins_per_clade = mins_per_clade_per_file)
|
||||
|
||||
|
||||
@ -151,7 +151,7 @@ def reroot(tree):
|
||||
return best_clade
|
||||
|
||||
#Get the biggest clade for each taxonomic group (stops once it finds one)
|
||||
for taxon in [('Ba', 'Za'), ('Op'), ('Pl'), ('Am'), ('Ex'), ('Sr')]:
|
||||
for taxon in [('Ba'), ('Za'), ('Op'), ('Pl'), ('Am'), ('Ex'), ('Sr')]:
|
||||
clade = get_best_clade(taxon)
|
||||
|
||||
if len([leaf for leaf in clade if leaf.name[:2] in taxon]) > 3:
|
||||
|
||||
50
Utilities/for_trees/ShortenTips.py
Normal file
50
Utilities/for_trees/ShortenTips.py
Normal file
@ -0,0 +1,50 @@
|
||||
|
||||
''' Author, Date : Godwin Ani, 10 - July - 2024.
|
||||
Motivation : To make phylogenetic trees more presentable.
|
||||
Intent : Shorten the tip labels of phylogenetic trees.
|
||||
Dependencies : Python3, ete3
|
||||
Inputs : A folder containing trees
|
||||
Outputs : A folder of trees with shortened tips.
|
||||
python3 RenameTips_v1.0.py -i input to_folder_of_trees
|
||||
'''
|
||||
|
||||
|
||||
import os, re, sys, argparse, string
|
||||
import ete3
|
||||
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-i', '--input')
|
||||
args = parser.parse_args()
|
||||
os.makedirs(args.input + '/renamed', exist_ok = True)
|
||||
|
||||
def get_newick(fname):
|
||||
newick = ''
|
||||
for line in open(fname):
|
||||
line = line.split(' ')[-1]
|
||||
if(line.startswith('(') or line.startswith('tree1=')):
|
||||
newick = line.split('tree1=')[-1].replace("'", '').replace('\\', '')
|
||||
return newick
|
||||
|
||||
|
||||
def tree_formatting_wrapper(file):
|
||||
newick = get_newick(file)
|
||||
tree = ete3.Tree(newick)
|
||||
any_letter = tuple(string.ascii_letters)
|
||||
for leaf in tree:
|
||||
if leaf.name.startswith(any_letter):
|
||||
leaf.name = str(leaf.name).split('_Len')[0]
|
||||
leaf.name = str(leaf.name).replace('Contig_', 'Ct')
|
||||
leaf.name = str(leaf.name).replace('_XX_0', '')
|
||||
tree.write(format=1, outfile=args.input + '/renamed/' +file.split('/')[-1] + '.tree')
|
||||
|
||||
|
||||
for tree in os.listdir(args.input):
|
||||
if tree.split('.')[-1] in ('tree', 'tre', 'treefile', 'nex'):
|
||||
tree_formatting_wrapper(args.input + '/' + tree)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user