Compare commits

..

166 Commits
v6.0 ... main

Author SHA1 Message Date
Godwin N. Ani
1814246c04
Update wrapper.py 2025-09-23 16:05:44 -04:00
Godwin N. Ani
591e32d5e5
Update wrapper.py 2025-09-23 16:01:59 -04:00
Adri K. Grow
fbf51f815a
Update run_eukphylo.sh 2025-09-19 12:19:51 -04:00
Adri K. Grow
ac84339565
Update run_eukphylo.sh 2025-09-19 12:19:10 -04:00
Adri K. Grow
bdec8612ee
Update run_eukphylo.sh 2025-09-19 11:46:14 -04:00
Adri K. Grow
db49d4965e
Update run_eukphylo.sh 2025-09-18 15:46:48 -04:00
Adri K. Grow
f2d8fd9e6c
Update wrapper_submit.sh 2025-09-04 11:21:53 -04:00
Adri K. Grow
fbaa61f23e
Update wrapper_submit.sh 2025-09-04 11:21:17 -04:00
Adri K. Grow
8222b6404e
Update ReadMapping.py 2025-08-28 23:26:39 -04:00
Adri K. Grow
f5ca94b51c
Update ReadMapping.py 2025-08-27 13:18:28 -04:00
Godwin N. Ani
033ed1237e
Add files via upload 2025-08-27 10:49:37 -04:00
Adri K. Grow
4f400d61c8
Update wrapper_submit.sh 2025-08-25 17:21:34 -04:00
Adri K. Grow
692eabc6ad
Update run_eukphylo.sh 2025-08-25 17:20:28 -04:00
Adri K. Grow
f17b43ffc9
Update wrapper_submit.sh 2025-08-25 17:20:21 -04:00
Adri K. Grow
12870b2007
Update run_eukphylo.sh 2025-08-25 17:18:13 -04:00
Adri K. Grow
10c4dda6b7
Update run_eukphylo.sh 2025-08-25 17:15:31 -04:00
Godwin N. Ani
5bee8e55d2
Update wrapper_submit.sh 2025-08-25 17:05:56 -04:00
Adri K. Grow
a00e51523f
Update wrapper_submit.sh 2025-08-25 11:49:11 -04:00
Adri K. Grow
cb71db1f72
Update wrapper_submit.sh 2025-08-25 11:47:10 -04:00
Adri K. Grow
e785532921
Update wrapper_submit.sh 2025-08-25 11:42:05 -04:00
Godwin N. Ani
c042f62249
Update wrapper_submit.sh 2025-08-25 10:59:29 -04:00
Godwin N. Ani
efebf01057
Update wrapper_submit.sh 2025-08-25 10:58:25 -04:00
Adri K. Grow
0fc18547d6
Update ReadMapping.py 2025-08-22 13:16:37 -04:00
Adri K. Grow
fee7125729
Update ProcessAndRenameAssembledData.py 2025-08-21 10:40:08 -04:00
Adri K. Grow
c92cfb1b19
Update ReadMapping.py
Making things more universally applicable rather than keeping the hardcoded index range
2025-08-18 14:13:11 -04:00
Adri K. Grow
5fcd3b937e
Update CladeGrabbing.py 2025-08-18 10:56:17 -04:00
Adri K. Grow
20d3926f10
Update CladeGrabbing.py
Adding new optional arguments to support two-step clade filtering within one run of the script.
2025-08-18 10:54:25 -04:00
GiuliaRibeiro
873862adbc
Update guidance.py
fixed 70gapTrimmed line
2025-08-07 09:53:42 -04:00
Godwin N. Ani
333a7bc063
Update run_eukphylo.sh 2025-07-29 17:43:37 -04:00
Godwin N. Ani
b582321c3a
Update guidance.py 2025-07-17 14:47:39 -04:00
Godwin N. Ani
7a2320284a
Update utils.py 2025-07-17 14:46:52 -04:00
Godwin N. Ani
f53b6926c5
Add files via upload 2025-07-17 11:42:28 -04:00
Godwin N. Ani
399811662c
Delete PTL2/Black_and_Grey_list_GuidanceSisSubsisters_RemovedSeq_Conserved_PTLms_June2024_ML (1).txt 2025-07-17 11:42:19 -04:00
Godwin N. Ani
a05f94c9fa
Add files via upload 2025-07-17 11:41:09 -04:00
Godwin N. Ani
343496b598
Delete PTL2/Black_and_Grey_list_GuidanceSisSubsisters_RemovedSeq_Conserved_PTLms_June2024_ML.txt 2025-07-17 11:40:56 -04:00
Godwin N. Ani
3f9ef410a7
Rename Black_and_Grey_list_GuidanceSisSubsisters_RemovedSeq_Conserved_PTLms_June2024_ML (1).txt to Black_and_Grey_list_GuidanceSisSubsisters_RemovedSeq_Conserved_PTLms_June2024_ML.txt 2025-07-17 11:31:34 -04:00
Godwin N. Ani
fb5353fe96
Blacklist_and_greylist 2025-07-17 11:31:09 -04:00
Godwin N. Ani
4e63a165b7
Update contamination.py 2025-07-09 13:59:38 -04:00
Godwin N. Ani
99647c87bc
Update contamination.py 2025-07-09 13:59:28 -04:00
Adri K. Grow
586a9e0c63
Update CladeGrabbing.py 2025-07-08 07:54:58 -04:00
Katzlab
aec7f6caf9
Update ColorByClade.py
Changed to root on bacteria (Ba) and not prokaryotes (Ba, Za).... did not test
2025-07-01 16:18:54 -04:00
Godwin N. Ani
5486c4289b
Add files via upload 2025-07-01 11:11:21 -04:00
Adri K. Grow
c079b58d87
Update wrapper_submit.sh 2025-06-27 13:33:22 -04:00
Adri K. Grow
d16560f747
Update wrapper_submit.sh 2025-06-27 13:29:37 -04:00
Adri K. Grow
0b1feed651
Update ProcessAndRenameAssembledData.py 2025-06-27 12:39:05 -04:00
Adri K. Grow
d33ac6c2c3
Update Trim_Reads.py 2025-06-26 15:55:15 -04:00
Katzlab
ef6b633de7
Update guidance.py
tidying comments
2025-06-18 08:43:19 -04:00
Katzlab
c891188eec
Update guidance.py
updated comments only to clarify versions
2025-06-18 08:37:19 -04:00
Adri K. Grow
c767bd9a0a
Update Trim_Reads.py 2025-06-16 11:25:08 -04:00
Adri K. Grow
c84f3387ff
Update Trim_Reads.py 2025-06-13 12:41:35 -04:00
Adri K. Grow
3e4f4987a2
Update Trim_Reads.py 2025-06-13 12:41:17 -04:00
Katzlab
e7facbf6e7
Update PlotComps.r
Fixed typo by changing

geom_line(data = enc_null, aes(GC3, ENc))

to 

geom_line(data = enc_null, aes(GC3S, ENc))
2025-06-12 04:27:20 -04:00
Katzlab
3fe4ee3ae5
Update CUB.py
Updates to notes only, including clarifying that we use GC3 and GC3S interchangeably .. though GC3S probably most correct
2025-06-12 04:25:47 -04:00
Adri K. Grow
d81926f179
Update wrapper_submit.sh 2025-06-12 00:44:27 -04:00
Adri K. Grow
6cb8a7b98a
Update wrapper_submit.sh 2025-06-11 23:46:58 -04:00
GiuliaRibeiro
fdf525d433
Add files via upload 2025-06-11 10:17:50 -04:00
Katzlab
8ccde9800a
Update wrapper_submit.sh
fixed typo to add backslash at end here
2025-06-02 16:36:02 -04:00
Adri K. Grow
d32fbcc4e8
Update guidance.py
Updated guidance command for local HPC users
2025-06-02 11:23:34 -04:00
GiuliaRibeiro
9e4d605bff
Update trees.py 2025-04-18 13:01:33 -04:00
Auden Cote-L'Heureux
d01086e017
Update headers 2025-04-18 09:09:24 -04:00
Auden Cote-L'Heureux
e0930c5748
Update header for new guidance version 2025-04-18 09:09:03 -04:00
Auden Cote-L'Heureux
cc7a66b179
Update README.md to make links more clear 2025-04-18 08:57:06 -04:00
Auden Cote-L'Heureux
16a96f137e
Update guidance_path param 2025-04-18 08:48:42 -04:00
Auden Cote-L'Heureux
fa1f50e6f0
Update guidance_path param 2025-04-18 08:45:40 -04:00
Auden Cote-L'Heureux
9239110401
Updating guidance_path param 2025-04-18 08:45:23 -04:00
Auden Cote-L'Heureux
dc813c351a
Update utils.py 2025-04-18 08:44:21 -04:00
Auden Cote-L'Heureux
e452ae0117
Making the --concat_target_taxa argument optional 2025-04-15 16:23:19 -04:00
Godwin N. Ani
2651fe7b89
Update utils.py 2025-04-07 14:56:38 -04:00
Godwin N. Ani
c92f490837
Update contamination.py 2025-04-07 12:36:17 -04:00
Godwin N. Ani
7074a1e71a
Update contamination.py 2025-04-07 12:24:56 -04:00
Godwin N. Ani
95b26b1880
Update contamination.py 2025-04-07 12:20:56 -04:00
Godwin N. Ani
fba84adb17
Update contamination.py 2025-04-07 11:54:36 -04:00
Godwin N. Ani
8c7a1a2eed
Update contamination.py 2025-04-07 11:29:15 -04:00
Godwin N. Ani
e8aa277e44
Update contamination.py 2025-04-07 11:20:34 -04:00
Godwin N. Ani
998ce5bd9c
Update contamination.py 2025-04-07 11:06:32 -04:00
Adri K. Grow
4da74765ee
Add files via upload 2025-04-06 23:36:01 -04:00
Adri K. Grow
6a9ae23e8c
Delete Utilities/for_fastas/Cluster.py 2025-04-06 23:35:47 -04:00
Godwin N. Ani
0cfea8840c
Update contamination.py 2025-04-04 15:28:08 -04:00
Auden Cote-L'Heureux
d161957d86
Taking out 'both' option of contamination loop 2025-03-28 15:40:58 -04:00
Godwin N. Ani
4e52430af7
Update guidance.py 2025-03-25 12:35:43 -04:00
Godwin N. Ani
fdea233780
Update guidance.py 2025-03-25 12:16:02 -04:00
Godwin N. Ani
d7577022c4
Update CladeSizes.py 2025-03-24 15:01:48 -04:00
Godwin N. Ani
cf0948023d
Update guidance.py 2025-03-24 10:17:36 -04:00
Godwin N. Ani
04c41a6f44
Update guidance.py 2025-03-24 10:16:53 -04:00
Godwin N. Ani
c39df0e39d
Update utils.py 2025-03-24 10:11:35 -04:00
Godwin N. Ani
9b6b28ac03
Update Cluster.py 2025-03-21 13:48:23 -04:00
Godwin N. Ani
31cb84fa28
Update Cluster.py 2025-03-21 13:45:49 -04:00
Godwin N. Ani
a2ec0edb37
Update Cluster.py 2025-03-21 13:43:08 -04:00
Godwin N. Ani
3d8cd2e221
Update Cluster.py 2025-03-21 13:42:57 -04:00
Godwin Ani
e18b3dfe0c
Update Cluster.py 2025-03-21 13:22:21 -04:00
Auden Cote-L'Heureux
8487f1d836
Updating headers in 6_FilterPartials.py 2025-03-19 09:20:37 -04:00
Adri K. Grow
d1b9a64e60
Update run_eukphylo.sh 2025-03-13 17:23:19 -04:00
Adri K. Grow
6ff435291c
Update wrapper_submit.sh 2025-02-18 16:29:05 -05:00
Adri K. Grow
030401e9b0
Update guidance.py 2025-02-14 13:57:57 -05:00
Adri K. Grow
e3d215c7e9
Update wrapper_submit.sh 2025-02-13 18:22:03 -05:00
Adri K. Grow
feab503fb6
Update run_eukphylo.sh 2025-02-13 18:16:29 -05:00
Adri K. Grow
d4f27a3032
Update run_eukphylo.sh 2025-02-13 18:01:48 -05:00
Adri K. Grow
1aebef4725
Delete Utilities/For_Assemblies/ProcessAndRenamePlateTranscripts.py 2025-02-13 16:30:03 -05:00
Adri K. Grow
dfd826ee94
Add files via upload 2025-02-13 16:29:13 -05:00
Godwin Ani
45de3036c9
Update ShortenTips.py 2025-02-04 18:59:31 -05:00
Godwin Ani
7e9b90c79b
Shorten tree tip names 2025-02-04 18:58:53 -05:00
Auden Cote-L'Heureux
5284a71ce8
Updating headers and example commands in wrapper_submit.sh 2025-01-22 12:56:11 -05:00
Auden Cote-L'Heureux
4f033e8ab2
Updating header and example runs in wrapper_submit.sh 2025-01-22 12:51:10 -05:00
Auden Cote-L'Heureux
a559f61567
Generalizing and adding examples to run_eukphylo.sh 2025-01-22 12:28:04 -05:00
Auden Cote-L'Heureux
fc190415ee
Updating headers in utils.py 2025-01-19 11:09:21 -05:00
Auden Cote-L'Heureux
743d650b74
updating header in trees.py 2025-01-19 11:08:08 -05:00
Auden Cote-L'Heureux
676a11a287
updating header in preguidance.py 2025-01-19 11:07:49 -05:00
Auden Cote-L'Heureux
0df7470b92
updating header in guidance.py 2025-01-19 11:07:25 -05:00
Auden Cote-L'Heureux
3dc60dcd2e
updating header in concatenate.py 2025-01-19 11:06:23 -05:00
Auden Cote-L'Heureux
8d48e65b7f
updating header in wrapper.py 2025-01-19 11:05:28 -05:00
Auden Cote-L'Heureux
0ac714a63b
updating header in CheckSetup.py 2025-01-19 11:04:59 -05:00
Auden Cote-L'Heureux
7348c7445f
updating header in CUB.py 2025-01-19 11:04:41 -05:00
Auden Cote-L'Heureux
3ac37547b7
updating header in 7b_SummaryStats.py 2025-01-19 11:04:02 -05:00
Auden Cote-L'Heureux
de566bf546
updating header in 7a_FinalizeName.py 2025-01-19 11:03:31 -05:00
Auden Cote-L'Heureux
db22976d3c
Updating header in 6_FilterPartials.py 2025-01-19 11:02:43 -05:00
Auden Cote-L'Heureux
a9a151eeb3
updating header in 5_GCodeTranslate.py 2025-01-19 11:02:24 -05:00
Auden Cote-L'Heureux
41223ec75c
updating header in 4_InFrameStopCodonEstimator.py 2025-01-19 11:00:24 -05:00
Auden Cote-L'Heureux
97da58aaf0
updating header in 3_AssignOGs.py 2025-01-19 11:00:00 -05:00
Auden Cote-L'Heureux
ee7dce1af7
updating header in 2b_Identify_Proks.py 2025-01-19 10:59:34 -05:00
Auden Cote-L'Heureux
b61eb8be1b
Updating header in 2a_Identify_rRNA.py 2025-01-19 10:59:10 -05:00
Auden Cote-L'Heureux
ea96ad0fe4
Updating header in 1b_CrossPlateContamination.py 2025-01-19 10:58:09 -05:00
Auden Cote-L'Heureux
1d7749b3a6
Updating header in 1a_TranscriptLengthFilter.py 2025-01-19 10:57:44 -05:00
Auden Cote-L'Heureux
375bfef45e
Updating header wrapper.py 2025-01-19 10:55:37 -05:00
Auden Cote-L'Heureux
f5bb221378
Updating header in CheckSetup.py 2025-01-19 10:54:49 -05:00
Katzlab
31d0a8ddb2
Update CUB.py
headers
2025-01-19 10:54:01 -05:00
Katzlab
3491166695
Update 5b_SummaryStats.py
updated headers
2025-01-19 10:51:39 -05:00
Katzlab
cd226beb9c
Update 4_CountOGsDiamond.py 2025-01-19 10:49:41 -05:00
Auden Cote-L'Heureux
10a8c2f78a
Notes in run_eukphylo.sh, further updates needed 2025-01-19 10:27:34 -05:00
Auden Cote-L'Heureux
2173eda5a0
Update run_eukphylo.sh 2025-01-17 10:34:37 -05:00
Auden Cote-L'Heureux
60b0f9a9b4
Update contamination.py 2025-01-17 10:16:55 -05:00
Auden Cote-L'Heureux
bd72ed3a43
Update and rename phylotol.py to eukphylo.py 2025-01-17 10:16:27 -05:00
Auden Cote-L'Heureux
0fbd2ae863
Rename run_phylotol.sh to run_eukphylo.sh 2025-01-17 10:16:01 -05:00
Auden Cote-L'Heureux
19943a13fd
Delete PTL1_gene_families 2025-01-17 10:03:53 -05:00
Auden Cote-L'Heureux
04fe7029d2
Changing to use EukPhylo 2025-01-17 09:50:54 -05:00
Adri K. Grow
fe7ef7b71a
Update and rename ProcessAndRenamePlateTranscripts_AKG.py to ProcessAndRenamePlateTranscripts.py 2025-01-15 11:25:16 -05:00
Adri K. Grow
2ad02897c8
Add files via upload 2025-01-15 11:22:39 -05:00
Auden Cote-L'Heureux
eb9e81f4c0
adding iqtree-fast 2025-01-12 12:35:53 -05:00
Auden Cote-L'Heureux
4f5bce5963
Update utils.py 2025-01-12 12:35:05 -05:00
Auden Cote-L'Heureux
d595a51b4f
adding 'allow_large_files' flag 2025-01-09 12:20:59 -05:00
Auden Cote-L'Heureux
31a9295485
Adding flag for large files to guidance.py 2025-01-09 12:19:51 -05:00
Auden Cote-L'Heureux
733851b957
Adding trimal cutoff as parameter 2025-01-09 12:11:54 -05:00
Auden Cote-L'Heureux
c0dfdb6248
Adding trimal cutoff as parameter 2025-01-09 12:11:10 -05:00
Auden Cote-L'Heureux
baf5fd037d
Adding TrimAl cutoff as parameter 2025-01-09 12:09:36 -05:00
Auden Cote-L'Heureux
1e543b3989
Adding TPM calculating script (Salmon) 2025-01-09 12:06:10 -05:00
Auden Cote-L'Heureux
a8a2a5ff2d
Moving CountTaxonOccurence.py 2025-01-08 10:38:23 -05:00
Adri K. Grow
1c309f8e10
Update ReadMapping.py 2024-12-06 12:20:50 -05:00
Adri K. Grow
61f808343f
Update run_phylotol.sh 2024-11-26 13:04:53 -05:00
Godwin Ani
22eb559650
Update run_phylotol.sh 2024-11-12 09:01:44 -05:00
Adri K. Grow
dcf4079b86
Update wrapper_submit.sh
Updated modules for current grid use.
2024-11-11 21:19:16 -05:00
Godwin Ani
f2701e7f65
Update run_phylotol.sh 2024-11-11 17:01:32 -05:00
Adri K. Grow
a0b45ed2c8
Update CladeSizes.py
Modified to work with clade grabbed input file names.
2024-11-11 08:59:47 -05:00
Auden Cote-L'Heureux
fb6e7aa6e7
Changing CladeGrabbing.py to use less than or equal to for number of contaminants 2024-11-01 10:34:57 -04:00
Auden Cote-L'Heureux
d249add545
Add files via upload 2024-10-17 16:45:55 -04:00
MCLeleu
d57fdad826
Delete Am_tu_He24_assembledTranscripts.fasta 2024-10-15 11:46:54 -04:00
MCLeleu
f18717ac6b
Add files via upload 2024-10-15 11:46:37 -04:00
MCLeleu
da6ac878d3
Add files via upload 2024-10-15 11:46:12 -04:00
MCLeleu
eb81b5ebfa
Delete PTL1/Transcriptomes/TestData/Sr_rh_Amon_assembledTranscripts.fasta 2024-10-15 11:45:51 -04:00
MCLeleu
679f1a10c8
Delete PTL1/Transcriptomes/TestData/Sr_rh_Aa01_assembledTranscripts.fasta 2024-10-15 11:45:32 -04:00
MCLeleu
85be0f31ca
Delete PTL1/Transcriptomes/TestData/Sr_rh_Emac_assembledTranscripts.fasta 2024-10-15 11:45:21 -04:00
MCLeleu
4472f44e36
Add files via upload 2024-10-15 11:43:54 -04:00
MCLeleu
e862010cfc
Add files via upload 2024-10-15 11:43:13 -04:00
Katzlab
d7f02a243c
Fixing concat_target_taxa file read error in concatenation 2024-08-26 15:56:42 -04:00
Katzlab
4b8a3fbe64
Delete PTL2/Scripts/utils.py 2024-08-26 15:56:15 -04:00
Katzlab
c4aab9eaef
Delete PTL2/Scripts/concatenate.py 2024-08-26 15:56:09 -04:00
Katzlab
7904ec8b35
Fixing concat taxon list read error 2024-08-22 11:59:58 -04:00
Katzlab
41cc1354b3
Fixing taxon list read error in concatenate.py 2024-08-22 11:59:38 -04:00
51 changed files with 932604 additions and 500 deletions

View File

@ -1,5 +1,5 @@
# Last updated Sept 19th 2023
# Author: Xyrus Maurer-Alcala
# Author: Xyrus Maurer-Alcala and Auden Cote-L'Heureux
# This script classifies translated CDS into gene families by
# similarity-searching using Diamond against a reference database of

View File

@ -2,12 +2,12 @@
# Author: Auden Cote-L'Heureux
# This script produces both taxon- and sequence-level statistics to describe the ReadyToGo files
# output by PhyloToL Part 1, as well as some OG-level information from the Hook (OG reference)
# output by EukPhylo Part 1, as well as some OG-level information from the Hook (OG reference)
# database. It relies on the utility script CUB.py to calculate composition statistics (GC content,
# Effective Number of Codons, etc.). Both sequence level and taxon-level stats are summarized in tab-separated
# outputs written to the Output folder. This script requires that the OG reference database is available as an
# amino acid fasta file in the Databases/db_OG folder with the same file name as the .dmnd file used in script 4.
# This script is intended to be run as part of the PhyloToL 6 Part 1 pipeline using the script wrapper.py.
# This script is intended to be run as part of the EukPhylo Part 1 pipeline using the script wrapper.py.
import os, sys
import argparse
@ -30,7 +30,7 @@ def get_args():
description = "Updated March 31th, 2023 by Auden Cote-L'Heureux"
)
parser.add_argument('-i', '--input', type = str, required = True, help = 'Input path to the "Output" folder produced by PhyloToL Part 1. This folder should contain both the "ReadyToGO" and "Intermediate" folders.')
parser.add_argument('-i', '--input', type = str, required = True, help = 'Input path to the "Output" folder produced by EukPhylo Part 1. This folder should contain both the "ReadyToGO" and "Intermediate" folders.')
parser.add_argument('-d', '--databases', type = str, default = '../Databases', help = 'Path to databases folder')
parser.add_argument('-r', '--r2g_jf', action = 'store_true', help = 'Create ReadyToGo files filtered to only include sequences between the 25th and 75th percentile of silent-site GC content. Please be aware that these are not necessarily the correct or non-contaminant sequences; examine the GC3xENc plots carefully before using these data.')

View File

@ -1,12 +1,15 @@
# Last updated Sept 2023
# Author: Xyrus Maurer-Alcalá
# Author: Xyrus Maurer-Alcalá and Auden Cote-L'Heureux
# The aim of this script is to generate lots of codon usage statistics to aid in
# identifying useful characteristics for de novo ORF calling. It is intended to be
# stored in the 'Scripts' folder for the PhyloToL Part 1 pipeline scripts, and is
# stored in the 'Scripts' folder for the EukPhylO Part 1 pipeline scripts, and is
# called by Script 5b to calculate composition statistics for Part 1 output files.
# It should not be run separately.
# Users should think about including start/stop constraint as default includes all
# sequences, which can capture pseudogenes
# Dependencies:
# Python3, numpy, BioPython

View File

@ -1,7 +1,7 @@
# Last updated Nov 2023
# Author: Auden Cote-L'Heureux
# This script is run as the first step of the PhyloToL 6 Part 1 GENOMES pipeline,
# This script is run as the first step of the EukPhylo Part 1 GENOMES pipeline,
# before any sequence data are actually processed. It checks to ensure that the input
# CDS files and databases are properly located and formatted.

View File

@ -1,7 +1,7 @@
# Last updated Sept 2023
# Author: Auden Cote-L'Heureux
# This script is a WRAPPER for the PhyloToL Part 1 GENOMES pipeline. Users should
# This script is a WRAPPER for the EukPhylo Part 1 GENOMES pipeline. Users should
# use this script to run the pipeline, rather than running any of the sub-scripts (number 1a through 5b)
# independently. To run an individual step in the pipeline, use --script X where X is the number (1 through 5).
# To run multiple sets (usually all of them), use --first script 1 --last_script 5, or whichever first
@ -19,8 +19,8 @@ import CheckSetup
def get_args():
parser = argparse.ArgumentParser(
prog = 'PhyloToL v6.0 Part 1 for GenBank Genomes',
description = "Updated January 19th, 2023 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/AudenCote/PhyloToL_v6.0"
prog = 'EukPhylo Part 1 for GenBank Genomes',
description = "Updated January 19th, 2023 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/Katzlab/EukPhylo"
)
parser.add_argument('-s', '--script', default = -1, type = int, choices = { 1, 2, 3, 4, 5 }, help = 'Script to run if you are only running one script')

View File

@ -1,24 +1,45 @@
#!/bin/bash
#
#SBATCH --job-name=PTL1_genome
#SBATCH --output=PTL1.%j.out # Stdout (%j expands to jobId)
## Last updated Jan 2025 by Auden Cote-L'Heureux
## This script is intended to be used to process genomic CDS with EukPhylo part 1 on an HPC that uses the Slurm workload manager.
## The first part of the script are Slurm-specific parameters that should be adjusted by users to fit their resource allocation
## needs and restrictions, followed by some example commands taken from the GitHub Wiki, more detail for which can be found
## here: https://github.com/Katzlab/EukPhylo/wiki/EukPhylo-Part-1:-GF-assignment
## Slurm specific code
#SBATCH --job-name=EukPhylo
#SBATCH --output=EukPhylo.%j.out # Stdout (%j expands to jobId)
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --ntasks-per-node=64 ##change to number of srun when running multiple instances
#SBATCH --ntasks-per-node=64 # #change to double number of srun when running multiple instances
#SBATCH --mem=160G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=YOUREMAIL@smith.edu
#SBATCH --mail-user=email@email.edu
module purge #Cleans up any loaded modules
module use /gridapps/modules/all #make sure module locations is loaded
#Unity server
module use /gridapps/modules/all
module load conda/latest
module load uri/main
module load diamond/2.1.7
module load VSEARCH/2.22.1-GCC-11.3.0
conda activate /work/pi_lkatz_smith_edu/Conda_PTL6p1
#Grid server
module use /gridapps/modules/all
module load slurm
module load tqdm
module load Biopython/1.75-foss-2019b-Python-3.7.4
module load BLAST+/2.9.0-gompi-2019b
module load DIAMOND/0.9.30-GCC-8.3.0
module load tqdm/4.66.1-GCCcore-12.3.0
module load Biopython/1.79-gfbf-2023a
module load BLAST+/2.14.1-gompi-2023a
module load DIAMOND/2.1.8-GCC-12.3.0
module load VSEARCH/2.25.0-GCC-12.3.0
path='/beegfs/fast/katzlab/PTL1/Genomes/'
parent='/Your/Home/Folder/'
## Example run command
# Start at script 1 and go through script 5 (the final script) using the Universal genetic code
srun -D ${parent}Scripts python3 ${parent}Scripts/wrapper.py -1 1 -2 5 --cds ${parent}Input -o ${parent}Output --genetic_code Universal --databases ${parent}Databases > log.out
srun -D ${path}Scripts python3 ${path}Scripts/wrapper.py -1 1 -2 5 --cds ${path}PTL1GenomesBatches/PTL1GenomesBatch2 -o ${path}Output/PTL1Genomes_OutputBatch2 --genetic_code Universal --databases ${path}Databases &
wait

View File

@ -3,7 +3,7 @@
# This script is intended to remove transcripts below or above a given
# size range from a transcriptome assembly. It should be run as part
# of Part 1 of the PhyloToL version 6 pipeline, using the script wrapper.py.
# of Part 1 of the EukPhylo pipeline, using the script wrapper.py.
# Prior to running this script, ensure that you have assembled your
# transcriptome and renamed the assembled transcripts in the format of

View File

@ -5,7 +5,7 @@
# by removing sequences with low coverage relative to other
# very similar sequences from samples sequenced on the same
# plate. This script is optional, but to be run as part of the
# PhyloToL version 6 Part 1 pipeline using the script wrapper.py
# EukPhylo Part 1 pipeline using the script wrapper.py
# The specifics of parameters are described below and include removing seqs 1/10
# the coverage of the most highly expressed, and keeping all seqs with coverage >50.

View File

@ -5,12 +5,12 @@
# all length-filtered assembled transcripts against a reference database. It then
# writes these sequences into a separate file, removing them from the remainder
# of the sequences that will go forwards for gene family assignment. This script
# should be in Part 1 of the PhyloToL version 6 pipeline using the script wrapper.py.
# should be in Part 1 of the EukPhylo pipeline using the script wrapper.py.
# You must run Script 1a before this step. Optionally, you may also have run Script 1b.
# Before running this script, ensure that you have a properly formatted rRNA reference
# BLAST database in the Databases/db_BvsE/SSULSUdb folder; it is relatively narrow in scope
# and could be easily replaced
# and could be easily replaced or updated to better capture a user's target taxa
#Dependencies
import argparse, os, sys

View File

@ -9,7 +9,7 @@
# to a prokaryotic sequence, it is labeled with an "E"; if it's best hit to a prokaryotic
# sequence has an e-value >1000 times that of its best hit to a eukaryotic sequence, it is
# labeled with a "P". Anything else gets a "U". This script should be run as part of the
# PhyloToL version 6 Part 1 pipeline using the script wrapper.py.
# EukPhylo Part 1 pipeline using the script wrapper.py.
# Prior to running this script, ensure that you have run scripts 1a (and optionally
# script 1b) and 2a, and that your prokaryote and reference databases (or the default

View File

@ -6,7 +6,7 @@
# gene families. We provide the Hook database on the GitHub, but this
# may be replaced with a custom reference database by REPLACING the
# .dmnd and .fasta files in the Databases/db_OG folder. This script
# is intended to be run as part of the PhyloToL 6 Part 1 pipeline using
# is intended to be run as part of the EukPhylo Part 1 pipeline using
# the script wrapper.py.

View File

@ -7,7 +7,7 @@
# frequencies in all reading frames; it then reports these frequencies in a spreadsheet
# (gcodes_output.tsv) for the user to inspect in deciding which genetic codes to use,
# if unsure. This step can be skipped if genetic codes were input from the beginning. This
# script should be run through the PhyloToL 6 Part 1 pipeline using the script wrapper.py.
# script should be run through the EukPhylo Part 1 pipeline using the script wrapper.py.
#----------------------------------------- NOTES -----------------------------------------#
#

View File

@ -4,7 +4,7 @@
# This script is intended to translate nucleotide sequences. It does this using
# the gcode_output.tsv file output by script 4 and containing in-frame stop codon
# frequencies. The user can use this stop codon information to fill in the last
# column in this file with the genetic code for each taxon. If the user input a
# column in this file with the genetic code for each taxon, as outlined in the Wiki on Github. If the user input a
# genetic code or list of genetic codes to script 1, then the gcode_output.tsv will
# be filled automatically. sequences are translated using the Diamond BLASTp results
# from OG assignment as a starting point for determining coding sequence boundaries.
@ -14,7 +14,7 @@
# of transcriptomic data, poor genetic code assignment or low-quality/partial data can
# interfere with this process).
# This script is intended to be run using the wrapper.py as part of the PhyloToL 6 Part 1
# This script is intended to be run using the wrapper.py as part of the EukPhylo Part 1
# pipeline. It requires that the setup of the 'Output' folder be that as output by script 4
# of this pipeline.

View File

@ -5,8 +5,9 @@
# First, all sequences shorter than 33% or longer than 150% the average length of sequences
# from the same OG in the Hook database are removed. Then, for each transcriptomic sample,
# all sequences within an OG are compared at the nucleotide level to the sequence with the
# highest “score” (defined as k-mer coverage multiplied by length). The script should be run
# as part of the PhyloToL 6 Part 1 pipeline using the script wrapper.py. It requires that the
# highest “score” (defined as k-mer coverage multiplied by length) using BLAST, and sequences that
# are 98% identical to the master sequence are removed. The script should be run
# as part of the EukPhylo Part 1 pipeline using the script wrapper.py. It requires that the
# structure of the 'Output' folder be as output by script 5, and that the Databases/db_OG folder
# contains a .fasta file containing all amino acid sequences in the OG reference database (Hook)
# with the same file name (until the extension) as the .dmnd file for the reference database used

View File

@ -3,8 +3,8 @@
# This script does not process sequence data in any way. It only renames the outputs of
# script 6 to the 10-digit taxon code which prefixes the file names, and then moves output
# 'ReadyToGo' files into a separate folder. It is intended to be run as part of the PhyloToL
# 6 Part 1 pipeline using the script wrapper.py.
# 'ReadyToGo' files into a separate folder. It is intended to be run as part of the EukPhylo
# Part 1 pipeline using the script wrapper.py.
import argparse, os, sys
from argparse import RawTextHelpFormatter,SUPPRESS

View File

@ -2,13 +2,13 @@
# Author: Auden Cote-L'Heureux
# This script produces both taxon- and sequence-level statistics to describe the ReadyToGo files
# output by PhyloToL Part 1, as well as some OG-level information from the Hook (OG reference)
# output by EukPhylo Part 1, as well as some OG-level information from the Hook (OG reference)
# database and the original input assembled transcripts. It relies on the utility script CUB.py
# to calculate composition statistics (GC content, Effective Number of Codons, etc.). Both sequence
# level and taxon-level stats are summarized in tab-separated outputs written to the Output folder.
# This script requires that the OG reference database is available as an amino acid fasta file
# in the Databases/db_OG folder with the same file name as the .dmnd file used in script 3. This script
# is intended to be run as part of the PhyloToL 6 Part 1 pipeline using the script wrapper.py.
# is intended to be run as part of the EukPhylo Part 1 pipeline using the script wrapper.py.
import os, sys
import argparse
@ -31,7 +31,7 @@ def get_args():
description = "Updated March 31th, 2023 by Auden Cote-L'Heureux"
)
parser.add_argument('-i', '--input', type = str, required = True, help = 'Input path to the "Output" folder produced by PhyloToL Part 1. This folder should contain both the "ReadyToGO" and "Intermediate" folders.')
parser.add_argument('-i', '--input', type = str, required = True, help = 'Input path to the "Output" folder produced by EukPhylo Part 1. This folder should contain both the "ReadyToGO" and "Intermediate" folders.')
parser.add_argument('-d', '--databases', type = str, default = '../Databases', help = 'Path to databases folder')
parser.add_argument('-r', '--r2g_jf', action = 'store_true', help = 'Create ReadyToGo files filtered to only include sequences between the 25th and 75th percentile of silent-site GC content. Please be aware that these are not necessarily the correct or non-contaminant sequences; examine the GC3xENc plots carefully before using these data.')

View File

@ -3,10 +3,12 @@
# The aim of this script is to generate lots of codon usage statistics to aid in
# identifying useful characteristics for de novo ORF calling. It is intended to be
# stored in the 'Scripts' folder for the PhyloToL Part 1 pipeline scripts, and is
# stored in the 'Scripts' folder for the EukPhylo Part 1 pipeline scripts, and is
# called by Script 7b to calculate composition statistics for Part 1 output files.
# It should not be run separately.
# Users should think about including start/stop constraint as default includes all
# sequences, which can capture pseudogenes
# Dependencies:
# Python3, numpy, BioPython

View File

@ -1,7 +1,7 @@
# Last updated Nov 2023
# Author: Auden Cote-L'Heureux
# This script is run as the first step of the PhyloToL 6 Part 1 TRANSCRIPTOMES pipeline,
# This script is run as the first step of the EukPhylo Part 1 TRANSCRIPTOMES pipeline,
# before any sequence data are actually processed. It checks to ensure that the input
# assembled transcripts files, databases, genetic codes, and conspecific names files (the latter
# used only with cross-plate contamination, script 1b) are properly located and formatted.

View File

@ -1,7 +1,7 @@
# Last updated Sept 2023
# Author: Auden Cote-L'Heureux
# This script is a WRAPPER for the PhyloToL Part 1 TRANSCRIPTOMES pipeline. Users should
# This script is a WRAPPER for the EukPhylo Part 1 TRANSCRIPTOMES pipeline. Users should
# use this script to run the pipeline, rather than running any of the sub-scripts (number 1a through 7b)
# independently. To run an individual step in the pipeline, use --script X where X is the number (1 through 7).
# To run multiple sets (usually all of them), use --first script 1 --last_script 7, or whichever first
@ -21,8 +21,8 @@ import CheckSetup
def get_args():
parser = argparse.ArgumentParser(
prog = 'PhyloToL v6.0 Part 1 for Transcriptomes',
description = "Updated September 29th, 2023 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/AudenCote/PhyloToL_v6.0"
prog = 'EukPhylo Part 1 for Transcriptomes',
description = "Updated September 29th, 2023 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/Katzlab/EukPhylo"
)
parser.add_argument('-s', '--script', default = -1, type = int, choices = { 1, 2, 3, 4, 5, 6, 7 }, help = 'Script to run if you are only running one script')
@ -92,7 +92,7 @@ def script_four(args):
if os.path.exists(args.databases + '/Taxa_with_few_sequences.txt'):
with open(args.databases + '/Taxa_with_few_sequences.txt', 'r') as f:
content = f.read()
print(f'These samples do not run through PTL6p1, perhaps because they has no good hits to the hook. We suggest you remove them and restart.')
print(f'These samples did not run through EukPhylo part1 because they have no good hits to the hook database or the Diamond sequence aligner ran out of memory. We suggest you remove them and restart.')
print(content)
print('Stopping Run.')
os.remove(args.databases + '/Taxa_with_few_sequences.txt')

View File

@ -1,29 +1,57 @@
#!/bin/bash
#
#SBATCH --job-name=PTL1_GBF
#SBATCH --output=PTL1.%j.out # Stdout (%j expands to jobId)
## Last updated Jan 2025 by Auden Cote-L'Heureux
## This script is intended to be used to process genomic CDS with EukPhylo part 1 on an HPC that uses the Slurm workload manager.
## The first part of the script are Slurm-specific parameters that should be adjusted by users to fit their resource allocation
## needs and restrictions, followed by some example commands taken from the GitHub Wiki, more detail for which can be found
## here: https://github.com/Katzlab/EukPhylo/wiki/EukPhylo-Part-1:-GF-assignment
## SLURM-SPECIFIC SETUP BELOW
############### FOR SMITH GRID HPC ############### (DELETE section if not applicable):
## Slurm specific code
#SBATCH --job-name=EukPhylo
#SBATCH --output=EukPhylo.%j.out # Stdout (%j expands to jobId)
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --ntasks-per-node=64 ##change to number of srun when running multiple instances
#SBATCH --ntasks-per-node=1 ##change to number of srun when running multiple instances
#SBATCH --mem=160G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=YOUREMAIL@smith.edu
#SBATCH --mail-user=email@xxx.edu ##add your email address for job updates
module purge #Cleans up any loaded modules
module use /gridapps/modules/all #make sure module locations is loaded
module use /gridapps/modules/all
module load slurm
module load tqdm
module load Biopython/1.75-foss-2019b-Python-3.7.4
module load BLAST+/2.9.0-gompi-2019b
module load DIAMOND/0.9.30-GCC-8.3.0
module load VSEARCH/2.21.1-GCC-10.3.0
module load tqdm/4.66.1-GCCcore-12.3.0
module load Biopython/1.79-gfbf-2023a
module load BLAST+/2.14.1-gompi-2023a
module load DIAMOND/2.1.8-GCC-12.3.0
module load VSEARCH/2.25.0-GCC-12.3.0
parent='/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams/'
############### FOR UMASS UNITY HPC ############### (DELETE section if not applicable):
## Slurm specific code
#SBATCH --job-name=EukPhylo
#SBATCH --output=EukPhylo.%j.out # Stdout (%j expands to jobId)
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --ntasks-per-node=64
#SBATCH --mem=40G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=email@xxx.edu
module purge #Cleans up any loaded modules
module use /gridapps/modules/all
module load conda/latest
module load uri/main
module load diamond/2.1.7
module load VSEARCH/2.22.1-GCC-11.3.0
conda activate /work/pi_lkatz_smith_edu/Conda_PTL6p1
srun -D ${parent}Scripts python3 ${parent}Scripts/wrapper.py -1 1 -2 7 -x --assembled_transcripts ${parent}AssembledTranscripts -o ${parent} -n ${parent}Conspecific.txt --genetic_code Universal &
#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 2 -2 7 -x --assembled_transcripts ${parent}Plate4/Assembled_Transcripts -o ${parent}Plate4 -n ${parent}Plate4/Conspecific.txt --genetic_code ${parent}Plate4/Gcodes.txt &
#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 2 -2 7 -x --assembled_transcripts ${parent}Plate7/Assembled_Transcripts -o ${parent}Plate7 -n ${parent}Plate7/Conspecific.txt --genetic_code ${parent}Plate7/Gcodes.txt &
#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 1 -2 7 -x --assembled_transcripts ${parent}Plate11/Assembled_Transcripts -o ${parent}Plate11 -n ${parent}Plate11/Conspecific.txt --genetic_code ${parent}Plate11/Gcodes.txt &
#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 2 -2 7 -x --assembled_transcripts ${parent}Plate18/Assembled_Transcripts -o ${parent}Plate18 -n ${parent}Plate18/Conspecific.txt --genetic_code ${parent}Plate18/Gcodes.txt &
wait
## PROVIDE YOUR PARENT PATH
parent='/Your/Home/Folder/'
## EXAMPLE RUN COMMANDS BELOW
# A simple run that goes from script 1 to script 7 (the last script) using the Universal genetic code
srun -D ${parent}Scripts python3 ${parent}Scripts/wrapper.py --first_script 1 --last_script 7 --assembled_transcripts ${parent}AssembledTranscripts -o ${parent}Out --genetic_code ${parent}Gcode.txt --databases ${parent}Databases > log.out
# Including the cross-plate contamination step, using conspecific names
srun -D ${parent} python3 ${parent}Scripts/wrapper.py --first_script 1 --last_script 7 --assembled_transcripts ${parent}AssembledTranscripts --output . --genetic_code ${parent}Gcode.txt --databases ${parent}Databases --xplate_contam --conspecific_names ${parent}Conspecific.txt > log.out

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,89 +0,0 @@
>NODE_2069_length_1109_cov_20.350386_g2025_i0
GTATCAACGCAGAGTACGGGGGATGAAGATCCAGGTAATGTAGATTATCGTAGTATAGGTGGTTTAAATGATCAAATACGTGAAATACGTGAATCAATTGAATTACCATTAACTAATCCAGAATTATTTAAACGTGTTGGTATAAAAGCACCAAAAGGTGTATTATTATATGGACCACCAGGTACTGGTAAAACATTATTAGCACGTTGTATGGCAAATACTATGGATTGTAGATTTTTAAAAGTTGTTGCTAGTGGTATAGTTGATAAATATATTGGTGAATCAGCACGTATTATAAGAGAAATGTTTGGTTATGCAAGAGAAAATGCACCATGTATTATATTTATGGATGAAATTGATGCTATTGGTGGTAAAAGATTTAGTCAAGGTACATCTGCTGATAGAGAAATACAACGTACATTAATGGAATTATTAAATCAAATTGATGGATTTGAAGAATTAGGACGTGTTAAAATTATTATGGCTACAAATAGACCTGATGTATTAGATCCTGCATTATTACGTCCAGGTAGATTAGATAGAAAAATTGAAATACCATTACCAAATGAATCAGCAAGAATTGATATATTAAAAATTCATGCTTCAAAATTAACTAAATCTGAAAATATTGATTATGAAGCTATTGTTAAACTATCAGATGGTTTTAATGGTGCTGATATACGTAATATTTGTACTGAAGCTGGTATGATGGCTATAAGAGCTGATAGAAATTTTGTAAATCAAGAAGATTTTATGAAAGCTGTAAGAAAATTAAAAGATGCTAAAAAATTAGAATCAAAATTAGAATATAAAAAATTGTAAACATTTCTAATTGTGGTAATATAATTAACGTTTTCAAAATTAATTTTCCAATATTATTTAATTCACTTTTGTCTTAATATAAATTGTGGGGTTTTTTTATAAAAACAAAATTATAAATTTTACGCAAAATTTAAACATTTAAAATTAATTAATTTTCATCGTCCTGATCAACAAAATCAATAAGTTTACTTTGTTGTTTAGCTCGTATTTTTTCCATTTCAATTGCTGATGTATTACGTTCTTGTTTTTGTGATGAATCTTCTTGTCTTCTTTTAGCTCTATCATAG
>NODE_2305_length_1021_cov_17.683544_g2261_i0
GGTATCAACGCAGAGTACGGGCATTTGAAGAAGAACAAAAACATTATTTAGCTAGTTTAGAAAATAATGGTCTAACAACACAGTTTAATCCTTTAATTTATTGCAAATTTCCATCTAAAGATATTACAAAAAATTTACAAGATCCTAGAAAATTATTTGTTGAATCAATGAAAAATTATTTGAATAATAATAATAAATTAATAGGTGCATTAGCTGGTACTATTGGCCAAAAATATATACCTGTATTTAATTCATATTCAAAAAAATTTAATATACAATTTTCACTTCCAAAAATGTAGTTAACATATACAATCTAATTTTGTAAGGTTTATAAAAGTTTAAATAATTTAATTAATTAAATTTGTTTGTTTTGTTATTTGTGTAATTGTTGTATTAATGCTTTGTTTTTATTAAAATATATTTAAATCACATGAAAGGAAAAATATTAAAATTATGAACAAATTATTTAAGACTTTATTATATAAATTAACGTACATTAAAACACATATATATTAATTTAAATTTAATTTAAATATGATGACCATCCATTAAAGAATCATCATATGTATTATATCTTACAAATGGTGGTATACCTGTTGTATATATATCAGTTGCTAATTCTTCAGAAGTTAACATTCCATCTTTTTCCGCATCACTTGATTCTTTATCCACTAATTTTCTTATTTTTGTTTCCATATTTTTTAATTCTTTCTCATCAAGCCAATTTTGTTCTAATATCATATTTCTTATATATCGTAAAGGATCAAATTGTGTTCTGTATTCTTGAACTTCTTCATTATTTCTATAGGATATACCTGGATCAGACATACTATGACCATGATATCTATAAGTCATAGCTTCCATAACAATAGGACCTTTTCCTGATCGACACCAATCTGCTGCAAAACGAGTAGCCTCACGAACACATAAAACATTCATTCCATCTACTTGAATACCAGGAATATAATCTCCTCTTGTATAAAATTCAGTACTAGCGGATGCTCTTTCAATTGAAGTACCC
>NODE_2310_length_1019_cov_82.463002_g2266_i0
ACAGATTATAGATTAAGACGTAGACTAGTTCAACAAGATAAAAGAAAATATAATGCTCCAAAATGGAGATTAGTTGTAAGAATTACAAATAGAGATGTTATTGCGCAAATAGCATGTGCTAAAGTGCAAGGTGATCATATATTATGTGCTGCATATGCACATGAATTAAAAAGATATGGATTAAAAGTAGGTTTAACAAATTATGCAGCTTGTTATGCTACTGGTTTATTATTAGCAAGACGTTTATTAACAAAATTAGGTTTAGATAAATATTATCCAGGTAAAAAATCAATAGATGGTAAAAGATTTATTAGTAGAGTAACAAATGTAAAATGGCAACCAGATCAAAAAGTTTATAGACCTTTTAAATGTATATTAGATAAAGGTTTAGCACGTGGTACATCAGGTGCAAGAATATTTGGTGTAATGAAAGGTGCTATTGATGGTGGTTTAGATGTACCACACAGTGTTAATCGTTTTCCTGGATTTAAAAAAGGAGAAGACGGAGCAAAAGATGAATACAATGCAGAACGATTATCAGAACGTATTTATGGTCAACATGTTGCACAAAAAATGAGACATTTACAAGAAAATGATCCAGATAAATATAGTAGACATTTTAGTCAATATATTAAAGAAGGTATTGAACCTGATAATTTAGAAGAAATGTATGAAAAAGTTCATCGTGCTATACGTAAAGATCCAAGTGCACCACAAAAAGTTAAAAAAGATTATAATAATATGGATGTTAGAAAAAAACAAGCTAGATGGACACAAGAACAAAAAACTGATCAAAAAGTTCAGAAAATTGCTGATTTAAGATTAAAATTTAGTAAACAAGATAAAAAGAAAAATAATAATATGGATGTTGATGAATAATTTAAATATATGTTTGTTTGCTTGTTAATTGTTGGAAATCTTTTGCAGTATATTCTGATCAATTACCAATCTATTACGCACAAAATAGTCATGGTCCCATATTTCATTGACAATTTATCTATCGACCGACAGAACATGCA
>NODE_2554_length_947_cov_14.820366_g2508_i0
GGTATCAACGCAGAGTACGGGAATGACAAATACAAGTGGTAAATGGGATCAAGTATCAGGCATTTTACGAGATGTAAATGTCGATATGGCAATGGAACAGGCTCAAACAAATTTATTGTACAAAAAATTGTTGGAAGATATGAGAGAACGAACAAAAGAAAGAGAATGGGATGATCCAAATCGTGTAACTATAACATCTGGTGAAAAAAACAATTCAAATAAAAATAATAACGATGATGAAGATTCTGATTCTGAATTTGACGATTTTTTCGATGATCCTGAAATGGATAAATTACATCAACAACGACTTGAACAATTAAAACAACAATACAAAGAAAAAAAAAAAAAATTAGAAAAGGGTCATGGTCATTATGATGAAATGAATGAACGAGATATGTTAAAAATGGCATGTGATACAGATTATGTAGTTGTTAATTTTTATAATGATGAATTTGAAAGATGTAAAATAATTGATAAACATTTTCGTATATTAGCACAAAAACATATTGAAGCACGTTTTGTTCGTGTTAATGTAAAAAAATCACCATTTATAGTATCAAGATGGAAAATTAAAACATTACCAACAATAGGAGTTATTATTAATGGTTTATTTGCTGATAAAATAATAGGATTTGCAGATTTTGGAAATAAAGATGATTTTCCAACTGCTGCACTTGCTAAAAGATTAATAAAAACTGGTGTATTAAAAAATTTACAAACTGGAAAACGTAAATTAAGAAATAAAAGAACAACTGCATTAACAGATATTGAATAAACATATCAACGTATTAACATTGTAATCAAATTTATAATAATGTTAATATTTAACCAATTTTTGTGATCATTTTGTTGTTGTTTTTTTTTAATTTTGAAAAAAAATATTGAAATTTTTTCAAATTGTGATTGTGTAAACTGTTTTTAAATATCGAGATATATCATAACAAAGG
>NODE_2573_length_940_cov_56.809689_g2527_i0
GAGTACGGGCAGAAGAAAACGTAAAGCAAATTGTTTAAAGAAAAAATTAAAATTAAGCATACCAGCACCTGAAGGTTTAAAAAAAGCCTGGGAAAGAGAAAACCGTATTAAAAAATTAAGATCAGAAAGAGCACAAAAAATATTAAAAAATTTACCAAACAAAAGAGATAAACAAGCAAAACGTATTGAGAATTATGAAGCAAAATATTCAAATATAGTACGTAGAAGACAAGAAAATGAGTTAAAAGCACGATTAAATGGAAATTTTTATAAACCAGAAGATTCACGAGTAATGGTAGTAGTAAGAATTAGAGGTATAAATCGTGTATCACCTAAAGCAAGAAAAGTATTACAATTATTTCGTTTATTACAAATTCATAATGCAGTATTTGTTAGAGTAAATAAAGCAACAATATCTATGTTAAAATTAATTCAACCTTATGTAGCTTATGGTTATCCATCAGTAAATAGTATAAGAGCATTAATTTATAAAAGAGGATATGCTAAAATAAGACATAGACCAGGTAGTATATCAAGAATACCAATTATGTCAAATAAATTAATTGAAAAACATTTAGGTAAATATGGTATTGAAACTATTGAAGATATGGTATTTCAAATATATACTGGTGGTAAATTCTTTCGTCAAGCATCTAACTTTTTATGGCCTTTTAAATTAAATTGTCCAAAAGGTGGTTATAGAGGTAGAAAACGAAGACATTTTAACGAAGGTGGTACTTATGGAAATTGGGAAAATAATATTGGTAATTTAGTCAAAAGAATGATTTAAATTATACTTGCGTTTGTTGTTATTAATTCAAATTTTAATTCAGTTTGATTATATTTCGAATTCGCATTGTTTACAATTATTGCGAATCATCGATATACTATTAAATGAAAATTTGTCTTCTAGATTGACAATTCTTGATAAAATTAATTC
>NODE_2620_length_930_cov_17.252042_g2574_i0
AAAAAAAAAAAAATCAATTGAAAAGTTTTACTTTTAAAGTTTCACACAACACGCACATAAAAAACAAAGTTAAAGAGAAGTAATATTAGCAACACCAGGTAATACTTTACCTTCTAATAATTCTAATGAAGCTCCACCACCAGTAGAAACATGACTAACTTTATCTTTTAATCCAGCTTTTTTAATAGCAGATGCACTATCACCACCACCAATAATAGTAACAGTACCATTTTGAGTAGCTAATGCAAATGCATTACATAAAGCAGTAGTACCTTTACTTGTTTTTTCCCATTCAAATACACCAGGTGGTCCATTCCATACAATAGTTTTAGCACTATTTATACTATTTTCAATTAATCTAATGGTTTCAGGTCCAATATCTAATCCCATCCAATTATCAGGTATACCATCTTTAGCAGTACATGTTTTTATTTGACAATTTGGATCAAATTTATCACCAATTACAAAATCAACTGGTAATATTATTTTAACATTATTTTTTTTAGCTTTTTCTACTAAATCAGATACTGTTTTTGCACCTTCTTCATCATATAATGATGTACCAATTGACATATTATTTATTTGTTTTAAAAATGTATATGCCATACCACCTGCTATAACCATTTCATTTACACGATCTAATAATGAACTAATTACTTTTATTTTATCTCTTACTTTTGCACCACCTAATATTGCTAAATATGGTTTTTTTGGTACTGCTAATGCTTGATTAAAATAATTTAATTCTTTTTCTACTAATAAACCACTTACACGTAATGGTACATCAACACCAACCATACTACTATGACCACGATGACATGTACCAAATGCATCATTAACATATAAATCTGATATTGATGTCATTACACTTCTTAATGAAGATATTTCTTCTTTTGATGCCCCGTACTCTGCGTTGATACCCTGTCTCTT
>NODE_2637_length_926_cov_4.390387_g2591_i0
GGTATCAACGCAGAGTACGGGTGCATCCATGGTGTAATAAAAATATATCTATATGGAAAAAATATAATAAATATAATAGTAATTCATTTACACAAATAAATTTTAATCCTTCGGTTCCTCAAAGGGTATTATTATATAATAATTATCATTTAGTTTTATTAATTTTAAATAAAAATTTACAAAAACATAATTTTGATTTGTTTGATATTAATATAAATCATTCAAATAAACGAAGAAAATTAAATAATAATAATATTAGTCAATATAATAATAAAAAACACAACAATACAAACAAAAATAATTATAAAATCAATGAATTAAAAGTAATAAAAAAATATCAAAATATTTTATTATCAAGTTTTATAAATTGTGATGAGTTATTACTAATTGAAATGAAATGGGATAATATTACTCAAAAATTAAAAAATCCAATATATTCAAAAAAATATGGTATTTAATTAATCGACATATTGAAATTTAGTTATTTTTGTATGTGTTTTATTGATTTTGTATTTAATTTGATAATTTTTTAGCTAAATTGATATTAAAATTACAAACAATCTATACTTCTATATAAGATTTATTACTAACATTTCCACTACTAATTAAAGATTTTGGTCTTTTATCACAATAAACACGTCGACAATTATACATTAATCCTTGATCGTATGGATTTACATTTTGTGAATAAGTATTTTTAACATTTTCATTTGTAGTTTGATTTATAATAATTAAATGTAAATGATAAAATGCTAATGAAGATACACTAAAAAATATTAATATACCGAATACAAATAAACCAATAGCAAATGGATTATCTTCTATGCTTGAAAATAATTCATCTGACCAACTACGATGTTTATTATTGTTATCATGATTATTATTAAATGAATTTGCTTTTTCAATTGCTTTTCGATAAATAACCC
>NODE_2714_length_902_cov_47.945718_g2668_i0
AGAGACAGGGTATCAACGCAGAGTACGGGGAATTATTAAAAACAGAAAATTGTGAATTATTATTTACAGATGATGCAATTGATAAAATAGCAGAAATAGCAGTTAGATGTAATGAAAATATTGAAAATATAGGTGCTAGAAGATTAGTTACAGTATTAGAAAAAGTTATGGAAGAAATTAATGTTAATGCTTCAGATGAAACTTCTAATAAATATGTTATTGATGTACCATATGTAGAAAAACAAGTTGAGGATATATTTAAAGGAGATGATTTACAAAAATGGATTCTTTAAATGTAAATTATTCAATAAATTCATTTTTTGATTCACAAGTAATGTACTTCAATTAAACCAGTTAGCTATTCACAACTGTATACGTTAATTTCATGTACTGAATTTCTGCAACTATGAATTATTTAATTAAACAATATCCCTCAATTGAACATTAAATGATTTATTTACAAATTGAACTATATCGACGTCATTCATCAGTATTTTGAGTTGTATTATCTTCCTCAACAACTTCATTGTCTTTCTTAGTGTTGTCTTCATTTACTTCATCATTATTATTACTGATTTGTGTCGTATTATCATTGTTAGTCTCAGTTTCAGAATTGTTGTTATTTTGTTTTGATTGATTTTCATTATCATTTTGAGTTTCTGTTTTATCTTCTTTTTCTTCGTTGTCTTGTTCATCACCACAAAAATATTGGTTCATTTTTTTTAATGTGTATTCTCTCAAAACTAAATTATCTTCCTCTTTATCACCAAATCCACTAGCTGTACCATTAAATTCTAAAATATATTCTTCATCTTTGGTTGTTACTAAAACATCTACTGTAACAATATCCATACCACCTGCAAATTTAGCACATTCATCTGCCCAGTACTCTGCGTTGATAC
>NODE_2774_length_888_cov_5.245399_g2728_i0
ATTTTCATCCAAAACACACAACATCCAAAATTTACAAAATTGAATTTCGAATAATAATTTACAATTTAGCTTGTTTAGCAGCCAGATTATACATAATTTCTTCACTACCAGCTGCAATAGCACTTGCTCTAACACCTCTATAACCTTCTTCAACTCTACCAGCTCTACCACCTCTAACATAACTTCTACCACCAAATATTTGTGAACAGTCAATTAATATTTTTTGATATGATTTTGTAATATGTACTTTCATTAATCCTACATTACGTGGTATACTTTTATCTTTACGTCCATATTGATCAAATGATAAATCGTATGCCATTTTTTCTAAAAATAATTGACAAGTAATAGCTAATCTTGAAATATCAGCAATTTTATGTTTAATTACTTGATGATCCATTAAACGTTTACCAAATGTTTTTCTTTCTTTTGCCCATAATATTGTATCTTCTATTGCTGCTCTCATACCTGCTACTGCATCTGCACATACTACAAATCTTTCAAAATTAAAATTATACATTAATGGTTTAAATCCTTTATTTTCTGTACCAATGAGATTTTCAACAGGTACTTTCACATTTACGAATGTAACAGATGCTGTATCATTTAAATCAGCTCCTTGCATATTTAATTTTGATGTATATACACCTTTACAACGTGGTATTAACATTAATGATACTTGACCTGGTCGATCACCTGTTTTACATAATGTTGTAAAATAATCTGCTCGGGCGCCACCTGTTATCCAATATTTTGAACCATTTACTACGTAATATTTTCCATCTTCAGATTTAACAGCAGTAGTTTTAATTCTAGCAACATCACTACCACCAGTCATTTCACTAATAGCTAAAGATATTAATTTATCCCCGTACTCTGCGTTGATAC
>NODE_3004_length_845_cov_42.306995_g2957_i0
GTATCAACGCAGAGTACGGGGTAACAAAAATGGATTTCGCCTCCCCAAAAGGTGGTTTATCCCGTGTAGATGGTTCAAGTGTAACAGATTTCGCCAAAGATGAAGAATGTGTTCAATTTACCAAAGATGTTTTGAAGAATCGTGACAATAAAGACAATCCAGGTGAAATAGATACAATCGCATTATCCAAAATAAATAGTAAAGATTACAATGTAATATTTTTTGCAGGTGGTCATGGTACAATGTGGGATTTTCGTGATAATAAAGATGTAAATAGATTAGCAAAAGAAATATATGAAAATGGTGGTATTGTTAGTGCTGTATGTCATGGTCCATGTGCATTATTAGGTATTAAATTATCTAATGGTGAATATTTAATTAAAGATAAATTAGTATGTGGTTTTACCAATGATGAAGAAGAAGCTGTTAATTTAACTAAAGTTATGCCATTTTTATTAGAAACGGAAATGAAAAAAATTGATGGAAAATTTGTTTCAAGAAAAAAACTGGTCTTGTTGTGCTGTATTAGATAGAAGAGTTGTTACTGGACAAAATCCTGCTAGTGCTGGTAGATGTGCAGAATTAATTTTATCTTGTTTTAAACCGGATGAATTAATTGAAGAAGAAGAAGAACAACCAAAACAAAATATGATGTTTGATGATGATGAATAAAGTTGTGTTTAACTTGAAGTCAGATTTGTAAATAGCTAAATTAGTGACTGTTTCTTGTAGCATTATTTTTAAACTTCGACTCAGAAAATGAGCAACATATTTTTGACCTTGAAATGTTTTGGTTTTGTTTGTTCACTAACTTTACTTAATGCACTTTGTGACAGCTGTCTCTT
>NODE_3014_length_842_cov_25.146944_g2967_i0
GTATCAACGCAGAGTACGGGGGAAGAGGAAGAGGTAGAGGTGGTAAATCATTAGAACAATGGCAACCAATTACTAAATTAGGTAGATTAGTTAAAGATAGTAAAATAAATACTTTTGAAGAAATATTTTTACATAGTATTAAAATTAAAGAACCTGAAATTGTTGATTTCTTGCTTGAAAAACTTGGATACGAATTGAAAGATGAAATAATGAAAATAAAACCAGTCCAAAAACAAACAACAGCAGGTCAACGTACACGTTTTAAAGCGTGGGTAGCAGTAGGTGATAGTAAAGGTCATATAGGATTAGGTCAAAAATGTGCAAGTGAAGTAGGTATAGCAATTCGTGGAGCACATATATTAGCAAAATTATCATTAGTACCAATACGTCGTGGTTATTATTTATCAAAATTACGTGATCCTCATACAGTACCTGGTAAATTAACAGGACAATGTGGTAGTGCTAGAGTAAGATTAATACCTGCACCTCGTGGTACTGGGTTAGTTGCTGCTGGTGTATGTAAAAAAATGTTAGGTATGAGTGGTATTGAAGATATTTATGTATCTGCAAGAGGTCAAACTAGAACTACTGGTAATTTTATTACTGCACTTTTCCTTGCATTAAGAAAAACTTATAAATTTATTACTCCTGATTTATGGGCACCATTTGCATTAAGAGATAATCCATTAGATAAATATCAAATTAAACAAGAAGATGATCAAAGAAGTTAAAATTAATATCATTATGACAATTTTGAAAAAATATTATCAATTGATAAAATTGTTGGTTTTTTTTGTAACAAGTTGAAGTTTTTGCATCATATGTTTTGCATGTTATGTCTC
>NODE_3028_length_840_cov_13.483703_g2981_i0
GAGACAGCTCGAATGCGCACTGAATTAAGAACGCAAAATAAGGTTTAATTCACATTTACACAATTTTGAATGACAATAACAGACTAAGATAGTGTATTTTCCAAAGTGCCATCGTGGTTGTTACAGTACTTGCAGTATAAAAGAATTAAACAGACACGTTAACATTTGCACAAAATACAACTTTCTCCCATTTTTCGTTTAATTCTAAATCATAAAATATATCATTACCGCTAATATTGCAATCAAAACAATGATCAAACAAATTTTTTTTCCTTCAGAATGACCCATTAATTTATCTAATTTTTTGGTAAGAGAGTTTATTTTACCCATTGCTGTGTCCATTTCAGTATCAACTTCGTCCAACAAAATTAATTGATCTTCAAGTTCTGCTTTAATATTGTTTCCATGAACACCTAATCGTCGCAATGTACCCAACATATCATCTAATACTAAGTCTTGTTCGTCCCTTTGTATTTGTTTTTGGGTTTGTGTATCCTCAATATAATTTTGATTTATTTCTTCTACCAATCTGTTCTTAGGTTCTGTGTTCTGTTTTTCTGATTTTATACGGTCTTGTTTTAGTTTTTCTCTAGTTCGATCAGATTGCATATTATGACGACATTCATCAATATATTTTTGAGTATTGTTAAGAAATTCTTCTCGAGATTCTAATTCATATTTACTTATATGTGGATAATTTTCAGGATTATTACGCACAGAATCGACTGTAGATTTACGAATTTCTTTTAATGCATGTTTAATATCTTTATATTTTCCTCTTAAATTATCGGTTAATTCTTTAAATCGTTTATTTTTTACCCCGTACTCTGCGTTGATACC
>NODE_3108_length_826_cov_13.136786_g3061_i0
GAAAGAAATGTGAACTTTCAATTTTTGTTTGAGAATTTCAACAAAAATTTTTGCAATTTTAAACACAACAAGCAGTACTAAAAAGAAAAATAATACGAATTCTAATAGTAATAAATTTAATTTTTTTTTCTTTGTGTTTTTGTTTTTTGTTTTTGTTGATTTCTTTTTTTACTACCTTTATTTTTCTTTTTTGAACCTTTTGATTTTTTTTTACTACCTTTATTACTTTTTTTATTATCATCAACATCCATTACATTTTCACCTCTTTCTCTTCTTTGTTTTCTTTCAATTTGTTTACGTTTTTGTTCTTTAATTCTATCTTTTTTAGTAGTACCAAAAAATTCTTTTTTTTCAACAGCAGTTTGAAATCTACCAACACCCATTTTACTCGATGTATCAATAAATTTAAGATTAATTTCTTCAGATGCACCTGAACCACTTGGTAATACAACAGGTTTTCTTAAAGTAATTGGTCTTTTTCTTGGACCAGGACAAGTACCTTTTATCATAATATAATCTTGTTTAACAATACCATAATTAACAAAACCACCAAGTGGAGTTATATTTTTGGATGTTAAATCATTTTCAGTTGATGCATTAAATGATGTTGGTTTACCATTTTCATCATATTCAATTGCTTTACCAATTCTATAAATTTTTTTATGAATTTCTGTTCTATGATGATAACCTTTTTGACCTTCTCTAGCAATTTGATAACCAACACGTGCTGGATGCCATGAACCAATACATGCTACTTTTCTTAAACCTCTATGTGTTTTTCTAGGTAATCTAGTTACACCCCCGTACTCTGCGTTGATACTGTCTC
>NODE_3121_length_824_cov_4.390146_g3074_i0
GACAGGTATCAACGCAGAGTACGGGGGTATATTTAATTTTGAAGGTGGTTGTTATGCTAAAACATCTAAATTATCATTAGATACTGAACCTGAAATTTATCGAGCTGTAAAATTTAACGCATTGATGGAAAATGTTTGGATATCACCATACAGTCATGATATTGATTATTTTAATTTATCAATAACAGAAAATGGTCGTGTATCATATCCAATTGAACATATAGATAATCGTGAAGATTCATTAGCAGGTGGACATCCAGAATATATAATATTTTTATGTTGTGATGCATTTGGTGTATTACCACCAATAGCAAAATTAAATGCAGGTCAAAGTATGTATCATTTTATAAGTGGTTATACAGCTAAAGTAGCAGGTACTGAAAGAGGTATTAAAGAACCACAAGCAACATTTAGTCCATGCTATGGTGCTGCTTTCTTAACATTACATCCAATGGAATATGCAAGATTAATGAAAAAAAAATTAGAAAATCATAATGTTGATTGCTATTTAGTAAATACTGGTTGGACTGGTGGTCCATATGGTGTTGGTGAAAGAATGAGTATTAAAACTACTAGAAATTGTATTAATGCTATATTTAATGGTGCTATCAAAAAATCGAGATTTAGAGAAGATAATTTATTTAAATTTAGTGTACCTGAAAATATACCAAATGTTGATAGTAATTTATTAAATCCTAGAAATACATGGAGTGATAAAAATGCATATGATGAAGCTGCTTTAGATTTAGCGGATAGATTTGCTGTAAATATTGCACAATATACGGATGATGTTAATGAATATGAAGGATGTGGACCTATTGGAC
>NODE_3128_length_822_cov_12.560748_g3081_i0
ATCATGTTGAATTAAGTCTAACAAATGTTCTGTAGACCATAATTCAACATGATCACTGAATCCAAATGAAGTAAATAAACAAAAAACAAGCAAAACACTAGACATTTTATTACTCTACTTTAGTTTTAAATTAATTTGGAATTGTCCAATTAATTTCATCTTTTCCTAATTTTTTCAATAATGAATTTGTTTTTGAATATGGCTTACTTCCATAAAAACCTCTTGAAGCAGACAAACCTGATGGATGCGCAGATTTAACTATTTTATGCAAATTAGTATTTATAATTAATTCTTTTTTTTGTGCTTGTTTACCCCATAATATAAATACAACACCATCTTTTTTTTTATTTGATATTGTTTTAATAACACTATCTGTAAATTCTAACCAACCAAATTTCTTATGGCTATTTGCTTTGTGAGCTTCTACAGTTAATGCAGTGTTTAATAATAATACACCTTGTTCTGCCCATGTAACTAAATTACCATGATTAGGCCTTTTAAATGATTTACCTAAATCACTTTCTGCTTCTTTATACATATTTCTTAATGATGATGGAACTTTAATACCTTTTGGTACACTAAAACATAAACCTTCTGCTTGTCCATCATCATGATATGGATCTTGACCTACTATAACAACTTTTAACTTTTCCCATGTACATAATTCAAATGCTCGATATACTTGATGTTTTGGTGGAAATACCTCTATTTTCGGATCTGATTCTACATTTTTTAAATTTTTAATTAATTTTAAAAAATATGGTTTTTTAAATTCATTTTGCAACATTTCTTTCCATCCCCCGTACTCTGCGTTGATACCAC
>NODE_3147_length_818_cov_35.135570_g3100_i0
TTCAATGAATTGAAACTAAACCACAAAAAAAATCAAAAAACAATAAAATTGCACCCCAATAGACAAGACTTTAAGAGTCAAGAAAAAATTCAAAAAGAAACATGTGCAACAAACAACAAACTAATTAACAAAAATGAACTTACAAAAAAAAAAAAAATAAAATAATTTAATTTTCACCACATTTTGAAATATTTAATTACCACCACGTAATCTTAATACTAAATGTAAAGTACTTTCTTTTTGAATATTATAATCACTCAAAGTACGATTATCTTCTAATTGTTTACCTGCAAATATTAATCTTTGCTGTTCTGGTGGTATTCCTTCTTTGTCTTGAATTTTTGCTTTGACATTTTGAATTGTATCATTAGCTTCAACATCTAATGTAATTGTTTTACCTGTTAATGTTTTTACAAATATTTGCATTGCACCACCACGTAATCTTAATACTAAATGTAATGTACTTTCTTTTTGGATATTGTAATCACTTAAAGTACGATTGTCTTCTAGTTGCTTTCCTGCAAATATTAAACGTTGTTGTTCAGGTGGTATACCTTCTTTATCTTGAATTTTAGCTTTAACATTTTGAATAGTATCATTAGCTTCAACATCTAAAGTGATAGTTTTACCTGTTAATGTTTTCACAAATATTTGCATAGCACCACCACGTAATCTTAATACCAAATGTAATGTTGACTCTTTTTGAATATTATAATCACTTAATGTACGATTATCTTCTAATTGTTTTCCTGCAAATATTAAACGTTGTTGTTCAGGTGGTATACCTTCTTTATCTTGAATTTTAGCTTTAACATTTT
>NODE_3153_length_817_cov_5.100806_g3106_i0
CCACAAATTCTAAAAATAAAAAATATGTTTCCTTGATTCTTGTTTTATTTTTATACAAGATCAGGTACTGCTTCATCATCTTCTTCATCTTCACCATCTTGAACATTATTATCTAAATTAGCTAAATCTTTATTTAACATTTCATTAATAAAATCTTTTTCTTCATCTTTTTCATTAATAATATCTCCACGTGCTTGTAAATTATTCAAAATATTATCACTTGCTTCATTATTTTCTTCAGATTTAACAGGATATTTTTGTCTTAATTCTTCTCTTCTTTTTTCAATTTTTTCTAATTCTTCTTGTCTTTGTTTAGTAATATTTTTCATACATTTATCTAACATATTTTGTGATCTTCAGCATTTGGTGTTAATCTTAATACTTTTTGAAATGCAGTTGCTGCATCTTTATATTTATTTAATTTCATTAATACCATACCACGTAAATGATGTCCTTTGCCATAATTTTCTAATGGATCTAATTCACGAATTTTTTGACAATCTTCTAATGCTTTTTCGTATTGATTATTTTGATAAAACATTAATACTCTATTGCTATATAATATAATATTTGTAGGATGTTTTTCAATTGCTTTTGATACTTTTTCTATTGCTGATGTGTAATTTTTTTCATTAAAATCTTTATTTGCATCTGTTTTTATTGTTTGTGCTTCTGACCAAAGAACATTTAATTCATCATTAGTTCTAAATTCAAGTGGAGTACTTTTTCTTAATTTATTTAATTCTAGTTTAATTTGTTGATATGTCCAATTTTCACATTTTTTATTTCCTATTCTAATTAATTGACTACCAATAGG
>NODE_3211_length_808_cov_24.370068_g2924_i1
GATAGAACATTATCAGATTATAATATTTAAAAAGAAAGTACATTACATTTAGTTTTAAGATTACGTGGTGGTGCTATGCAAATATTTGTAAAAACATTAACAGGTAAAACCATTACTTTAGATGTAGAAGCTAATGATACTATTCAAAATGTCAAAGCTAAAATCCAAGATAAAGAAGGTATACCACCAGAACAACAACGTTTAATATTTGCTGGTAAACAATTAGAAGATAACAGAACATTAAGTGATTACAACATTCAAAAAGAAAGCACATTACATTTGGTATTACGTTTAAGAGGTGGTGCAATGCAAATATTTGTTAAAACGTTAACTGGTAAAACTATTACATTAGATGTTGAAGCAAACGATACAATTCAAAACGTAAAAGCAAAAATTCAAGATAAAGAAGGAATACCACCAGAACAACAACGTTTAATATTTGCTGGTAAACAATTAGAAGATAATCGTACTTTAAGTGATTACAATATTCAAAAAGAAAGTACATTACATTTAGTATTAAGATTAAGAGGTGGAGCAATGCAAATATTCGTAAAAACATTAACTGGTAAAACAATAACATTAGATGTTGAAGCAAATGATACAATTCAAAATGTAAAAGCAAAAATTCAAGATAAAGAAGGAATACCACCAGAACAACAAAGATTAATATTTGCAGGTAAACAATTAGAAGATAATCGTACTTTGAGTGATTATAATATTCAAAAAGAAAGTACTTTACATTTAGTATTAAGATTACGTGGTGGTAATTAAGCTAGAAACCAAATCAATAAATCACTCAAATATTTTC
>NODE_3261_length_799_cov_53.852617_g3212_i0
GTTCTAAACCTCGTGGACCTGGTATGGAAAGATCAGATTTATTACGTGATAATGGAAATATATTTGAAAAATTAGGAAAAGTAATGAATACAAAAGCAGCAAGATATTGTAGAACTACAGTAGTAGGTAATCCATGTAATACAAATTGTTTAATATTAGCAAGCAATTGTCCTGATATTGATCGTAATAATTTTACTGCTATGACTAGATTAGATCATGATAGAGGTTTATCATTAATATCAAGTAAAGTATGTTTACCTGTAAATGAAATAAATTATTTTAGTATTTGGGGAAATCATAGTGCAAGTTTATTTCCTGATTTATCAAATACATTAATACATGGTGTTGAATGGGACGAATTAATTGGTAAATATAAATCTGATAGATTTTTTAGAAATGAATTTATTCCAAGAGTACAACAGCGTGGTGCAACTATTATAGATGTTAGAGGATCATCTAGTGCTGCTAGTGCTGGAAGTGCATGTTTAGCTCATACAAGAGATTGGATATTTGGTACACCACAACCTGATTGGACATCTATGGCAATATTTAGTAATGGCGAATATAATGTACCAAATAATTTAGTATTTTCTTTTCCTGTATGGTGTAAAAATGGTTATTATCAAGTAGCAAGTACACCATATCAAATAAATGCATTTCAACAATATTGGATTGAGAAAAATATTCAAGAATTGAAAGATGAGCGAGATATGGTTTCTAATTTTGTTAGATAATTAATTCTAATGTAAAAATTTTAGAAAACCTTTTTGACTTTTTGTGTTTTTTTTGAGTAATTTAT
>NODE_3269_length_798_cov_24.885517_g3220_i0
ATATAAAAGCGCAAAGATTCCCAAGCCAGGAGAAAAAAAAAAAGAAAACACAAGCAACAAAACAAAAAAACGAATGAAGTAGAAAACTAAAAATAAACTATCACAAAAATATAACGCAAAATGATAAGATTGTATTCAATGAATAATGATGTTTAAAAGATTTTTTATGACTGTGTTTAATTGATATTACTATTTCATAAATTAGTTCTTATGAACTACTGATACAACCTTACCAAGCATAATTAATGAATTACTATCCATAGCAGCAACTCTACCTAATGGTTTACAATCATCAAATGCACATACAACAAATGGCATTTTAGGTTTAAATACAACTTCTGCTTGATCACCAGCTTCAATATATGGTGGCATTTCAACTTTTTGATTATTTGTTGATTTACCACTTTTCCATTTTATTTCAACCATTTGACATGGTGCTTTAGCAGTTCTAATATGTATACTTGGTGTAAAACCACCTTTATATTCATCATTTTGTTTTTTAGCACATTTTAATTGTCCAGGATGATCTTGTACAAATACTAATGCTGTAAATGTATCAGCTTGTTTTGGTGGTTTAGGATCATCTTCATCATCATTACACATAACATCTCCTACTTTTGGCATATTTTCTTTTTTTAAACCTTTGACATTTACACCTACATTATCACCATGGACTGCTTTTTTTACTGTTTTATGATGCATTTCTATGCTGAATGCTTTACCTTTTACACCAGATGGATAAAATCTTACATTTACATCTGGTACTAATTTACCTTGTTCAATACGTCCCCCGTACTC
>NODE_3271_length_798_cov_13.121379_g3222_i0
TAATTTTTGTGTGCTTTATCGAGTACAAAAAAGTTTAACGTTAACACTAACACTAGAACACTAAATCAACGAACAATTATAATATGACGTGCATGACTGTACGTTAATATCAATAGTCTTAGTAAATATTATATAATATGTGAATTTGAATACACAGGTTAATCTAACTCAACAGAAAAAGCAATAATAAATATAATATAAGCAGCACATAATATATATCCATGAAATAATTTTAACGTTAATGATGATTTATACAATAAAAAACATAATACTATTAATAAAACACCTAATGCCAATAAAAATAATTCAAATCCAGTATCTGATTCAACATCTAATGTTTTAAAACCTATCATAACTGATTTTAAAAAAAATGATAATCCAACACATATACAAATATCAAATACATTTGAACCTAATGCATTTGATACTGCCATAGTACCTTTACCTTGTTTTGCAACTAATATACTACTAAAACAATCTGGTAAAGAAGAACCAATTGCTAATAATGTTAATCCCATAACATCTGCATTTAATTTAACACAATTACCGATTTTATTTGCACAATCAACTGCTAAAAATGTTAAAAAACCCATCCATACTATTGATGCTATAAATACTGCTGATAATTTACAATATGTTATTATTTCTGATATTGGACGATCATTTGCATCATGTGGATCATGTTGTACTGCTTGAGAATTTGATGATTTATTTATACTTGTATGATAACCAGGTGAATTTAAATCTGGTATTGTATAACTAAATATAATTCGAAATGGTATTAACAATATTTTAAGT
>NODE_3272_length_798_cov_8.382069_g3223_i0
AAAAAACAATACAAAATGAAATAATATCTCAAATATCCAAATCTGAATCATCTTCTGATTCTTCTTGTTTTATTCTTTTTTTTTCTTGTTTTAATTTTTTTTCAAGACCAAGAAAAGTTTCTTCAAAAAATCCGTATTCATACAATTTTTCTTCTAATTTAATTGTTTCTAATTTACCATCTGGTGCAACCCAATCTAATCCAACTAATTTTCTATCAACTTTAAATTTTTTGGAACAAACTAATGTTGGTAACATCCATATATTTAATTTTTCCATTAAATAACTTGCATTTTTTGCTTCAATTTCTACAAATTTACATTCAATATGTTTTCGAGCCAATAATGTTAAATGTTCCCTTAATGTATTACACCATTTATTTGTTTTATTAAAAAAATGACATATGACATATTCTGATGTTTTAACTTCATTAAAAAATTCACGTTGATCTGGTAATAACACCATACGTCCATGATTTGACGACATCCATTTTTTTCCCCGTACTCTGCGTTGATACNNNNNNNNNNGTTTTAGTTGCTCTAAACGTTTTTTCCTTAGTTCTTGAACATCTTCTTTTGAAAGATTATTTAATCGTTCAATTTCTTCATCAACTTCTCGTTCTTTGTCATTTAAAACTTTTTGCATTGTTGTAGTGACTTTGTCACGAACTAATTGTTCCATTTGCCATTTTTTGTATTGGTTTTGAAGGTCTTGTTGACTTTGTTTTGACATTGCTCACCACACAAGTTTTAAAAAGTTTGTTTTGTACCCGTACTCTGCGTTGATACCACCTGTCTCTT
>NODE_3363_length_786_cov_44.642356_g3313_i0
TCTATATAAAGCTAAACCGATCATTTGTATAGATAAAAAAATGTCCACAAACACAACAAACAAACACAGATGACTCACACCATTAGTACATATGATCAATGACAACCATGACATGCAAAATAGATCCATATGTACACTGAAAATTAATATTAAATTATAAATTTACTTATAAAAATTGTCTTAAAATGTTTTCTAATTTAACGATATCATCAGCAAATCTACGAATACCTTCAGCTAATTTATCATTTGCCATAGGATCTCTACACATACCATATCTAAAATTAGGTTCTGTAACATTTAATTGTTGTTTATAATCACTTTTTTGACTTGGTTCTAATACTCTTGTTATTTGATCATTACTATTAGTTAATTGTTCTAAAAATTTAGGACCAATTGTTAATCTATCACATCCAGCTAATGCTAATATTTGTTCTTTATTTCTAAAAGATGCACCCATTACAATTGTTTTAAAACCAAATGTTTTATAATATCTATAAATTTCTAATACATTTTTAGGACCTGGATCATCTTTAATATCAAAACCGTCAACACCTTGATTTTTTTTATGCCAATCTGTTATTCTTCCAACAAAAGGTGAAATTAAGTAAGCACTACCAATTTTAGCAGCAGCTGCTGCTTGCCATATATTAAATAATAATGTCATATTACAATTAATATTAAATGCATGTAATCTTTGACATGCTTGAATACCTTCCCAAGTTGATGCTATTTTAATAAGTATTCTATTCTTCGCATCTTTAATACCCCCGTACTCTGCGTTGATAC
>NODE_3371_length_785_cov_53.504213_g3321_i0
GAGTACGGGGTGGTCAAAGATGTACATCATTAAGAAGATTATTTTTACATGAATCAATATATGATGAATTTTTAAATAAATTAATGAATAAATATAAAACAATTAAAATAGGTAATCCATTAGAAAATGATACATTATGTGGTCCAATGATAAATAAAGAGGCTGTACAAGATTATGTGAATGGTATAAATTTAATTAAAAAATCATCAAAATCTAAAATATTATGTGGAGGTAATGTTTTAGATAATATGAAAGGTAATTTTGTTGAACCAACTATTGTTCAAACAGAACATACTGAACCATTTGTAAATGAAGAATTATTTGCACCTGTATTATATGTTATGAAATTTAAAACATATGATGAGGTAGTAAAAATGCATAATAGTGTAATACATGGATTAAGTAGTTCATTATTTACTAAATCACATACTAATATATTTAAATGGTTAGGACCAACTGGTAGTGATTGTGGTATTGTTAATGTTAATATTGGTACAAGTGGTGCTGAAATTGGTGGTGCTTTTGGAGGAAATAAATATACTGGAAATGGAAGAGAAAGTGGATCAGATTCTTGGAAACAGTATTGTAGACAAAGCACATGTACCATTAATTATTCAGATGATTTACCGTTAGCACAAGGTATCAATTTTGGCTCTGATGAATAAATTAAGTCTTATTTGTTTGTGAGTGTGTTAATTGCTCACACGTCGTTGTTTTGTTTGTTGAGTGAAATAAATAATGCTTATTTAAAAATTAAAATTTAAAATCAAGTTTTGTGTATGTTT
>NODE_3405_length_781_cov_5.103107_g3355_i0
CCAAAAAGGGGAGTATAGCGACCCTTTGCGGCTAGTATGAATAATAAAACTTAATTATTTTGGTCAACAGGATTTCTACAAATAGGACAAATATGATTTCTTTGCAACCATGTATCAATTTCTTTAGTATGAAATATATGTAAACATGGTAATCTTCTAATTTCATCACCTTCTTTAAATTTTTCTAAACAAATACAACATTTTGAATTTTCATCATTTTGATTATTATTATTATTATTTTTTTTATTATCATCAATTGATGTTGTTTTTTTTTCATGATATTTATCAGTGGGTAATCTTTCAATATCTTGTTGATTAGCACCTCTAGGTGGATTTGGAAATTGTCTTAATAATTCTTCATATGTCATATTATGTATATCTCTATTTAATGAATTCATACCATAAATATCATTACCCATTAAAGCTCTTAAATTTGGTAATCTTGACAATATATTATATGGATTATCAACGTTTGCATTATCAAAATCATCGTTACTGCCACTAATATCACGCATTAAAAATGAATTCGTACCTAAATCAAATGTATTACTATCCATATTATCACTATTATTATTATTATTTCTGCTGTATGTATTTCTAATTGTTGTAAAATTCAATCCGGTGCCATTACTATTATTTCCTGTTGAAGAATAATTATATGAAAATTGTGTACCATCATCATCAGCCCAACTACGTGATTCATCATTATTGTTGTGCATATTTTGTAATCCAAGCAAATTAAAAGGATCTTCACGATCCCCGTACTCTGCGTTGATACCAC
>NODE_3438_length_776_cov_5.551920_g3388_i0
ATCATATATTCTTGTTTTACCTAATTGAATACTTACAACTGCATTTATATGTTCTTCTGCACTTTTAATCAATGGTACATCTTGAATATGTCGTTGTTTTACTAATAATTCTATTTCTGTACAACCTAAAATACAACATTGTGCGCCTTTTATTTTACACAATTCATTTCTTATTACATTAATAAAAAAATTTCGTGATTTTTGTTCAAATTTATTAAAACTTAATTCTTTTTCAATTATTCGCTCCATTTCTATCTGATCAGATTCATTTGATGGTATAACTACTTTTAATCCATGTTGACGTAAACGTGCTTTTAAGTAATCTTGTTGTAAAGTAAAACGAGTGCCAACAAGTCCAACTGTATTAAATCCTTTACTTATAATTGATTTTGCACAACAATCTGCTATATGCAATATTGGAAAATGTGGCAATATTTTAGTCAAATAAGGTACTATCATATGACCAGTATTGGATGCAATAACAAGAAAATCTGCACCTGCATTTCGAACACGTCTAGCAGCATCAGATAATAAACCAACTACTAAATCCATTCGATCTGCTGTCAAGTAAGCAACGTATTCTTCTAAATTTACAGAATACATTACCATTTTGCTTGTATTCCCAGCAAATTTCCATTTCGAACCACTTTGAATTTTTTCATTAATTTGTATGTAATAATCCGCTCCACTAATATGTGATATACCAGTAACAATACCAACAGTTGGCTGAGTAATTACAGTATTCATTTCAACTTGCCCGTACTCTGCGTTGATAC
>NODE_3456_length_773_cov_10.982857_g3406_i0
AGAGACAGAGTAAACACAAATATTTAATTTTGGGTGAAATTTAAATAATTGTCAAAAACAAACAACAAAATATCCCATCCATATACAAAACACGAAAAACGATAAATATTTCATAATGAGATGAAAATATTTAAAAAAAATCAACAATGATGTAACATTTACTCCAAAGAATATTAAGAATAAATATTCCCTTTACGTAACAATATATCGTATGAATATAATATTACAAAATACTAATTTTCATTATTATTCATAACAATATCATCATCTTCCACACTATTATTATTATTATTATTATTATCTGTAGTTTTATCATTTTTATTATCTTCAGTCTCGTTTTCTTTACTTTCTGTTTGATAATTTTGATTATTATTTTCACTATTAGTATTACTATCAGCATTTTCATCAGTTTCATCCCATTTTATAGAAAATCTAGTAACTCTAGGTTTATCAGCTAATACAGTACGTGTAATTTTATTAGGATCTATATTTAAATTTTTTTTTTTATTTGTATTTTCATTTTTATTATTATTATTAGTATTTTCTTCTTTATTATTTTTATTATCGTTAATATTATCATCATTTTTATATTCATTGGATACATAATATCCAATACGTATAAATTCTTCTTCATTATATGAACAAGATAATAAAACTACAGTAACATCCAATAAATCTTGTGGTGATATTAATTTTGAATTTGGTGCTGGTGCTTCAAATACAAATCGATTTTTACCTAAAGATATTGGTCCCCCGTACTCTGCGTTGATACC
>NODE_3462_length_772_cov_13.801144_g3412_i0
AAGAGACAGGTATCAACGCAGAGTACGGGGTACCACAAATTGAAGTTACATTTGATTTAGATGCTAATGGGATTTTATCTGTTTCTGCTAAAGATAAAAAAAATGAAAGTAATAGTAAAAAAATTACAATTGATCAACAAAAAGGTAGATTAAGTGAAGAAGAAATTAAAAAAATAGTTGAAGAAGCAGAAAAATATAAATCTGAAGATGAAGAATTAAAGAAAAAAATAACAGCTAAAAATGATTTAGAATCATTTGCATATCAAATGAGAAATACATTAGATGATGGTAAATTTAAAGATGTAATTAAAAAAGAAGATAAAGAAAAAGTAGAAAAAGCAGTCAAAGAAGTAATTGAATGGGTTGATCAAAATCCAAATGCAGAATTAGATGAATTAGAAGCTAAAAAAAAAGAATTAGAAGATTTATGGAAACCAATTATTATGGAAGCTTATAAATCTACTGGTGGTCAACCTGGACAAGGAGGTATGCCTAATATGGGTGGTATGGGTGGTATGCCTAATATGGGAAATTTTCAGCAACCAACAAATACTCAATCAAATAAAGGACCTGAAATCGATGATGTTGATTAAATTACTATTAATTCATTGATTTATTAAAATACATAATAAAATATTAAATTAAAATATTTTTTTAAAATTATAAATAAGTTTTTGAATTTGGGATTGTGATCTGATTTCTTTTCCAAATTTAAATTTTATATGTTTTGTTTTAATTTGTTTTCATTTTGGTTTTATTGTCGATCTGGCTT
>NODE_3477_length_770_cov_21.413199_g3427_i0
GTGGTATCAACGCAGAGTACGGGGGTAAATGTAAAAGCAGTATTACGAGAATGTGGTGGTAGATCAAGACTAATTGGACCAAATTGTCCAGGAATAATAAAACCAAATGAATGTAAAATAGGTATTATGCCAGGTCATATACATATGCCAGGTAAAATAGGTATTGTTAGTAGAAGTGGTACATTAACATATGAAGCAGTTAATCAAACAACTGGTGTAGGATTAGGTCAATCAACTGTTGTAGGTATTGGAGGTGATCCATTTAATGGTACTAATTTTATAGATGTATTACAAAAATTTAAAGATGATCCTGAAACTATTGGTATTATTATGATAGGAGAAATAGGTGGTGGTGAAGAAGAACGAGCTGCAGAATGGATTAAACAAAATAATTTAACTGAAACTAAACCTATGGTTGGTTTTATATGTGGTGTAACTGCACCTCCAGGAAGACGTATGGGACATGCTGGTGCAATTGTATCAGGAGGTAAAGGTGATGCTAAATCAAAAATGGAAGCTTTAAGATCTGCTGGTGTTGTAGTAAGTGATTCACCTACTATTATGGGTAAAACTATGTTGCGTGTAATGCAAGAAAGAGGATTACACTAAAAATCATCAATTACACCTTATTTATAATTCATATACAATTCAATTACAGTTTGTGTTTTATCAATTTGACTTGTTTGTTTAACTATTTGTTTAAAAAATTAAAAAATTTAATTTTTTTCTTTTTTTGTTTTCACAACAATAAAACGGATAACATATAATAT
>NODE_3481_length_769_cov_29.005747_g3431_i0
GAGTACGGGGGACGTATTGAACAAGGTAAATTAGTACCAGATGTAAATGTAAGATTTTATCCATCTGGTGTAAAAGGTAAAGCATTCAGCATAGAAATGCATCATAAAACAGTAAAAAAAGCAGTCCATGGTGATAATGTAGGTGTAAATGTCAAAGGTTTAAAAAAAGAAAATATGCCAAAAGTAGGAGATGTTATGTGTAATGATGATGAAGATGATCCTAAACCACCAAAACAAGCTGATACATTTACAGCATTAGTATTTGTACAAGATCATCCTGGACAATTAAAATGTGCTAAAAAACAAAATGATGAATATAAAGGTGGTTTTACACCTAGTATTCATATTAGAACTGCTAAAGCACCATGTCAAATGATTGAAATTAAATGGAAATCAGGTAAATCAACTAATAATCAAAAAGTTGAAATGCCACCTTATATTGAAGCTGGTGATCAAGCTGAAGTTGTATTTAAACCTAAAATGCCATTTGTTGTATGTGCTTTTGATGATTGTAAACCATTAGGTAGAGTTGCTGCTATGGATAGTAATTCATTGATTATGTTGGGGAAAGTTGTATCAGTAGTTCATAAAAATTAGATAAAAATTATTAATTTTACAAAGTGAGAAAAAAGGAGCATTTTTTACGTTTTTTTACGTTTTCTTTAATTCTATGTGGTTTGTAACTTTGTTTGTTTGTGTGGGTATGATAAATTGAAATTTTTAATTCTTTTTTTAGCGAAAATTTTTTGCCGGTTCCTAAGCCTCAATG

View File

@ -1,50 +0,0 @@
>NODE_96_length_4637_cov_31.218614_g87_i0
CTTGGATAAAATGGTTATTCTTTTTCCGAATTGTGAGATGATAGAAATCAAAGGATTGACGTTGACTGAGGTTACATTTGAAAATCTTGTTTTATGTGTGGATAATATAAGTATGGAAGTGATTGCTGCACAAGATTTCAATATTTGTACTAAATCATCACAGGCTGGGAAAAAACCTCGTCTTAAGATGGTAAGCAAATTACAAAGGATTTTAATCACCGAACCAGGCATAGAAGTGGATGACATTATGGATGAACTTAATACTATTCGTGAACGTTTCGTAGATTTACATTGGAATATTAAATATTTAGAGAGTTATCATTCTGCAGCATCAATAATTATCACCAGAGATAATGAATATTTTAAAGAACGTGTTCAACAAAAAAAGAAGGAAATTAAGAAACGAATTAAAGCCATCTCTAGTTCAAAGCCACATACGCCACTCAATGAAGCATCAGACAATATATCATCCGATGTACAATCTTCTGCAGTAGCAGGAGAAGAATCTGCTGCTGGTGTTTTAAAAAATGCTGCAGAAAAAGTGGTTGAGGTGAAGAGTGATGATGATGAAAAAAAAGTTGGGGAAGAAGATTTATCTGAAGATGATAATGATGCACAAGATGAAAATGCTTTTATAGAATTTGAAGAAGCAGATTTTGCAAATGATATGCTTGATAATAATGATGACATAAAAGAAGATATGGATGAGGATGATATTGATTGGGATGATTGTCCAGAATATCTACAATTACAAAGTTTTGGATTTGATCGACTATGGGTGAAATGTTCACTCACTTTATACAATAGAAAATTGCAGGATTGTTTGGAATATTTAATATCAGCAGAACATGAAAATGCTAAAAATGCGTATGATGCCATATTAGCTAATGATCCAAATGCATTCACCAATGATAATATTTATAATCCTTTGTGGAAATGTGTAATGTGTGGTTTTAAGAATCGCGGAGCTCAACTCACATGTTTATTATGTTCACTTGGACAAAGACCAGCACCAACCGTTCCTAATACTATGGATTTATTGTTGGATGAAGATGAAGTACTTCCAGCAGTGGTTGAATTAGATTTAGGTCCATGGAATTGTCCATTGTGTTCTTTTGAGAATGAGAATTCTAAATTGGCATTTTGTGATATGTGTCAAAAAGGTAAGAATTCTGTGCAATGGGAACAATTGATATCTGGTGAATCACGCATTGTAAAACGATGTAATTTTAAATCAACAGATGATAGAAATGGTGTAGTATTTTGGATTGGTACACTTTGTGGTCAATCAAAATGGAAAAACCCAGGATTATTCAATCGTATAAGATGTGAAGCATCAACGCTCACTCCCGATTCACATCCAGTGACATCAACACTCACTGATCGAGTAAGTGTACGATGTGTAACAACTGCTACTCCAGGTTCATGGATGATGATTGATTTTGTACATTTTAAAATACGTCCAACACATTATAGTTTGAGACATTATATCTCATGGAGAAGTGAAGCAATGCGTAATTGGGTGTTAGAAGGATCGAACACTCCTTCCAACAATAATAATAATAATTATAATAAAGAATGGATTCAATTGCGTGAACATAAAAAGGATGCATCATTGTTTGGTAAGGGATCAATTTCTACCTGGAAATTGAATGTCACTGATAATGCATATCGTTTTTTTCGTATCAGGCAAACTGGTCTCAATTCTAACAATCATCAATATTTAGCTTGTTCTGGTTTTGAAATGTATGGTATTGTATTTCCTGTTAATGTTTTGCAACCTTTGGTTGAGATTAGAGTTTTAATGCTAAGTGTTGAAGAACCAATTGAAGATATAGATCATGATGAAAGATTATTGCAGAGTATTAAAAAACAACAGATAAATCATTTTATGTCAAATTTGGATTCTAATCTTGATGGACATAAGTTTAATTATAAATCAGATTTCGATGAGAATGGTGTTTTATTTTTTTTAGGTACAGAATATGGAACAGCACCATGGAATAATCCAGCATTATCTGGTGTAGTGACTGTTACATCATCTGGGATGGATCCGGAGAGTGTGGCAGCAACTGCTATTTGTGGTAGGAGTGCTGAACGTTGTGTGTGTTTGCCAGTTGATGACAATTGGATAAAGGTTGATTTGAATAATTTCTTCATTCAAATCAGCCATTATACTTTGAGACATTATACTTCAGCACATGAGGCGTTGCGTAATTGGTATTTTGAGGCAAGTGATAATGCTCAGAATTGGACTGTGTTGAAAACGCATAGAAATGATGAGGGTTTGAAGATGGAGAGTGGTAGTACTAAAACGTGGAAATTAATTGGAGAGCGAAGGAGATTTCGTGCTTTTCGTATTCGACAATATTCTAGAAATTCTAATGGGCATATGTGGTTGGCTTGTAGTGGATTGGAATTGTATGGAATTATGTTTTTGCAAAAGTAGGTAAAGGTTGGTAAATGAGAGTAAAATGAAATCATTGAAAACTCAGATGTTTATAAAGGATGTCTGTCTCATAGAAGTGTTGACATTGTTCGCAACAATGTATCTCCTGAATATCAAATACTACAAGTTCTTTCACATGTAAACTTATTAGTTCTATTATTTTATTATTTTATTATATTTCTTGATCTGTTTGTCTCTCGCAACCACAAAAAATTACAAACAATTTCAAACTAGCCTTCCCTTCTATGATCATATTTACTAAAATACATACTGCTGTTGTTGCGCTTTCTGCTGTGACTTAATTGGCAATGAACTTGGATGATTCCACGATGTAGTTCCAGATGTATGATCCACATAATACACAGTACCATTCTCATCATATGATCTCTCCCATCCCGGAGGCAATGGTGCATTCGCATACTGATCATTTGCAACCACTGCAGGTGCAGTAGACATTTGATACACACTCTGTTGACCCAGCTGTTGATCAGCTTCATTGTACGCAAATTGTTTCACATTATATTGTGCTTGAGATTTAGGCCGTGGTTGAATATTATGTGCACGTACATATGATAGAAACTGATCAGGAATCTCAATCAATGTTTCTCTACTCAAATCAGATAACTTATGATACTTATTCATAGGCACAAATTGAACTATATCACGTTTGGCCACTTGTCCTCTGCTATTCATCAATCCATATTGATCACCATCCAATTCATCCATTGCACTAAAATCTGCATTACCAACACCAACAATAATTACAGATATTGGTAAATTCTCATTCGACATTGCAACAATTTCATTGCGAGTTAATTTCATATCATTTATAATACCATCAGTAATAATCAATAATATAAAATATTCCAAATTGCCAGCAGCAGCAGCCGCAATCCCATGAGCTTTTTTCAAAATCGGTGCAAATAATGTTGGACCACTTAAAGCAAAAGAACCACCTCTGATACTCTGTAAATATGTTTGTTCAATACCATATATATCGTAAACTTCAGGATCATGAGAATTAAAATTCAAATTGAAATCATGAAAAACACCATTGTATGTAGTATTCCCAATTCTAATATTACTGAAATGTGCGCCAAATCCCCAAACTGGAAATTTTTGGTCAGAATCGTACACTTTAATAATATTACCAATGGTTCGAATGGCATTCTGATATGGTGATGGATTTTGACCAAAAATATAATGTAAACTTTGATAATCTTTTGGGTGTCCATTAGAGCCAGTGAAATCAATTGCAACCATCAAACTCATATCCAAATCACCATTCATATAATCTAAAAACGAATTCACTGTTTGTGATTCACACGAAACAACATTTAATGTACCATACGTCTTCTTCTTCTTCTGCACTCGTTTCAATGGCATATTTTTCGGCATATTTTTCAATTCGTACAAATTAGTATCAATTTGGCCAATAAAATCATCACTGCCATTGCTATCCCAATCATACACTTTAATGCGTATTGGTCGATATTCATCATTGTTGCATAATCGCTGACTCTCAATTTTGAATGGTTTCCAAACTGGATTCAACGTGCGTTTGATAAAATTTTCACGATCTCCATAAACGGTAATTTCCTTGCCATCTTCTCTGGTTCTACAGAGAAGAAAATATGGGTCAGCTTTTCCAAACAAACCATCCATTTTTGGTAAATTTTTTGCACTGAATTGCATGGTAATTAGAGTGTTTCCTTGTTCGTTGATTTGTTCTAGTGTTGCTATCACGTGTGAGAAACGTTTGGTTTTTTTATTTTTCAAATTCTTTCCTTTGTACATTAGTTTTTTTGCCATTCGCGAACCAGATGAATGAACCAATTCGCCCAAAACCATAGAACATGATCCAAGAATATCATGATCTTTCAATCGTTTGGAACCTTTCTTATCTTCGTCATAGCAGTCAAATCTAAGAATCTGTTCTTCTTCAAAATAGTAATCCATACGAAATTGAGTGCTAAAATTTGGATTGTGATTATCGTATATTACTTCTGTTCGACCAACTTCTACAAATGATTTATTTTTGCTGCCTTTTACGTACAGTATTACAAATGGGTCTGATTTGCTGTGTACGTCTTTTTTTATCAAATCACGACAGCTGACGAAGATTTCAACACTTTCTTTTAAAATTGGTGCTAAAACGTTGCTATTGTTGTTGTTGTTGTAGTTAACG
>NODE_109_length_4370_cov_38.616755_g82_i2
AGCAATGATGATTGGTCTAAAATATACCCAGGTGTTATTGGTATTGCAGCAGAAACCATGGGTTGGGAAGGAAATAATGCTAGAAATCCTATGCTCGTAAAATTATTCTTGAAAATGATAACTATTGCAATGACCAAAGCCAAAGCTTCTAAAAAAGATACAGGTATGTTTACCATCTATTTATCATGGGCTATGCAGAAATTGCAAGAAAAGAAATTCAATGCAATGGCTGAAGAATGTATAATGGCGACTTGCTTTCAGTATGGACCTAAATTAGTGTTCACTGCAATGGAGAAATTTATTACAGAGCCAGAAAATGCACAAAATGATCCATTTAAGAATGAGAGAGCATTTGGACCAGTGATTAAATTTATGCATCGTATGGTCAAAGAATTTGGTGTGGATAATTGTTATCCATTGCGTATGCTACGTTTGACACATGCGTTGTCTGCCAAATGCAGACAGAAAGATTCTAAAGAAGCATGTTATAATGTGATCACTGAAATGTATCAGCAATTGGGTAAAAATATGAAAGGTGTATGTCTTGGACCATTGGGTAAAGCGCAAGTGAAAGTGTTGACCAAAAGATTTGATGCTATAAAAAATGCAGGAACATTCATGCAATTACGCTGTACACGAACAGAGAAAACACCTGCAGCCAAAGGAGGACAAACAGAAGAATATGAAGAAGAAGTAATTGAGTGGGTTGAAGATACATCAGCAGAAGTGGCAGCAGCACAGGCAGCACAAGCTGAAGTAAAAGCAGCAGAACCAGCTCAAGCTGCTGTTGTTGCGGTTGTTGCGGTTGCTGCTGCACCAGAAGAACCTGAAGTAAAGGAGGAAATAAAAGAACCAGAGAAGTCTCCTACTCCTCCACCACCGGATGGACCAATAAAAAGTACTGCCAAACAAAAAAGTAAACGTTTGAAGAAAGCGAAGGCGAGATACAAACAATTTGAATGGAAGCCTTGGAGTAAAAATGAGATAAAAGAATATGATGCATATTTGTTTCCTAAAAAAGAAGAGGATGAGCCATACAGAGATGAACAGTTTGGTAAGGATTTCAATATCAAGATTGGTTCTTTGAAGAAATGGAAAAAATGCCTTGAGCCAATTGAGATATGGATGGATACAAATTGGAAACAATTCTATTGTGCATCAGATTTGGTTATGTATTGGTCGACATACATTTTTGAGCTTAATCCACAGCCTAATATTGGTAAAGCATTTATTCCTTTGTGGAAGAAGTATTGTGAGCTGTTGCAGGAACATTTGCAGATTATGGATGAAGCAGAAGGAAAATACGTGTTAGCCTTGGTAATAGAGAAAGGTTTCGTTCATCGAGGATTGATTGAAAGTGTCCATGATTTCTTGGATGTTTTTGAAGTAGTTTTCAATCCTGCTCTTTGTATGAAAACATATTTGACCATGGCTGAAAAAAGTAAATCAAATGCAGTAAAATTGCATTGCATGTCACGGTTAGGAATATGCATAGAACGCTATGGTTTGGTGAAATGCGAATCAAAGAAGAATAAATTGAAATCTAGCAAAGCATTCAGAGATGCTAAAGACTTTTCTCGTCAATTTATAAAAATGTATCCCAAATTGATATCAGATAAAAAAACAGCAGTGCAAACTAAAAAGATATTGAAGTATGTGTATGAAGTGATTGGTGATAAAGCATTTTGGAGTGCAATGAAAATCAAAGACAAAACTAAAGACAAAGAAGAAATGACTGATCTCATATCAGATGCAGATAAATTCAAGAAAGGAAAATTTCGTAAACCAGAAGAACATTGGGTGGGAGATATGATCAAAGCTGCCAATAAAAGAAAAGCAAGAGAAGCTGTATTAGGTGGTGGAGCAGGTGGAATGCCAATGGGTGGTCCAGGAATGGGTGGAGGTGAACATGGATTGCTTGATCCAACGCAATATGCACCAAAAGGACCTCTTCCTGCCTTAGTTCTCAATAATTTCACACCAATGCATTTCACTGCACAACAATTGTTAGCTTTGCGATATGGTCCAGTTCCACATCTGTGGGAGATAAGTAATATTCGTGATATGGTTGGTAGATTACCAGCAGCTTTCAGCATGGATTGGGAAGAATTCGAACAAAAAAAACAAACAGTTGATACTAAAGCTACCATGCCTTACCTTAGAATTGGGAAAGTGCGAGTGGATGTAGAAACAGGCAAAGAATATCCAGAAATAGTACAAAATGGTAAAATACAATTGTTTGCAAGTGTTCCACCAACAGATGATAGAGCACATCAATTGTTGGTAGAAGAGAAAGAGGCTGCATATATGAATAGATTGAGAAACGATGATAATCATATTCAAGCTATTCGCCAGATTGAAGCAGAAAATTCAGAATATTTTGATCCAGCCAATGATGGTTCATCATGGTTGGCATTGTATGATGTCAATGGTTTTGATGAAACTGAACGTCAAATACAGATGATAGAAATGAGACAGATATATGGGGTAAAGTCATTGCAGACATGGTTGCATACTGGTAAGAATGTTAATGTTATTGAACAACGAGTGGAGAAAATTATTGATAAATTATTGCCGTGGTTTAAAAGTTTGATCAATACTAAAACTGCAATGAGTGATCATCGTAGATATGAGCATGTGTATCAAATATTGCAATTGCTGAAATTTGTTTGCAAGTTTAAAGCGCTTCGATCATTGAATATATCGGTGTTGAATGCGTTTTTTGAGACTTTGTTGAATGCTATCACATATGATAATATGCCTGCGCTTTTCAATCAACATCATAAGCGAGTGCTTGGTGAAATGAATTCAGTTTTATTGACATCTATAAATTACACACCGCCAGTGGAAACATTATGTATTTTAATACAATTGCTATCAAAATGTGATCCAACCAATGATAGTGAATCAAAAAAGTTGTATGCAGAAGCTATTTCTAAATTAATTTACAGACTGTTGAATAAATTCGATCGAGCAATTATGAAAACAGATGAAAGTAAAATAAAAATTCTACAAGAAATTCATCAATTTTTTTCTTTGATATCGTATGAAACATGGCAAATGGCTGATAGTTCGAGGCGTTTTCCATTGAAGATTATTCAAATGGTGATTGCTAGAATTGTGTATTATTCTGGACATGAAACTGCTCAATTGTTGGAAGAGTCAATTCAGCAAATGCAAATCACTCAGAATATGAATGGAGGGCCCACTCCACTCAGAGATACATATGCTAAACGGTTGATATTGAATTTTATTCAATCATGTGAGAATCGAAGATATCAGCAGCAAAATCCGGGGAAAATGAAGAAAGTGCCGAAACCGCATTCGCATTATGATGATATGGAACGAAGGCCAGACAATATTTACCAGGCACAATCTGAAATGCCTTTTGCTCAAGGAAACAGAGGAGGTAGTGTTAGTACGAGTCATCAGCAGGAAGCACCTTCGCAGATTATGCGTGCTAAGAAAGAGATTTCAGTGCAAGCGCAATCGTATGCAAAATCTAATGCTATTGATTTGAATTTCTCGATATGGGCTGGAATGACTGGAAGAGATCGAAGGCAAAACAATTTTGGTGTTTATTAAATGTGAGTTGATAGGAAAAATATATTTATATTGTTTGTGTTGCATCTTGCATGCGTAGTTTTATGTTTTCTGTGCGTTTTTTTTGTTTTCAATCATGAGTTCAAGTAATATCACTGTGCATGTAAATACAGCCACTTTTGCCACGATTTATAAACACTTGCTCCATCAGCATCATTACTACCGGTATGCATATCAATCTCAGAAGTTTCTACGACTAAATTACAAGATTGCTGCTTCTGCTTTTGATCATGCTTGTAATGACTAAAAAATAGATTCATAGTATTTTCATATATAATATCAGACAAGTGATTAATATCCTCTCCTTTGATTGCTGCAATCACTTCCAAAACTTGAATAATATGACATGGCTCTGTTCTTCCTTTCTTCACTTGCAAATAATCTGATGTTTTCTTATATTTCTTTATCTCATTCTTGAAGTGTGTTTGCTGAACATAAGAATATCCAGCAAACGATTTTTTAATTCCACAAAAAGGACAGTCAGTTTCAATCAACAATCGATCATTTGGTATGTATCGAATCATATCCAAATTATCATTGGTCTTCAATGAACATCCATTTATGCCAATGTACATTCCTTCAAATTTCAAAAACTCTTTCATATCTTCAATAGTATCAGTAAAAGAATGCACGACAGCAGTTTTCCATCGATGTCGATTTTTAGACAGGAGTTGAATGAGATCTTTAGTGGATCGTCGGTTGTGTAAAAATATTGGTAAATAATTGCTGGCTTCACACAAATCGAAATGTTTTTCAAAATGTTTGAG
>NODE_156_length_3937_cov_77.750514_g133_i0
TGTTCGGTTGCTGGTATCTCTGATCTTGCTCGCTATGAAGAGCTTACTGAATCATTAATTTTGATGAGAATTGATGAAGGATTACAACATTCTTTATGGGAGATATCATCCGGTATTTTCAATTTAGGTAATATTGAGTTCACTCGCGAAGGCGATGGTTTTGCAGCAATAAACAAAAAATGCCCAAAATTCATCCATGCAGTTGCTGAACTCTGGGGAGTCAAAGATAGTATGATAAATGATCGTTTACTTACTAAGAATATGAGAGTAATGAAGAAGACTATAACCCAACGGATAACTTACGAAAACTCTATTACAAATCGTGATAGTATTGCTAAAGGTATTTATGAGAATATTTTCTTATGGTTGGCGGAGAGAATTAATGCTGAGTTGTATCAAACAGAAGAGGATGTGAAATCTATTCTGTTCATTGGTATTTTAGATGTTTTTGGTTTTGAGAATTTTTATATTAATTCTTTAGAGCAGTTTTGTATTAATTTTACAAATGAGAAATTGCAGCAGTTCTTTAATTATCATATTATTAAATCTGAGCAGGAGGAGTATATTAAGGAGAGTGTGTTTTGGACACCGTTGTCTGTGCCAGATAATATTAATTATGTACATATGGTTGAGAATAAGGATCATGGGTTTTTTGCGTTGTTGGATAGTGCATGTAAAGCACCGAAGCCTTCGGTTGAGGCTTTTATGCAGGAGTTGTTTAAAAAGAATGGGAAGAATCCGTGTTTGGCTACGATTAATGCTCCTGGGGTTGGGATGTTTAGAGGTGGACCAAAGAATGCTAAAAAAAAAAAAAAAGGAAAAGGACGTTCAAGAGGAATATTCACTGGTTTTAGTATTTTACATTTTGCAGATCATGTTGGATATGATGCTAAACATTTCTTAACCAAAAACATGGAATCTGTCCATGCAGATACTGCTAAAATGATGGCCAAATCAAGTAAAGCTTTAACCGCTCAAATCGGTGGTCCAGTGAGTGGCAGTAAGAAATCCAGAAAGAAAAAATCTGTAACTTCCGTATTCTTCAGTGGTATAAAAATATTAATGAAAAACTTATCTGCAACCGAACCATACTTCGTAAGATGTGTGAATCCAAACAAACAGAAATCCAGTAAAGTATGGAATGTAAACTTAGTTAAACACCAATTACGCTGTGGAGGTTTGGTGGAAGCACTAAAAGTATTAAAATTAGGATATCCCACACGTGTACCCTACGCAACCTTATTTGATAAATACCATGGAAATGTCACTAACCCATTGATAAAAAACATGGGACCCGAAGCATTTTCAACTGCATTGTTAATTGCTTTTGATGTAAGTGAAAATGATTACGAATTAGGTTTAACCAAAATATTCTTCAAACCTTCGAAAGCAGCAGTTTTAGAGACAATCATGGGTCAAGCTGGTCAACCCTTAAGTAAAGCACAAAATGAAAAGATCACAAAATGGGTTGTACAAAAACGTATCAAACAAATGATGGGTACTTGCAAAGCGTTCTTGGAATACAGAAAACGTGTACGTTTAACACGTGCTGCTCGCAGATGGCAGTATGCTGGTAGAGTTGCTTCTCTTTTGGGTGGTAGTGTACTCAGTCATTTGTATATGGCTCGTGAGATAATACTAAAACGTAAGAGAGAGGAAGCATCCGTGAAAATGCAATCATTCTTTCGTGGTTCATATGAACGTGGAAGATATATCAAACATATTGTAAAAGTAAAGAAAGCTACTAAAATTGTATGGATATCTTATCGTAGATGGCAAGAGAGAGTCAATCTTCAGATATGGTTGGATGTCAAAGTAGTAGAGACACGTAAAAGAAAAGAAGAGGAAGAGAGAATCCGAAAAGAATTAGAGCGTGAAAGAAAGTTGGAAGAGGAGAGATTAGAAAGAGAGAGGATATTGGAAGAGAAGAGACAGGAAGCAGAACGTTTGAGATTGCAAGCGTTGATGGCAGAAGAGGAAAGACAAGCAGAGATGGAGAGATTACGTGATGAAGCAGAACAGTTACGTTTGGAAGCAGACAACAGAAAGGTTGAAGCAGAGAAAGAGAGAATACGTTTGGAAGAGGAAAGAAAAGCAGAGATGGAGAGATTACGTTTGGAAGCTGAGAAATTACGTTTGGAAGCAGAGAAGAGAAGACAAGAGGAAGAAGAAGCAAAACGTGAGGAAGAGCGTATACGTAAGGAAGAAGAAGAGAGAATACGTGCAGAGGAGGAAGCTAAGATATTGGAACAAGAACAGGAGGAGGCTCGTAAGAGACAAGAACAGAAGTCTAAGTCAATCAAACAGGAGAAGAAGATTATTGAGTTAGAGAAACGTGAACATAAGAGAAGAAAACGTGAGGCGAGTATAAAATCGAATAAAAAGAAGAGGAGAAAACGTGAGGATGACAGACAAGATGAAGAAGATGCAAGATTTATAGAGAAGAATTTGATTGAGATGGTTAGTGATACGGATGATGAGTCTGCTAGCAGTGAGACTGACAGTGAAAGTGATGGACCAATTATGAGTGTGAAAGAGTTGTTGAAGAATTTCGATAAAATTGCAAGTACTGGTCAATTGTTTTTGAAGTATACTGGGAAGAGAAGACGTAAACCGCAGGATAGAATTGTGAAAGTGTCGTTTGATAATAATTATAAACCTAAGCAGATCTCATGGGGTAGTGGTTCGAGACATATTGATTTTAGTGATATATTGTATATTGCGCAAGGACATTGGACACCAGTGTTTCAGGCTAGAACAGAGTCTTTGGATGCGAAGTTGTGTTTTAGTGTGGTTGGTAAACAACAGATATTGGATGTGCAAGCACAGACTAAAGATATGTGTGAGTTGTGGGTTAAAGGGTTACGCCGTTTGATTGGACAAACAGATGAACAAGCACTTAAGTTGTGCAAACAGAATTTGGAGAGTGGGAATTTGCCTGGTTTTAAGAGTAAAGATCAGAATGAACAAAAACGTGCAGAAAAGGAGCATAAGAGACGAACGAAATCGTTGATGCTTTTACAGCAGGATTTGTTTGTGATGACAACTACCACTGTTTTTCGTAATTTGGATGAAGAACGCATATGGGATATTGATCAACAAGTGCGTGAACAGTTCAATGCAAAAGTGTTGTATGAGCAAGCGTTGAGAGAAGATATTCCATGGCGACAGTGGAATCACTGGATACGTGAGAAGATTGTTACTTATTTGCGAACGAACAATCGAATTGCAGCTCCACAGCCAATGTATGGTCAACAACAGTTTGGTGGACAACCGCAGTATGGACAGGCACAGCAAATGTATGGTCAACAGCAGTTTCAACAGCCAGTACAACAGTTACAGTATCAGCAGCAGTATAATCAGTATCAACCGCAACAACAGCAGGTGTATGGTGGTGGTCAGCAGTATGGACAGCCACAACAAATGTATGGTCAACAGCAAGCAGCACGGAGTAGTGCTTACAGTCAGGGTGGTGGATATGGTGGAGGAATGATGCAACAGGCACAGCAACAGCAACAACTGCAGCAACAACAGCAACAACAGCAACAGCAGCAGCAACGTCAACAGATGCTGAGACAGCGGCAAAGTAGTCAGGTATCTTTACCGCAGATGAATACTGGGAATTTTAATATGCCTGCTCAACAGCAGACGGATAATTTTGGATATGGTCAACAGCAGCAGCAGGTGCCAATGCAACAACAGCAGATGCCGAATATGGGAGGAGGAGGAGGAGCTGCGTATGGTGGAGGTGGTGGTGGTGGTGGTGGAGGAGGAGAGAATGATGAAAATTGCACATTGATGTGAAAACGATGAAAATCATATCTGAAATTTTTGTGTTCATATAGAAAATGATTGAATTTTGTGCTTCTTATGCTTGAAAAGTACATTTTTTTTATTGGTAATGTTATTTTATTATTTTTTTTTTATTACTTATCCG
>NODE_162_length_3902_cov_67.743317_g126_i1
ATGACTTCCACTTTAGTAACCTTTGAGCCCTGTTCTTGCCCTTCAATCACCACTTTGTTCAAATCACAGCACAAAATGCCGTCGGATATTTCTCAGTCATATGAAAATAAATATTTTATATTGATGTTTCATCACAAATGTCTATTTATTTGTATTCTTATTTCCACTCGCTTTGGATGAACTCTTACCACTGGTGTTCTTACTACTCTTCTTAATGAATGGTGTCATAGGATCATAATTGCACACAGCAGTCGACAAAGAAATATTAAATGCACTATTCTTCACAATCTGTGATGCATGTCCAACACATTGCTCAATAGCAATATCACATCCCATATCCATAGCCTCCTGCAATGGATAACCACTTGACTTACGACTCATATCACCTGAATTGAATTTCTTATTCATAGTCAAAATACGCTTTGGTAATTCATCACGTATTTCAGGCTCCAACAATGCAGAAGATGACAACTGCCGTATCCACAAATGCTTACGCGACATCATTTCCAAATTATGTCTCACCAACGAACCCATAAAAAAAGAATGAGACAATACGTAATATCTGTTGTAAGCTGCTCCCCATCTCTTGGATAATATCGATTGATCTGCTACTTTCATATCTGGACCAGTGTTCTTTGGTGGTTCATAATCTGGTGGATAACACATGAATGTCAATAATTCCACCTTAGATTTAGCCAAACATCGAAGAGGATCAACCACAATACCTGCCCATTTTCTGAATTGCATCTGCCACAATAGCTGACAACTTACATCTATTGCACTGAAAAAGGAATTTGAATCTACACCCACTTCAAATGGATGTGAATGATACCAACCAATAAATCTATCTTTTCTTAGCATTTCAATTCTATCTTGCATACGAGTCATGAAACCTAATACTTTTTCATCATCTGCTACCACTCTTGTTTCTGATCCTTCTACAGGTAATTCAACGCAATCCATAATGATGATAGTTGCATGGTCAATTCTACCCACTAACAATCCCATGATTTCTATGGGTAATCCACCTTTTTTTCTACCTTGATTTACACCTTGTATGGCATGTTTTACCATTTTTGTAGCTGCTAAAACGTTGATTCTACACTTAGTAAAATATCGAGGATTTTTATTCCATGGGGCTTCTTTGTTCAATTGCGCGATTTTCTTATCGTCATAGGTGTATACACTATCTTGTATTGGAATTGCAAATGTTGCTGGTTTTTTTGAACCAGGGTCAGTCTTACGTTCAGAACGCGATGAATTATTTCCTCTGCGCATGATCAAAGTAGTTTTCAAAAAAAAGTAAAATGTAGGCAGAGGACTTGAACAAATACAATCAATCAATCAATCAATCTTTCATAAAAATTCAATAATTTTTCTTCTTTCTTCCCTTGCTATGAATATACCACAATAAAGTTTGAGTATAAATACGAAATTCAAACGTGCCAACTTCCTCATCTCTATTCTTCAAAGTTTTCGACAACAAATCAGGCAACACATTCCAATGTTTCTGATGCTGCTTCAAATCATGTTCACTCCATTTCATCAAATGCTTATAACTCCCCATAAAAAAATCATCATCAGCTTCATCATAGTCATAAACCTTGATATTAATCTCCAATCTCCCACCAGTATTGCCAACAATCCCCCGAGAAATACCACTGTTATCTTTCACAGACAACCCACTATCTCCATATAACTCATCATTCACTCTTTTATTAAAACCTGGTGAACCTGCCCTTGCCCGCTTACTCCCATGCGTCTTCATTTGCATATCAATCAAATCAGGAATATCTTCAACCGGTATTATAAACAACAGTGGTTGTCTCCACTCAGGATTGAGATTCTTTATAATAGTCTCTGTCCGTTTTACTTTCCCAGCAACCGAAACACTAGTGTATGGATCACTTGTACCTGTACTCTCATCATTCTCATCTTCAAAATCATGTGCAGGTAAATTCATTGCACGTTCAACAGCTATAACAATAGGCACTCCTCGACCTTCACACATACGCAAAGCATCCTCATGTCTGAATTGTGTCACCAAAGTAGTAGTATAAAACTCCCAAGTGACATCATCGCACACTTTGGCTGCAATCATGAATGTCAACAGTTTGGATTGTTTGGATTTCAATGCATACATCACTGGACTATCATAATCTACCACTGGTAATCTCTCCACAGATTTACCCACCTTCCATAGTAAATCAGTTAAGTATAACTCATTTGTGGTATCATCTGCGATATCTATGAATATCTCTGCCACATCTCTGGCAGATACTACTCCATCATCACTTTGACGAAGGGCAGCACACAATTGAAGAAATTCACGGAAATCGAATGTACGACTTTTATCATCATCGAATGCAAGGAAGAGTTCTTTTATTTCGTTATGGGAACGACTGAAGATTACAGATACCAAACCTTTTGGTGAATCTGCTAACATTTGACCTAAGAAACGACATTCATGAGTGGTCAAATTCTTCAATGAATATGCACTTAATTTTATTGTATGTAGTGTACATTCTGGAGATATGAGTGCTTTAGATACTAATCCAACACTATGTGAATCAAATCCAAAGTCAGTCCAATTCACTGCTGACCGTTTACCTCTCTCAATGGATGGTAAAAGTGGTACAAAATTGAATGAACCTTGCAATACTACTGTCACATATTCATAAAAATGAAGACCTGATTGCTTACTCTCTATTTTAAGTACAACTGTCCATAGAATTAATACTAAAAAAGATGTAATTAGTGATAAAAGAACTGCACCTAAGTCAACATTACTTGGATTTGAACGAGAGAAACGTGCAAAGATATATAATTGTAATACAGTTTGACATATGGTTTCAGCAACCACTTCAGAAACTCTACGCAATTTTTTATACCCTCTTTCAGCATGTGATTCTTCACGCAATCTTTTCTTAGTTATACACAAATAAATTGGTTTGATAATAAAATCTTCAAACAAGAGATAGATATCAATAAATATGAGTATAAGAACACCGATGGGTGCAATTGCAAATAAAATTACAAAGAGTTGTATTTTCTTATCTGGACTTCCAGTCACTTGTTTACTGACAATCTCCTCTGATGAACTTGCGTCATCTTCTTTGTATTTATCATTGCTATCATAATCATAAAATTCTTCTTTTTGTTCGTCAGTTTCACCATCAACAGTGGTGGATACTTTCAATTCTGATTCTGTGGATGATTCCATTGCTAATTTTTGTAGATATCCAAATGCAGCAGCCCATGCAATATAATATGGTGCCAATATAAACACACAACTCAACATAAAAAGCCAATTTTCTTTAGCATCAGATAATGTCTGAGCAACACGAATGTCAGTGATGAGATCTGCAAATATGACACATTCTGAGAAGAAATGTAAGAGTAATATCCCACTGACCACATATATCGGATGTTTTCTTACTTTGACAGTCATCGCTCGTATAAAAGTAATATTATGCAAGAATCCCCATACGCCGGATAACTCTTTTGGTTCAGGAATGTTCACCAATCTCTTCAGTGTTTCTGGTGTTACTTCGGATAGTGTTCCATCGGTTGTGCATGATTTTCTTATCCATCCTTGAACTGGAAGTACTACCTCTAAATATAAACCATTGTCTGCATTTGTTTTACCTGTGGTCACTAACACTTCATACTGCTGAACTAATTTGCATATTTTATAGTCTGCATTTGGTTCGGGATGGATGGGGAAACGCCAATCTTCGTTTACCATATACATTTGACCTTTTTTTAAATCTTTTGGTTTCAAATATGATTGGTCTAGAGTTGGGCCAGAAACTTCAAACGTTATTTCGTTGGCATTTAGAAACTGCTCTGCTGCCTGTTGTACTATAATAGAAGTCATGGTTTGTTTTTGGAATGTAGGA
>NODE_166_length_3897_cov_25.502339_g139_i0
TCACTTAATCATGTCTGGTAAAAAACTATTCAACTTCAAAAAACGTCGCGGCATTTCAACCGACGAATCCCCCAAACAACAAAAACGTAAACACAGCTCCGGCCAAAAACCACGCTCCCAAACCTCATGGGGCCTAATGTCCCTAGCAACCACGCGATCCAACGGCATAAAAAAAACCAGCGCAGTTCCCTCATCAATAATGACAAACGTCCATCGCATGATAAAAACCTCCTCAGCAACCAACACTTCAATAACCCCAACCCCTTTGCACAACCGCAGTATCCACAATTACAATATCAACACTCCCTTGGGTTACTCATCATCAGTATTGAATGCACGCTTAAGATCTCAACACTCCTCACACGCAAACAACAACAACAACAACCCATTGGATAACATCATCAACCAATTGCACAGTCCACGTCGTTCATTACTACCCACACATATGACTCGACCAATGACAGACCGTTCAAAAACCACTTCCAATAAACCTCAACAAAAAATCATCCATAATACAATGTACTCCGTATTCAGCAACGCAGGCATGGAATCATTGATGAATAAATTGCAGGGACAAACCAAAGATACCTTCAGCAGCAATACAAACATCCATCAAACTCCTCGTTTACAACCATATAACAATTTATCCAAGAGTAGTAACAGTCCAACTGCATTTTCATTGCTCAATGGTGGAGGATATGGTCAACGTTCAAGAAATTCAAGTTTGAATGTTGTCAATGGTGAGACATGTACTGTATCCATTGAAACTATGGTTGATAATGTAAGTAATAAATCTGTGAATAGCAATACTAGTGCACCACCCAATAGTTTTATGTTACCTTCGAATGTCATGAAAGCTATGACAGATGATGATGTAAAAGCATGGGAGCAAGAGTTGATTGAATGGGAAGTGAAAGGTGAATGGATTGAATGTAGAGAGACATTGACTGGGAGAGTGATGTGGTATAATTCTTTATCAAATCGTATTATATTTGATTCACCTCCCGATGGCGTACAACCATTACAACATACAATACATACCAAAAGTTTAGGTCCAAAATCGGATGATTACTTTCTAAGTCGACCATTGACACCATACGAAAACGAAGATGTAGTTCAAGGTTTACTAAAATTCGATGATAGCACATGGGACTTCACCAATGATTTGGTAGATGTATATCGTACACGTGCAGCCATGCGAAATCATAAAAAAATGCTACCTAAACCTCAGCAATATTGTTTTCCAGAGTATTTTTCAATGTCTCCATTGCCTGAGTTGCCTGGTGGTCGATTCTTAGCTAAGATACGTTTACCAGAGGAGTTTTCTGCACATCGTAGATACTCAAGTGTGCAAATAAAAGTGGATTACACTGCAGCATCAGATGCTATCAAACAGGGTGTGGATAAATTGGATGAACCATTCAACACTCGCAGACAGGATTACATTCTGAAAGTTGTGGGACAAGAGGCGTATATGTATGGTAGACGTAAGATTATTGATTATGAAGCAGTAAGAGATGCAGTTCGCAATGAGGATGATGTTGAATTTGTACTTATTCAAAGATCAGATTTCAAAGAAAAAGTTGCGGAAGCAAAACAACATCAGTTGGAATATGCTAAGCTTTTTTCTACTGCATATCCTGCGAATTTGGTGGAGTCAGCGAAGGAAACATTGAAATATGATTTCAATAATCTCAGGGATAATTTACCGAATATGAATTCGAAGAAACAACGGTCAATGATAGATTTTGAACCACAGGATTATATATCTTTGTATGATTGCGACTGGCATTATAGAATAAAAATTGAAGGGTTGACTAATGCTACATCTTTGCCGAGATTTGATGATCAGTCAATGAAATCTATTTATGTGGTAGCTGAGTTGTGGATGGGTGATCTTATGTTCGATCATGCGACACTTATGACACGTAATTCATATCCATCGACGAATATTCGTTGGGGACAATGGCTTTCATCTCGTAATCAAACATTTGCGCAGATACCGAGGGAATCTGTGCTGTGTTTTATGGTTATGGGTATCAAAGAAGGCGATAATAAACCTCAATGTTTGAGTTATTGTAGATTACCTTTGATTGATCATCGAAATTGTTTGCGTTCTGGTAAATATTTACTTAATATGTGGCAGATTCCAGTGTTCAAAATGGTCAAAGATGGTCCTAAAACTGATCCATATTTAGATCGTCCATTCAGATATCGTGGATGTATGCGTGATAGAAATATGAAAGCAATGGCAGGTAATGATGATGAAGATTATGAACAATGTCAATTGCTCATAGAATTTGATGAGTTTGCATTCGATGTCGTTGCACCCAAGTATTTACCAAAAAAAGATTATAGCGAAGTTGATGTTGGTGGTAAATTGAATCATACACAATTGACCAAACAACAAAAGAGTTCCATACATACAATAATAAATAAAACACCTTTGGAAGTGCTGGAACAAAAAGATAAACATCTTATTTGGCAATCACGTGATTTACTATGGCATGATCCAAGCGCTTTGCCAGCTTTCTTACGTTCTGTGAATTGGACCAATTTGTGTCATATCTCAGAAACACATAAGTATTTGGATTTGTGGGCAAGTCCCAAAAGACCTGAGAACGCTATAGAATTCTTAGATTATAGGTTTGCAGATACGAGAGTGAGAGAGAAAGCTTTGGAATGGTTGGAAGATTTGCATGATGCAGATCTACAAAAATATCTGTTGCAATTGGTTCAGTGTTTGAAATATGAACCTCAGAATGATAGTGCACTCTCTCGATTTCTTATACGTCGTGGACTTAAGAGTCCATATCAGATTGGACACTTTTTATTCTGGCATTTGAAGGCTGAGTATCATAAGGAACAGTATACAGAGAAGTTTGGTTTACTTATGGAAGAATATCTGTTGCATGCTGGAGTACATACAGAACAATTGTTTGTTCAGCATGCATTGCTAAAACGTTTGGAATTGATTGCAGAGAAGATACAACAGGCAAAGAGAAGTATGAGTAGTGATCAATGTAAGAGATTATTTCGTAAGGAATTGTATGCACTGAACAAAGATTTACCAGATATGCCCATACAAATACCATTGAATCCGAAGTGGAGTGCTAAAAAGATAATTATTGATGAATGTCGATATATGAGCAGTAAAAAAGTACCTCTTTGGTTAGTGTTTGAAAATGGTGATGAATATGCTCCACCGATAAAAATTATGTTCAAATCTGGAGATGATTTGAGACAGGATATGCTGACGTTGCAGATTATAACTATTATGGATCGTTTGTGGTTGGACAATAAGTTAGATCTGCATTTGAAGCCGTATTCGGTGATGGCTACGGGTGTCAATCGCCATAATGAGGGAGTTGGTATGCTTGAAATGGTATTGCAATCGTGTACGGTCAATACTATAAATGTTGAATATGGTGGGGCATTCAATGAGAAGACGATTGATTCTTTTTTGCGGAAGTATAATACGTATGATCAGTCATTGAATAAAGCACGAGAGACATTTGCGCGTTCTTGTGCTGGATATTGTGTGGCTACTTGGGTTTTGGGCATTGGTGATCGACATTCGGATAATTATATGGTCACTCAGAATGGGCAATTCTTCCATATTGATTTTGGACACTTTTTGGGGAATTTTAAGTCGAAGTTTGGTTTTAGACGGGAACGGTCACCGTTTGTTTTTACTCCGCAGATGAAGTTTGCCATTGATTCTGGTTTGAGAAAAAATAAATTGTATTATGATTTTCTGGGATGGTGTTCGGAGAGTTATAATGTGTTGCGAGTGAGGAGTCGTTTGTTGTTGGTGTTGTTTTCGTTGATGGTGGCTGCTGAAATGCCGGAGTTGATGAGAGAGTCGGATATTGGGTATTTTCGGCAGATGCTGAA
>NODE_170_length_3872_cov_43.923097_g143_i0
TGTGCTCTTCCGATCTGACTGATTCTTCAACTAGACTCGTAAACTCGGAAACCCATCAAACCCTAGAAAAACAGCCATTCTTAAGAAATATGGATACCGACGACGAAGATGAAGCCCTAGCAAATGCCCCCAAATACGAAAAGAAATTCCTAAACATTGGTGGAATGACCTGCGGTGCCTGCTCTTCAGCAGTAACCAAAATAATCATCGAACAAGAAGGCGTTCAATCTGCAACAGTCTCACTAATGATGCAAAGAGCAGAAGTAATTTTCGATCCACTCATCATTGACCTCCCTCAAATCATTGAAGAAATCGAAGACACAGGTTTTGATGCCAGCGAACTGAAATTATCCACAACCAATTCCAATCAATTCATGATACATATATTATACCCATTAGCATCCACCACTGAAATACAAGTTACTGATATATTACTAACCATAGATGGCGTACTAACTGTAAAAACAGTTGATGAAGGAATTGAGAATCTTGGTCTCATTGATAGTGATTCTATGCGTTCATTATCAATCATAGATGGTGGTGGAATTGGCGCTGATGATTTATATATCTCTAAATATGGTAGCAATGGTGATCGTACATTGGAAAACGCATCAATGTATGTGAACATAAAATTCGATCCAAAAATCACTGGTATGAGAAGTATTACAGAGTGGATCAATTCTGCAGTCAATACTGCCTTCAGATGTAGGATATTGTTTGACAGTAGTGATATTTCACAGCGTAAAAAAAATATTCAACGTGGAAGAGAAGAAGAGATACATAAATGGAGATCACTCTTAAAATTCAGTGCATGTTTTGCTATTCCAGCATTTGTTCTCGCAATGGTATTCCCAATATTCCCAGCATTTCGCAATGCATTTGATACAAAAATACTTCCAGGATGTCTTTTACGCGATGCAATATTATTCTCTTTAGCTACACCCATACAATTTGGTCCACCTGGATTGCTATTCTACAGAGGTGCACATAAATCTTTGAGAGCAGGTGTAGCCAATATGGATGTTCTGGTTGCTTTAGCTACTACCATATCATATATATTTTCATGTTTCAGTATACTTTTATGTATTATCAATGATCATTCATCTGCAGATGAAGAGACAACATTTGAGACATCTGCTTTGTTGATTACAGTTATAATATTAGGTAAGTATATGGAGACTTTAGCAAAAGGTAAGACATCACAAGCTTTAGATAAATTGATGAATTTAGCGCCATCTACAGCGAGATTGGTTGATAATTGGAATGATGATGAAAATGAAGAGAAAATTCATCATCAAGAAGAAGAAAAGTCATCGATGACAGATACTGGCATATTTGCAGCAGAAAAAGTGAATGAAGAGCAACGTGCTTTACACATACGTGAGATTGATGCACGTTTAATACAATTGGGTGATATTGTACAAGTACAACGTGCAATGAAAGTCCCCTGTGATGGCATAATTGTTGAAGGTGTCTCCAGTATCGATGAAAGCTTAATTACAGGTGAGTCATGTCCAGTAAATAAAGAGATCGGTGACCAAGTCATTGGTGCTACAGTAAACATAAGTAATACCATATACTTTCGTGTCAATAAAATTGGAAGTGAAACTGTACTCTCAAAGATTATAACCTTAGTTGAGAATGCACAATCTTCTAAAGCACCCATCCAAAAAACAGCAGATTTGGTGGCCAGTAAGTTTGTACCTGCAGTGGTAGTTATAGCCATATTAGTATTTCTTGGATGGTTTTTTGCTTTGGAATATGGTTTGGCAGATATGCGTTCTTTGTTTGATGTTGCAATGAGAACAAGTGCAGCAGTGTTCTATGCAGTCATATTTGCAGTCACTGTATTAGTAATCTCATGTCCATGTGCATTGGGTTTAGCAACACCTACAGCAGTGATGGTGGCCACTGGAAAAGCAGCAGAGTTAGGTATACTTTTTAAAGGAGGTGAACCATTGGAGATTGCAGGTACTACCAATTGTTTGGTGTTTGATAAAACTGGAACTTTGACTGAAGGTAAAATGCAAGTGGTAAATATCATACGTTTGACAGATGGTATGCTGTATAATGTAATACAACAGCCACAGTCATACAATAGTAGTACTTCAGATCATGTGAAAAACAGACATGATTTTTGGAATTTTGTGTACGGTGCTGAATCTCAGAGTGAACATGCAATTGCAACTGCAGTTTGCAAATTCATTGAAGGTAAACCAAATGTTTCTAAATACGTAACTGCTGACAGTAAAACTAACACTATAGATATGAGTATCATAGATAAAAAAAGTAAAACAACAGCATATGAGAGAATAAGTACTGATGATAGCACTATAGATGTTGGTGAACAACAAGAGATAAGTTATAATGCAGATAGTTTTTATAGCATAAGAGATAATGAGTTAGGGAATACATTTGAATTCAAGCAATGGACAGCCAATGAGTTCAATGCAAAAACAGGCAAAGGAGTCAATGCTGTTTATGAATTACCCACTTATTGTCAAATGTTCATTGGCAACATAAAATACATGAAAGAGAACAATATCAGTGCTATATTGATGTCAAAGTTCTTGACCAATGATAGCGTTGACAATGATGTATACTCTGAAGTAAAAAACAGAGCAGATGATGAAGAACAAAAACATGCATACGTAAGTGTTGAAGAAGAGAATGCATATACCATAGTAATGGATAAAAGCAATGAACTAAAACGCAAAGGATATACAGTAATATTTGTAGCATGCAACAAACATTTGACTGCAGTTTTGTCCATTGCAGATAAATTGAAATCCGATTCGTATGCAGTCATACAATATTTGCAGCATGAGATGCATATACCATGTTATATGATCACAGGTGATAATGAATTGACTGCATATGCAATTGGTGATATGTTAGGTATAGATAGAGATCATATCGTTGCTGGTGCACAACCGGAAGATAAACAGAAGATTGTGAAATTACTGCAGCGTACGCGTGCAATACATGTGAGAGAGGGCAGAAAAAATTTCATCACTGCTTTCTTTTCTGGTGGTAGTGGTGGTAGTAAAAAAAGTGTGGAAGCAGTTGAATTTAGTTTGATACAGAATGGAAGTGGTGATGGATATGTTAGCAGTCGTGGTAGCACTAGCAGTAGTGTAAAAATTGATGAAGTGAGAAATATAGTGACATTTGTTGGAGATGGCATCAATGATTCACCATCTTTGGCACAAGCGGATGTTGGAGTTGCAATTGGTGCAGGTACTGATGTTGCTATTGCTTCAGCATCTGTTGTTTTGATGAATGATGGGTTGAGTGATGTTTTGAATGCAATTGATTTATCTAAGGCTACACTTATGCGAATAAAATCTAATTTTGTATGGGCTTTGTTGTATAATACATTGATGATACCGTTTGCTGCGGGACTGTTTTATCCATGGTTGCATTGGGCATTGCCGCCTTTTATGGCTGGTATTTTGATGTGTTTGTCGTCGATCAGTGTTGTGTGTAATTCTATGTTTTTGCGGTTGTACAAACCACCGGATTGGAGGAAGTCGTTTGATGAGAAAGAGAGGAAGAGGAGAAAGATGGGGATTAAGAAATATGGGGATAATCGTAGTCGGGGAAGTCGAAAGAGTAGTTATAAAGTGGAGAGTAAAGCATCGATTAATTTGTTGGAGGAAGATTGAATTTATTGTTTGTTTTTGTGTGTGGAATATTTAGGTGAACTTCTTGATGTGTAGTGAATGAACTGGGTATTAAACATATTTATTTACATTGTACTGTAGTCTGACGTACACTTGTAGTGACAGGGTAGCTTGTTTTTGTATGGAGCCAGTCTCAGTCAGACATCTACTACTTAAATTAAAGTTTTCCACTGAAAAAAAAAAAAA
>NODE_181_length_3796_cov_18.176675_g149_i1
ATAGTACACATGTAAAACCAATATCAACTGCAAATGATATACAATCACAAACTGTTTCTATTCATACATCATCCAAAGATAAAAGTACAACAAATGAAACAAAAATTGAAGAAGAATTGAATACAACTGTGATTGTTGATGATAGCGAAGAAGAAATTGAATATCATATAAGAATATTTTGGGCTGGCGATAGTCGAGCAATACTTATTCGTCATTCATCAAATGTGGAATCAAAAGAAATTCAAAATTTACCATACACAATAAAAAATATTTTACAAAAGAGAGATATCCCCAATGAAAATACAAATCAATCACAGACATATTTCTTTGAAAGTAATACTAATCAATCATCAACGCATACCACCCAACCCAAACAACCCAAACATCAATCACCTCCAACTCTCCCACATTTCATTGATTTAACCATAGACCATTCACCAAAATGCAAATCAGAATACGATCGCGTTCTAGCTGCCCATGGCCAAATAATATCCAACCGCGTTGATGGTAAACTAGCTCTAAGCCGTGCATTCGGTGATAAATCAATGAAAAACAATCCCAACCTATCATTCAACAAACAACGAGTTATCTCTGTATGCGATGTACAAACAATAACTGCCAAATCCAATGATTACTTATTCTTATTCTGTGATGGATTGGTTGAACAATGGAAACATCCCCAACTAATATCTCATCTGTCCACAAATGTTTCTCATTTCAATGACAATGTGTATGCATTGGGTGATACATTCGATGATATCATTGATGCTGGATCAAAAGATAATATGTCTGCAATGCTAATACAATTTACAAATGGTGAACAATATGGAGATATTATGGAAAATATATCAAAACCTAAGACATTTTTACCTGGACCTTTGTATTGTTCGAGACATGATCGTAGATTTGTTACATGTTATATGGAAAATGCAAAGAGATTTGGACATAAGGATAGACCATATCTAAGACGAGCTGCATACAAATCAGATATAAAGTATTTACAGAAGTATGGGATTAAGCATTTGTTGTATGAGCAGAGATGTAATCGTACATGTAAGGATATTATTTCTGATATTCGTAGAGGTATTCATGAGATTGATAAAAGAAGGGAATTGGAGATGAAGAAAATGGAGAAAAAGAAGAATGAGAAACAGGAAAACGTTTTATGTACAAAGAATACAGAAACAATAAATATTTCTAATATAAATACAGAAACGATAGATAAAGAGATTGTTACTGCTGCTGATGATATTGTTATTGGTGATGGTATTGTGGATGAATTGAGTGCAAATGAAATACCTTTAACTCAGGGAAGTGTTGTAAATACAATTGATAGTCAATTGAGTGAAGAAGTTGGAGATGAAGGACCATTGAATATTTTCAGTGATGATGATGAGGATGATGATGAGGTTAGTACTGAGATTATTGATAGTAGTGATTATGATAGTAGTTGTAATTTATTGAGGTATACATCATCGAATATCAGTAGTATTGAATCTCTTTCTCCTGTGAAAGAATCTGTTTCCCCTGTGAAACAATTGCATGAGAATACGGAGTTGAATTTGGAGGATGATGGAATTGAAGATGAAATGAAGATGGATGCTTTGGAAACAGAGGTGGAAGTTGAGGTTGAGGTTGAGGTTGAAGTTGAAGAAGAAGTGGAAGTTGAAGAAGAAGAAGAGATAAATAGCAGTAGTAGTATATCTCCATTCTATAGTGCAAGTGCAATGGTGAAAAGTGCTTTTATATCTGTGAGTGAATTTGCAAGTAATGTGATTAAGAAAGTTATACCGGGTGTTTCTGGGGATAGTATGAGTAGAAAGAGATCATTATCTGAAATATTATCTTCTTCGATTGATTTTACGGATGAAATGTTGCCATCAGCATCGAAAATAATGAATAATACAAATACTATGGAAGATAATGATGGTGGAGTTAATTCAAATGATGCAGAATTATTGAGTCCAGCAACAAAGAAAAGGAAACTTAATAAAATGCAATAGAAATATATTTATACTGTTGAAAGATTACAAATTATTAATGTTGCCCTTGAAGTGTAAGTTTTTTATTTAAATGTTTTGTCATTATGACTCTCATTTGACAACTGCTGTTTTTTTCTCTACAATAAACCAAATCAATTGAAATTGTTTTACAAAATGAATTTTTCAAAATCTTCCCCTCTAAATATTTTCCCCAAATGTTCCTCAATATAAACCTCATCAATAACATAAGCCTTCCCTTCCCCCTCTGCAGCTTCAATATTAATATCCTCCATAATCTTCTCAATAACAGTAACCAACCTCCTAGCCCCAATATTCTCCACATTCATATTACAATCAACCGCTGTATCCGCAATACAATTGATTGCACCCTCAGTAAACTCCAACTTACACCCTTCAGTCTCCAACATCTTCACCTGCTGTGAAATCAAATTATACTCCGTTGCAGTCAAAATCCTACACAAATCATCCTTCGTCAACGGTTGCAACTCAACCCTCACCGGCAAACGTCCCTGAAACTCTGGCATCAAATCTGACGGCTTAGAATCATGAAAAGCACCTGCACATATGAACAATATATGATCAGTGTTGATTTTACCATAATTTTGTACAGTGATTGATGTACCCTCAATCAAAGGCAATAAATCTCTTTGTACACCTTCCTGAGATACACGTCTTTCACTTCCACCTCTCATACTATCTCTATTCCCACATATTTTATCTATCTCATCAATGAATACAATTCCATCATTCTCTGCCAATTCTATTGCACTTGCAGCTAAATCTGTGTCAGATAAAAAACTCTTCAATTCTTGTTCCATCAAAATTTGACGAGCTCTCTTTATTGTATGTGACTTCTTTTCATATTTATGTTCACTGTTACTGCCACCTCCACTTCCTGGAGGACCAGCGAACACTAAAAAAGGACTGCGTAGACCACCTCTCAGAAATGAAGATATATCAGATTCACGTTGAACATCTATTTCAACAGTTACAGTGTCCAACAAACCATTGTCCAATTGTGATTGCCAATATTTCTGTTCTGTATCTTGCAATTTACCCATAAGTGAACGGAGGAGTATTTTATCCACGGCTGTTTTTAGTTCTTCACGCATTTCTTTCTCTTTTCTGGATTTTACGAGAATTGCTGCATGTGCTACTAAGTCTTCGATGATAGTGTTCACATCTTTGCCAACATATCCAACTTCGGTGAATTTGGTGGCTTCAACTTTGACGAAAGGTGAGTCAGAGAGACGTGCTAGTCTTCGAGCGACTTCTGTTTTTCCACAACCGGTGGGGCCAATCATTAGTATGTTTTTTGGGGTTACTTCGCGTCTGTAGTCTTCTGGTAATTGTTGGCGACGCCAGCGAGCACGCCATGAAATTGCTACTGCTTTTTTGGCGTCGTTTTGGCCAACGACGAATCTGTCCAATTCTGATATCATTGCGTTTGGTGTCATATTATATGGATCATCATTTGCTTTTTTATCGATGTTGGAGGATTTGTTGGTGTCATCTGTGGTAGTGTCTGTTGGGTTGTTGGTGGTGGTAGTGGTGGTGGGAGGATTTGAAGGGGTATTGGTATTGTTGGTTGTGTTGTCAGATGATGTGAGGGAGGGTGGATTGGATTGTAAGTGATTTAAGTATTGTTGATGGAGTATGGATCTTTGGAGGTTGGGATCATTGAATGCGTTGTTGGTGAGTGATGGGGATGGTTGGATGAAATAATTGTATTTGGAATAAGTTAATATATTATGATGATTGATTTGTAGGAGTTTTTTAAAACAAACAAGTCG
>NODE_194_length_3707_cov_26.335703_g159_i0
ATGAGTTTTGTATTGATATCGATGATGCATTAGGTGATTGGCCATCACCATTAGCATGTTTGAATGGACAAAATCCATTCTCACTTGGTTTTGTTCCTCCAATGAATTCCATGGCTGCAGCAATGATGATTCAACATCCATATATGTCAAAAATGTCACATGCTGCTGGTGGTAATTATCATCGTTCTGCAAGAAATTTGCATATACCACATACAGTAAATCGACGAACAGCTGAAGACCATCACAGTAGAAAAGAAACATTCACTTTATCTGACATTGTAAGCATTACTCGATCTCCTAGAAACTGTGAGGAATCAATATATACTTCTGTTTCTGGCACTTACCCACATACTTCTTCTTACCTTAACCCACCTTTTACAACTACTCTAAAAACATCTCAGTCTCTTATGGAACGTATTTCATCACCAGCATATTCTTCCAGACGTAGAGCAAACACTGGTACACAAAAAGCTCAACGATTATCGTTATGGAAGGAACAAATGCTATATGTTTCTGAACGCCATAATAATTCATCATCGCATGGAAGTCATCCACCAGTTAGTAATAGTGCTTTACCAGGTGGTGGCAATGCTTTATGTTCAATCCCATCTTCGCAGTCTGTTGCTACTTCTCGTGAATCATCTACATTTGCTGTTGTGGAACCACGTGAAACTCAACGGCAACATTTGGTTTTAACGCCAAGTAATTCTGCTCCACCTGCGAGTGAATTACCACAGGAATTGAAGCAACCGTTGATGTCAACGTACGCTAGTGCACAGATATACCATTCGAAAACTGTTGTTGAAGTGGAATATAATATTGGTGATCGTGTATTGGTAGATGGTGGTAAGTTAGGTCAAATTAAGTCGATTGGACAGCAACCTCAGTGGGGAGCTGGTACTTTTTATGGTATTAGATTGGCTGGAAAGAATGGTTCATGTGATGGTGCTTGGAAAGGTATTCGTTGCTTCTCTTGTCCACCAGGCTATGGTGTATTCATAAAGAAACAGCGTATTACACAAAAGTTACCAGATGCGAAGTTTGAATATATGGATGAAAATTTCATTGAAGATAACAAACAAAATACACGCGACCGGCAGTTGAAGGAGATCACTAAAAACGATTCTTCTTCTGATGAAGATCCTGGAGAATCAGACATTCCTACCCCAGGTAATTTTCATCTTTTGCAACACTCAACATCCGAAAGAATCCTATCTTTAACACCAAAAACTGATAAGCGTGTAAAACTTTCATTTGATAAAGCAAGTAAATCATCAAAACCACCATCATCAAAACCACCATCATCTAAAAATAAGACAGCATTGACTCGTAAATGGACTGGTGGTGCACCGCGTACTGTTGTTCTACGTAATAAAATTCCACCAATCAAACTGCATGCTGGATACAAAACATGTCAAGAGGGTATGAAATTACGTGAAGAAGAAAAATATAAATTAGCAATTGAGAAATTTAAGATAGGTATTGCATTATTGCAAGTAACTCTATTGGAAATTGATCGTGGATTGGAAAAACGTAAATGGAGAGGAATAATCGAAGAATTTCGTCATAAAATGAGTCGATGCACATTTGAAATACAGAATTGCAAACGTAATCGCTTTCTAAAAAAACGCAATCAAAATATACAAAGAGCAGCTGAACTAAAAGCACAATTGGAATTAGAGCATCAAAGGAAAGCAATGCAAGAAGCATTATCAAATGCTGTTGGTCGTGAACGTAATTGGACAGTAACTCGTGAAGATATGGAAGTGAAATTGCAAGCAAATCGTGCAGAAATTGCAGCCGCAAATAATGCTGATAGTGATGAGGAAGAAAAATCTGGTAAAAAGAGTAGCAATAAGAAATATATGCGTGATCAGACTTTAGCGAAAACTAAACGAGAATATAGATTAAAAAAGAAAGGCAAAGGAAAAAAAGATTGGAGAGATTATCCAGGATATGGTGGAGATATGGGGAAAACCGGAGATGACAGTGGTGGCGATGATGATGATGGTGGAAGTGATGAAGAAGATGGTGGAAAGAAAAAAAAGAAATCTAAATTATCCAAACATGATCAAGAATTGCGTTCCAGAATAGAAGGTGATGTAATGACTGAAGCACCAGATGTTTCATTTTCAGATGTGGTTGGTTTAGCAAATGTAAAGTTAGCACTCTACGAAAGTATTATATTACCTTTTTTGCGACCAGATTTGTTTCAATCTATAAAGAAAAGTACACAAGGTATCCTACTCTTTGGTCCACCTGGAAATGGTAAAACTATGATTGCTAAATGCGTAGCAGCCCAATGTGATTGTACATTCTTCAGTATTTCTGCATCTTCCATAACTTCTAAATTTGTTGGTGAAGCAGAACGTATTATGCGTACATTGTTTGATATGGCTCGTCAACGATCACCTTCAATTATATTTATTGATGAAATTGATTCATTATTGAGAGCACGTGGTGGTGCTAATGAAGCTGAATCTTCGCGTAGAGTAAAAACTGAATTTCTCATTCAATTTGATGGTGTAAAAAGTGCTCAGCAGGCAGATACAACAATAACAGTGATTGGTGCTACAAACTTGCCATCACAATTGGATGATGCAGTACTTCGTCGATTTCCAAAACGTATTTTAGTTCCAAATCCGAATTCTGTTGCACGATATGGTTTAGTTCGCTTATTGATGTGTAAGCAAAATCATGCAATAAATGAGAAACAATTTCAAACAATTGCTAGAAAACTGGATGGTTACTCATGTTCAGATATTGCTATGCTTTGTACAGATGCTAAAATGGGTCCAATACGTTCTATAAAAGGTTCTGATATACTTACAACTCGAAAATCTGAAATACCACCAATATCTATGAATCATTTTCAACAGTCAATAAAAAATATACGCTCTTCATTGTCTGAAGAAGCAGTAAATGCATATCGTGCTTGGGATGATGAGTTTGGTTCGAAACTGTTCTTAACTATGGATGTATTGCCAAAAGATATGTTAGCCAAGGAATTGATGCCTGTTGAAGAAGAAATAGCAAACAAGAAACGAAAAATTAAGGAAGAAGTGGAAAGAATTATTGCATTGGCAGATGCACAAAAGGAGAAAAAGATGCTGAAACAAAAGGAAAAACCAAAGTCGACAGTAACAAATAAGAAGACTGTATCATCATCGTTGTCTCAAACACAACCATCTTCTGAATCGAAATTGAAAGCATCAACGAGTGGAAAATCTGCGAAATCTCAGATGCAATCGAATAAGAAAAAGTCTAAGGCACCTGTAGCAACAGCTCCTGCAACAGTGCAGAGTAAAGCAAAGCGTAGTTCAGTAAAAGGGAAGAATAAAACCAATAGTAATATGAGTGATGCAGTGCAGGTAAAGAAGAATAGCAGTAGTGGTCATAATGGTGGTAGCAGTAGAAGAAAGAAAGGAAGTCTTCGTTTGACTGTTGTAACGAACAATAAAGTTGAATCCAGCAAAAGAAGAAAGAGTTCAAGCAGTGCTAAATAGTTGTGTGATTCATATCATGTATAGGTTGGCAATGCTTAATATAAAATCCGAATAGGAATATATCACCGAAATTTTTTATAATATTTTATATTTAGTTGTTGCTTGTGTTGTTAATAGACAAACTCCATGTACCGTAGGACAGGAAAGTTTTAGAACAATGTAGTGATCTGGCAGTGGTGGTGTGTGGAAGTGGTGGCGGTG
>NODE_200_length_3691_cov_73.986546_g164_i0
TTTTTTTTAGTGTACGTTCTACCACAATATGGTAAAGAAAAAGTAAATCAAAAACAAAAAAGAGCAGAAAAAAAATAGAACGTAAAGGTACTAAAATTTAAATATAAATACTGTATATTATGGAGGGAAGTGCAATCAAATGAAAAAACTTCATTCTCGATTGCTTAACTTCCTTCCTATACAATAAAAATGATTTATAAATATATATATATACTAAAAAAAACTGAGCATAATAAACAATAAGTGCTACAAACAAACCATAAATATTGATATTCAAAGAAAAAAACAAAAACCTATTTATTTTTAGATTTCACTGCAAATTGCTTCAATTTAAGCCATCGATTCTCACGTTGTTCTTTTTTTTGAATCCGCAATTGCTCATTTTCATCAAATTTCTCATCCAACGACTGATGTTGTTTTTCAAGTTTCTCAAAAAACTCTGGATCTCTTCTCTTAAAATCAGACATAATTTGCCGATTAATATTTCGAATGGCAGACAACCAATACTTCTCTTTATTCTCCAACAATGCATTATAAATCATTGGCCATATCTTCTCTCGATGTAAATCAACACAAATACGAACTGTCTCATCTCTCCAAACCATAAGACTCCGTTCAGCAACCTGGTGATGAGGACTAGTCATACAATCCACAAATTGTTTCAAAACTGCAATAGTAATTGTAGCAAATGCAGGATCACTTTGTTTTACATGCGGATGACGTATACATACATTCAGAATATGAACAATCTCAGTAATAAACAATTGCTCTTTCAAAGGAGAGAATTTAGGCCAGAACTTAAGTATCCCACCCATAATCACTGGTGCTGAATGTGGATCTTTAGTTACAAACTGTGTACAACATTGCAACAATTGTTCATGAAATTCATCCAAATGGAAAGACTTATGCAATGGTACAATCACATTACGTAATACCACATGGTAATCAGGTTTTACCGGTACATTCAATCCTTGAAATACACTACAGAATATCTCAAGTATCTCTGGTAATCCTTGCCAAGAACTTGTATCGCGAAATGTGGAAGTATAAATATAGCGATAGCAATAATCCGACAAATGTTTTCGAATCGCTTTACGTAATGCCATAAATCTACCATAAATGGCATGTACAATAATTTTCACATACTGTGGTTCTCTATCATCACAAGAACCAAACAAATGAATGAGATTTTCTAAAAATGCTCCTTGCAAATGTTTTTTCATTGTTTTTTTATCAATGTGTGTGTTGGTAACCACATGAAAAGTGAGTTCGTATATCAATTGTAAATGATCCCATGCTGGATCACGAAAGTCCATTTCATCATCATTTGGGTCTGTACGTATCATATTTTCATCTGGTAATGCACGAAACAAATTCTTCCAAATTGTTTCAATGCATTGTTTGAGCAGTTCAATCGAATTCCATTTATTTCCACTCATACACTGTAGTACTTCCAACAATAAGTCTTTCTTGTTATTTATGCGTATCATCTCTTCTTTGGTTGCTTCTTTGAGTTCAACATTAGTGTTGTTATTGTTGTTGTTGTTGAACTCTTCATCGCTGTTTTCATTCTCATTCTCATCCGTTTCATCCTTGCATAATTCAAAATTGCAAAGTACTGAGCAAAGTTCTAATTTCTCTTTCATGAGAAGTTGTCTTTCATTTACATCAGTAACATCTGACAGTGATTTCATTGAATCACCTGCAGATAATAACTCATGTAACAATCGATTGGTTCTCTCTCTTTGTTTCTCTTTGGATGCATTCAATTTCTCAAACATTGGACTATAATCAGGCAATGGAACTGAAGGTTCATTTCCTCGAAGAGAACCTTTGTTTTTACCTTTCTTTTGTTTCTTTCGAAGAACATTTGGCGAAGATGGACACTCACCAGGTAATACACCACCAACAGCAATGATTTGTTTTTGTGAAAGTACTTTAGCTCCACTAACTTTAGCTTCCTTCTTAGCCATCAATAATTCATGTTTCAATTGAGAAATCTCATTATCTTTAGCAATAATCTCACGTTCTTTTCTCTGCAAAATAGAATCTTTTTTGGACAATTGAGCTTCCAAACGATTGATTTTACCATTCAACTGTTGCATATCAGCCAACAATTGCGACTTTTCTCCACGTAACTCACTCACAGGTGGATGATTCATTGCAGTTGAAGGACTATGTCCATTTCTATCATGCAATTGTTTGGCCATTTTAAGATCTTTCTCAAGTGCTTCGATTTTATACTGCAACTCTTGCTTATCTTTGCTGTATCTCTCCAATGATTGTTGAACCATAACTTTATCTCTATTGTGTCGATCTAAAGATAACTGTATGTACTTCTGTCCCAATCGAGCATAGCAACTATAGTGAATTTGATGATGCGGTGGAGGAATCGCAGATTTCAATGTCTTAGCATAATTCATCACTTCATTGCACAATATCTTACCCTCTTTGTATTTGTTGCTGTATTCAAGCAATCGAGAGTAGAAATAACAGATATCACCTGCGAATGGATTGAGTTCCAAAGCTCTTTTATAAAATATTTCAGCATTCTCCAATTTCTCAACCAATACCAAACAATTGGCATAATATGTGAGAGCTTCGGTTGATGTTGGATACTTATCAACTGCTCGTTGATAAATATCCAACGCTTCAACAAACATACCTTGCTCTTCAAAAAACACTCCATGTTTTAGTGGTTCAACTACAATGAACTTTCCATAATCATAGAAATGTGGTTTGGATGCATGTGATTTGAGAACATGAAATTCATGTGCATTCAATAGTATATCATTGTCAGATGTATACCCATTGATTGGTCGAGATTTTCTAAATTTCATCTCAAAAATGGTAGATACATACTGTGACATTTCTCTTATCTTAGCAATCAATTCTTCGTAATTGAAATTGGATGATAGATGAATATGAAAAACAGGTGTTGTGGCAGCAGATGATGTTGTTTGCGTACGTCGTCGTTGATCATTGTCGCAATATATGGCTAACTCAATGGTGGTTGATATCATTTTTATTTTTGTTGTACTCGAAGTCGAAGAAGAACCATTTGTATGTGTGTGCAAGGATGAATGTTTACCTCTTTTACGTCTATGACTTATACCATATGATGCACCATCAGAATCAGAAGAAGATGAACGTGCATGAGTGCTGGGGAGTGCAGCAACCTTTTGAATGCTGGAATTGTTGCATTTTAGAAAATATGCATATTTGGAGCTAGTTTTAAAATAGCGAGTACCTTTGAACATTCCATCACTGTCTCCAATTGCTTTGCGTAACTGTACTCCATAATAGACATCTGAGCAACGATTGGGAGTACCAATGTATCTGATTTGACCAACACTCTTACGACTGCCTACGTACAATTTCACCCAATCTGTGATGCCAATTCTGGAGATGGAGTCAGAGGGCATTACTTCTTGTATGCGAGCGAAACGTAAGAAGAGACCACGTTTAGCTTTGCATTTGAAGTATCGAACATTACCTACCACACCATTTGACTTACCTTTTGATTCATTCAATTCTATGCCGTAGTATATGCCTTTTCTTGCTTGGATCTCTCCTACGAATCGGACTACTCCTAATAAGTCTTTTGTTAAACGGACGCGGTCACCGACTGTTACAACGTCGTTCAAGTTTGTAGTATGGCCGTTTGTTCCAGACATGATTTTACACAATA
>NODE_202_length_3674_cov_52.950345_g165_i0
TTTTCTTTAATGTTATATGAATCTTATCTAAGCACTTTCTCGAAAAAAGAAACCAACCACCCCCAAAAGCCCTTGAGCTAAGTGCCCTAATATAAAAAATATTTTCAGTTTTCGTTGCATGCATTTACCACATGTGTAAACGCTGTGCAATAGAATATATAAATATGATATCATCAAATTATAATCGTATTCTTACGTACAGAATACAACCATAACGTAATCAAACAACAAGCAACAACAATAAAGAAAAGCGTCAGCATAAGCATCAACGATTCCCAAGCAGCCATCCGTTGCTCATGCAAAAATACCTTCCGCACATTCTCCTCCATAACTTTCTGTTCTGCATCTGTACGAATAACACCACCACCTCCTTTATCACCGAATTCCCCACCAACCTCATCACCAAAATAATATTTATCAAAATAATGATTCGCCACTTTAGCATCATAGATATCTTGCATTTCAGATAATCTCTGATTCCTAACACTATCCATCTCCAACTCATCATCAACACCATCACCATCACCAAAACCATATTCATCATCAACAAATCCACCTTCAAAACCATAATTATCATCACCCACATACCCACCATCATCAATTCCATCAACCATCATATATTCCTCAATATCAGCAGCATCTGCATTCATCCCATTACCATTCTCATGTTTATCCACATAATCACTCAACCACTCAGCAGTCTTCTCCTTATCATTATTCAAAAATTTATTGTTCAACATAATCTTCTCTAAAATTGACCGCACAACCCGTACCTTACTATGTATAAGTTTCCTCTTTCCATCATTATCAAAATAATAATCATTTATCTCACGATATACTTTGATATTAGCAAAAGTAGATGCCATACCCGCCTGTCTCGGTTTACCTCTACTTCTCTTCAACATATTCTGCCATACAATACCACGTCCATAACAACGTCTCAAACTTCCAAACCCATGTCCTTTCACATTACGACGTACTTTTGGAGAGAGTACAAAATTGATTGCACGTTGCAACATATATGGATCACGATATACAGCACCAATTGCACCTAAAATCACTGTCTGCACTTCTGTATTTGCACTACCATATATGTATAACAAATTATTGTAAATCAGTTTAGTATTGCGTGACTTCTATGCTTATTTTCATTCATTTTACTTAAGCCAGCTTCAAATATAGCTTGTAAAACATTAGCATCGATATCTGCTTTGAAGTATCCTTCTTTAGTTACTCCAAACTTATGATTGTTTTTGCGCATTATACTCAACAATAAATTATATCCAGTGTTGATTATTGTTTGATCTTCAAATCGTATGAGTGCAGATAAAATTAAGGGACGAAGTAATATATGATCATTTGATTCGCTATTGCTATCTTTTTTATGTCTATGATATGAGCTACTTCCATCTCCATCCCATTTTCCAATTGCTCTCCATATTTTATGTATGATGTTTTTAGCAAAATAACGAAAGTCTTTTCGTAATAATTGTAGAGTCATATCTGGTGAATGGAAGAGTTCACAGTATATATCATCGATATGGATGAATGATTCAATGATCAAACGCCATAATAAATAATTTATTTTTCCGTGTTTACCATTGTTGATTTTACTGACAATTGATGATACAAATGATATATAATTACCAGCTGATATGTAGTTGAACATTAGTAATGCGAATCTGTCTGCAACAATACTGAATTGGTCCACTTTTGATAAATGGTCGAATGATGAGATAATTAGTTTGAGACTTTTTTTATCATAGTGTGTACGAAATAGAGATTTCATTTCTGGATTTATGATATAATATTCGTTACCTTTGAGGCATTTCTTTCTAGCCATAGTTATCTTAAGGGCAGTAGAATCAAACAATCGCTTATTATCATATGTGCTATAACAATTTCTCACACGCAATGGTATACTCCACAACTGATCCATATTAAGTTTAGTTTGCGAATGTAGATTAGGTATCATCCGATCTTGACTCAAAACCAATGCAACTCTACCATCACTCATATCTATCGACACTTTCACTATAGGTAAACCAATATTCAACACCCAATCATTCATCATACCTAAAATCTTCTTACGTGTCATATACTTATGCATATTTCCAATCATCGCATTATCCGCTAATATTTCAAACAAATCATCGGTATCTGAATTAGAATATTGATATTTCTTCAGATATGCTTGCAAACCATTCATAAATCTATAATTTCCAAAATATTCACTCAACATATGCAAAAAACCTGCTCCCTTATGATATGTTATACCATCGAACAAAGAACGAACATCTCTTGGACGAACAGCAACTTCACCATTCTGACTCTCTTTATCAACAACAATAGCATGTGTTTCATGCGAACAATCCAACAGCATAGCACGATGTGTATCATAATAGTATTGATCCCAAAAATTCATCTCTGGATATGCATGTTGTGCACCAATAATCTGGAAATAAGCTGCAAATCCTTCATTCAACCATATATGATTCCACCATTTCATAGTTACCAAATTACCAAACCATTGATGTGCTTCTTCGTGTGCAATAACCATTGCTACTTGCATAACATCCATTAGTGTGGAATGTGCTGGGTCTACCAACAAACGTATATCACGATAAGTTACCATTCCCCAATTCTCCATTGCTCCCATTTTAAAATCGCTTACTGCCAATGAATACATTTTAGGTAGTTTATAAGTCATATGAAACAATTTCTCATAATAAGGCATTATGGATAGAGTTGATTTTAAAGCAAATTGAGCACGAATGTGCTTTTGAATGGGGAAGTATATGCGTTGGGATATATTTCCAGATTTACCAGCAACATATTCATAATCGCCAATGGCAAATGCAACTAAATATGTTGACATTTTAGGGGTGGTGTCGAAACGCATTAATTTACATTTGGAAGTATGACCGACATGAGATCGCCAAAGGCAATCATGTTTATCTGTTTTTTCTTCATATTTGACGAGTGTATTGGATATAACAGTTGCTGGATAAGGTGCATATACATTTAGATTGAATTTCGCTTTGAAATGTGGTTCATCAAAACAAGGAAATAAACGACGAGCATTTGTTGCTTCAAAATGTGTGATTGCATTATGTATCATAACACCATTATGCTCATATGAAGTAATGTACAATCCATTCATTTCTGTCTGCATTTTACCCTTGAATTCCAGATATAAAATGGCTGATATATAATCATTATTTTTCTGTTTTCTAAGAATAGATAGTAATTTAGTGTTAGCTAAATTGAAATCGAATTTGACTGTCTGTGCATGTTTATTGTAATGAGCATTTGTTTTATGGTACACTTTATTATGGTTGTTATATAAGTATAAGCGACATTTTATTACTTTTATATCACGTGCGTTGATGGTAATTTGTAAGTTTTCATTGCTGTTTTGTAAATGATCTTTGGTTATGCGAATACTGATGTATTCGAAGGCTTCGATTGTGAGATCACTAAAAAATGGATGAATAGTTATATTATATTTCACAGGGTATATAGTTTTTGATAAGCGATAGGTTGCATGGGATGATGATAATGAAAATGAGGTTATTGATATTAGAAGAAATACTAGGATTGTGTTGATTAAGTATGACATACAGTGTTGAAAAACTGCAGACGCTTAGAACTTTGTGCTAATTCACTCCATT
>NODE_226_length_3556_cov_34.562589_g181_i0
TTGTCAACACATGGAACATGCACGCTTTACGTGCAGTTCAACTTTTTTGTAGGAAATCTTGTTATTCACATCTTCAATCTGTTGCTATCATGACGTCCATGCCAACCGTTATCAAATCTCCTGCATCATCTTCTTCCTTATCTTCATTTCAATCAATGATGTATTTTTTCCACAACAAAGAACAGCAACCTACCCAATTCATTTCCAATTTGAATTCGATTGAAAACTCATCACAAATGTTGTCAATCGATTGGCAAACATTAAAAATACAAGCACAAAAACAACAAACATCTGAAACAGAATCAGAATCAGAATCAAATTCAGAGACAAGAAATATTTCCGTGATTTCCAATTCCAAACGCGGTCTCAAATGGGACAATGGCTATTTTTATAACAAATTACAAACGAAATGGCTTGGACAAACATTTTCATTTTGCGATACTTTGTCTTCCACCCAAACGTTTCTGAAATCAAATGCTCAACAGCTTCCATGCGGAACTCTCTGCACATGCAATACCCAAACATGCGGTCGAGGAAGAGGAGGAAATGTTTGGGAATCTCCGAAAGGCTGTCTCATGTTTTCATTCACTGCTGATTTGCATTCATCGAAAGCACGTTTTTTGCCAGCCATGCAATATTTGATATCTGTAGCAATAGTGCAATCCATCCTACATTTGACTGATGATCAATTGCAACTGCAGCTCAAATGGCCGAATGACATCTACCATGGAAGCACTGAAAAAATTGGAGGAGTGCTTTGCGAAAGTTTATATTTACAGGAAAGCAAGAAATATCGAGTGATTGCTGGAATTGGATTGAATGTCGATAATGAACAACCAACCACTTGCATTTCTAAAATTATCAATGATTTGAATGGTAGTAGAAATGAAAATGAAACTGGAGAGAAACTGTCGATCATATCGAGCATCAATCGCGAAGATGTTCTCAGTACTTTTTGTAATATTTTTGAAGGCATGTTCGATGATTTTTGTTGTCAAGGATTTTCAAATCATCATCGCCAGCAATATTTGCAATATTGGATGCACTCTGGGCAAACAGTGACTGTCAGAGAATCTGAGGATTCGGTGGTGAGCAAGGAAGTGACCATCACTGGTATCAGCAAATACGGATTGTTATTGGCTCACAATGCTCTCGGGGAAGGATTTGAACTACATCCGAATGGAAACTCATTTGATTTTTTGAAAGGTCTGATTTGTAGAAAAATACAAAATATAAGCGTCAAATATTGATATTTATGTAGTGATTTGTTAGGATGAACTATTACTTTTGTTCGTATGCCGTATTTATATCACAACAATACCGTACACACATTTTCAACAAATAATCTATTTCTATGGTAAATGCTCATCAATATTTTTACTAATACCATTTTTTGGATAGATAGCAATGGTATTCTTCTGCACACGAGTCGTTGGTTCCGACTTAGCATAAGTAATCATCCCCAACAAAGGTTCATACTCTGATGATTCACTTCCCTCCAATAAATTCTTTTTCAATCCTCCATCAAATATAACATCACTGCACTGTTTCTCCGGAAATTCCACAATATATTTACTCTCTCTTTCCATCAAAGTACCATACATACAATACTGATACCACTTAAAAGCCAAACAAAACCCACCAGCAATAACAACAACAGCCACCACAAAAGTCATCCATTTCAACACTTTCTCAAATCTCTTAACACTCACAGGTCCACCAATCCGCTTCACCAACATAGCATTCACCGCTTCATGTGCTATTTTATATTCCATAGCCATTGCATAATACTTAGATTGACTCACATCAATATCTGTATATTTCACATCTGCCGACATCAATGATTTATCAACCAATACTTCCTCAAATTCATATTTAATTCCATTCAATCCATAATTCTTCATCCATGGCATCCAAAACCCTCTAAAACGCAATGGATCACTATTGGTATCAAAAGAATTCTCATATTCTTCAACCAAAGTAACAGTAGAAATATCATCATATGCCAACAACATATTATACATATATTGATACAAATTTGAAAATCTTATATCATTGCTGAGATCATAATATTCGCAAGGATCACTTACGATATCAAACAAACATGGTTTATCACTGCATGGAACATCATCTTCACTCACAGCACTCTGTTGAACATCATCTGGAATACTACCACATCCCAGATAATCCATATATCCATCACTGTGTGCATTAATTGCGAAATCACTCGTCCAAAATGCATATGCACTATTAACATTTGAAGCACCAATGATTTTATATCTTCCATATCGATATGCACCACATACCTTACCATCCTTACATTCATATGAATCAATATTATACAGAAATTCTCTGTTCTCCAATGATAACTCATTATCAATTGTCCCATATTGCAAAGCATTCCACATATTTTTACCATCGAATACACGTCGATCACTGAATATCTCATCATCTGCCAAACCAATAGCTGATAGAATGGTAGGATACCAATCTGTGACATGCATAACACTATCAAACACTTTACCTTTTCTATCATCATTCAAATATCCACCAGCAATAAATGCAGGTGTACGTACTCCACCTTCATACAAATATGCTTTCCCACCGCGCAATGGATAATTATCACCAGATAATATGAATGCACCATTGTCTGATATACCAATCAACAATGTATTCGACCATACATTTGATAAGGATTTAAGATGGTCTATCATTTGTTTGAGAGCATGATCAGCACATGTGGTCACTGATTGTGCAGCTATTCTCAAATCACATGAGTTGATTGCTGTGAGTAATGCTGGGAAGCCATATTGTTTGAGACAGGTGGTTGATTGTAATTTTTCCATGTATGGTTGTATGCATTCTTCTGTTGTTTCTAAAGGGAAATGGGAAGCATGCATAGCTAAGTAAATGAAGAATGGTTTGTTGGTGGATGCAAATGCAGTGGTTATTTGTATGAATTGTTCTGTTTCCCATATACTTAGATATTTATTGTTGTTTTCAATGAAATGATCATTTATTCGTAGATCATAGCCATTGTATATTTTATTATCAACAGTGAGAGAGTAATCATGTGAGTAGTAATGTAACATGGGTGATAATGAACCATGGAATGCATCAAAGCCTCGATGTAAAGGACATAAATCTGGTCGATGGAAACCTAAGTGCCATTTTCCAACTGCTGCTGTTTTGTACTCATTCTCTTGCATGTCATTTGAGATCATTTTCTCAAATACTGATAAACCAAATGGGTAGCCATTCTCGATTACAGATGATTGGAGACCGAAACGTATTGGGTAACGACCTGTGACTAATGCTGAACGAGTGATGGAACATAAAGGGTATACGTAATGGGAATCTATGGTTAGAGCTTCGTTTTTTATGAAATCTGTGAGGAATGGCATTTGGGTGTCTGTGTGTTTGTGCCATGGAACGTCGTTGAATCCCCAATCATCTGTTACTAGTAAAATTACATTGGGTTTGGTATTTTTATCTTTTCCATGTACTTGGAGAGCTAGAAAAAATAAGCAGAGACCTAGAAAAGTAGATGAATGGT
>NODE_238_length_3524_cov_80.640288_g191_i0
CAACAATTTAAAAACAGACATGTACGCGTCTGTTAAAAAATTAATTTTCAAAAGGATCACTCATCGTTCATTTTCTTCTCGTGCGTTTGATGAATGGAACCCACGAATCCACTCTCTATCTGCAAAATATCCAGGGTTGTTCAATCGTCAATCATTTGTCGATGATCAAATAGAACTTCTTAAATCGGCGGAATCTTCCTTGCCTGGAATTGCAGAAGAATACGAGAAAGGAAGGAAAATGGTAAATAATATGATAGGGGATGAATTGTTTCCAAATGTGAATGAGGTTCAACCGCAGAATGAACCAATATTCTCCAATACGTATTTTGAACAAAATTCCACTGAATTTGAACAAAAGCTGAGCACAGGTGAATCAATAATTGCCAAATTGCTTGCTTTGCAGCAATCGAGTGCATCATCGGTTGAAGAATCAAATCTGCACATGATTGAAGTCTCAAACTTTGCCGATGAGATTGAAGAATGGCGTTTATCATTGCTCAGCAATGGAACATTCAATGATACACTTGGGTTTCCAATTTCCGATAAAAAGTTGGATGCATGGGTAACCAATGAATACTACCATGCACCAGATTATGTTCGAGAACAAGGGATGATAACATATGAAGAACATAAGAAGTTAGAGAATCAATGGATGCTCGAGGTTGATGAACGATTGGTAATTAAGTTGAAGAAAGATTGTTCAACCAAAGGAATTGATTTTAATGAAGATGCTTGGGTGAGATTTTGTGGAATAGTGAATGATATCTTCCATCCTCATAACATCGATCATTATACTCTCTCAAATGCAAGTAAATCTTTATTTCGAGCAGCGCTTTGGCAATCTGTCAATAATGATGGTAGTGATCGACCAACAATGGTTGAAGAAAGTATTCTTCAATTGAAAGAGATATCGGAAGATATGCGCGATAATGATTGTCAAATTGTTCGACGGTGCTATGAACAGAATATTCAAATGAAGTATGCATCATTGGTGGCATTGCAATTGCAAATCACTGATCCATTGTTTGCTGCCAAATTATGTGATGAAGTACGGTTGACTCTGGAAAAGGTTGCTTTTGATAGAGCAGTATGTGATGTTATGCGTGATTTGGAGATACCCAGTATTGAAGCAGAGAAGAGAGCAAATGAAGAAGAATTACGAGAGACAATGTCAACAAATCCCGTGAATTTACTGCGCAATATCAAACGATTAGAAAATGAAGCACAAAAGTTCAATATTGAAGTACCAGATTTAGATTTTGTGTTAGATTCTATTTTGCCAATAAAATTATGGGGCAATGATTTCTCTCGAGTGAAGACTGCCATTCTAGCATATGCTACAGGAGATACATCCGTGAAATTGTTGGACATTCATTCTGCGATTGAATCTGTTCTCATTTCTTCGAATTTGGTGCAGTCCATTGAGAATGATGGAGGATTGAATGATGAGGAAATTACAAAATTCAGTGTTGAATTTCATAATGCACGTGCATCAAATCCAGAATGGTGGAATTTGAAGCAGTTTTTGGTGAATCATTGCAATCAAAAACGAGATCAATCATCCATAGTAAGCAATCAAAGAATTGTGAGAAGAAAATTGAGTTTGATTGATCCCCTGCTGAGCAATCTCATTTGTATAATGTTTGCAGATGGTACATGTGATTTGAATTCATTTCATTCACTTCTCGCAGATTATCAAACAATAATGCGTAGATACCGAGGAGAGGTGGATGCAGTGATCACTAGTGCACAAGCACTCGATGAAACTACATTCAATTCTATTTTGATGGCGTTGGAGAATGCAAATCCAGAGAAGAAAATTACTTTGAACCAAGCAATTGATACTGGAGTTCAAGCTGGATTTATTGTCAAAGCTGGATTGCAGAAATTTGATTTTTCATTGGCCTCGCATTTGCAGCAGGCACTATGACAGGCACTATGAATTTTTATAAAATTATAAAATAAAACGGTTTTGAACCTTGATTTAAAAATGAATTGCTTTCATTCCACATGCATTCATACATGCTTCTTTAAATGCTTCGCTAAGCGTTGGATGTGCATGACATGTGCGAGCTAAATCTTCAGCAGATGCTCCATATTCAATACCAATCACACCCTCTGCAATCATTTCTCCAGCAGCAGATCCAATGATATGTACACCCAACAATCTGTCTGTTTCTTTATCTGATAATATTTTCACAAAACCTTCCGTTTCTCCATTTGTCCTCGCTCGAGAATTTGCTGCAAAAGGAAATTTACCCACCACATATGAAATATTACTCTCTTTCAACTGTTCTTCAGTTTTCCCAACACACGCAATCTCTGGTGAAGTATATATCACAGATGGTATGCTATTATAATTTACATGTCCAGACTTTCCAGCAATGACTTCAGCCACTGCTATTCCTTCTTCCTCTGCTTTGTGTGCAAGCATTGGTCCTTCAATGCAATCACCGATAGCATAAATATTGTTATGAGTGATACTCCTCCAATCCCCATGAACTCCATCACGTATTTTTATTCTTCCTTTGTCTGTCTCCATACGAATGGACTCTGTATCTAAGCCAAGATTGCTAGTGTTTGGTCTACGCCCAATTGCAATGAGTAATTTTTCACATTCATACACTTTATCATCAGTGCATGAAACTTCAATCATATCTGATGTTGTTTTCACTGATGATAAACCAGTGGATAGTTTGATATCTATTCCTTGTTTTTTCAACATCTTCTGAAATGCATTTGCAACTTCTAAATCTGTGCCAGGTAATATCCTATCTAGAAATTCGATTACAGTTACTTTTGTTCCGAGTCTGCTCCAAACACTTCCCAATTCAAGTCCTATGACTCCAGCACCAACCACAATCATAGATTTTGGTATTTCAGAAAATGAGAGAGCTCCAGTGGAACTGACAATATTTTGTTCATTCACTTCTACTCCAGGTATATCTGTACTCACACTTCCACTCGCAATTACAATATTCTTGGCAGTCACTATTTTTTCACCAGAATCAGAATTTATTGAGATTTCTGTTGGACTCTTCAATTGTGCCCATCCTTTTTCATAATGTACCTTATTTTTTGAGAATAAGCCTTCAATGCCTTTGGTGAGACCAGAAACTGCATTTTCTTTGTATTTCATGAATTGATTGAAATCGAATTTCACATCTGCAATAATTCCTCGATTGCTCAAATCATGCAATGATTGATGATAGAGATGGCTATTGTGCAATAATGCTTTTGAAGGAATACAACCTACGTTCAGACATGTGCCTCCTAATGATCCACGTCCTTCGACACATGCTGTATTAAGGCCCAATTGAGCTGCTTTGATTGCACAACAATATCCTCCAGGACCACCGCCAATTACCACTACATCGTATGCTGATGTTGAATTTCTGTTCTGTCGAATACATAGAAAACTCCCACTCTGTATATTTGTGCTATGTTGCAGTCTTTGACCAACGCGTTTTACAGCTAGCGCTAACATTGAATTTAAGTCACGAATTAATTTTATTAATTAGATC
>NODE_257_length_3425_cov_46.478969_g206_i0
GTGCTCTTCCGATCTTTTTTTTTTTTTCTGTCGACCATTCGTTCGTTTTCATGTCGTTCTTGTGTCTTCTTGGTTTCTCTTGCTTACTCTTCCACTTCACACAAAGTTCCAATAGCCACGACTCACATGGATTTACCCAAACGAATAAGTTCCAACCAAATATAATCTTCATGTTTTGCGATGATTGTGGCTTCAATGATTTTGGTTTCAATCATGAATCAAATGTCCAAACACCATTCATCGACACTCTAGTAAAAAATGAAGGTTTGATAATCAATACACATTATGTCCATAATTTATGTTCCCCAACCAGAGCAGCATTTCTTAGCGGTAGATATGCACATAAACTTGGATTACAAGTTGGAATGCTCACTGAAAATACAGACTATGCATTAACCAGACAAGTATCTCTACTTTCAAATGAATTTCAATCTCAAGGATATGCTACTCACGCAATTGGAAAATGGCATTTAGGCTACCAAACATGGGAATACACCCCAACCTACCGTGGCTTCGACACTTTCGCAGGTTTTTACAACGCAAAATCTGAATATTTCACCCATAAATTCAAAAATTCCCAAAACATGGATTACTACGACTTAAGATTGAATGAAGAACCAGTAAGAGATGCAACAAACATTTATGGTACTCAATGGGAACAAGAACAAGCAATATCAATTCTAAAAACAAAACATAAATTAGATGAACAACCATTTTTCATGTATCTTGCATGGCAAGCAGCACATGAACCATCAGAAGCACCAATGAAATATCAAGACATATATTGTGTCAGTGACACATGTGATGATAGTGATATACATAAAGCACAACTCACATCATTAGATGATAATATACAAACAATTGTAACTTACCTCAAAGAGAATGATTTATGGTCCAGAACATTACTTGTATTCTCTGGTGATAATGGAGGTGCAGTTGGATTTGGTGATAACTTCCCATTACGTTCATATAAATTCTCACCATTTGAAGGTGGAGTACGTGTACCTGCATTTGTTACAGGTGGTTTTCTTAATCCAATGCGCTATGGAGAACATGTGGATGAATTTGCTGTTCATGTCACTGATTGGTATCCAACATTATTGTCTGCTGCTGGTCTTTCAATAACACATGCAAAATCACTTGCATATCATTCTAGTACGTATGATGAAAATACATTGGAATTGATTGATTTTGATATACCTTTGGATGGCTATGATATTTGGCAATTTATTCAATATGGAACATATTCAAAAGATGGTAAAATTGCAGATTTCTTCATACGTAACAATGATGATGGTAAAGGAAGCAGTAGAAATTTGGGGAATACTTTGAGTCATGGACGTGAGATTATATTGAATATTAATAATATGAATTGTAAGTGGGATTCATGTGGATCTATGATTATTGGTGGGAGATGGAAATATGTGAGAGGTGGGAATATGTGTGGTTCTATTGTGGATTGTAATGCATGGATGGAAACAAGTGATACTAATATTCTGAAGTGTGAGTCGTATTCTGATATTAGTGGTAAAAGTGTTGATGAAGGTGGTATTAGTGCTATTGACTGTATTGAGACTGAGGATGGATGTTTGTTTGATATCATTGGTGATCCATGTGAATATTTTAATTTGGGTGAACGCAATCCAAATATTGTGAATGTGGCGAAGAAGAGATTGCAAGCTGTTTATGATGGAGATGCTATTGCTCCATTGGATTTAATGGGGAAGTTGCGTGGGGATGTTATTGATCCTGCTTTGCATGGGGATGTTAGTGATTTTTGGGGTCCTTTTCAGTTGTTTGAGGATGTGCAGTTTGAAAAGCTAATATTTCGTGATTTTACATTGCTTTATGAGGGGAGGAAAGTAGAGGTTGGGGATTATGAGGATGATGGTTTGATTGTTGATGTTGTGAGTGGGGAAGAGTCTGGATATCATAGTAAGGGAATGTTTGAGTTTGATGATGATGATATATTTGCAATTATTATCATTGGAGTGGTGTCGATATTGTTGTTGATGTTGTGTGCTGGTATTTCGTATTTTTGGAGAAAAGTGGATCGGAAATCGGAAGTAATGGCAGAAATGCAGCCCTTACTTGTTTTGTAGCAGGATGAGTGAACCTAATGAACGACGTTGAATTCTATTGACTGTTTCATCATGCGTTTTGCTCGTATTTTTGCCTTTGTGTCGCGTACTACTCGCGTTTTATTTAGAAAATCAAGTTCTTCCTCTACCACAGAACTGAAGAAAACCAAATTAAATGCTTTCCACATCGCAAATAACGCAAAATTAGTAGATTTCGGAGGATTTTCAATGCCAGTTCAATACAAAAATTTATCCATAACAGATTCCACTCTCCACACACGCAAATCATGCTCATTATTCGATGTATCACACATGGGACAATTGAAATTCTACGGAAAAGATCGCATTCGATTCTTAGAATCAATATTAGTGTCCAATGTATCTCCAATAAAAACAAATCAATTGAAATATTCATTAATGCTCAACAAAAATGGTGGAATCATTGATGATTTAGTAATTGCCAATTGCGATCAATCTAAAAACGATAATCCCCATCATTATATGGTTATAAATGCTGGACGTATCCCAGAAGATTTACAACATATTGATCATCAACTCTCTCAATTCAATGGTGATTGTAATTATACATTCATGGATAATCAATCATTGATTGCATTACAAGGGCCAAAAGCAGTTAATGTTCTGCAAAGATTAATAACTACTGATTTTGATTTCAATGGTTTGAAATATTACAATATGAGTGATATGAATATCAGTGGTATTCCAATTCAAGCATCGCGCAGTGGATATACTGGTGAAGATGGATTTGAGATTTCTGTGAGCAGTGATCAGATTGTTGAATTAGCTGAGGTTTTGTTGAATGAAAGTGAAGTTGAACTGGCGGGTTTGGGAGCGAGAGATGCTTTGAGATTGGAAGCTGGAATGTGTTTGTATGGAAATGATTTGAATGAGAATATAACTCCAAATCAAGCATGTTTGTTGTGGACAATGAGTAAGAAGAGAAGAATGGATGGAGGGTTTCTTGGGTTTGAAGCTGTTCGACATCAAATTGTGAATGGAGTTAAGATACAGAGGGTTGGGTTGATTGGGGAAAAAGGACTTACGCCGAGAGGTCATCAGAGAGTGGTGAATGAGAGTGGTGTGGATGTTGGGGAGGTTACATCTGGTACTTTTTCTCCGTGTTTGCAATTGCCGATTGCATTGGGGTATGTTGGTAGTGATTGGTGTGAGATTGGTACACGTTTGGAAGTTATAATTAGGGAAGGTAAGAAAATAAAGGTAGAAGTTTGTAAATTACCTTTTGTAGAAACGTCTTATTATAAGTAAACATTTGTAAGGGACACAAGGAACATAATTTTTAATTTATTTGACGATTTAAGGGGC
>NODE_284_length_3345_cov_73.038532_g225_i0
ACGTCAGTTCTTGTGCCTTACAAAAAATAAAGACACAATAGATTTAAAAATAAAGGGTGAAGCAAGAAAAAAAAATATAAAATAAAAATCAAAAAAGTATGGAAAAAAACTGTTTATAGCACAGATAATTGCAAAAAGAGAAGTAAAATACATATTACACACACAAACAAGAAATATATTCACCATCCAACAAACTTATACAGATTCTGAATTAGATGGTGAACTATCAGAACTTGTTCCATGAGCCAATTGCAATTGCGACATTGGCAAATCATCTGAATTTAAAACAGAGATAATATCGAAATGTGTACTATACTCATCACTTTCAATATCTCTTACCACCAACACATAATGTAATGATGTCTCTTGTACTGCTAAATACGTACCAGCAAATTCATTGCTCTCAAGTGTACCATATTTACTGTTATATCTAAATAATGAATATTTACTACCACTTCCATCATTCACATCTACACTCCCACCCTTATTGTACACACGTAAGTATCGACCAGTTTTATTATTTCTAAATTTTACTAAATCTCCATATATTTCTAAAATCCATTGTGAATTAATTTTCTCATATCCATTACCACGTGCATTAACCTCTTCATCATCCGATGCCACTCTCAAATGCTTACCAAATGCATGCTGTAACAGCACAACGCTATAATTCTTCAAAACACTCTCTCCATTCATTATCTTCCCATGTCTTACATCAGATATCATATGTTCATCATTATAACTTCTATTACGTCTATTATTATTATTCCTATTACCATTCATCCTCAATACTTCCATATCATCCTCATATTCATCATCATAATCACTTCCACCAGCACTACTAAAATTATTTACATTTATATTTGCAGAAAAGATAACTTTCGACCATGAATTCTTTCTCTTCACTCGTTTCTCAGCAAGTGTCAAAGGTTTAGGTTTCTTTCCTCTCAACTTCTCTTTTGCATGCTTTTGAAGTACAATTTCATATGGATCACGATTGTTAACCTCCGCATCCGATATCGCTTTTGCAATCGCATTCTCCTTTCTATCTTTCTTTGATAAATATTGCTGCATCTTCTTCCAATATGCACTATCCTTACTACGATATATCTTCTCCACTGAACATGAATTCTGTTTTAATGCTTGATTTTTATGATCACTATTGCGATAAAAAGCTTCACACAATTTGGGTAAGAACGCTTTTCTATCATAATCCACCAACCGTTGCATACTCTGTTCTTTCCATGCAGCAATTGCACGTTCAGCTACCGGTTGTCTGGTACTCAACATACAATCTGTAAGTTTTGTGGCAGCCGCCACCAATATCTCTTTATAGTTCTCGTACACAAATCCATGCTGGTGAGTGATCAATACATTGATGATATTCACTACTTCCATAATGAATATCTCTTCCTTTTGCGGTGATTGCTGTGGCCAGAAACGAAGCAGACCACCTAAGATTACTGAACCACCATTGCCATCCTTAGCAACATAATTCACACAACATTGTGTCAACTGTTCCCAGAACTTCTTCAATCGACGACATTTGTGCAATGGCACTAAAACATTTCGCACAAATGATTGCCAACTTTCTTTTACTGGTACAGTCAAACCTGGGATTATTGCACATATGATTTGCAATAATTCAATGATACCATTGACATGTTCAAAGTCTGAGGTATATATCATACGATAGAAGTATCCACACATGATTTGTATGATATATGGACGTAGTTTTAGACAACGACCATATATTTTATGAAGTATCATCATTAGATATGCACGTTCGCGGTGATCTTCAGATGCGAAGAGTTCTACTAAGCATTTGAGGAATTTACCAGTCATGTATTTCTCCATCATTTGGGCAGTGACCATTGGAGTGTTAATCACTCGCCAGGTTAGATCGTAGACTAATTTTAAGTGACGCCAGCTGGGATCTTCAAAGTTCGACTCTTTGTCGAATACTTCATCGTTGAAGAAGGAGAGAGCGGGTGGGCGATCTTGATATGGAAGAGGACGAAATAGATTGGCACCTACGGTTTCTAAGCATTTTTCGTAGAGGTTCTCTTTGAACCATACATGTCTTGCTATGAATTCACTCAATTCTAAGAGTAATTTCTCTTTTTTTATTATTAATTCATCCATTTCTGGTGTTTGAAAAAAGAAATCACATCGTGCTTGGCAATAATACAACTTAAGTACAAACATATCTTCTCTTTCATCTTCTGGATAATCAGCAATGGGTGATAGATTACTGTAATACAATTCTTGAGCAATACCACCTAATTTTCTGCTTTGCGCGAGATATATCTCTAAAACTCGATTGGTGGATGAATATGAACGAGAGTCTTCTGGTGAAGGTTTAGGTGTAGACGATGATGAATTCTGCTGTCTTGGCTCTACTTTTCTATCATCAGAGTCCTCATCACCTGTACCACTTTCATACAAAGATACCCAACCTTTGAACATCAATTTCTCACTCATACGAAGTTTTTTTATTCGTTTTATTTGTGGTTTGGGTGTTTTTGGCTTCACTTCAATGAAGAATGGATCATCTCGATTGGTATTACTTTTATCTGGTTTTTTTATTGATGGATCCACTGGTGCTGGAATAATTCTTTTGAAAGGTAACATTACTCCATGATTATCTTCAGCATCAAAATAGAATTTCTTTTTTACTGTTCCATTGTTTTTACCTTTTGGTGCATCCAATTCTACACCAAACCATACACCCTTATCAAAATGTGTGGGTCCAACATATTTGATAATACCAGGTGTCTTCTTCTTTACATATATAACTCTATCACCAATGGTAAGATTAAATTTCTTACCACGATGGAGTACACGTTCAACTCTCTGTTTTGGTACCCATATGGTTTTATTTCTGGGCGATTGATTATCAAATACATTTTCACTGCTTAGATGAACATCCATGACCACACGTACCCATACTCCACGTTTTTCTGGTTTAGCACCAATTGAAGTTACTTTTCCACTTCTTTTGTTTGCTAATAAGATTTTATCTCCAACATATAATTGTAGATCATCATTTTGATAGCTTATGTACTTCTTTTTATTTTTATGACCAGTGCTTTTGGTTCTATATTTTTTTGGATGTTGAGGTGAACGAGCAGTGCCATTGTGGCTGTTGTTATCTGTGTTAGTATCTATATTTTCAGGTTCTTCTTCATTATCGACATCTGGCATTGTTGTTGAATCACCTCTTTTTAGAGATGCTAATAATTCGTCATCATTGCGTTTCTTTTTCTTAAACCAACCAGACATTTTCTATGTTTATGATTTTTACTTTATTTCCTACGTTTTTTACGGTTCGCTTGAGCAAAA
>NODE_285_length_3344_cov_82.848558_g226_i0
TCAATTAAGACCTGTTGTGCTTCAAAAATCAAATCAAATCACAAAACCAAACCACAAAAACCACAAACATAAAATCATAGATGAGTTTCATAGCTGCCACAGAACAATACTACACCCACTACGAACAAGAAGGTCCTCTAAAACGCACTGAAGACATCCGCGATCACAAATGGCTCTTCTCCAGACACGACCTCTGTTCACTCATGGCCAATTTCCAAGAAAAACGCTTAGCACTCTCCACACTCGCCCAAATGGGCGGTAGCCAAGGCTTATGCTACGCCCTACGCAGTCACTCACGCACTGGCCTCGGCATCGATGAAATAGAAGACAACTCAGAAATCAATGCATTAGACATCAGACGCAGTAAATTCGGCATAAATGAATTGCCCCCACATAAATCTGATCCATATTACAAACTATGCTATGATGAATTGCAAGACCCTATGCTCTGTGTGTTAGTGGTCGCAGGTGTCATCTCTCTAGTCGTTGGTGCAGCATTGCATGCAGCGGATGGTGGATATATTGAAGGATTAGCAATACTCATTGCAGTAGTTATTGTAGTAAATGTTGGAGCCATAAACAATTGGCAAAAAGAGAAACAATTCCGAAAAATGGATGAAGAGAATAAAAAGAAGAATACAATAGTAATGAGATCTACAGAAATGGAGATTCCATGGAGTGAAGTGGTAGTTGGAGATCTAGTTATACTTAGAAATGGTTTTACTGTTCCAGCTGATGGTGTATTTGTATTAGGTACTGAAAATCTGCATACGGAGGAAAGTTCACTCACTGGTGAATCGCGTGAATTATCGAAGAATAGCGAGAATCCATTGTTGATGAAAGGTACGAATGTTGTGGAAGGTGAAGGACTTATGCTGGTGGTTACTGTTGGTCCATACACAGAATGGGGAAAACTTATGCTCGGATTGCAAGAGGAAAGAAAAGATACTCCATTGCAAGAGAAATTGGATCGTTTGGCTGGGTTGATTGGCTATGGCGGTGGAGCAGTAGCAATTCTTTTGTTCATAATATTGACCATCAACTGGGGAATCAATGGTGGACATGATGCAGATATAAATATATTGAATTTCTTGATCATTGCAATAACTATTGTAGTGGTTGCTGTTCCAGAAGGTTTACCATTGGCAGTTACCATCTCACTTGCATACTCAATGAAGAAAATGTTGTTGGACAATAATTTTGTACGTCATTTGAAAGCATGTGAGACAATGGGTAATGCAACAACCATATGTTCAGATAAGACTGGAACATTGACTACCAATCGAATGTCTGTTCAGCAAGTGTTTATGTATGGCAGACGATTCATTCGTAGTGAAATATGTGATACTCCAGAGATTGCTGCAAATGCTTTGACACCAGTTGTGCATCAATTGCTGATGAATTGCATATGCACTAATACCAAATCATTTCAGGAAGAGCCTAAAACTGTAGATGAAAGAGCAGCCATCGATGCTGGGAAACGTAAAAAAAGACTCACTGGCGGTAACCAAACGGATTGTGCAATGTTGCAGTTTGCAATTGATTTGGGTGCACATGATTACAAAGAACGTCGTAGAAACAGCCCAGTTACTAAAATGTTCCCTTTCAATAGCAAAGTAAAACGTAGTTCAGTGTTAGTGAGAGATAAGAATCGATATATAATGTATACCAAAGGCGCAGCTGAAGTAACACTTGAGATATGTACACATTATATGTCCAACAATGGAGAAAGTGTACGTATGTCCAACGAAGATAAAGCAAAAGTCTTAAAAGCTATGAATCTGATGACAAAACGCGGTTTACGCTGTCTCGGCACTTGTTACAAAACATTTGACAAATCTGAAATACCTTTTTCTTCCATAAGTTTGAATATCGCAGAAGAAGATTGCGGAATATTGTTTGAGAATATGATATGGATAGCAGTGATGGCCATACAAGATCCAGTACGCGATGAAGTACCAGATGCAGTTCTAACATGTCAGAGAGCAGGTATTGTAGTACGTATGGTAACTGGTGATCATTTGGAGACTGCCAAACATATTGCAAAAGAATGTCATATCCTCACATGTGCTGACCATGTGTGTATGACTGGAGAACGTTTTCGTTCTTTGACAGATGATGAAAAATTTGATCTTCTGCCAAGATTGAGAGTGTTGGCTCGTTCTAAACCTAAAGATAAGGAACAATTGGTGAAATGGTACAAAGAGAACAACAATGACATCGTAGCTACCACCGGTGATGGTGCAAATGATGCATTGGCATTGAAAGAAGCGAACATTGGACTGTCAATGGGTATACAAGGGACAGATGTTGCAAAAGAAGCATCAGATATTATTATTATGGATGACAATTTTGCATCTATAGTGCAGACAGTGATGTGGGGCAGATCTGTTTATGATAATATAAGAAAGTTTGTACAGTTTCAATTGACTGTGAATGTGGTTGCTTTGACATTGTCTTTGATTGCGGCATTTTGGACAGAGTTTGCTAATCCTTTGACAGCTGTGCAGTTGTTGTGGGTGAATTTGATTATGGATACAATGGCTGCGTTGGCATTGGCTACGGAGGACCCTACTCCAAAACTGTTGGACAGACATCCTTTTACACCTGATTCGAATTTGATCACTCAGATTTTGTGGAGGTTTGTGTTTGGACATTCTTTGTATCAATTGGTACTGCTTTTGATGACAATGTTTGTAGCGGATGAGTGGTTGGGTATTAGAGATATGGAGAAAGGGGAGGAACAGAATCGGAGACATTTGACTGTGATCTTCAATACTTTTGTGTGGATGCAGATTTTTAATGAGTTCAATGCACGTAAGGTGAATAATGAGTGGAATATTTTTGAACATTTGTTCGATAATCTGTACTTTTGGTTTATTATGGGAGTGACTGTGCTTTTGCAGATATTTATGATTGAGTTTTTTGGGGATTTTGCGTCAACAGAGGGTTTGAATGGAAAGGAGTGGGGATATTGTTTGGCATTGGGTGCAGGTTCTTTGCTGTGGCATCAGTTGGTGAGATTGGTCCCAGTGGATTTCAATGATGGCATTAAAATTGTGGATAGTGATGTGCTTTTTAAGACTGAGGTTGAATTTGAACCAGGGTATGTTGCTCCGAATGCGGAGGTCATTGATGATGAGAAGGATGATAGTACTTTGACTGCTGATAATATGGTACATGTTGCAAGTCATAGTACACAGCAGCATCTGTGATTTTTTTTTTGTGTTTTGTGCATAACATCAATGAAAACTATTTTTTGGGTATTTACTTTTTTTTAGGTGTATGTTCGTTGTTGGTTTGTAGTTGTACATATTTGCGGTTTATTTTTATATGAGTGTAGT
>NODE_294_length_3304_cov_64.247619_g234_i0
GAAAGGATTAGAATGGGCGATTTTTTATCTCATGTTCCATGTTCGATGAATAATCAGGAATTGATAGTGCATATGATTGAGAATTATTTGGAAGAGAGGGAGAGTCAAAGTAAAAATCACGCGGCATTTTATGAGAATGGAAATATTCCAAATTATATCCAGCAATTATTTAACGAGTGCACTTCTGACAGATATGCATTTCTTCTCAATTCATATGCAGTCAAGCATTCAATCACTAATGAACAATTGCTGAAATATTTGGATGGAAGTGAGAATGGAAAGAATTCTCCTTTTTTGAGTGTTTTTGGGAGGATTGCTGAAGCACATGTGTATGAATGGTTGGTGACTGGAGATGAATACGTTTCATATTTTTCAGATTCTCATCCATGGGTAGTAAATCAGCAATCATATCGAATAAATCTCGCAAAAGAATCTGAGGATGAGCCATTGTATCTAAAGTTCCATTGGGAATTAAACAATGATAAAAGTGATGGTTTATTATGGTGCTTATTGGTCTGTGATCAATTGCCAATGCAAGTGGAAGAAGTGAGAATGAATTTAGGGATATATATTCCAGAAACGAAATCTGAAACGAATGAGGGTTCTTTTTTGAATCGCCAACAGTTTAGAGGCAATCAATTGCACAGCATTGTAGAAAGCAAGAAGTTATTGATGGAGAATAATCCGCAAGTAAAGAAACCTGAATTTAATCTCAAATATATTTTCAGATGCTGAATGTCAAAAATAAATAGTTTCACTATGAAATATGTACACATACAAAACAAAAACAAAAACAGTCAAAATGGTAAAAAATAACAAAACAGTCAAAATGGTATATAAAAATACAATAAATATGGCCAACCCTCCCCAAATCAATCACTTTAAACTTCTCAAAATTGTCTCTCAAAATATTTCCCAAAATCATGCTTCGGATCAATAGGCTTAGCATCAAGCGCATCATTCGGATCAATCTGAATCTTCGCAAAATTCCCCAAATAAGTCGAACTCTTAACCTTATTAACAATAGGTGCAGGCATAGTCCCATCATTCAACTTAGCCCACGAAAAACTAGAATACCACGGATGCTTACGAATATTATTAGCACCACCTCTCATAACTCCCAACCTCCGAGTAGGCTTATTATGCAACAAACCTTTGATCAAATCACGCACTTCAGCACTGAAATATCTCGGAAAACGTATTCTCCCACGTATAATCTTACGATATGTCTCAATTGGATCATCAGCCACAAAAGGTGGAAAACTAGCCAACATCTCATATATCAATATCCCAAGCGTCCACCAATCAACACCTTTTCCATGTCCTTGACCAGTGACAATCTCAGGACACAAATAATCTGGTGTTCCACACAAAGTAAATGTCTTACCTGTCAAAAATTTAGCAAAACCAAAATCAGTAACTTTTAAGTATCCATCATTGTCCAACACCAGATTCTCTGGTTTCAAATCACGATAAATGATATTCATACTGTGCATGTAATCAAATGCTTCAATGACACAACCAGCATAAAAACGAGATGCTGGTTCATTGAAGTATCTGCGAGAACGTAGGATAGTAAATAATTCACCACCCAAACATACATCCAATAAAAAGTATACACGTAGTGGGTCATTGTAAGTGCAGTGTAAGTTCACTAAGAACTTGTTTTTCATAACATCCATCACTTGTTTTTCACTTACGATGTGTTTTTGCAGTTCAAGTTCTATCACTTGGAACTTTTTTATGGATTTTAAGGCGTATGATTTTTTTGTATGTGGATCGACCACTAAAGTTACTAAACCAAAACCACCTTTGCCAAGCACTCCAACAGTCTGCAACTCATGCAAATCACAAATCTTTTTAGGTTTCGTAGCTGCGTTTTTATTTTGCTCTGCAGCTTTCGATCTTATTTGCTTTTTCTTCTCACGAACAGCATCTTCAGCTATCATTTTTGCAGAACGACTGTACTCTGATATCTGATCATCCACTAATTCAATGACTGGTCCAAGCAAATCAAAGAAATGTTGACTGTCCATTTCTAAGCAGGTTAAGTCAGTTTTTGCACTGATTGTTGCTGCACGTGCTTGTTTTGTACGCAAAGCACGTTCACCAAAGAATTGTCCTTTAGTTAATATACCTTTTTCACCATTAACTTTTTTCCATTCTGCTGAACCTTTATATATAACATAAAATCGTTCACCAACTTCCCCCTGCCTAAAAACCACAGTTTTTGCTGAATATACCTTCTCTTCCAGCGACGAACCCAAATCCATAATCTCTGATCTCAGCAATGGCTTAAACAACGAAACAGTAGGCAAAAAAGCCATTAATTCTTCATCTTTCTGTTTACTGACAACAATAACCTCATATCTAAACATCTGTCGTTCAATCACCCATAAACGACCACCTTTTTTAGATACACATTTCACAGTTGCAGCTCTGGGTGCATCATTAATTAATGCAAGTTCACCAAAACACCCACCTCTTTTGTATGAATGTACTTTACATTTATCTTTTAATACATTATAATCTCCACTCTCAACCACATAAAAAGTCGTTGCTTTTAAGTCTCCTTGATTGATAATACATTCATTGTAATCAACAGTAATCATTCTCATACGTGATAATATTCGTCTCTTCATTGCTTTATCAAAATTCTGAAATAACAAATTATCAGCTACAGATTTCATCAACCATTTCATAGCACTATCACTCACATTCGACAAAGGTGCTGCAGCATATTTACTACTACCACTACCACTGCTGGTATGAGCATTATTATTATTGATATTACCACTGCCACCACTCTTGTACTCATCGCGTAATGCACTCAAATTTATTGGAGTAATAAAAGCATTTCGTTTTGCTTCACGTTTGGCAAATCTCACGGAAGAACTCGATTGTATAACCTTTTGAAACAATTTCTGGTTGCATACCAGACATATAACTTTAGAACATGCTTGTAAGCTGGCATTTCTTTTGGCATTTTGGAGTAATGCTTGTTCACCTGCGTAGTCACCTGTTTCTAGTGTTGCTACCACTTTGTTGTCTGTTGATAGGACGTTGACTATGCCTTCTACTATGATGTAGAATGAATCACCAGTCTCGCCTTCTTTCATTATATATTCACCTTTAGCGAACTGTTGGGTTACTAAACCATTTGAGAGGTTCTTACGTTCGATTGGTGATAAACCAGAGAGTAAAGGAACTGATGCTAGAATGGTGTTTATTACGGATTGTCTAGTTTTAGAGTACTTTGAACTGGTCTTTGTTTTATTTGTAGTGCCAGACATTATTTTTTTAT
>NODE_373_length_3084_cov_119.730807_g297_i0
TTTGCTTATTTTTTTCTTATTCCTTGGTTCTTCTTGGTTTTGTGTTTGTTTAATGACAACAACAACACCTGCAGTAACGATAACAAACGTTGATCGGAACAACCCGTATGCATCTGCTTCTTTGTATGTGGGTGATTTGGCAGCAGATGTGACTGAAGCCACTCTCTTCGAATTATTCAACGCAGTTGGTCCAGTTGCCAGCATACGTGTGTGCAGAGATGCAACCACACGTCGTTCCTTGGGTTATGCTTATGTAAATTTCCATTCAGTGCACGATGCAGAAGTGTCATTGGATATCATGAATTTTACAAACATTCGTGGTCGCCACTGTCGAATTATGTGGAGTCAGAGGGACCCTCGCTTACGCAAATCTGGCAAAGGCAACATTTTTGTCAAGAATTTGCATGAGTCCATTGACAACAAAACACTTTATGATACGTTCTCTGTGTTTGGAAGTATTCTCTCCTGCAAAGTAGTGGTTGATAGAGATTCAGGACTTTCTCGCGGTTATGGATATGTACACTACGTGGATGATAAGTCAGCAGCTAAAGCTATCGAAGGAGTAAATGGAATGAAAATCAATCAATGCCAAGTGCATGCTGAGCTTTTTAAACCGCGCGAAGAGAGAATGAAAGACCCGAAGTATGAGTTCACTAATATTTATGTGAAATACATTCCAAGTGGTGTGAATGAGAAAAAGTTGGTGGAGCTTTTCCAGCGCGAAGCGGAAGAAGTTTGTAATAAGTATGATTTTTGGTACAGAGAGTATGGAATATCTGCTTGTTTTAATTTCAAGTCTACGGCTGGTGCTCGTAGAGCAATCAGAGAAATGAATGGCAAGTTTTTGCATGATTTCAAAGACATTGATCAATTGTTTGATGAGAAGGAGGCAGTAGCAGTGGTTGAAGATAAAGTTGCAGATGAAGCAGTTGTCAATGGTAATGAAGAGAAAGAGAACAGTGGGCAAGCTGAAGAATCTGTTGCCGGCGGTGATGAGAATAAAAATGCGAAGGCAGAAGAAGAAGAAAAAGATTCTGAAGCAGATATAATTACCACTTCTGCTGCCACTACCACTGGTAATTCAACTGCTATCGCTGCTGTTGCTTCAGAAGATAGCAAAGCAGATGATAGCAAAGATGACAGTAAATCGCGTGAAGATCGTCTTCCAGTGCCAACCACAAATGGTGCTTTGATGCGTGTGCAACAGCGTGGATTGTATGTTGCGCGTGCACAAAAAGGAAATGAGCGCAAAGAGTTTTTAAATCGCATGGCTCGAAGTGTAACTCTCAATGGAAGACGAATTGGAATTCCTGGGGCGAATTTGTATGTAAAGAATTTGAGTCCAGAGGTGAATGATGACAAATTACGTGAGATGTTCGCTATTTTTGGTACAATTACATCTGCAGTAGTGATGACAGAGAAAGATAGTAAGAAATCGAGAGGTTTTGGTTTTGTAGCATATCAAAAGAAAGAGAGTGCTGCTCGTGCAATTCATGAAATGATGAATAGTTTGCACAATGGTAAACCTTTGTATGTGTCGCGTGCTCAAAGCAAACAATTTCGCCAACAGTTTATAGCGAAACAACTTCGCCAAAGAGGTAATTTCAGTGGTAGAGGTCGTGGTGGATACCGTGGACGTGGAAGAGGTAATTACCGTGGTCGAGGTAGAGGTCGAGGAAGTGGAGGTTCATACAGTGGAGGATATCGTTCATTTCGCGGTGGATATCGCGGAGGATACAATAATTACCGAGGTAGAGGCAGAGGACGTGGACGTGGACGATCCAATTTCCCACCAACTTCGGTCTACAGTGGATATCCTCCTCAACAGCCTACCCCTTACGCAGGTGTATATGGAGGTGGACAAGCATATCCAGCATCGAGTCCATACGCTCGACAGTATTCTGGTCAATATCCGAATTATGCTCAGGCATCTCGTGTACCATATGGTTATCCTGGTCAAAGTGCAGCTAATGCACAAATGGCGAGAACTGCGCAACCATCAGTGCCTATGAATTATTCTGCACAGGGAGTTGTGCCGAATATGAATCGAGCATATATTCCGCAAACAGCACAAGGGTACATTCAACAACCGTATGTGAATCCGCAGCTTCAATTGAGACAAACGCAGAATATTCCGTCTCAGCCCGCAAGTGCTGCATTAGCGGTAGCTCAACCGCAGCAAATTCAATCTTTGCCAGCTGGACAGCAATCTGGTTCAATGATGTATGGAACATCATCTGTTCCTCGTCCACCGTACAGTGTGAATCAAATTGTGCATCAAGTGCCAATGCCATCACAGCCATTGGCACAACCACAGACATCTGTGAGTCAATTGAGTGCAAACAAGGATTCTGCTCCAGTCTTAGAGAACCATCCATTGACCAGTGAAATGTTGAAGGAAGCAAAGCCGCCAGAGCGCAAACGATTGATTGGTGAGAGATTGTTTCCAAAAATTCAAGTGGTTGAGCCTCGATTGGCAGGGAAGATTACAGGAATGTTGTTAGAGATGGATAATACTGAACTGTTGGTACTGTTGTCAGACCAGGCTGCGTTGATGAGTAAGATTAATGAGGCATTGGCTGTGTTGAAGGATCATCAACAGAAGCAGTCTCAGCGCAATCCTGAATCATCCAAGAATCAGTCATCCCAGGCAAACAAAGCAGGTTCGCAGTCAAATCAAGCAAGCTCTTCTGCTGCACAAGCGAATCAAACTAGTGTTGGACAACATGTTGCGCAACCGAGATCTGCTGCCAATCCATAAACAATTGTATGATGCTCGATGGGTGTTCAATTGCCTCATCCATCTCCATTGCATTCACTATTCTTATTATTTTTAGTTCTAATTTTTATTTAATTTTGCCATACACAAAAAAAAATAAAAAAAATAAAAAAAAAACTGATTTTCTATTGAAGTGTATGGTGTTTGACCAAGTTTTGTGTTTAAATTGTTTCTTTTTTCGATTTTATTTATTTTTTGCCTTTTGTTTAGTTTATATCACGGATATATATATACAATATTATGTTTCTAGAATGTGTTTATTACTATTTCCGAAACCATTGTCCACTTTGAAAAATTGAAAAAAG

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -1 +0,0 @@

View File

@ -1,12 +1,12 @@
# Last updated Jan 2024
# Authors: Auden Cote-L'Heureux and Mario Ceron-Romero
# This script chooses orthologs to concatenate OGs. This can be done as part of an end-to-end PhyloToL run,
# This script chooses orthologs to concatenate OGs. This can be done as part of an end-to-end EukPhylo run,
# or by inputting already complete alignments and gene trees and running only the concatenation step.
# Use the --concatenate flag to run this step, and optionally use the argument --concat_target_taxa to input
# a file containing a list of taxon codes to be included in the concatenated alignment. If a GF has more
# than one sequence from a taxon, a representative ortholog must be chosen to include in the concatenated alignment.
# To do this, for each taxon PhyloToL keeps only the sequences falling in the monophyletic clade in the tree
# To do this, for each taxon EukPhylo keeps only the sequences falling in the monophyletic clade in the tree
# that contains the greatest number of species of the taxons minor clade (or major clade, if the target taxon list
# uses major-clade codes). If multiple sequences from the taxon fall into this largest clade, then the sequence
# with the highest score (defined as length times k-mer coverage for transcriptomic data with k-mer coverage
@ -118,17 +118,15 @@ def remove_paralogs(params):
#Getting a clean list of all target taxa
if type(params.concat_target_taxa) is list:
target_codes = [code.strip() for code in params.concat_target_taxa if code.strip() != '']
if os.path.isfile(params.concat_target_taxa):
try:
target_codes = [l.strip() for l in open(params.concat_target_taxa).readlines() if l.strip() != '']
except AttributeError:
print('\n\nError: invalid "concat_target_taxa" argument. This must be a comma-separated list of any number of digits/characters to describe focal taxa (e.g. Sr_ci_S OR Am_tu), or a file with the extension .txt containing a list of complete or partial taxon codes. All sequences containing the complete/partial code will be identified as belonging to target taxa.\n\n')
elif params.concat_target_taxa != None:
if os.path.isfile(params.concat_target_taxa):
try:
target_codes = [l.strip() for l in open(params.concat_target_taxa).readlines() if l.strip() != '']
except AttributeError:
print('\n\nError: invalid "concat_target_taxa" argument. This must be a comma-separated list of any number of digits/characters to describe focal taxa (e.g. Sr_ci_S OR Am_tu), or a file with the extension .txt containing a list of complete or partial taxon codes. All sequences containing the complete/partial code will be identified as belonging to target taxa.\n\n')
target_codes = [params.concat_target_taxa]
else:
print('\nERROR: missing --concat_target_taxa argument. When concatenating, you need to give the taxonomic group (sequence prefix), groups, or a file containing a list of groups (multiple prefixes) for which to select sequences to construct a concatenated alignment\n')
exit()
target_codes = [leaf.name[:10] for leaf in tree]
monophyletic_clades = { }

View File

@ -1,4 +1,4 @@
# Last updated Jan 2024
# Last updated Jan 2025
# Authors: Auden Cote-L'Heureux, Mario Ceron-Romero.
# This script contains the entirety of the contamination loop, an iterative tool to assess
@ -323,15 +323,45 @@ def cl_mafft(params):
if file.split('.')[-1] in ('fasta', 'fas', 'faa'):
os.system('mafft ' + params.output + '/Output/Pre-Guidance/' + file + ' > ' + params.output + '/Output/NotGapTrimmed/' + file)
os.system('Scripts/trimal-trimAl/source/trimal -in ' + params.output + '/Output/NotGapTrimmed/' + file + ' -out ' + params.output + '/Output/Guidance/' + file.split('.')[0] + '.95gapTrimmed.fasta' + ' -gapthreshold 0.05 -fasta')
os.system('Scripts/trimal-trimAl/source/trimal -in ' + params.output + '/Output/NotGapTrimmed/' + file + ' -out ' + params.output + '/Output/Guidance/' + file.split('.')[0] + '.95gapTrimmed.fasta' + ' -gapthreshold ' + str(params.trimal_cutoff) + ' -fasta')
#Utility function to run FastTree in between iterations (if this is the chosen tree-building method)
def cl_fasttree(params):
for file in os.listdir(params.output + '/Output/Guidance'):
if file.split('.')[-1] in ('fasta', 'fas', 'faa'):
os.system('FastTree ' + params.output + '/Output/Guidance/' + file + ' > ' + params.output + '/Output/Trees/' + file.split('.')[0] + '.FastTree.tre')
#Utility function to run Iqtree in between iterations (if this is the chosen tree-building method)
def cl_iqtree(params):
for file in os.listdir(params.output + '/Output/Guidance'):
if file.split('.')[-1] in ('fasta', 'fas', 'faa'):
if not os.path.isdir(params.output + '/Output/Intermediate/IQTree'):
os.mkdir(params.output + '/Output/Intermediate/IQTree')
tax_iqtree_outdir = params.output + '/Output/Intermediate/IQTree/' + file.split('.')[0].split('_preguidance')[0]
os.mkdir(tax_iqtree_outdir)
os.system('iqtree2 -s ' + params.output + '/Output/Guidance/' + file + ' -m LG+G -T 10 --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
#Copy over the final output
if os.path.isfile(tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile'):
os.system('cp ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile ' + params.output + '/Output/Trees/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.tree')
os.system('rm -r ' + params.output + '/Output/Intermediate/IQTree/*')
#Utility function to run Iqtree_fast in between iterations (if this is the chosen tree-building method)
def cl_iqtree_fast(params):
for file in os.listdir(params.output + '/Output/Guidance'):
if file.split('.')[-1] in ('fasta', 'fas', 'faa'):
if not os.path.isdir(params.output + '/Output/Intermediate/IQTree'):
os.mkdir(params.output + '/Output/Intermediate/IQTree')
tax_iqtree_outdir = params.output + '/Output/Intermediate/IQTree/' + file.split('.')[0].split('_preguidance')[0]
os.mkdir(tax_iqtree_outdir)
os.system('iqtree2 -s ' + params.output + '/Output/Guidance/' + file + ' -m LG+G -T 10 --fast --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
#Copy over the final output
if os.path.isfile(tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile'):
os.system('cp ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile ' + params.output + '/Output/Trees/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.tree')
os.system('rm -r ' + params.output + '/Output/Intermediate/IQTree/*')
#Wrapper script to manage parameters and iteration
def run(params):
@ -440,11 +470,15 @@ def run(params):
if params.cl_tree_method == 'fasttree':
cl_fasttree(params)
else:
if 'iqtree' in params.cl_tree_method:
os.system('rm -r ' + params.output + '/Output/Intermediate/IQTree/*')
elif params.cl_tree_method == 'raxml':
os.system('rm -r ' + params.output + '/Output/Intermediate/RAxML/*')
elif params.cl_tree_method == 'iqtree':
cl_iqtree(params)
os.system('rm -r ' + params.output + '/Output/Intermediate/IQTree/*')
elif params.cl_tree_method == 'iqtree_fast':
cl_iqtree_fast(params)
os.system('rm -r ' + params.output + '/Output/Intermediate/IQTree/*')
elif params.cl_tree_method == 'raxml':
os.system('rm -r ' + params.output + '/Output/Intermediate/RAxML/*')
trees.run(params)

View File

@ -2,7 +2,7 @@
# Author: Auden Cote-L'Heureux
# This script is what users should call when running any or all components of
# PhyloToL 6 part 2. It briefly determines which parts of the pipeline should be
# EukPhylo part 2. It briefly determines which parts of the pipeline should be
# run (pre-Guidance, Guidance, tree building, contamination loop, and/or
# concatenation) based on the --start and --end parameters, and then runs all
# of these components. Each component is actually run by the run() function in

View File

@ -1,12 +1,12 @@
# Last updated Apr 2 2024
# Last updated Jun 02 2025
# Authors: Auden Cote-L'Heureux and Mario Ceron-Romero
# This script runs Guidance in an iterative fashion for more both MSA construction
# and more rigorous homology assessment than what is offered in PhyloToL 6 part 1.
# and more rigorous homology assessment than what is offered in EukPhylo part 1.
# Guidance runs until the input number of iterations (--guidance_iters, default = 5)
# has been reached, or until there are no sequences below the sequence score cutoff.
# All sequences below the score cutoff (--seq_cutoff, default = 0.3) are removed at
# each iteration. By default, PhyloToL does not remove residues that fall below the
# each iteration. By default, EukPhylo does not remove residues that fall below the
# given residue cutoff (--res_cutoff) and columns that fall below the given column
# cutoff (--col_cutoff, defaults are 0), though this can be turned on by adjusting
# these parameters. Outputs at this point are found in the “Guidance_NotGapTrimmed”
@ -14,9 +14,14 @@
# that are at least 95% gaps (or --gap_trim_cutoff) generating files in the “Guidance”
# output folder.
# Users should note that there are two version of Guidance. This script, by default, uses
# the newest version (v2.1). Users who wish to use the older version of Guidance will have
# to make a small change in guidance.py (look for a comment in the script with the phrase
# "UNCOMMENT THE FOLLOWING LINE IF USING v2.0.2"). See the Wiki for more information here.
# This step is either intended to be run starting with --start = unaligned (but not raw)
# inputs, meaning one amino acid alignment per OG. It can also be run directly after the
# preguidance step. The run() function is called in two places: in phylotol.py generally,
# preguidance step. The run() function is called in two places: in eukphylo.py generally,
# and in contamination.py if the contamination loop is using Guidance as the re-alignment
# method.
@ -24,7 +29,7 @@
import os, sys, re
from Bio import SeqIO
#Called in phylotol.py and contamination.py
#Called in eukphylo.py and contamination.py
def run(params):
if params.start == 'raw' or params.start == 'unaligned':
@ -53,6 +58,25 @@ def run(params):
guidance_removed_file = open(params.output + '/Output/GuidanceRemovedSeqs.txt', 'w')
guidance_removed_file.write('Sequence\tScore\n')
too_many_seqs = False
#For each unaligned AA fasta file
for file in [f for f in os.listdir(guidance_input) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta')]:
nseqs = len([rec for rec in SeqIO.parse(guidance_input + '/' + file, 'fasta')])
if nseqs > 2000:
too_many_seqs = True
#Print if OG has > 2000 seqs
guidance_log = open(params.output + '/Output/GuidanceLog.txt', 'w')
guidance_log.write(file + ' has more than 2000 seqs.\nStopping run')
print(file + 'has more than 2000 seqs')
print('Do you want to run this?')
print('Stopping run.')
break
if too_many_seqs and not params.allow_large_files:
return False
#For each unaligned AA fasta file
for file in [f for f in os.listdir(guidance_input) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta')]:
tax_guidance_outdir = params.output + '/Output/Intermediate/Guidance/Output/' + file.split('.')[0].split('_preguidance')[0]
@ -77,8 +101,17 @@ def run(params):
else:
mafft_alg = 'auto'
#Running Guidance (one per OG per iteration)
os.system('Scripts/guidance.v2.02/www/Guidance/guidance.pl --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
#For Guidance v2.1 (2025 version) on the grid ... COMMENT OUT THE FOLLOWING LINE IF USING v2.0.2
os.system('python ' + params.guidance_path + '/script/guidance_main.py --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
#For Guidance v2.0.2 (origin version in PhyloTol6). UNCOMMENT THE FOLLOWING LINE IF USING v2.0.2
#os.system('Scripts/guidance.v2.02/www/Guidance/guidance.pl --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
#For UMass Unity users, use the following line and comment out the others:
#os.system('python3 /work/pi_lkatz_smith_edu/Guidance/guidance_Linux/script/guidance_main.py --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
#For Smith College Grid users, use the following line and comment about the others:
#os.system('python /gridapps/software/Guidance_mid/2.1b-foss-2023a/bin/script/guidance_main.py --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
#Checking for a sequence score file; if not available, Guidance failed.
if os.path.isfile(tax_guidance_outdir + '/MSA.MAFFT.Guidance2_res_pair_seq.scr_with_Names'):
@ -150,10 +183,10 @@ def run(params):
os.system('mafft ' + tax_guidance_outdir + '/postGuidance_preTrimAl_unaligned.fasta > ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.postGuidance_preTrimAl_aligned.fasta')
#Gap trimming
os.system('Scripts/trimal-trimAl/source/trimal -in ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.postGuidance_preTrimAl_aligned.fasta -out ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.95gapTrimmed.fasta -gapthreshold 0.05 -fasta')
os.system('Scripts/trimal-trimAl/source/trimal -in ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.postGuidance_preTrimAl_aligned.fasta -out ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.70gapTrimmed.fasta -gapthreshold ' + str(params.trimal_cutoff) + ' -fasta')
#Copying over final aligments (pre and post gap trimming) into output folder.
os.system('cp ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.95gapTrimmed.fasta ' + params.output + '/Output/Guidance/' + file.split('.')[0].split('_preguidance')[0] + '.95gapTrimmed.fasta')
os.system('cp ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.70gapTrimmed.fasta ' + params.output + '/Output/Guidance/' + file.split('.')[0].split('_preguidance')[0] + '.70gapTrimmed.fasta')
os.system('cp ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.postGuidance_preTrimAl_aligned.fasta ' + params.output + '/Output/NotGapTrimmed/' + file.split('.')[0].split('_preguidance')[0] + '.postGuidance_preTrimAl_aligned.fasta')
#Removing intermediate files if not --keep_temp
@ -168,6 +201,8 @@ def run(params):
os.system('mv ' + tax_guidance_outdir + '/' + gdir_file + ' ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '_' + gdir_file)
guidance_removed_file.close()
return True

View File

@ -2,7 +2,7 @@
# Authors: Auden Cote-L'Heureux, Mario Ceron-Romero, Godwin Ani
# This script is only run when --start = unaligned. This typically means that a user
# is inputting ReadyToGo files as output by PhyloToL 6 part 1. The script contains two optional
# is inputting ReadyToGo files as output by EukPhylo part 1. The script contains two optional
# filters. One filter aims to remove sequences outside silent-site GC content ranges set by
# the user, and relies on the output of the utility script GC_Identifier_v1.0.py. See the manual
# for details on using this filter. Sequence filtration by composition is set using the --og_prefix
@ -27,7 +27,7 @@
import os, sys, re
from Bio import SeqIO
#This function is called ONLY in phylotol.py.
#This function is called ONLY in eukphylo.py.
def run(params):
#Reading in the list of gene families to use (--gf_list)

View File

@ -2,7 +2,7 @@
# Authors: Auden Cote-L'Heureux and Mario Ceron-Romero
# This is a relatively simple script that only runs trees, using either IQ-Tree
# or RAxML. The run() function is called in two places: both in phylotol.py, and
# or RAxML. The run() function is called in two places: both in eukphylo.py, and
# in contamination.py, where it is used to re-build trees. When starting at this
# step, users must input one aligned amino acid fasta file per OG. Otherwise, if
# starting at the pre-Guidance or Guidance steps, this step will be run if --end = trees.
@ -12,7 +12,7 @@ import os, sys, re
from Bio import SeqIO
from color import color
#Called in phylotol.py and contamination.py
#Called in eukphylo.py and contamination.py
def run(params):
#Checking whether aligned files were input, or it should just start with the Guidance outputs from the previous step.
@ -34,7 +34,7 @@ def run(params):
for file in [f for f in os.listdir(guidance_path) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta') or f.endswith('.fas') or f.endswith('.aln')]:
#Run IQ-Tree
if params.tree_method == 'iqtree':
if params.tree_method == 'iqtree' or params.tree_method == 'iqtree_fast':
#Make intermediate folders
if not os.path.isdir(params.output + '/Output/Intermediate/IQTree'):
os.mkdir(params.output + '/Output/Intermediate/IQTree')
@ -42,10 +42,21 @@ def run(params):
tax_iqtree_outdir = params.output + '/Output/Intermediate/IQTree/' + file.split('.')[0].split('_preguidance')[0]
os.mkdir(tax_iqtree_outdir)
#Run IQ-Tree
os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G -T 10 --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
#Copy over the final output
#Examples on how to run IQ-Tree
#Comment on the lines that do not fit your system
#Run IQ-Tree on the Smith College grid
if params.tree_method == 'iqtree':
os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G -T 10 --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
elif params.tree_method == 'iqtree_fast':
os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G -T 10 --fast --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
#Run IQ-Tree in HPC Unity Cluster
#if params.tree_method == 'iqtree':
#os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
#elif params.tree_method == 'iqtree_fast':
#os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G --fast --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree')
# Copy over the final output
if os.path.isfile(tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile'):
os.system('cp ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.treefile ' + params.output + '/Output/Trees/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.tree')
#color(params.output + '/Output/Trees/' + file.split('.')[0].split('_preguidance')[0] + '.IQTree.tree')

View File

@ -2,33 +2,33 @@
# Author: Auden Cote-L'Heureux
# This script is a general utility script that does two main things. First, it has
# a function to read in all PhyloToL parameters, which is called in phylotol.py.
# It also has a function that checks for and cleans up existing PhyloToL part 2
# a function to read in all EukPhylo parameters, which is called in eukphylo.py.
# It also has a function that checks for and cleans up existing EukPhylo part 2
# output files from previous runs, and creates a new, empty Output folder structure
# for the new run. This function is also called only in phylotol.py.
# for the new run. This function is also called only in eukphylo.py.
#Dependencies
import os, sys, re
import argparse
import shutil
#Reading in all parameters. This function is only called once, in phylotol.py
#Reading in all parameters. This function is only called once, in eukphylo.py
def get_params():
parser = argparse.ArgumentParser(
prog = 'PhyloToL v6.0',
description = "Updated January, 2022 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/AudenCote/PhyloToL_v6.0"
prog = 'EukPhylo v1.0',
description = "Updated January, 2022 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/Katzlab/EukPhylo"
)
common = parser.add_argument_group('Commonly adjusted parameters')
common.add_argument('--start', default = 'raw', choices = {'raw', 'unaligned', 'aligned', 'trees'}, help = 'Stage at which to start running PhyloToL.')
common.add_argument('--end', default = 'trees', choices = {'unaligned', 'aligned', 'trees'}, help = 'Stage until which to run PhyloToL. Options are "unaligned" (which will run up to but not including guidance), "aligned" (which will run up to but not including RAxML), and "trees" which will run through RAxML')
common.add_argument('--start', default = 'raw', choices = {'raw', 'unaligned', 'aligned', 'trees'}, help = 'Stage at which to start running EukPhylo.')
common.add_argument('--end', default = 'trees', choices = {'unaligned', 'aligned', 'trees'}, help = 'Stage until which to run EukPhylo. Options are "unaligned" (which will run up to but not including guidance), "aligned" (which will run up to but not including RAxML), and "trees" which will run through RAxML')
common.add_argument('--gf_list', default = None, help = 'Path to the file with the GFs of interest. Only required if starting from the raw dataset.')
common.add_argument('--taxon_list', default = None, help = 'Path to the file with the taxa (10-digit codes) to include in the output.')
common.add_argument('--data', help = 'Path to the input dataset. The format of this varies depending on your --start parameter. If you are running the contamination loop starting with trees, this folder must include both trees AND a fasta file for each tree (with identical file names other than the extension) that includes an amino-acid sequence for each tip of the tree (with the sequence names matching exactly the tip names).')
common.add_argument('--output', default = './', help = 'Directory where the output folder should be created. If not given, the folder will be created in the parent directory of the folder containing the scripts.')
common.add_argument('--force', action = 'store_true', help = 'Overwrite all existing files in the "Output" folder.')
common.add_argument('--tree_method', default = 'iqtree', choices = {'iqtree', 'raxml', 'all'}, help = 'Program to use for tree-building')
common.add_argument('--tree_method', default = 'iqtree_fast', choices = {'iqtree', 'iqtree_fast', 'raxml', 'all'}, help = 'Program to use for tree-building')
common.add_argument('--blacklist', type = str, help = 'A text file with a list of sequence names not to consider')
common.add_argument('--og_identifier', default = 'OG', choices = {'OG','OG6','OGA','OGG'}, help = 'Program to use for selecting seq by GC width')
common.add_argument('--sim_taxa', default = None, help = 'Path to the file with the taxa (10-digit codes) to apply the similarity filter on.')
@ -39,15 +39,18 @@ def get_params():
core.add_argument('--similarity_filter', action = 'store_true', help = 'Run the similarity filter in pre-Guidance')
core.add_argument('--sim_cutoff', default = 1, type = float, help = 'Sequences from the same taxa that are assigned to the same OG are removed if they are more similar than this cutoff')
core.add_argument('--guidance_iters', default = 5, type = int, help = 'Number of Guidance iterations for sequence removal')
core.add_argument('--guidance_path', help = 'Path to the downloaded Guidance folder (probably called guidance_Linux or guidance_MacOS-arm64, this folder should contain a folder called "script" which contains the guidance_main.py script). You can download this folder from this link: https://github.com/XseniaP/Guidance_mid/tree/main')
core.add_argument('--seq_cutoff', default = 0.3, type = float, help = 'During guidance, taxa are removed if their score is below this cutoff')
core.add_argument('--col_cutoff', default = 0.0, type = float, help = 'During guidance, columns are removed if their score is below this cutoff')
core.add_argument('--res_cutoff', default = 0.0, type = float, help = 'During guidance, residues are removed if their score is below this cutoff')
core.add_argument('--guidance_threads', default = 20, type = int, help = 'Number of threads to allocate to Guidance')
core.add_argument('--trimal_cutoff', default = 0.3, type = float, help = 'Gap masking threshold for TrimAl. The maximum proportion of sequences without gaps for a site to be removed (i.e. to remove sites with 70% or more gaps, set this parameter to 0.3).')
core.add_argument('--allow_large_files', action = 'store_true', help = 'Allow files with more than 2,000 sequences to run through Guidance.')
CL = parser.add_argument_group('Contamination loop parameters')
CL.add_argument('--contamination_loop', default = None, choices = {'seq', 'clade', 'both'}, help = 'Remove sequences by looking at the sisters of each sequence in a rules file or by picking the best clades')
CL.add_argument('--contamination_loop', default = None, choices = {'seq', 'clade'}, help = 'Remove sequences by looking at the sisters of each sequence in a rules file or by picking the best clades')
CL.add_argument('--nloops', default = 10, type = int, help = 'The maximum number of contamination-removal loops')
CL.add_argument('--cl_tree_method', default = 'fasttree', choices = {'iqtree', 'raxml', 'fasttree', 'iqtree_fast'}, help = 'Tree-building method to use in each contamination loop iteration.')
CL.add_argument('--cl_tree_method', default = 'iqtree_fast', choices = {'iqtree', 'raxml', 'fasttree', 'iqtree_fast'}, help = 'Tree-building method to use in each contamination loop iteration.')
CL.add_argument('--cl_alignment_method', default = 'mafft_only', choices = {'mafft_only', 'guidance'}, help = 'Alignment method to use in each contamination loop iteration.')
CL.add_argument('--cl_exclude_taxa', type = str, default = None, help = 'Path to a file containing taxon names present in input MSA/tree files but which should be removed in the first iteration of the contamination loop.')
@ -65,7 +68,7 @@ def get_params():
other = parser.add_argument_group('Other arguments')
other.add_argument('--concatenate', action = 'store_true', help = 'Remove paralogs and generate an alignment for concatenation')
other.add_argument('--concat_target_taxa', nargs = '+', default = None, help = 'The taxonomic group (sequence prefix), groups, or a file containing a list of groups (multiple prefixes) for which to select sequences to construct a concatenated alignment')
other.add_argument('--concat_target_taxa', type = str, default = None, help = 'The taxonomic group (sequence prefix), groups, or a file containing a list of groups (multiple prefixes) for which to select sequences to construct a concatenated alignment')
other.add_argument('--tree_font_size', default = 12, help = "Change this if you're not quite happy with the font size in the output trees. If you want smaller font in your trees, you can lower this value; and if you want larger font in your trees, you can raise this value. Some common values are 8, 10, and 12. Size 16 font is pretty big, and size 4 font is probably too small for most purposes. Iconoclasts use size 9, 11, or 13 font.")
other.add_argument('--keep_temp', action = 'store_true', help = "Use this to keep ALL Guidance intermediate files")
other.add_argument('--keep_iter', '-z', action = 'store_true', help = 'Keep all Guidance iterations (beware this will be very large)')
@ -74,7 +77,7 @@ def get_params():
return parser.parse_args()
#Cleaning up existing output and creating a new output folder structure. This function is only called once, in phylotol.py
#Cleaning up existing output and creating a new output folder structure. This function is only called once, in eukphylo.py
def clean_up(params):
#If an output folder doesn't exist, create one.
@ -122,7 +125,7 @@ def clean_up(params):
if params.end == 'trees' or params.contamination_loop != None:
os.mkdir(params.output + '/Output/Trees')
os.mkdir(params.output + '/Output/ColoredTrees')
if params.start == 'trees':
if params.start == 'trees' and params.contamination_loop == None:
copy_input('Trees')

70
PTL2/run_eukphylo.sh Normal file
View File

@ -0,0 +1,70 @@
#!/bin/bash
## Last updated Jan 2025 by Auden Cote-L'Heureux; modified Sept. 2025 by Adri K. Grow
## This shell script is used for running EukPhylo part 2, and includes a general setup for use on an HPC that uses
## the Slurm workload manager. It also includes several example run commands, which correspond to examples explained in more detail in the
## EukPhylo Wiki (https://github.com/Katzlab/EukPhylo/wiki/EukPhylo-Part-2:-MSAs,-trees,-and-contamination-loop).
## These run commands can also be copied and run in the terminal / command line separately, without a shell script.
## For the contamination loop, We recommend iterating the sister/subsisters loop multiple times as branches will shift. In contrast, we recommend only running clade grabbing once
## SLURM-SPECIFIC SETUP BELOW
############### FOR UMASS UNITY HPC ############### (DELETE section if not applicable):
#SBATCH --job-name=EukPhylo
#SBATCH -n 10 # Number of Cores per Task
#SBATCH --mem=125G # Requested Memory
#SBATCH -p cpu # Partition
#SBATCH -q long # long QOS
#SBATCH -t 334:00:00 # Job time limit
#SBATCH --output=Run_EP.%A_%a.out # Stdout (%j expands to jobId)
#SBATCH --mail-type=ALL
#SBATCH --mail-user=email@email.edu
#SBATCH --array=1-600%50
module purge #Cleans up any loaded modules
module load conda/latest
module load mafft/7.505
module load diamond/2.1.7
conda activate /work/pi_lkatz_smith_edu/Conda_PTL6p2/envs/PTL/
############### FOR SMITH GRID HPC ############### (DELETE section if not applicable):
#SBATCH --job-name=EukPhylo # Job name
#SBATCH --output=Run_EukPhylo.%j.out # Stdout (%j expands to jobId)
#SBATCH --nodes=1
#SBATCH --ntasks=10 ## On the Smith College HPC (Grid), we have to change this to be double the number of task/batches you want to launch
#SBATCH --mail-type=ALL
#SBATCH --mail-user=email@email.edu ##add your email address for job updates
#Load required modules
module purge # Cleans up any loaded modules
module use /gridapps/modules/all # make sure module locations is loaded
module load slurm
module load ETE/3.1.3-foss-2024a
module load Biopython/1.79-gfbf-2023a
module load DIAMOND/2.1.8-GCC-12.3.0
module load MAFFT/7.526-GCC-13.3.0-with-extensions
module load RAxML-NG/1.2.2-GCC-13.2.0
module load IQ-TREE/2.3.6-gompi-2023a
module load tqdm/4.66.1-GCCcore-12.3.0
module load Python/3.12.3-GCCcore-13.3.0
module load Guidance_mid/2.1b-foss-2023a #Smith College HPC specific
export PATH=$PATH:/beegfs/fast/katzlab/grid_phylotol_setup/programs/standard-RAxML-master #Smith College HPC specific #export PATH=$PATH:/Path/To/Executable/Files
## PROVIDE YOUR PARENT PATH
parent='/Your/Home/Folder/' # The folder where you are running EukPhylo (this should contain the Scripts and input data folders)
## EXAMPLE RUN COMMANDS BELOW
# A simple run of part 2, starting from ReadyToGo files and running through tree building
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/eukphylo.py --start raw --end trees --gf_list ${parent}listofOGs.txt --taxon_list ${parent}taxon_list.txt --data ${parent}Input_folder --output ${parent}Output_folder > Output.out
# Another example starting from ReadyToGo files and running through tree building, with the commonly used similarity filter cutoff, blacklist, and "sim_taxa_list" arguments (see Wiki)
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/eukphylo.py --start raw --end trees --gf_list ${parent}listofOGs.txt --taxon_list ${parent}taxon_list.txt --data ${parent}Input_folder --output ${parent}Output_folder --similarity_filter --blacklist ${parent}Blacklist.txt --sim_cutoff 0.99 --sim_taxa sim_taxa_list.txt > Output.out
# An example of running just the concatenation step of part 2, starting from trees
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/eukphylo.py --start trees --concatenate --concat_target_taxa Sr_rh --data ${parent}Output > log.out
# See the Wiki (https://github.com/Katzlab/EukPhylo/wiki/EukPhylo-Part-2:-MSAs,-trees,-and-contamination-loop) for more details!

View File

@ -1,33 +0,0 @@
#!/bin/bash
#SBATCH --job-name=meta033 ##change this to a shortened name of your project
#SBATCH --output=Run_phylotol.%j.out # Stdout (%j expands to jobId)
#SBATCH --nodes=1
#SBATCH --ntasks=10 ##change this to be double the number of task/batches you want to launch
#SBATCH --mail-type=ALL
#SBATCH --mail-user=youremail@smith.edu ##add your email address
module purge #Cleans up any loaded modules
module use /gridapps/modules/all #make sure module locations is loaded
module load slurm
module load ETE
module load Biopython/1.79-foss-2021b
module load DIAMOND/2.0.13-GCC-11.2.0
module load MAFFT
module load BioPerl
module load RAxML
module load IQ-TREE/2.1.2-gompi-2021b
module load tqdm/4.64.1-GCCcore-12.2.0
module load Python/3.9.6-GCCcore-11.2.0
export PATH=$PATH:/beegfs/fast/katzlab/grid_phylotol_setup/programs/standard-RAxML-master
parent='/beegfs/fast/katzlab/Adri/p2PTL/033_meta/B1_meta_033/' #add your path starting with the name of your folder, should begin with /beegfs/fast/katzlab/
#if you are running batches, you need an srun line for each batch!
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/phylotol.py --similarity_filter --sim_cutoff 0.95 --sim_taxa sim_taxa.txt --blacklist GuidanceRemovedSeqs_allConservedRuns_ML_nov_dec_2023.txt --start raw --end trees --gf_list B1_listofOGs.txt --taxon_list taxon_list.txt --data OutgroupR2Gs --output ${parent}Output_folder_B1 > Output_folder_B1.out &
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/phylotol.py --similarity_filter --sim_cutoff 0.95 --sim_taxa sim_taxa.txt --blacklist GuidanceRemovedSeqs_allConservedRuns_ML_nov_dec_2023.txt --start raw --end trees --gf_list B2_listofOGs.txt --taxon_list taxon_list.txt --data OutgroupR2Gs --output ${parent}Output_folder_B2 > Output_folder_B2.out &
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/phylotol.py --similarity_filter --sim_cutoff 0.95 --sim_taxa sim_taxa.txt --blacklist GuidanceRemovedSeqs_allConservedRuns_ML_nov_dec_2023.txt --start raw --end trees --gf_list B3_listofOGs.txt --taxon_list taxon_list.txt --data OutgroupR2Gs --output ${parent}Output_folder_B3 > Output_folder_B3.out &
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/phylotol.py --similarity_filter --sim_cutoff 0.95 --sim_taxa sim_taxa.txt --blacklist GuidanceRemovedSeqs_allConservedRuns_ML_nov_dec_2023.txt --start raw --end trees --gf_list B4_listofOGs.txt --taxon_list taxon_list.txt --data OutgroupR2Gs --output ${parent}Output_folder_B4 > Output_folder_B4.out &
srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/phylotol.py --similarity_filter --sim_cutoff 0.95 --sim_taxa sim_taxa.txt --blacklist GuidanceRemovedSeqs_allConservedRuns_ML_nov_dec_2023.txt --start raw --end trees --gf_list B5_listofOGs.txt --taxon_list taxon_list.txt --data OutgroupR2Gs --output ${parent}Output_folder_B5 > Output_folder_B5.out &
wait

View File

@ -1,4 +1,4 @@
<img src="https://github.com/Katzlab/PhyloToL-6/blob/main/Other/Katzlab.png">
**PhyloToL version 6** is the latest version of the PhyloToL pipeline from the [Katz Lab](https://www.science.smith.edu/katz-lab/) at Smith College. PhyloToL is a phylogenomic toolkit for processing transcriptomic and genomic data that includes novel phylogeny-informed contamination removal techniques. See our [Wiki](https://github.com/Katzlab/PhyloToL-6/wiki) for more information on installation and usage!
**EukPhylo version 1.0** is an updated version of the PhyloToL pipeline from the Katz Lab (https://www.science.smith.edu/katz-lab/) at Smith College. EukPhylo is a phylogenomic toolkit for processing transcriptomic and genomic data that includes novel phylogeny-informed contamination removal techniques. See our Wiki (https://github.com/Katzlab/EukPhylo/wiki) for more information on installation and usage!

View File

@ -0,0 +1,86 @@
'''
Author & Date: Adri K. Grow + ChatGPT, Nov 11th 2024
- Updated 02/13/25 to accept either transcriptome and genome assembled data in command line
Motivation: assess and rename assembled transcript or genome files for use in EukPhylo Part 1
Intention: warn if any 'transcripts.fasta' or 'contigs.fasta' files are missing or empty for an LKH, otherwise rename and copy them with their assigned 10-digit code by LKH
Input:
- a base directory containing subdirectories for each LKH, named either 'WTA_LKH<xxxx>' or 'WGA_LKH<xxxx>', each containing a 'transcripts.fasta' or 'contigs.fasta' file
- a mapping .txt file with LKH#s tab-separated with corresponding 10-digit codes
Output:
- a folder named 'renamed_transcripts|contigs' with assembled files now named by 10-digit codes; e.g. "Sr_rh_Ro04_assembledTranscripts.fasta"
Dependencies: python3
Usage:
- for transcriptomes: python3 ProcessAndRenameAssembledData.py <assembled transcriptomes directory> <mapping_file.txt> transcriptomes
- for genomes: python3 ProcessAndRenameAssembledData.py <assembled genomes directory> <mapping_file.txt> genomes
'''
import os
import shutil
import sys
def read_lkh_mapping(mapping_file):
"""Reads the LKH number to 10-digit code mapping from a file."""
mapping = {}
with open(mapping_file, 'r') as file:
for line in file:
lkh_number, code = line.strip().split('\t')
mapping[lkh_number] = code
return mapping
def process_directory(base_dir, mapping, output_dir, data_type):
"""Iterates over all subdirectories in base_dir, processes relevant fasta files."""
if not os.path.exists(output_dir):
os.makedirs(output_dir) # Create output directory if it doesn't exist
# Set file naming patterns based on data type
folder_prefix = "WTA_LKH" if data_type == "transcriptomes" else "WGA_LKH"
fasta_filename = "transcripts.fasta" if data_type == "transcriptomes" else "contigs.fasta"
output_suffix = "_assembledTranscripts.fasta" if data_type == "transcriptomes" else "_assembledContigs.fasta"
for folder_name in os.listdir(base_dir):
folder_path = os.path.join(base_dir, folder_name)
if os.path.isdir(folder_path) and folder_name.startswith(folder_prefix):
lkh_number = folder_name.split('_')[1] # Extract LKH number from folder name
fasta_file = os.path.join(folder_path, fasta_filename)
if not os.path.isfile(fasta_file):
print(f" WARNING: file '{fasta_filename}' is missing in folder {folder_name}.")
continue
if os.path.getsize(fasta_file) == 0:
print(f" WARNING: file '{fasta_filename}' is empty in folder {folder_name}.")
continue
if lkh_number in mapping:
new_name = f"{mapping[lkh_number]}{output_suffix}"
output_path = os.path.join(output_dir, new_name)
shutil.copy(fasta_file, output_path)
else:
print(f"Notification: No 10-digit code found for LKH number {lkh_number} in folder {folder_name}.")
def main():
if len(sys.argv) != 4 or sys.argv[3] not in ["transcriptomes", "genomes"]:
print("Usage: python script.py <base_dir> <mapping_file> <transcriptomes|genomes>")
sys.exit(1)
base_dir = sys.argv[1]
mapping_file = sys.argv[2]
data_type = sys.argv[3]
if not os.path.isdir(base_dir):
print(f"Error: The directory '{base_dir}' does not exist.")
sys.exit(1)
if not os.path.isfile(mapping_file):
print(f"Error: The file '{mapping_file}' does not exist.")
sys.exit(1)
output_dir = os.path.join(os.getcwd(), "renamed_transcripts" if data_type == "transcriptomes" else "renamed_contigs")
mapping = read_lkh_mapping(mapping_file)
process_directory(base_dir, mapping, output_dir, data_type)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,101 @@
#!/usr/bin/env python3
#Author, date: Giulia Magri Ribeiro and Adri K. Grow updated from Xyrus Maurer-Alcala and Ying Yan; June 13 2025
#Motivation: Trim adaptors from reads and quality trimming before Assembly
#Intent: clean up reads
#Dependencies: biopython and bbmap folder
#Inputs: parameters.txt, fastq.gz forward and reverse reads
#Outputs:trimmed reads in ToAssemble folder
#Example: python3 Trim_Reads.py parameter.txt
#Katzlab parameters are 24 for quality trimming and 75 for minimum length as of June 2025
from Bio import SeqIO
import sys,os
import time
#------------------------------ Checks the Input Arguments ------------------------------#
if len(sys.argv) == 1:
print ('\n\nThis script will remove Adapters, do quality trimming and length trimming on given score and assembly from your raw reads')
print ('\n\nChecking the overall quality and reads size on FastQC is recommended\n\n')
print ('Example Usage:\n\n\t' + 'katzlab$ python3 Trim_Reads.py parameter.txt\n\n')
print ('\t\tQuestions/Comments? Email Giulia (author) at gribeiro@smith.edu\n\n')
sys.exit()
elif len(sys.argv) != 2:
print ('\n\nDouble check that you have added all the necessary command-line inputs! (see usage below for an example)\n\n')
print ('Example Usage:\n\n\t' + 'katzlab$ python3 Trim_Reads.py parameter.txt\n\n')
print ('Please also check that you have a parameter.txt (tab separated values) file which should contain your current filename, new filename, score of quality trimming, and minimum length (see an example below)\n\n')
print ('parameter.txt example:\n\n\t' + 'XKATZ_20161110_K00134_IL100076423_S41_L005\tLKH001_Spirostomum\t24\t100\n\tXKATZ_20161110_K00134_IL100076416_S17_L005\tLKH002_Loxodes\t28\t100\n')
sys.exit()
else:
parameter_file = sys.argv[1]
mailaddress = 'your_email@xxx.edu' # default email
if not os.path.isdir('ToAssemble/'):
os.system('mkdir ToAssemble')
### takes your raw read data and renames the files with your assigned new names and alters the end to either FwdPE or RevPE
def rename(code):
for filename in os.listdir(os.curdir):
if filename.endswith('.fastq.gz'):
# Forward read patterns
forward_tags = ['_FwdPE', '_R1', '_FPE']
if any(tag in filename for tag in forward_tags):
for tag in forward_tags:
if tag in filename:
cur_name = filename.split(tag)[0]
break
if cur_name in code:
new_name = code[cur_name]
print(cur_name, new_name)
os.system(f'mv {filename} {new_name}_FwdPE.fastq.gz')
os.system(f'mkdir -p {new_name}')
# Reverse read patterns
reverse_tags = ['_RevPE', '_R2', '_RPE']
if any(tag in filename for tag in reverse_tags):
for tag in reverse_tags:
if tag in filename:
cur_name = filename.split(tag)[0]
break
if cur_name in code:
new_name = code[cur_name]
print(cur_name, new_name)
os.system(f'mv {filename} {new_name}_RevPE.fastq.gz')
### Uses the adapters.fa file in the bbtools resources folder (and BBDuK) to remove adapter sequences -- update if necessary
### Uses BBDuK to quality trim reads so the average is q24 and the min length is 100 -- adjust if needed ... flags will be added eventually
def QualityTrim(qtrim, minlen):
for filename in os.listdir(os.curdir):
if 'FwdPE' in filename:
new_name = filename.split('_FwdPE')[0]
qscore = qtrim[new_name]
lscore = minlen[new_name]
qtrimcmd = '_q'+qscore+'_minlen'+lscore
log_file = filename.split('_Fwd')[0] + '/' + filename.split('_Fwd')[0] + qtrimcmd + '_bbduk.log'
os.system('./bbmap/bbduk.sh -Xmx20g in1=./' + filename + ' in2=./' + filename.replace('Fwd','Rev') + ' out1=ToAssemble/'+filename.replace('FwdPE','FPE'+qtrimcmd) + ' out2=ToAssemble/' + filename.split('Fwd')[0]+'RPE'+qtrimcmd+'.fastq.gz qtrim=rl trimq='+qscore+' minlen='+lscore+' mink=11 k=23 hdist=1 ktrim=r ref=bbmap/resources/adapters.fa stats=' + filename.split('_Fwd')[0] +'/'+ filename.split('_Fwd')[0] + qtrimcmd + '_Stats.txt overwrite=true'+ ' > ' + log_file + ' 2>&1')
### Calls on rnaSPAdes to do the transcriptome assembly on the quality trimmed files.
#def rnaSPAdesAssembly():
# for filename in os.listdir(os.curdir+'/ToAssemble'):
# if 'LKH' in filename:
# if 'FPE_q' in filename:
# os.system('python rnaSPAdes-0.1.1/bin/rnaspades.py -m 26 -k 21,33,55,77 --min-complete-transcript 300 -1 ToAssemble/' + filename + ' -2 ToAssemble/' + filename.replace('FPE','RPE')+' -o ' + filename.split('_FPE')[0] + '/; echo "Finished assembling ' + filename.split('_FPE')[0] + '" | mail -s "Finished Transcriptome Assembly ' + (time.strftime("%d/%m/%y")) + '" ' + mailaddress) > out.txt
def main():
code = {}
qtrim = {}
minlen = {}
for line in open(parameter_file,'r'):
code[line.split('\t')[0]] = line.split('\t')[1].split('\n')[0]
qtrim[line.split('\t')[1]] = line.split('\t')[2].split('\n')[0]
minlen[line.split('\t')[1]] = line.split('\t')[3].split('\n')[0]
rename(code)
QualityTrim(qtrim, minlen)
# rnaSPAdesAssembly()
main()

View File

@ -0,0 +1,18 @@
#!/bin/bash
#
#SBATCH --job-name=Gigi_spades
#SBATCH --output=rnaSPAdes_run.%j.out # Stdout (%j expands to jobId)
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --ntasks-per-node=64
#SBATCH --mem=180G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=xxx@xxx.edu
module purge #Cleans up any loaded modules
module load SPAdes
rnaspades.py -m 500 -t 50 -1 ToAssemble/SRR26595464_FPE_q24_minlen75.fastq.gz -2 ToAssemble/SRR26595464_RPE_q24_minlen75.fastq.gz -o Assembled/SRR26595464
rnaspades.py -m 500 -t 50 -1 ToAssemble/SRR26595465_FPE_q24_minlen75.fastq.gz -2 ToAssemble/SRR26595465_RPE_q24_minlen75.fastq.gz -o Assembled/SRR26595465
rnaspades.py -m 500 -t 50 -1 ToAssemble/SRR26595468_FPE_q24_minlen75.fastq.gz -2 ToAssemble/SRR26595468_RPE_q24_minlen75.fastq.gz -o Assembled/SRR26595468

View File

@ -1,11 +1,12 @@
#Author, date: Xyrus (last modified by him Sept 17 2020), most recently updated by Auden on July 19 2023
#Author, date: Xyrus (last modified by him Sept 17 2020), most recently updated by Auden on October 17 2024
#Motivation: Generate lots of codon usage statistics to aid in identifying useful characteristics for de novo ORF calling
#Intent: Summarize nucleotide composition statistics for a fasta file or folder of fasta files
#Dependencies: Python3, numpy, BioPython
#Inputs: Fasta file or folder of fasta files
#Outputs: A fasta file filtered for properly formatted sequences and several spreadsheets summarizing GC, ENc, RSCU, etc.
#Outputs: A fasta file filtered for properly formatted sequences and several spreadsheets summarizing GC3S, ENc, RSCU, etc.
#Example: python3 CUB.py -i seqs.fasta
#Note: Use "python3 CUB.py -i seqs.fasta --require_start --require_stop" when using the on R2G files.
#Note: Use "python3 CUB.py -i seqs.fasta --require_start --require_stop" to see more conservative estimate
#Note: in this script we use GC3 and GC3S interchangeably, though the abbreviation GC3S is probably more correct
#Dependencies
import os
@ -19,11 +20,11 @@ import argparse
class CalcCUB:
"""
Returns the Effective Number of Codons used (observed and expected)
Returns the Effective Number of Codons (ENc) used (observed and expected)
following the equations originally from Wright 1990.
"""
def expWrightENc(gc3):
# Calculates the expected ENc from a sequence's GC3 under Wright 1990
# Calculates the expected ENc from a sequence's GC3 (GC3S) under Wright 1990
if gc3 > 1:
# If GC3 looks as though it is > 1 (e.g. 100%), converts to a float ≤ 1.
# Calculations expect a value between 0 and 1
@ -32,7 +33,7 @@ class CalcCUB:
return round(exp_enc, 4)
def nullENcGC3():
# Calculates the expected ENc from the null distribution of GC3
# Calculates the expected ENc from the null distribution of GC3S
# values (0, 100% GC)
null = [CalcCUB.expWrightENc(n) for n in np.arange(0,.51,0.01)]
null += null[:-1][::-1]
@ -356,6 +357,12 @@ class GCeval():
return round(GC(''.join([seq[n] for n in
range(2, len(seq)-len(seq[2:]) % 3, 3)])), 4)
def gc3s(cdnTbl):
# This function return the GC content of the third position of a codon excluding Tryp and Met
syn = round(GC(''.join([k[-1]*v[-1] for k, v in cdnTbl.items() if v[0] != 'W' and v[0] != 'M'])), 4)
return syn
def gc3_4F(cdnTbl):
# # This function return the GC content of the third position of four-fold
# # degenerate codons
@ -385,7 +392,7 @@ class SeqInfo(object):
def ENcStats(self):
# Stores the various Effective Number of Codons calculations in the class
self.expENc = CalcCUB.expWrightENc(self.gc3)
self.expENc = CalcCUB.expWrightENc(self.gc3s)
self.obsENc_6F = CalcCUB.calcWrightENc(self.cdnCounts_6F)
self.obsENc_No6F = CalcCUB.calcWrightENc(self.cdnCounts_No6F)
self.SunENc_6F = CalcCUB.SunEq5(self.cdnCounts_6F)
@ -396,6 +403,7 @@ class SeqInfo(object):
for k, v in self.gcFuncs.items():
setattr(self,k,v(self.ntd))
self.gc4F = GCeval.gc3_4F(self.cdnCounts_No6F)
self.gc3s = GCeval.gc3s(self.cdnCounts_No6F)
def RSCUstats(self):
@ -429,23 +437,23 @@ def CalcRefFasta(fasta, gCode):
def WriteWrightOut(seqData, outName, comp):
if comp == False:
with open(outName+'/SpreadSheets/'+outName.split('/')[-1]+'.ENc.Raw.tsv','w+') as w:
w.write('SequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\t'
w.write('SequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3S\t'
'GC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\t'
'ObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
for k, v in seqData.items():
name = [k]
gcs = [str(v.gcOverall),str(v.gc1),str(v.gc2),str(v.gc3),str(v.gc4F)]
gcs = [str(v.gcOverall),str(v.gc1),str(v.gc2),str(v.gc3),str(v.gc3s),str(v.gc4F)]
ENc = [str(v.expENc),str(v.obsENc_6F),str(v.obsENc_No6F),
str(v.SunENc_6F),str(v.SunENc_No6F)]
w.write('\t'.join(name+[str(v.amb_cdn)]+gcs+ENc)+'\n')
else:
with open(outName+'/SpreadSheets/'+outName.split('/')[-1]+'.CompTrans.ENc.Raw.tsv','w+') as w:
w.write('SequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\t'
w.write('SequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3S\t'
'GC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\t'
'ObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
for k, v in seqData.items():
name = [k]
gcs = [str(v.gcOverall),str(v.gc1),str(v.gc2),str(v.gc3),str(v.gc4F)]
gcs = [str(v.gcOverall),str(v.gc1),str(v.gc2),str(v.gc3),str(v.gc3s),str(v.gc4F)]
ENc = [str(v.expENc),str(v.obsENc_6F),str(v.obsENc_No6F),
str(v.SunENc_6F),str(v.SunENc_No6F)]
w.write('\t'.join(name+[str(v.amb_cdn)]+gcs+ENc)+'\n')
@ -473,7 +481,7 @@ def getCompFasta(fasta, gCode, require_start, require_stop):
def WriteNullENcOut(outName):
with open(outName+'/SpreadSheets/' + outName.split('/')[-1] + '.ENc.Null.tsv','w+') as w:
w.write('GC3\tENc\n')
w.write('GC3S\tENc\n')
w.write('\n'.join(CalcCUB.nullENcGC3()))
@ -558,14 +566,14 @@ if __name__ == "__main__":
o.write(folder.split('/')[-1] + '\t' + line)
with open('CUBOutput/SpreadSheets/ENc.Raw.tsv', 'w') as o:
o.write('File\tSequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\tObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
o.write('File\tSequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3S\tGC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\tObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
for folder in folders:
for line in open(folder + '/SpreadSheets/' + folder.split('/')[-1] + '.ENc.Raw.tsv'):
if 'SequenceID' not in line:
o.write(folder.split('/')[-1] + '\t' + line)
with open('CUBOutput/SpreadSheets/CompTrans.ENc.Raw.tsv', 'w') as o:
o.write('File\tSequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\tObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
o.write('File\tSequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\tGC3S\tGC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\tObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n')
for folder in folders:
for line in open(folder + '/SpreadSheets/' + folder.split('/')[-1] + '.CompTrans.ENc.Raw.tsv'):
if 'SequenceID' not in line:

View File

@ -1,14 +1,13 @@
'''
#Author, date: Godwin Ani and Laura Katz, 9th- Feb - 2023.
#Author, date: Godwin Ani and Laura Katz, Feb 9th 2023
#Modified: Adri Grow, April 6th 2025 to allow clustering at 100% (1.0) and output renamed file(s) with id clustered appended to file name
#Dependencies: Python3, CD-Hit
#Intent: For clustering nucleotide or amino acid sequences with the CD-Hit program.
#Inputs: A folder of containing Amino acid or DNA fasta files.
#Outputs: A folder of clustered files.
#Example: python Cluster.py --type dna --identity 0.95 --overlap 0.67 --input input_folder_dna --output output_folder_dna
#Intent: For clustering nucleotide or amino acid sequences with the CD-Hit program
#Inputs: A folder of containing AA or DNA fasta files
#Outputs: A folder of clustered files
#Example: python Cluster.py -t dna -id 0.95 -ov 0.67 -i input_folder_dna -o output_folder_dna
'''
import os
import argparse
from tqdm import tqdm
@ -16,8 +15,10 @@ import subprocess
def input_validation(value, error_message):
try:
integer, fractional = value.split('.')
value = float(value)
if value == 1.0:
return value
integer, fractional = str(value).split('.')
if int(integer) == 0 and len(fractional) == 2:
return value
except ValueError:
@ -28,39 +29,44 @@ def input_validation(value, error_message):
def cluster_sequences(program, identity, overlap, input_folder, output_folder):
for file in tqdm(os.listdir(input_folder)):
if file.endswith('.fasta'):
subprocess.run([f'{program}', '-i', f'{input_folder}/{file}', '-o', f'{output_folder}/{file}', '-c', f'{identity}', '-d', '0', '-aS', f'{overlap}'])
output_name = f"{os.path.splitext(file)[0]}_{int(float(identity) * 100)}clustered.fasta"
subprocess.run([f'{program}', '-i', f'{input_folder}/{file}', '-o', f'{output_folder}/{output_name}', '-c', f'{identity}', '-d', '0', '-aS', f'{overlap}'])
for file in os.listdir(output_folder):
if file.endswith('.clstr'):
os.rename(f'{output_folder}/{file}', f'{output_folder}/{file.split("FILE")[0]}Clustered.txt')
base_name = os.path.splitext(file)[0] # removes .clstr
if base_name.endswith('.fasta'):
base_name = base_name[:-6] # removes .fasta from end
new_name = f"{base_name}.txt"
os.rename(f'{output_folder}/{file}', f'{output_folder}/{new_name}')
def main():
parser = argparse.ArgumentParser(description='Cluster amino acid or DNA sequences using CD-HIT.')
parser.add_argument('--type', choices=['aa', 'dna'], required=True, help='Type of sequences (aa for Amino Acids, dna for DNA)')
parser.add_argument('--identity', type=str, required=True, help='Sequence Identity Threshold (e.g., 0.99, 0.95)')
parser.add_argument('--overlap', type=str, required=True, help='Sequence Alignment Overlap Value (e.g., 0.67, 0.75)')
parser.add_argument('--input', type=str, required=True, help='Input folder containing sequences in fasta format')
parser.add_argument('--output', type=str, required=True, help='Output folder for clustered sequences')
parser = argparse.ArgumentParser(description='Cluster amino acid or nucleotide sequences using CD-HIT.')
parser.add_argument('-t', '--type', choices=['aa', 'dna'], required=True, help='Type of sequences (aa for amino acid, dna for nucleotide)')
parser.add_argument('-id','--identity', type=str, required=True, help='Sequence identity threshold (e.g. 1.0, 0.99, 0.95)')
parser.add_argument('-ov', '--overlap', type=str, required=True, help='Sequence alignment overlap value (e.g. 0.67, 0.75)')
parser.add_argument('-i', '--input_files', type=str, required=True, help='Input folder containing sequences in fasta format')
parser.add_argument('-o', '--output', type=str, required=True, help='Output folder for clustered sequences ending with -id value')
args = parser.parse_args()
if not os.path.isdir(args.input):
print(f'Error: Input folder "{args.input}" does not exist.')
if not os.path.isdir(args.input_files):
print(f'Error: Input folder "{args.input_files}" does not exist.')
exit(1)
if not os.path.isdir(args.output):
os.mkdir(args.output)
if args.type == 'aa':
identity = input_validation(args.identity, 'ERROR! Use format 0.## for Amino acids sequence identity threshold.')
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for Amino acids sequence alignment overlap value.')
cluster_sequences('cd-hit', identity, overlap, args.input, args.output)
identity = input_validation(args.identity, 'ERROR! Use format 0.## or 1.0 for amino acid sequence identity threshold.')
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for amino acid sequence alignment overlap value.')
cluster_sequences('cd-hit', identity, overlap, args.input_files, args.output)
elif args.type == 'dna':
identity = input_validation(args.identity, 'ERROR! Use format 0.## for DNA sequence identity threshold.')
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for DNA sequence alignment overlap value.')
cluster_sequences('cd-hit-est', identity, overlap, args.input, args.output)
identity = input_validation(args.identity, 'ERROR! Use format 0.## or 1.0 for nucleotide sequence identity threshold.')
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for nucleotide sequence alignment overlap value.')
cluster_sequences('cd-hit-est', identity, overlap, args.input_files, args.output)
else:
print('Invalid sequence type. Choose "aa" for Amino Acids or "dna" for DNA.')
print('Invalid sequence type. Choose "aa" for amino acids or "dna" for nucleotides.')
exit(1)
if __name__ == "__main__":

View File

@ -27,7 +27,7 @@ enc_null <- data.frame(read_tsv('ENc.Null.tsv'))
#you need as.numeric to ensure R is reading the variable correctly
gc3_plot <- ggplot(gc3, aes(as.numeric(GC3.Degen), as.numeric(ObsWrightENc_No6Fold)))+
geom_point(size = 0.1)+
geom_line(data = enc_null, aes(GC3, ENc))+
geom_line(data = enc_null, aes(GC3S, ENc))+
theme_classic()+
labs(x = 'GC3 Degen', y = 'ObsWrightENc_No6fold')+
theme(legend.position = 'none')+

View File

@ -0,0 +1,65 @@
#!/bin/bash
## Last updated on Jan 9th 2024 by Auden Cote-L'Heureux
#Intent: Calculate TPM for assembled transcripts
#Dependencies: None
#Inputs: Must be in a folder along with a folder called 'Transcriptomes', containing assembled transcripts as output by rnaSpades (transcripts.fasta),
## and a folder called 'RawReads' containing the fwd and rev reads prior to assembly, with the same file prefixes as the corresponding assembled transcript files
#Outputs: A folder, containing a 'quant' file which has TPM data.
## If running on an HPC, include parameters here! For example, on a Slurm system you might use
#SBATCH --job-name=tpm
#SBATCH --output=Salmon.%j.out # Stdout (%j expands to jobId)
#SBATCH --nodes=1
#SBATCH --ntasks=60
#SBATCH --mem=60G
mkdir Indices
## First, build transcript indices
cd Transcriptomes
IFS='/'
for TRANS in *; do
#read -a trapsplit <<<"$TRANS"
#traf=${trapsplit[1]}
tax=${TRANS:0:10}
./../salmon-1.9.0_linux_x86_64/bin/salmon index -t $TRANS -i ../Indices/$tax
done
## Now calculate TPM
cd Indices
IFS='/'
for TRANS in *; do
read -a trapsplit <<<"$TRANS"
tax=${TRANS:0:10}
fpe='NA'; rpe='NA'; fpesub="FPE"; rpesub="RPE"
for TRIM in ../RawReads/*; do
read -a tripsplit <<<"$TRIM"
trif=${tripsplit[2]}
if [ "${trif:0:10}" == "$tax" ]; then
if [[ "$trif" == *"$fpesub"* ]]; then
fpe=$trif
fi
if [[ "$trif" == *"$rpesub"* ]]; then
rpe=$trif
fi
fi
done
if [ "$rpe" != 'NA' ]; then
./../salmon-1.9.0_linux_x86_64/bin/salmon quant -i $TRANS -l A -1 ../RawReads/$fpe -2 ../RawReads/$rpe --validateMappings -o ../quants/$tax
fi
if [ "$rpe" == 'NA' ]; then
./../salmon-1.9.0_linux_x86_64/bin/salmon quant -i $TRANS -l A -r ../RawReads/$fpe --validateMappings -o ../quants/$tax
fi
done

View File

@ -1,51 +1,56 @@
'''
#Author, date: ?
#Uploaded: updated by Adri Grow, 2024 (previous Adri Grow 2023)
#Intent: map a group of trimmed reads to a reference.
#Dependencies: Python3, hisat2, samtools, sambamba
#EDIT LINES: 18 & 32
#Inputs: Folder named 'TrimmedReads' containing all the trimmed reads.
#Outputs: Folders with the names of the LKHs containing the sam/bam files.
#Example: python ReadMapping.py
#Uploaded: updated by Adri Grow, Aug 2025
#Intent: map a group of trimmed reads to a reference
#Dependencies: Python, HISAT2, samtools, (optional: sambamba)
#EDIT LINES: 19 & 36
#Inputs: Folder named 'TrimmedReads' containing the forward and reverse trimmed reads that start with the same unique identifier for each sample/cell
#Outputs: Folders with the names of the unique identifier (e.g. LKHs) containing the bam files
#Usage: python3 ReadMapping.py
#IMPORTANT: Lines 34-42 manipulate the output files in several different ways including converting .sam to .bam, sorting, optional deduplicating, optional quality filtering, and retaining only mapped reads. It is the responsibility of the user to determine exactly which commands are needed for their dataset.
'''
import os
from Bio import SeqIO
#this first command builds your reference with Hisat.
#If you've already done this, DON'T run this command! Instead, comment it out (use a # in front of it).
#It will output several files. Don't worry about them, Hisat will know what to do.
os.system("hisat2-build Foram_reference.fasta Foram_Index") #change to your reference.fasta and rename the index
#This first command builds your reference with HISAT
#If you've already done this, DON'T run this command! Instead, comment it out (use a # in front of it)
#It will output several files. Don't worry about them, HISAT will know what to do
os.system("hisat2-build Foram_reference.fasta Foram_Index") #Replace "Foram_reference.fasta" with your reference fasta name, and optionally change "Foram_Index" to your preferred index name
folder = os.listdir("TrimmedReads") #Insert the name of the folder which has your trimmed reads inside the quotes
folder.sort() #This sorts the folder so that all the LKHs are in order.
folder = os.listdir("TrimmedReads") #Replace "TrimmedReads" with the name of the folder containing your trimmed reads, if different than TrimmedReads
folder.sort() #This sorts the trimmed reads folder so that all the files are passed in order
for x in folder:
if "LKH" in x and "FPE" in x: #assigning a variable to forward reads. Make sure you have both forward and reverse reads for each cell!
#This is specific for file names starting with 'LKH' unqiue identifiers formatted similar to 'LKH###_FPE.fastq.gz'
if "LKH" in x and "FPE" in x: #Assigning a variable to forward reads. Make sure you have both forward and reverse reads for each cell!
FPE = x
if "LKH" in x and "RPE" in x: #assigning a variable to reverse reads.
sample_id = FPE.split("_FPE")[0]
if "LKH" in x and "RPE" in x: #Assigning a variable to reverse reads
RPE = x
if(FPE[:7] == RPE[:7]):
#The next few lines are several Hisat commands that will create new files.
#EDIT the name of the index and the name of the trimmed reads folder in the first command below
os.system("hisat2 -x Foram_Index -1 TrimmedReads/" +FPE+ " -2 TrimmedReads/" +RPE+ " -S sample.sam")
os.system("samtools view -bS sample.sam > sample.bam")
os.system("samtools fixmate -O bam sample.bam fixmate_sample.bam")
os.system("samtools sort -O bam -o sorted_sample.bam fixmate_sample.bam")
os.system("sambamba markdup -r sorted_sample.bam sorted_sample.dedup.bam")
os.system("samtools view -h -b -q 40 sorted_sample.dedup.bam > sorted_sample.q40.bam")
os.system("samtools view -h -b -q 20 sorted_sample.dedup.bam > sorted_sample.q20.bam")
os.system("samtools view -h -F 4 -b sorted_sample.dedup.bam > defaultparameters_sample.bam")
if FPE.split("_FPE")[0] == RPE.split("_RPE")[0]: #Match sample IDs dynamically
#The next few lines are several HISAT commands that will create new files
#If necessary, EDIT the name of the index and the name of the trimmed reads folder in the very next line only
os.system("hisat2 -x Foram_Index -1 TrimmedReads/" +FPE+ " -2 TrimmedReads/" +RPE+ " -S sample.sam") #running HISAT2
os.system("samtools view -bS sample.sam > sample.bam") #converts .sam file to .bam file
os.remove("sample.sam") #remove the .sam file (already converted to .bam, sam files are large and unnecessary to keep)
#os.system("samtools fixmate -O bam sample.bam fixmate_sample.bam") #use this command if you will be using the sambamba markdup command to remove duplicate reads (Katzlab default for transcriptomics and amplicon is to not remove duplicates)
os.system("samtools sort -O bam -o sorted_sample.bam sample.bam") #sorts the .bam file alignments by leftmost coordinates
#os.system("sambamba markdup -r sorted_sample.bam sorted_sample.dedup.bam") #removes duplicate reads - may not be appropriate for your study or protocols, user will need to determine if this is best practice for their study
#os.system("samtools view -h -b -q 40 sorted_sample.dedup.bam > sorted_sample.q40.bam") #only keeps reads with mapping quality ≥ 40, input is the dedup file but can easily be modified to use the sorted .bam file
#os.system("samtools view -h -b -q 20 sorted_sample.dedup.bam > sorted_sample.q20.bam") #only keeps reads with mapping quality ≥ 20, input is the dedup file but can easily be modified to use the sorted .bam file
os.system("samtools view -h -F 4 -b sorted_sample.bam > sorted_mapped_sample.bam") #only keeps mapped reads, using the sorted .bam file as input - this is the Katzlab transcriptomic and amplicon final output that should be used for continued analyses
if not os.path.isdir(x[:7]):
os.mkdir(x[0:7]) #making folders with the names of the LKHs
if not os.path.isdir(sample_id):
os.mkdir(sample_id) #making folders with the names of the LKHs or unique identifiers
for file in os.listdir('.'): #These lines move the sam/bam files that Hisat creates into the new LKH folders.
for file in os.listdir('.'): #These lines move the bam files created into the new LKH/unique identifier folders
if(file.endswith('.sam') or file.endswith('.bam')):
os.rename(file,x[:7] + '/' + file)
os.rename(file, f"{sample_id}/{file}")
print("~~~~~~~~~~~:>~") #When the snake appears, your script has run!
print("~~~~~~~~~~~:>~") #When the snake appears in terminal, the script has finished running for all samples/cells!

View File

@ -1,10 +1,10 @@
#Author, date: Auden Cote-L'Heureux, last updated Apr 1st 2024 by GA
#Author, date: Auden Cote-L'Heureux, last updated Aug 18th 2025 by AKG
#Motivation: Select robust sequences from trees
#Intent: Select clades of interest from large trees using taxonomic specifications
#Dependencies: Python3, ete3, Biopython
#Inputs: A folder containing: all PTLp2 output trees and all corresponding unaligned .fasta (pre-guidance) files
#Outputs: A folder of grabbed clades and filtered unaligned fasta files
#Example: python CladeGrabbing.py --input /Path/to/trees --target Sr_rh --min_presence 20
#Example: python3 CladeGrabbing.py --input /Path/To/TreesandPreGuidance --target Sr_rh --min_presence 20
#IMPORTANT: key parameters explained in "add_argument" section below
#Dependencies
@ -18,7 +18,7 @@ def get_args():
parser = argparse.ArgumentParser(
prog = 'Clade grabber, Version 2.1',
description = "Updated Aug 1st, 2023 by Auden Cote-L'Heureux, modified by GA Feb 13th 2024"
description = "Updated Aug 1st, 2023 by Auden Cote-L'Heureux, modified by AKG Aug 18th 2025"
)
#add_argument section with parameters explained
parser.add_argument('-i', '--input', type = str, required = True, help = 'Path to a folder containing input trees (which must have the file extension .tre, .tree, .treefile, or .nex)')
@ -28,6 +28,8 @@ def get_args():
parser.add_argument('-nr', '--required_taxa_num', type = int, default = 0, help = 'The number of species belonging to taxa in the --required_taxa list that must be present in the clade. Default is 0.')
parser.add_argument('-o', '--outgroup', type = str, default = '', help = 'A comma-separated list of any number of digits/characters (e.g. Sr_ci_S OR Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes, to describe taxa that will be included as outgroups in the output unaligned fasta files (which will contain only sequences from a single selected clade, and all outgroup sequences in the tree captured by this argument).')
parser.add_argument('-c', '--contaminants', type = float, default = 2, help = 'The number of non-ingroup contaminants allowed in a clade, or if less than 1 the proportion of sequences in a clade that can be non-ingroup (i.e. presumed contaminants). Default is to allow 2 contaminants.')
parser.add_argument('-ft', '--first_target', type=str, default='', help='[Optional] A comma-separated list or .txt file of complete/partial taxon codes for an initial, broad clade search. If provided, the script will first find clades with these taxa before applying the main --target filter.')
parser.add_argument('-fm', '--first_min_presence', type=int, default=0, help='[Optional] Minimum number of sequences from --first_target required in a clade for it to be used in the second-stage search. Ignored if --first_target is not provided.')
return parser.parse_args()
@ -85,86 +87,155 @@ def reroot(tree):
def get_subtrees(args, file):
newick = get_newick(args.input + '/' + file)
newick = get_newick(args.input + '/' + file)
tree = ete3.Tree(newick)
tree = ete3.Tree(newick)
majs = list(dict.fromkeys([leaf.name[:2] for leaf in tree]))
majs = list(dict.fromkeys([leaf.name[:2] for leaf in tree]))
# Only try to reroot trees with more than 2 major clades (original behavior)
if len(majs) > 2:
tree = reroot(tree)
#Only try to reroot trees with more than 2 major clades. This was added to fix the ETE3 "Cannot set myself as outgroup" error
if len(majs) > 2:
tree = reroot(tree)
# -------------------------------
# FIRST-STAGE (optional) FILTER
# -------------------------------
def get_outer_leafsets():
"""
Return a list of sets, each set = leaf names of an outer clade
that passes --first_target, --first_min_presence, children_keep,
and contaminants logic (using args.contaminants).
If --first_target is not used, return one set containing ALL leaves.
"""
if not args.first_target or args.first_min_presence == 0:
return [set(leaf.name for leaf in tree)] # no outer filter → whole tree
#Getting a clean list of all target taxa
if '.' in args.target:
try:
target_codes = [l.strip() for l in open(args.target, 'r').readlines() if l.strip() != '']
except AttributeError:
print('\n\nError: invalid "target" argument. This must be a comma-separated list of any number of digits/characters to describe focal taxa (e.g. Sr_ci_S OR Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes. All sequences containing the complete/partial code will be identified as belonging to target taxa.\n\n')
else:
target_codes = [code.strip() for code in args.target.split(',') if code.strip() != '']
# Parse first_target codes
if '.' in args.first_target:
first_target_codes = [l.strip() for l in open(args.first_target, 'r').readlines() if l.strip() != '']
else:
first_target_codes = [code.strip() for code in args.first_target.split(',') if code.strip() != '']
#Getting a clean list of all "at least" taxa
if '.' in args.required_taxa:
try:
required_taxa_codes = [l.strip() for l in open(args.required_taxa, 'r').readlines() if l.strip() != '']
except AttributeError:
print('\n\nError: invalid "required_taxa" argument. This must be a comma-separated list of any number of digits/characters (e.g. Sr_ci_S OR Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes, to describe taxa that MUST be present in a clade for it to be selected (e.g. you may want at least one whole genome).\n\n')
else:
required_taxa_codes = [code.strip() for code in args.required_taxa.split(',') if code.strip() != '']
outer_sets = []
seen_leaves = []
target_codes = list(dict.fromkeys(target_codes + required_taxa_codes))
for node in tree.traverse('levelorder'):
# large enough and not subsumed by already accepted outer node
if len(node) >= args.first_min_presence and len(set(seen_leaves) & set([leaf.name for leaf in node])) == 0:
leaves = [leaf.name for leaf in node]
#Creating a record of selected subtrees, and all of the leaves in those subtrees
selected_nodes = []; seen_leaves = []
# children_keep logic but for first_target
children_keep = 0
for child in node.children:
taken = False
for code in first_target_codes:
for leaf in child:
if leaf.name.startswith(code):
children_keep += 1
taken = True
break
if taken:
break
if children_keep != len(node.children):
continue
#Iterating through all nodes in tree, starting at "root" then working towards leaves
for node in tree.traverse('levelorder'):
#If a node is large enough and is not contained in an already selected clade
# count first-target hits (use [:10] uniqueness like original)
first_hits = set()
for code in first_target_codes:
for leaf in leaves[::-1]:
if leaf.startswith(code):
first_hits.add(leaf[:10])
leaves.remove(leaf)
if len(node) >= args.min_presence and len(list(set(seen_leaves) & set([leaf.name for leaf in node]))) == 0:
leaves = [leaf.name for leaf in node]
# contaminants logic applied to FIRST-STAGE (reuse args.contaminants)
passes_contam = ((args.contaminants < 1 and len(leaves) <= args.contaminants * len(first_hits)) or
(args.contaminants >= 1 and len(leaves) <= args.contaminants))
#Accounting for cases where e.g. one child is a contaminant, and the other child is a good clade with 1 fewer than the max number of contaminants
children_keep = 0
for child in node.children:
for code in target_codes:
taken = False
for leaf in child:
if leaf.name.startswith(code):
children_keep += 1
taken = True
break
if taken:
break
if len(first_hits) >= args.first_min_presence and passes_contam:
outer_sets.append(set(leaf.name for leaf in node))
seen_leaves.extend([leaf.name for leaf in node])
if children_keep == len(node.children):
target_leaves = set(); required_taxa_leaves = set()
for code in target_codes:
for leaf in leaves[::-1]:
#print(leaf)
if leaf.startswith(code):
target_leaves.add(leaf[:10])
return outer_sets
for req in required_taxa_codes:
if leaf.startswith(req):
required_taxa_leaves.add(leaf[:10])
break
leaves.remove(leaf)
# Build outer sets; if user supplied first-stage args, we'll restrict inner search to these
using_first = bool(args.first_target) and args.first_min_presence > 0
outer_leafsets = get_outer_leafsets()
# --------------------------------
# ORIGINAL INNER FILTER (unchanged)
# --------------------------------
# Getting a clean list of all target taxa
if '.' in args.target:
try:
target_codes = [l.strip() for l in open(args.target, 'r').readlines() if l.strip() != '']
except AttributeError:
print('\n\nError: invalid "target" argument. This must be a comma-separated list of any number of digits/characters to describe focal taxa (e.g. Sr_ci_S OR Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes. All sequences containing the complete/partial code will be identified as belonging to target taxa.\n\n')
else:
target_codes = [code.strip() for code in args.target.split(',') if code.strip() != '']
# Getting a clean list of all "at least" taxa
if '.' in args.required_taxa:
try:
required_taxa_codes = [l.strip() for l in open(args.required_taxa, 'r').readlines() if l.strip() != '']
except AttributeError:
print('\n\nError: invalid "required_taxa" argument. This must be a comma-separated list of any number of digits/characters (e.g. Sr_ci_S OR Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes, to describe taxa that MUST be present in a clade for it to be selected (e.g. you may want at least one whole genome).\n\n')
else:
required_taxa_codes = [code.strip() for code in args.required_taxa.split(',') if code.strip() != '']
#Grab a clade as a subtree if 1) it has enough target taxa; 2) it has enough "at least" taxa; 3) it does not have too many contaminants
if len(target_leaves) >= args.min_presence and len(required_taxa_leaves) >= args.required_taxa_num and ((args.contaminants < 1 and len(leaves) < args.contaminants * len(target_leaves)) or len(leaves) < args.contaminants):
selected_nodes.append(node)
seen_leaves.extend([leaf.name for leaf in node])
#Write the subtrees to output .tre files
for i, node in enumerate(selected_nodes[::-1]):
with open('Subtrees/' + '.'.join(file.split('.')[:-1]) + '_' + str(i) + '.tre', 'w') as o:
o.write(node.write())
target_codes = list(dict.fromkeys(target_codes + required_taxa_codes))
# Creating a record of selected subtrees, and all of the leaves in those subtrees
selected_nodes = []; seen_leaves = []
# Iterating through all nodes in tree, starting at "root" then working towards leaves
for node in tree.traverse('levelorder'):
# If using first-stage filter, only consider nodes fully inside some outer clade
if using_first:
node_leafs = set(leaf.name for leaf in node)
# require subset (node fully contained in an accepted outer clade)
if not any(node_leafs.issubset(S) for S in outer_leafsets):
continue
# If a node is large enough and is not contained in an already selected clade
if len(node) >= args.min_presence and len(list(set(seen_leaves) & set([leaf.name for leaf in node]))) == 0:
leaves = [leaf.name for leaf in node]
# Accounting for cases where e.g. one child is a contaminant, and the other child is a good clade
children_keep = 0
for child in node.children:
for code in target_codes:
taken = False
for leaf in child:
if leaf.name.startswith(code):
children_keep += 1
taken = True
break
if taken:
break
if children_keep == len(node.children):
target_leaves = set(); required_taxa_leaves = set()
for code in target_codes:
for leaf in leaves[::-1]:
if leaf.startswith(code):
target_leaves.add(leaf[:10])
for req in required_taxa_codes:
if leaf.startswith(req):
required_taxa_leaves.add(leaf[:10])
break
leaves.remove(leaf)
# Grab a clade as a subtree if it passes all filters
if len(target_leaves) >= args.min_presence and len(required_taxa_leaves) >= args.required_taxa_num and ((args.contaminants < 1 and len(leaves) <= args.contaminants * len(target_leaves)) or len(leaves) <= args.contaminants):
selected_nodes.append(node)
seen_leaves.extend([leaf.name for leaf in node])
# Write the subtrees to output .tre files
for i, node in enumerate(selected_nodes[::-1]):
with open('Subtrees/' + '.'.join(file.split('.')[:-1]) + '_' + str(i) + '.tre', 'w') as o:
o.write(node.write())
def make_new_unaligned(args):

View File

@ -1,4 +1,4 @@
#Author, date: Auden Cote-L'Heureux, last updated Dec 18th 2023
#Author, date: Auden Cote-L'Heureux, last updated Nov 11th 2024 by Adri Grow
#Motivation: Understand the topology of trees
#Intent: Describe clade sizes for different taxonomic groups
#Dependencies: Python3, ete3
@ -236,9 +236,10 @@ if __name__ == '__main__':
for tree_file in tqdm(os.listdir(args.input)):
if tree_file.split('.')[-1] in ('tre', 'tree', 'treefile', 'nex'):
clades_per_tax, majs_per_clade, mins_per_clade = get_clades(args.input + '/' + tree_file, args)
clades_per_tax_per_file.update({ tree_file.split('.')[0] : clades_per_tax })
majs_per_clade_per_file.update({ tree_file.split('.')[0] : majs_per_clade })
mins_per_clade_per_file.update({ tree_file.split('.')[0] : mins_per_clade })
base_filename = os.path.splitext(tree_file)[0]
clades_per_tax_per_file.update({ base_filename : clades_per_tax })
majs_per_clade_per_file.update({ base_filename : majs_per_clade })
mins_per_clade_per_file.update({ base_filename : mins_per_clade })
write_output(clades_per_tax_per_file, args, majs_per_clade = majs_per_clade_per_file, mins_per_clade = mins_per_clade_per_file)

View File

@ -151,7 +151,7 @@ def reroot(tree):
return best_clade
#Get the biggest clade for each taxonomic group (stops once it finds one)
for taxon in [('Ba', 'Za'), ('Op'), ('Pl'), ('Am'), ('Ex'), ('Sr')]:
for taxon in [('Ba'), ('Za'), ('Op'), ('Pl'), ('Am'), ('Ex'), ('Sr')]:
clade = get_best_clade(taxon)
if len([leaf for leaf in clade if leaf.name[:2] in taxon]) > 3:

View File

@ -0,0 +1,50 @@
''' Author, Date : Godwin Ani, 10 - July - 2024.
Motivation : To make phylogenetic trees more presentable.
Intent : Shorten the tip labels of phylogenetic trees.
Dependencies : Python3, ete3
Inputs : A folder containing trees
Outputs : A folder of trees with shortened tips.
python3 RenameTips_v1.0.py -i input to_folder_of_trees
'''
import os, re, sys, argparse, string
import ete3
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input')
args = parser.parse_args()
os.makedirs(args.input + '/renamed', exist_ok = True)
def get_newick(fname):
newick = ''
for line in open(fname):
line = line.split(' ')[-1]
if(line.startswith('(') or line.startswith('tree1=')):
newick = line.split('tree1=')[-1].replace("'", '').replace('\\', '')
return newick
def tree_formatting_wrapper(file):
newick = get_newick(file)
tree = ete3.Tree(newick)
any_letter = tuple(string.ascii_letters)
for leaf in tree:
if leaf.name.startswith(any_letter):
leaf.name = str(leaf.name).split('_Len')[0]
leaf.name = str(leaf.name).replace('Contig_', 'Ct')
leaf.name = str(leaf.name).replace('_XX_0', '')
tree.write(format=1, outfile=args.input + '/renamed/' +file.split('/')[-1] + '.tree')
for tree in os.listdir(args.input):
if tree.split('.')[-1] in ('tree', 'tre', 'treefile', 'nex'):
tree_formatting_wrapper(args.input + '/' + tree)