Updating headers in CheckSetup.py

This commit is contained in:
Auden Cote-L'Heureux 2024-01-26 11:47:22 -05:00 committed by GitHub
parent bf669310fd
commit 835104592f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,3 +1,12 @@
# Last updated Nov 2023
# Author: Auden Cote-L'Heureux
# This script is run as the first step of the PhyloToL 6 Part 1 TRANSCRIPTOMES pipeline,
# before any sequence data are actually processed. It checks to ensure that the input
# assembled transcripts files, databases, genetic codes, and conspecific names files (the latter
# used only with cross-plate contamination, script 1b) are properly located and formatted.
#Dependencies
import os, sys, re
from Bio import SeqIO
@ -53,6 +62,11 @@ def check_databases(params):
fasta = [file for file in os.listdir(params.databases + '/db_OG') if file.endswith('.fasta')]
dmnd = [file for file in os.listdir(params.databases + '/db_OG') if file.endswith('.dmnd')]
if len(fasta) == 1 and len(dmnd) == 1:
if fasta[0].split('.fasta')[0] != dmnd[0].split('.dmnd')[0]:
print('\nERROR: The file names (except for the extensions) of the OG reference (Hook) database .fasta and .dmnd databases must match!\n')
exit()
if len(fasta) == 0:
print('\nERROR: No Hook fasta file found in the Databases/db_OG folder\n')
exit()