Adding allowance to backtranslating script for nucleotide sequences shorter than amino acid sequences by 3' trimming

This commit is contained in:
Auden Cote-L'Heureux 2023-08-10 10:40:19 -04:00 committed by GitHub
parent 29daa4932b
commit 943bc3461d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -58,7 +58,10 @@ def backtranslate(aa, nucls, output):
for rec in aa:
if(rec.id in nucls):
if len(nucls[rec.id]) == len(str(rec.seq).replace('-', '')) * 3:
if len(nucls[rec.id]) != len(str(rec.seq).replace('-', '')) * 3:
print('\nWARNING: The nucleotide sequence ' + rec.id + ' is not 3x the length of the corresponding amino acid sequence. Trying to translate this sequence by starting at the beginning and working forward until the amino acid sequence ends.\n')
nucls[rec.id] = nucls[rec.id][:len(str(rec.seq).replace('-', '')) * 3]
running_seq = ''; c = 0; fail = False
for i, char in enumerate(str(rec.seq)):
if(char == '-'):
@ -76,8 +79,6 @@ def backtranslate(aa, nucls, output):
else:
o.write('>' + rec.id + '\n')
o.write(running_seq + '\n\n')
else:
print('\nWARNING: The nucleotide sequence ' + rec.id + ' is not 3x the length of the corresponding amino acid sequence. This sequence will be missing from the alignment.\n')
else:
print('\nWARNING: There is no nucleotide sequence for the amino acid sequence ' + rec.id + '. This sequence will be missing from the alignment.\n')