Adding allowance to backtranslating script for nucleotide sequences shorter than amino acid sequences by 3' trimming

This commit is contained in:
Auden Cote-L'Heureux 2023-08-10 10:40:19 -04:00 committed by GitHub
parent 29daa4932b
commit 943bc3461d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -58,26 +58,27 @@ def backtranslate(aa, nucls, output):
for rec in aa: for rec in aa:
if(rec.id in nucls): if(rec.id in nucls):
if len(nucls[rec.id]) == len(str(rec.seq).replace('-', '')) * 3: if len(nucls[rec.id]) != len(str(rec.seq).replace('-', '')) * 3:
running_seq = ''; c = 0; fail = False print('\nWARNING: The nucleotide sequence ' + rec.id + ' is not 3x the length of the corresponding amino acid sequence. Trying to translate this sequence by starting at the beginning and working forward until the amino acid sequence ends.\n')
for i, char in enumerate(str(rec.seq)): nucls[rec.id] = nucls[rec.id][:len(str(rec.seq).replace('-', '')) * 3]
if(char == '-'):
running_seq += '---'
else:
codon = nucls[rec.id][c:c+3]
if char == 'X' or codon in codons_per_aa[char]:
running_seq += codon
c += 3
else:
fail = True
if fail: running_seq = ''; c = 0; fail = False
print('\nWARNING: The nucleotide sequence ' + rec.id + ' does not match the corresponding amino acid sequence. This sequence will be missing from the alignment.\n') for i, char in enumerate(str(rec.seq)):
if(char == '-'):
running_seq += '---'
else: else:
o.write('>' + rec.id + '\n') codon = nucls[rec.id][c:c+3]
o.write(running_seq + '\n\n') if char == 'X' or codon in codons_per_aa[char]:
running_seq += codon
c += 3
else:
fail = True
if fail:
print('\nWARNING: The nucleotide sequence ' + rec.id + ' does not match the corresponding amino acid sequence. This sequence will be missing from the alignment.\n')
else: else:
print('\nWARNING: The nucleotide sequence ' + rec.id + ' is not 3x the length of the corresponding amino acid sequence. This sequence will be missing from the alignment.\n') o.write('>' + rec.id + '\n')
o.write(running_seq + '\n\n')
else: else:
print('\nWARNING: There is no nucleotide sequence for the amino acid sequence ' + rec.id + '. This sequence will be missing from the alignment.\n') print('\nWARNING: There is no nucleotide sequence for the amino acid sequence ' + rec.id + '. This sequence will be missing from the alignment.\n')