mirror of
http://43.156.76.180:8026/YuuMJ/EukPhylo.git
synced 2025-12-27 03:40:24 +08:00
SeqLengthToCsv
Seq_length_to_csv is a program that exports the length of DNA sequences excluding gaps and missing data to a csv file.
This commit is contained in:
parent
341c9a487b
commit
9f86dbe040
48
Utilities/for_fastas/SeqLengthToCsv_Feb9.py
Normal file
48
Utilities/for_fastas/SeqLengthToCsv_Feb9.py
Normal file
@ -0,0 +1,48 @@
|
||||
'''
|
||||
Professor L. Katz and Godwin Ani
|
||||
9th-Feb-2023
|
||||
Seq_length_to_csv is a program that exports the length of DNA sequences excluding gaps and missing data to a csv file.
|
||||
'''
|
||||
|
||||
import os, sys, re
|
||||
from Bio import SeqIO
|
||||
from Bio.Seq import Seq
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
|
||||
'''Ensure that DNA fasta files are stored in a root directory folder named "Seq_length". This folder will automatically be created if none exists.
|
||||
The result of the process is saved into a sub-folder within the "Seq_length" folder named "csv" which is created automatically if none exists. '''
|
||||
|
||||
|
||||
def Seq_length_to_csv():
|
||||
if not os.path.isdir('Seq_length'):
|
||||
os.mkdir('Seq_length')
|
||||
|
||||
if not os.path.isdir('Seq_length/csv/'):
|
||||
os.mkdir('Seq_length/csv/')
|
||||
|
||||
list = []
|
||||
for file in os.listdir('Seq_length'):
|
||||
if file.endswith('.fasta'):
|
||||
list.append(file)
|
||||
|
||||
for x in tqdm(list):
|
||||
name =[]
|
||||
seq_length = []
|
||||
file_name = x.split('fasta')[0] + '.csv'
|
||||
for x in SeqIO.parse('Seq_length/' + x, "fasta"):
|
||||
name.append(x.id)
|
||||
seq = x.seq
|
||||
seq_length.append(seq.count("A") + seq.count("a") + seq.count("T") + seq.count("t") + seq.count("G") + seq.count("g") + seq.count("C") + seq.count("c"))
|
||||
a = np.array([seq_length])
|
||||
mean_length = round(np.mean(a), 2)
|
||||
data = {'Name of sequence' : name, 'Length of sequence' : seq_length, 'Average sequence length' : mean_length}
|
||||
df = pd.DataFrame(data)
|
||||
df.index += 1
|
||||
df.to_csv('Seq_length/csv/' + file_name)
|
||||
|
||||
|
||||
Seq_length_to_csv()
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user