Update ReadMapping.py

Making things more universally applicable rather than keeping the hardcoded index range
This commit is contained in:
Adri K. Grow 2025-08-18 14:13:11 -04:00 committed by GitHub
parent 5fcd3b937e
commit c92cfb1b19
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,34 +1,35 @@
'''
#Author, date: ?
#Uploaded: updated by Adri Grow, 2024
#Intent: map a group of trimmed reads to a reference.
#Uploaded: updated by Adri Grow, Aug 2025
#Intent: map a group of trimmed reads to a reference
#Dependencies: Python3, hisat2, samtools, sambamba
#EDIT LINES: 18 & 32
#Inputs: Folder named 'TrimmedReads' containing all the trimmed reads.
#Outputs: Folders with the names of the LKHs containing the sam/bam files.
#EDIT LINES: 18 & 33
#Inputs: Folder named 'TrimmedReads' containing all the trimmed reads
#Outputs: Folders with the names of the LKHs containing the sam/bam files
#Example: python ReadMapping.py
'''
import os
from Bio import SeqIO
#this first command builds your reference with Hisat.
#If you've already done this, DON'T run this command! Instead, comment it out (use a # in front of it).
#It will output several files. Don't worry about them, Hisat will know what to do.
os.system("hisat2-build Foram_reference.fasta Foram_Index") #change to your reference.fasta and rename the index
#this first command builds your reference with HISAT
#If you've already done this, DON'T run this command! Instead, comment it out (use a # in front of it)
#It will output several files. Don't worry about them, HISAT will know what to do
os.system("hisat2-build Foram_reference.fasta Foram_Index") #change to your reference.fasta and (optionally) rename the index
folder = os.listdir("TrimmedReads") #Insert the name of the folder which has your trimmed reads inside the quotes
folder.sort() #This sorts the folder so that all the LKHs are in order.
folder.sort() #This sorts the folder so that all the LKHs are in order
for x in folder:
if "LKH" in x and "FPE" in x: #assigning a variable to forward reads. Make sure you have both forward and reverse reads for each cell!
FPE = x
if "LKH" in x and "RPE" in x: #assigning a variable to reverse reads.
sample_id = FPE.split("_FPE")[0]
if "LKH" in x and "RPE" in x: #assigning a variable to reverse reads
RPE = x
if(FPE[:7] == RPE[:7]):
#The next few lines are several Hisat commands that will create new files.
#EDIT the name of the index and the name of the trimmed reads folder in the first command below
if FPE.split("_FPE")[0] == RPE.split("_RPE")[0]: # Match sample IDs dynamically
#The next few lines are several HISAT commands that will create new files
#EDIT the name of the index and the name of the trimmed reads folder in the first command below (line 33) if necessary
os.system("hisat2 -x Foram_Index -1 TrimmedReads/" +FPE+ " -2 TrimmedReads/" +RPE+ " -S sample.sam")
os.system("samtools view -bS sample.sam > sample.bam")
os.system("samtools fixmate -O bam sample.bam fixmate_sample.bam")
@ -38,12 +39,12 @@ for x in folder:
os.system("samtools view -h -b -q 20 sorted_sample.dedup.bam > sorted_sample.q20.bam")
os.system("samtools view -h -F 4 -b sorted_sample.dedup.bam > defaultparameters_sample.bam")
if not os.path.isdir(x[:7]):
os.mkdir(x[0:7]) #making folders with the names of the LKHs
if not os.path.isdir(sample_id):
os.mkdir(sample_id) #making folders with the names of the LKHs
for file in os.listdir('.'): #These lines move the sam/bam files that Hisat creates into the new LKH folders.
if(file.endswith('.sam') or file.endswith('.bam')):
os.rename(file,x[:7] + '/' + file)
os.rename(file, f"{sample_id}/{file}")
print("~~~~~~~~~~~:>~") #When the snake appears in terminal, your script has finished running!