Add files via upload

This commit is contained in:
Adri K. Grow 2023-09-01 14:35:08 -04:00 committed by GitHub
parent a7832842ae
commit e502d2638f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,30 +1,35 @@
#The purpose of this script is to make a simple jellyfish plot from a dataset like the one #The purpose of this script is to make a simple jellyfish plot from a dataset like the one
#produced by the CUB.py script, also in the Github Utilities folder. This script was written #produced by the CUB_v2.1.py script, also in the Github Utilities folder. This script was written
#by Auden Cote-L'Heureux and last updated in May 2023 #by Auden Cote-L'Heureux and last updated in September 2023 by Adri
#load necessary packages
library(tidyverse) library(tidyverse)
#Change this path #Change to the path of the directory you're working from
setwd('/Your/Working/Directory') #Use "getwd()" in console below to get path
setwd("/Users/agrow/Desktop/NewCUB_meta/CUBOutput/SpreadSheets")
#You will need to change name of data frame below #You may need to change name of data frame below
#if you used the CUB.py script, it should be in the spreadsheets folder #if you used the CUB_v2.1.py script from Github, it should be in the
#in the output and end in CompTrans.ENc.Raw.tsv #CUBOutput folder and then inside the SpreadSheets folder
gc3 <- data.frame(read_tsv('CompTrans.ENc.Raw.tsv')) #you are looking for the CompTrans.ENc.Raw.tsv
gc3 <- data.frame(read_tsv('CompTrans.ENc.Raw.tsv'))%>%
mutate(taxon = paste(substr(SequenceID, 1, 4), substr(SequenceID,6,10), sep = '')) #this line reads in your 10-digit codes to a column in the data frame called taxon
gc3$GC3.Degen <- as.numeric(gc3$GC3.Degen) #This .tsv is generated by the CUB script and will be in the same folder as the .tsv above
gc3$ObsWrightENc_6Fold <- as.numeric(gc3$ObsWrightENc_6Fold) #This generates the null expectation line
#The data for the null expectation curve will be in the same folder as above
enc_null <- data.frame(read_tsv('ENc.Null.tsv')) enc_null <- data.frame(read_tsv('ENc.Null.tsv'))
gc3_plot <- ggplot(data = gc3, mapping = aes(as.numeric(GC3.Degen), as.numeric(ObsWrightENc_6Fold))) + #change data in first line here to what you want plotted
geom_point() + #you need as.numeric to ensure R is reading the variable correctly
geom_line(data = enc_null, aes(GC3, ENc)) + gc3_plot <- ggplot(gc3, aes(as.numeric(GC3.Degen), as.numeric(ObsWrightENc_No6Fold)))+
theme_classic() + geom_point(size = 0.1)+
labs(x = '%GC at 3rd-pos 4-fold sites', y = 'Observed Wright ENc (6Fold)') + geom_line(data = enc_null, aes(GC3, ENc))+
theme( theme_classic()+
legend.position = 'none' labs(x = 'GC3 Degen', y = 'ObsWrightENc_No6fold')+
) theme(legend.position = 'none')+
ggtitle("Metatranscriptomics R2G NTD files")+
theme(axis.text.x=element_text(colour="black"))+
theme(axis.text.y=element_text(colour="black"))+
facet_wrap(vars(taxon), scales = 'free_x')
gc3_plot gc3_plot