Add files via upload

This commit is contained in:
Adri K. Grow 2023-09-01 14:35:08 -04:00 committed by GitHub
parent a7832842ae
commit e502d2638f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,30 +1,35 @@
#The purpose of this script is to make a simple jellyfish plot from a dataset like the one
#produced by the CUB.py script, also in the Github Utilities folder. This script was written
#by Auden Cote-L'Heureux and last updated in May 2023
#produced by the CUB_v2.1.py script, also in the Github Utilities folder. This script was written
#by Auden Cote-L'Heureux and last updated in September 2023 by Adri
#load necessary packages
library(tidyverse)
#Change this path
setwd('/Your/Working/Directory')
#Change to the path of the directory you're working from
#Use "getwd()" in console below to get path
setwd("/Users/agrow/Desktop/NewCUB_meta/CUBOutput/SpreadSheets")
#You will need to change name of data frame below
#if you used the CUB.py script, it should be in the spreadsheets folder
#in the output and end in CompTrans.ENc.Raw.tsv
gc3 <- data.frame(read_tsv('CompTrans.ENc.Raw.tsv'))
#You may need to change name of data frame below
#if you used the CUB_v2.1.py script from Github, it should be in the
#CUBOutput folder and then inside the SpreadSheets folder
#you are looking for the CompTrans.ENc.Raw.tsv
gc3 <- data.frame(read_tsv('CompTrans.ENc.Raw.tsv'))%>%
mutate(taxon = paste(substr(SequenceID, 1, 4), substr(SequenceID,6,10), sep = '')) #this line reads in your 10-digit codes to a column in the data frame called taxon
gc3$GC3.Degen <- as.numeric(gc3$GC3.Degen)
gc3$ObsWrightENc_6Fold <- as.numeric(gc3$ObsWrightENc_6Fold)
#The data for the null expectation curve will be in the same folder as above
#This .tsv is generated by the CUB script and will be in the same folder as the .tsv above
#This generates the null expectation line
enc_null <- data.frame(read_tsv('ENc.Null.tsv'))
gc3_plot <- ggplot(data = gc3, mapping = aes(as.numeric(GC3.Degen), as.numeric(ObsWrightENc_6Fold))) +
geom_point() +
geom_line(data = enc_null, aes(GC3, ENc)) +
theme_classic() +
labs(x = '%GC at 3rd-pos 4-fold sites', y = 'Observed Wright ENc (6Fold)') +
theme(
legend.position = 'none'
)
#change data in first line here to what you want plotted
#you need as.numeric to ensure R is reading the variable correctly
gc3_plot <- ggplot(gc3, aes(as.numeric(GC3.Degen), as.numeric(ObsWrightENc_No6Fold)))+
geom_point(size = 0.1)+
geom_line(data = enc_null, aes(GC3, ENc))+
theme_classic()+
labs(x = 'GC3 Degen', y = 'ObsWrightENc_No6fold')+
theme(legend.position = 'none')+
ggtitle("Metatranscriptomics R2G NTD files")+
theme(axis.text.x=element_text(colour="black"))+
theme(axis.text.y=element_text(colour="black"))+
facet_wrap(vars(taxon), scales = 'free_x')
gc3_plot