From e502d2638f988a569aa01524cc3044636a494d49 Mon Sep 17 00:00:00 2001 From: "Adri K. Grow" <42044618+adriannagrow@users.noreply.github.com> Date: Fri, 1 Sep 2023 14:35:08 -0400 Subject: [PATCH] Add files via upload --- Utilities/for_fastas/PlotComps.r | 47 ++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/Utilities/for_fastas/PlotComps.r b/Utilities/for_fastas/PlotComps.r index 2a1107d..b3c443a 100644 --- a/Utilities/for_fastas/PlotComps.r +++ b/Utilities/for_fastas/PlotComps.r @@ -1,30 +1,35 @@ #The purpose of this script is to make a simple jellyfish plot from a dataset like the one -#produced by the CUB.py script, also in the Github Utilities folder. This script was written -#by Auden Cote-L'Heureux and last updated in May 2023 +#produced by the CUB_v2.1.py script, also in the Github Utilities folder. This script was written +#by Auden Cote-L'Heureux and last updated in September 2023 by Adri +#load necessary packages library(tidyverse) -#Change this path -setwd('/Your/Working/Directory') +#Change to the path of the directory you're working from +#Use "getwd()" in console below to get path +setwd("/Users/agrow/Desktop/NewCUB_meta/CUBOutput/SpreadSheets") -#You will need to change name of data frame below -#if you used the CUB.py script, it should be in the spreadsheets folder -#in the output and end in CompTrans.ENc.Raw.tsv -gc3 <- data.frame(read_tsv('CompTrans.ENc.Raw.tsv')) +#You may need to change name of data frame below +#if you used the CUB_v2.1.py script from Github, it should be in the +#CUBOutput folder and then inside the SpreadSheets folder +#you are looking for the CompTrans.ENc.Raw.tsv +gc3 <- data.frame(read_tsv('CompTrans.ENc.Raw.tsv'))%>% + mutate(taxon = paste(substr(SequenceID, 1, 4), substr(SequenceID,6,10), sep = '')) #this line reads in your 10-digit codes to a column in the data frame called taxon -gc3$GC3.Degen <- as.numeric(gc3$GC3.Degen) -gc3$ObsWrightENc_6Fold <- as.numeric(gc3$ObsWrightENc_6Fold) - -#The data for the null expectation curve will be in the same folder as above +#This .tsv is generated by the CUB script and will be in the same folder as the .tsv above +#This generates the null expectation line enc_null <- data.frame(read_tsv('ENc.Null.tsv')) -gc3_plot <- ggplot(data = gc3, mapping = aes(as.numeric(GC3.Degen), as.numeric(ObsWrightENc_6Fold))) + - geom_point() + - geom_line(data = enc_null, aes(GC3, ENc)) + - theme_classic() + - labs(x = '%GC at 3rd-pos 4-fold sites', y = 'Observed Wright ENc (6Fold)') + - theme( - legend.position = 'none' - ) - +#change data in first line here to what you want plotted +#you need as.numeric to ensure R is reading the variable correctly +gc3_plot <- ggplot(gc3, aes(as.numeric(GC3.Degen), as.numeric(ObsWrightENc_No6Fold)))+ + geom_point(size = 0.1)+ + geom_line(data = enc_null, aes(GC3, ENc))+ + theme_classic()+ + labs(x = 'GC3 Degen', y = 'ObsWrightENc_No6fold')+ + theme(legend.position = 'none')+ + ggtitle("Metatranscriptomics R2G NTD files")+ + theme(axis.text.x=element_text(colour="black"))+ + theme(axis.text.y=element_text(colour="black"))+ + facet_wrap(vars(taxon), scales = 'free_x') gc3_plot