diff --git a/Utilities/for_fastas/Plotcomps_speciesName.R b/Utilities/for_fastas/Plotcomps_speciesName.R new file mode 100644 index 0000000..0f6ec0d --- /dev/null +++ b/Utilities/for_fastas/Plotcomps_speciesName.R @@ -0,0 +1,47 @@ +#load necessary packages +library(tidyverse) + +#Change to the path of the directory you're working from +#Use "getwd()" in console below to get path +#You may need to change name of data frame below +#if you used the CUB_v2.1.py script from Github, it should be in the +#CUBOutput folder and then inside the SpreadSheets folder +#you are looking for the CompTrans.ENc.Raw.tsv +gc3 <- data.frame(read_tsv('ENc.Raw.tsv'))|> + mutate(taxon = paste(substr(SequenceID, 1, 5), substr(SequenceID,6,10), sep = '')) #this line reads in your 10-digit codes to a column in the data frame called taxon + +spp <- data.frame(read_tsv('Species_name.tsv')) +# Set the maximum text length you want + +spp$Species <- sapply(strsplit(spp$Species, " "), function(words) { + if (sum(nchar(words)) > 20) { + words[2] <- "spp" + } + paste(words, collapse = " ") +}) +gc3$taxon_a <- gc3$taxon +for (i in seq_len(nrow(spp))) { + gc3$taxon_a <- gsub(spp$ten_digit_code[i], spp$Species[i], gc3$taxon_a) +} + +gc3 <- gc3 %>% + group_by(taxon) %>% + mutate(taxon_c = paste0(taxon_a, '\n', taxon,', ',n())) + +#This .tsv is generated by the CUB script and will be in the same folder as the .tsv above +#This generates the null expectation line +enc_null <- data.frame(read_tsv('ENc.Null.tsv')) + +#change data in first line here to what you want plotted +#you need as.numeric to ensure R is reading the variable correctly +gc3_plot <- ggplot(gc3, aes(as.numeric(GC3.Degen), as.numeric(ObsWrightENc_No6Fold)))+ + geom_point(size = 0.1)+ + geom_line(data = enc_null, aes(GC3, ENc))+ + theme_classic()+ + labs(x = 'GC3 Degen', y = 'ObsWrightENc_No6fold')+ + theme(legend.position = 'none', strip.text = element_text(size = 7))+ + ggtitle("R2G NTD files")+ + theme( axis.text.x=element_text(colour="black"))+ + theme(axis.text.y=element_text(colour="black"))+ + facet_wrap(~taxon_c) +gc3_plot