mirror of
http://43.156.76.180:8026/YuuMJ/EukPhylo.git
synced 2025-12-27 05:50:24 +08:00
48 lines
1.8 KiB
R
48 lines
1.8 KiB
R
#load necessary packages
|
|
library(tidyverse)
|
|
|
|
#Change to the path of the directory you're working from
|
|
#Use "getwd()" in console below to get path
|
|
#You may need to change name of data frame below
|
|
#if you used the CUB_v2.1.py script from Github, it should be in the
|
|
#CUBOutput folder and then inside the SpreadSheets folder
|
|
#you are looking for the CompTrans.ENc.Raw.tsv
|
|
gc3 <- data.frame(read_tsv('ENc.Raw.tsv'))|>
|
|
mutate(taxon = paste(substr(SequenceID, 1, 5), substr(SequenceID,6,10), sep = '')) #this line reads in your 10-digit codes to a column in the data frame called taxon
|
|
|
|
spp <- data.frame(read_tsv('Species_name.tsv'))
|
|
# Set the maximum text length you want
|
|
|
|
spp$Species <- sapply(strsplit(spp$Species, " "), function(words) {
|
|
if (sum(nchar(words)) > 20) {
|
|
words[2] <- "spp"
|
|
}
|
|
paste(words, collapse = " ")
|
|
})
|
|
gc3$taxon_a <- gc3$taxon
|
|
for (i in seq_len(nrow(spp))) {
|
|
gc3$taxon_a <- gsub(spp$ten_digit_code[i], spp$Species[i], gc3$taxon_a)
|
|
}
|
|
|
|
gc3 <- gc3 %>%
|
|
group_by(taxon) %>%
|
|
mutate(taxon_c = paste0(taxon_a, '\n', taxon,', ',n()))
|
|
|
|
#This .tsv is generated by the CUB script and will be in the same folder as the .tsv above
|
|
#This generates the null expectation line
|
|
enc_null <- data.frame(read_tsv('ENc.Null.tsv'))
|
|
|
|
#change data in first line here to what you want plotted
|
|
#you need as.numeric to ensure R is reading the variable correctly
|
|
gc3_plot <- ggplot(gc3, aes(as.numeric(GC3.Degen), as.numeric(ObsWrightENc_No6Fold)))+
|
|
geom_point(size = 0.1)+
|
|
geom_line(data = enc_null, aes(GC3, ENc))+
|
|
theme_classic()+
|
|
labs(x = 'GC3 Degen', y = 'ObsWrightENc_No6fold')+
|
|
theme(legend.position = 'none', strip.text = element_text(size = 7))+
|
|
ggtitle("R2G NTD files")+
|
|
theme( axis.text.x=element_text(colour="black"))+
|
|
theme(axis.text.y=element_text(colour="black"))+
|
|
facet_wrap(~taxon_c)
|
|
gc3_plot
|