# Install packages # devtools::install_github('cafferychen777/ggpicrust2') # install.packages("ggh4x") # Notes about the code: # Run line by line; don't highlight and run many lines at once. # If you do highlight and run multiple analysis functions in a row, # it will stop if there are no significant pathways # There is no output if there are no significant differential pathways # Load packages library(readr) library(ggpicrust2) library(tibble) library(tidyverse) library(ggprism) library(patchwork) library(ggh4x) # set the working directory setwd("/Users/shendricks/Desktop/Schumacher/") ################################################################################ # Load in meta and abundance data # For meta data, I had to remove columns: ForwardFastqFile and ReverseFastqFile metadata_EWU2021 <- read_xlsx(path = "/Users/shendricks/Desktop/Schumacher/outputs/Mapping file_EWU2021.xlsx") %>% select(.,!c(ForwardFastqFile,ReverseFastqFile)) %>% rename(., "SampleID" = "#SampleID") # Load KEGG pathway abundance file = "PICRUSt2_outputs/picrust2_result/KO_metagenome_out/pred_metagenome_unstrat.tsv" abundance_data <- read_delim(file, delim = "\t", col_names = TRUE, trim_ws = TRUE) kegg_abundance <- ko2kegg_abundance(file) # This code takes a min or two ################################################################################ # This is optional code that calculates differential pathways based on different statistics (methods). # You can then compare results across different methods and see that different methods # produce highly variable number of differential pathways! # You may want to subset your data prior to running the daa_results_list function # Example code to subset data prior to running the daa_results_list function kegg_abundance_D0 = kegg_abundance %>% select(.,contains("d0")) metadata_EWU2021_D0 = metadata_EWU2021 %>% select(SampleID, TreatmentGroup) %>% filter(., grepl("d0",SampleID)) methods <- c("ALDEx2", "DESeq2", "edgeR","LinDA","Maaslin2","metagenomeSeq","limma voom") daa_results_list <- lapply(methods, function(method) { pathway_daa(abundance = kegg_abundance_D0 , #%>% column_to_rownames("function") metadata = metadata_EWU2021_D0, group = "TreatmentGroup", daa_method = method) }) # Be careful with the method names. Some are not the same as the method (as above) # For example, ALDEx2 is "ALDEx2_Welch's t test"; I believe all others are named # exactly the same between method and method_name. Check the unique(daa_results_df$method) for names method_names <- c("ALDEx2_Welch's t test","DESeq2", "edgeR","LinDA","Maaslin2","metagenomeSeq","limma voom") comparison_results <- compare_daa_results(daa_results_list = daa_results_list, method_names = method_names) ################################################################################ ################ function to run analyses without time comparison analysis <- function(input, meta, ContrastName1, ContrastName2, Test, TestName) { if (!dir.exists("Pathway_DE_analysis")){ dir.create("Pathway_DE_analysis") }else{ print("Pathway_DE_analysis dir exists") } kegg_abundance = input %>% select(.,contains(ContrastName1)) %>% select(.,contains(ContrastName2)) metadata = meta %>% select(SampleID, TreatmentGroup) # Perform pathway differential abundance analysis (DAA) using XXX test method daa_results_df <- pathway_daa(abundance = kegg_abundance, metadata = metadata, group = "TreatmentGroup", daa_method = Test, select = NULL, reference = NULL) # Filter results for XXX test method daa_sub_method_results_df <- daa_results_df[daa_results_df$method == TestName, ] # Filter results for significant p_adjust values and then chose top 60 daa_sub_method_results_df1 <- daa_sub_method_results_df %>% filter(p_adjust < 0.05) %>% arrange(p_adjust,.by_group=TRUE) %>% top_n(60,p_adjust) # Annotate pathway results using KO to KEGG conversion daa_annotated_sub_method_results_df1 <- pathway_annotation(pathway = "KO", daa_results_df = daa_sub_method_results_df1, ko_to_kegg = TRUE) write.csv(daa_annotated_sub_method_results_df1, paste0("./Pathway_DE_analysis/",Test,"_",ContrastName1,"_",ContrastName2,"_Top60_Significant_Pathway.csv"), row.names=FALSE) # Chose the top 15 significant pathways daa_annotated_sub_method_results_df1_15 = daa_annotated_sub_method_results_df1 %>% drop_na(.,pathway_name) %>% arrange(p_adjust,.by_group=TRUE) %>% top_n(15,p_adjust) # Generate pathway error bar plot p <- pathway_errorbar(abundance = kegg_abundance, daa_results_df = daa_annotated_sub_method_results_df1_15, Group = metadata$TreatmentGroup, p_values_threshold = 0.05, order = "pathway_class", select = NULL, # this is where you can add specific pathways ko_to_kegg = TRUE, p_value_bar = TRUE, colors = NULL, x_lab = "pathway_name") ggsave(paste0("./Pathway_DE_analysis/",Test,"_",ContrastName1,"_",ContrastName2,"_Error_plot.pdf"), width = 18, height = 5) # Generate pca plot metadata_rename = metadata %>% rename(., sample_name = SampleID) kegg_abundance_v2= kegg_abundance[rowSums(kegg_abundance[])>0,] p1 <- pathway_pca(kegg_abundance_v2, metadata_rename, "TreatmentGroup") + ggtitle(paste0(" ", TestName," - ",ContrastName1,"_",ContrastName2)) ggsave(paste0("./Pathway_DE_analysis/",Test,"_",ContrastName1,"_",ContrastName2,"_PCA_plot.pdf")) # Generate heatmap kegg_abundance_sig = kegg_abundance %>% rownames_to_column("feature") %>% filter(feature %in% daa_annotated_sub_method_results_df1$feature) %>% full_join(.,daa_annotated_sub_method_results_df1, by="feature") %>% unite(feature_path, feature, pathway_name, sep = "_", remove = FALSE) %>% select(-c(feature,method,group1,group2,p_values,adj_method,p_adjust,pathway_description,pathway_class,pathway_map,pathway_name)) %>% column_to_rownames("feature_path") p2 <- pathway_heatmap(kegg_abundance_sig, metadata, "TreatmentGroup", colors = NULL) + ggtitle(paste0(TestName," - ",ContrastName1,"_",ContrastName2)) ggsave(paste0("./Pathway_DE_analysis/",Test,"_",ContrastName1,"_",ContrastName2,"_Heatmap_plot.pdf"), width = 12, height = 10) } ################################################################################ # Subset data for times metadata_Naive_D0 = metadata_EWU2021 %>% filter(., MetaData1 == "Naive" & MetaData2 == "D0") metadata_Naive_D14 = metadata_EWU2021 %>% filter(., MetaData1 == "Naive" & MetaData2 == "D14") metadata_Naive_D21 = metadata_EWU2021 %>% filter(., MetaData1 == "Naive" & MetaData2 == "D21") abundance_data_D0 = abundance_data %>% select(.,"function" | contains("d0")) # function needs to be first column abundance_data_D14 = abundance_data %>% select(.,"function" | contains("d14")) # function needs to be first column abundance_data_D21 = abundance_data %>% select(.,"function" | contains("d21")) # function needs to be first column # Run analyses analysis(kegg_abundance,metadata_Naive_D0,"Naive","D0","DESeq2","DESeq2") analysis(kegg_abundance,metadata_Naive_D14,"Naive","D14","DESeq2","DESeq2") analysis(kegg_abundance,metadata_Naive_D21,"Naive","D21","DESeq2","DESeq2") ################################################################################ ################ function to run analyses with time comparison analysis2 <- function(input, meta, ContrastName, Test, TestName) { if (!dir.exists("Pathway_DE_analysis")){ dir.create("Pathway_DE_analysis") }else{ print("Pathway_DE_analysis dir exists") } kegg_abundance = input metadata = meta %>% select(SampleID, MetaData2) # Perform pathway differential abundance analysis (DAA) using XXX test method daa_results_df <- pathway_daa(abundance = kegg_abundance, metadata = metadata, group = "MetaData2", daa_method = Test, select = NULL, reference = NULL) # Filter results for XXX test method daa_sub_method_results_df <- daa_results_df[daa_results_df$method == TestName, ] daa_sub_method_results_df1 <- daa_sub_method_results_df %>% filter(p_adjust < 0.05) %>% arrange(p_adjust,.by_group=TRUE) %>% top_n(60,p_adjust) # Annotate pathway results using KO to KEGG conversion daa_annotated_sub_method_results_df1 <- pathway_annotation(pathway = "KO", daa_results_df = daa_sub_method_results_df1, ko_to_kegg = TRUE) write.csv(daa_annotated_sub_method_results_df1, paste0("./Pathway_DE_analysis/",Test,"_",ContrastName,"_Top60_Significant_Pathway.csv"), row.names=FALSE) # Generate pathway error bar plot daa_annotated_sub_method_results_df1_15 = daa_annotated_sub_method_results_df1 %>% drop_na(.,pathway_name) %>% arrange(p_adjust,.by_group=TRUE) %>% top_n(15,p_adjust) p <- pathway_errorbar(abundance = kegg_abundance, daa_results_df = daa_annotated_sub_method_results_df1_15, Group = metadata$MetaData2, p_values_threshold = 0.05, order = "pathway_class", select = NULL, # this is where you can add specific pathways ko_to_kegg = TRUE, p_value_bar = TRUE, colors = NULL, x_lab = "pathway_name") ggsave(paste0("./Pathway_DE_analysis/",Test,"_",ContrastName,"_Error_plot.pdf"), width = 18, height = 5) # Generate pca plot metadata_rename = metadata %>% rename(., sample_name = SampleID) kegg_abundance_v2= kegg_abundance[rowSums(kegg_abundance[])>0,] p1 <- pathway_pca(kegg_abundance_v2, metadata_rename, "MetaData2") + ggtitle(paste0(" ", TestName," - ",ContrastName)) ggsave(paste0("./Pathway_DE_analysis/",Test,"_",ContrastName,"_PCA_plot.pdf")) # Generate heatmap kegg_abundance_sig = kegg_abundance %>% rownames_to_column("feature") %>% filter(feature %in% daa_annotated_sub_method_results_df1$feature) %>% full_join(.,daa_annotated_sub_method_results_df1, by="feature") %>% unite(feature_path, feature, pathway_name, sep = "_", remove = FALSE) %>% select(-c(feature,method,group1,group2,p_values,adj_method,p_adjust,pathway_description,pathway_class,pathway_map,pathway_name)) %>% column_to_rownames("feature_path") p2 <- pathway_heatmap(kegg_abundance_sig, metadata, "MetaData2", colors = NULL) + ggtitle(paste0(TestName," - ",ContrastName)) ggsave(paste0("./Pathway_DE_analysis/",Test,"_",ContrastName,"_Heatmap_plot.pdf"), width = 12, height = 10) } ################################################################################ ################### Subset data for Env Naive #### metadata_Env_Naive_D0_D14 = metadata_EWU2021 %>% filter(., TreatmentGroup == "Envigo" & MetaData1 == "Naive" & MetaData2 != "D21") metadata_Env_Naive_D14_21 = metadata_EWU2021 %>% filter(., TreatmentGroup == "Envigo" & MetaData1 == "Naive" & MetaData2 != "D0") metadata_Env_Naive_D0_D21 = metadata_EWU2021 %>% filter(., TreatmentGroup == "Envigo" & MetaData1 == "Naive" & MetaData2 != "D14") kegg_abundance_Env_Naive_D0_D14 = kegg_abundance %>% select(.,contains("En")) %>% select(.,contains("Naive")) %>% select(.,!contains("d21")) kegg_abundance_Env_Naive_D14_D21 = kegg_abundance %>% select(.,contains("En")) %>% select(.,contains("Naive")) %>% select(.,!contains("d0")) kegg_abundance_Env_Naive_D0_D21 = kegg_abundance %>% select(.,contains("En")) %>% select(.,contains("Naive")) %>% select(.,!contains("d14")) # Run analyses analysis2(kegg_abundance_Env_Naive_D0_D14,metadata_Env_Naive_D0_D14,"Envigo_Naive_D0_D14","DESeq2","DESeq2") # need to add the other comparison analysis2(kegg_abundance_Env_Naive_D14_D21,metadata_Env_Naive_D14_21,"Envigo_Naive_D14_D21","DESeq2","DESeq2") analysis2(kegg_abundance_Env_Naive_D0_D21,metadata_Env_Naive_D0_D21,"Envigo_Naive_D0_D21","DESeq2","DESeq2") ################### Subset data for Env EAE #### metadata_Env_EAE_D0_D14 = metadata_EWU2021 %>% filter(., TreatmentGroup == "Envigo" & MetaData1 == "EAE" & MetaData2 != "D21") metadata_Env_EAE_D14_21 = metadata_EWU2021 %>% filter(., TreatmentGroup == "Envigo" & MetaData1 == "EAE" & MetaData2 != "D0") metadata_Env_EAE_D0_D21 = metadata_EWU2021 %>% filter(., TreatmentGroup == "Envigo" & MetaData1 == "EAE" & MetaData2 != "D14") kegg_abundance_Env_EAE_D0_D14 = kegg_abundance %>% select(.,contains("En")) %>% select(.,contains("EAE")) %>% select(.,!contains("d21")) kegg_abundance_Env_EAE_D14_D21 = kegg_abundance %>% select(.,contains("En")) %>% select(.,contains("EAE")) %>% select(.,!contains("d0")) kegg_abundance_Env_EAE_D0_D21 = kegg_abundance %>% select(.,contains("En")) %>% select(.,contains("EAE")) %>% select(.,!contains("d14")) # Run analyses analysis2(kegg_abundance_Env_EAE_D0_D14,metadata_Env_EAE_D0_D14,"Envigo_EAE_D0_D14","DESeq2","DESeq2") # need to add the other comparison analysis2(kegg_abundance_Env_EAE_D14_D21,metadata_Env_EAE_D14_21,"Envigo_EAE_D14_D21","DESeq2","DESeq2") analysis2(kegg_abundance_Env_EAE_D0_D21,metadata_Env_EAE_D0_D21,"Envigo_EAE_D0_D21","DESeq2","DESeq2") ################### Subset data for Jax Naive #### metadata_Jax_Naive_D0_D14 = metadata_EWU2021 %>% filter(., TreatmentGroup == "Jackson" & MetaData1 == "Naive" & MetaData2 != "D21") metadata_Jax_Naive_D14_21 = metadata_EWU2021 %>% filter(., TreatmentGroup == "Jackson" & MetaData1 == "Naive" & MetaData2 != "D0") metadata_Jax_Naive_D0_D21 = metadata_EWU2021 %>% filter(., TreatmentGroup == "Jackson" & MetaData1 == "Naive" & MetaData2 != "D14") kegg_abundance_Jax_Naive_D0_D14 = kegg_abundance %>% select(.,contains("Jx")) %>% select(.,contains("Naive")) %>% select(.,!contains("d21")) kegg_abundance_Jax_Naive_D14_D21 = kegg_abundance %>% select(.,contains("Jx")) %>% select(.,contains("Naive")) %>% select(.,!contains("d0")) kegg_abundance_Jax_Naive_D0_D21 = kegg_abundance %>% select(.,contains("Jx")) %>% select(.,contains("Naive")) %>% select(.,!contains("d14")) # Run analyses analysis2(kegg_abundance_Jax_Naive_D0_D14,metadata_Jax_Naive_D0_D14,"Jackson_Naive_D0_D14","DESeq2","DESeq2") # need to add the other comparison analysis2(kegg_abundance_Jax_Naive_D14_D21,metadata_Jax_Naive_D14_21,"Jackson_Naive_D14_D21","DESeq2","DESeq2") analysis2(kegg_abundance_Jax_Naive_D0_D21,metadata_Jax_Naive_D0_D21,"Jackson_Naive_D0_D21","DESeq2","DESeq2") ################### Subset data for Jax EAE #### metadata_Jax_EAE_D0_D14 = metadata_EWU2021 %>% filter(., TreatmentGroup == "Jackson" & MetaData1 == "EAE" & MetaData2 != "D21") metadata_Jax_EAE_D14_21 = metadata_EWU2021 %>% filter(., TreatmentGroup == "Jackson" & MetaData1 == "EAE" & MetaData2 != "D0") metadata_Jax_EAE_D0_D21 = metadata_EWU2021 %>% filter(., TreatmentGroup == "Jackson" & MetaData1 == "EAE" & MetaData2 != "D14") kegg_abundance_Jax_EAE_D0_D14 = kegg_abundance %>% select(.,contains("Jx")) %>% select(.,contains("EAE")) %>% select(.,!contains("d21")) kegg_abundance_Jax_EAE_D14_D21 = kegg_abundance %>% select(.,contains("Jx")) %>% select(.,contains("EAE")) %>% select(.,!contains("d0")) kegg_abundance_Jax_EAE_D0_D21 = kegg_abundance %>% select(.,contains("Jx")) %>% select(.,contains("EAE")) %>% select(.,!contains("d14")) # Run analyses analysis2(kegg_abundance_Jax_EAE_D0_D14,metadata_Jax_EAE_D0_D14,"Jackson_EAE_D0_D14","DESeq2","DESeq2") # need to add the other comparison analysis2(kegg_abundance_Jax_EAE_D14_D21,metadata_Jax_EAE_D14_21,"Jackson_EAE_D14_D21","DESeq2","DESeq2") analysis2(kegg_abundance_Jax_EAE_D0_D21,metadata_Jax_EAE_D0_D21,"Jackson_EAE_D0_D21","DESeq2","DESeq2") ################################################################################ # Test code that is not within a function for trial runs # # I didn't retry this code, so it may have errors # # # Set set data # kegg_abundance_D0 = kegg_abundance %>% select(.,contains("d0")) # metadata_EWU2021_D0 = metadata_EWU2021_D0 %>% select(SampleID, TreatmentGroup) %>% filter(., grepl("d0",SampleID)) # # # Perform pathway differential abundance analysis (DAA) using DESeq2 method # daa_results_df <- pathway_daa(abundance = kegg_abundance_D0, # metadata = metadata_EWU2021_D0, # group = "MetaData2", # daa_method = "DESeq2", # select = NULL, # reference = NULL) # # # Filter results for DESeq2 method # # Please check the unique(daa_results_df$method) and choose one # daa_sub_method_results_df <- daa_results_df[daa_results_df$method == "DESeq2", ] # # daa_sub_method_results_df1 <- daa_sub_method_results_df %>% filter(p_adjust < 0.01) %>% #slice(1:29) # arrange(p_adjust,.by_group=TRUE) # # # Annotate pathway results using KO to KEGG conversion # daa_annotated_sub_method_results_df1 <- pathway_annotation(pathway = "KO", # daa_results_df = daa_sub_method_results_df1, # ko_to_kegg = TRUE) # # write.csv(daa_annotated_sub_method_results_df1, paste0("./Pathway_DE_analysis/DESeq2_D0_Significant_Pathway.csv"), row.names=FALSE) # # kegg_abundance_sig = kegg_abundance_D0 %>% rownames_to_column("feature") %>% # filter(feature %in% daa_annotated_sub_method_results_df1$feature) %>% # full_join(.,daa_annotated_sub_method_results_df1, by="feature") %>% # unite(feature_path, feature, pathway_name, sep = "_", remove = FALSE) %>% # select(-c(feature,method,group1,group2,p_values,adj_method,p_adjust,pathway_description,pathway_class,pathway_map,pathway_name)) %>% # column_to_rownames("feature_path") # # pathway_heatmap(kegg_abundance_sig, metadata_EWU2021_D0, "MetaData2", # colors = NULL) # # Generate pathway error bar plot # daa_annotated_sub_method_results_df1_60 = daa_annotated_sub_method_results_df1 %>% # drop_na(.,pathway_name) %>% # arrange(p_adjust,.by_group=TRUE) %>% # top_n(30,p_adjust) # # p <- pathway_errorbar(abundance = kegg_abundance_D0, # daa_results_df = daa_annotated_sub_method_results_df1_60, # Group = metadata_EWU2021_D0$MetaData2, # p_values_threshold = 0.01, # order = "pathway_class", # select = NULL, ko_to_kegg = TRUE, # p_value_bar = TRUE, colors = NULL, # x_lab = "pathway_name") # ggsave(paste0("./Pathway_DE_analysis/DESeq2_D0_Error_plot.pdf"), width = 18, height = 5) # # # # Generate pca plot # metadata_EWU2021_D0_rename = metadata_EWU2021_D0 %>% rename(., sample_name = SampleID) # kegg_abundance_D0_v2= kegg_abundance_D0[rowSums(kegg_abundance_D0[])>0,] # # p1 <- pathway_pca(kegg_abundance_D0, metadata_EWU2021_D0, "MetaData2") # # # Generate heatmap # # Please change your sample id's column name to sample_name # p2 <- pathway_heatmap(kegg_abundance_D0 %>% rownames_to_column("feature") %>% # filter(feature %in% daa_annotated_sub_method_results_df1$feature) %>% # column_to_rownames("feature"), metadata_EWU2021_D0, "MetaData2", # colors = NULL)