--- title: "(20231027) Circadian_Rhythms_DE" output: pdf_document --- ```{r} # load libraries library(DESeq2) library(apeglm) library(EnhancedVolcano) # BiocManager::install("DESeq2") # BiocManager::install("apeglm") # BiocManager::install("EnhancedVolcano") ``` Also, try to get the correlation between normalized gene exprs data and the myogenic tone data. ```{r} # Load the filtered, normalized counts male_counts <- read.csv("DESeq2_normalized_male_counts_20231106.csv") female_counts <- read.csv("DESeq2_normalized_female_counts_20231106.csv") # Remember that these are normalized counts! # Continue to load the myogenic data male_myogenic_data <- read.csv("~/Circadian_Rhythms/Myogenic_Data/Final_data_males.csv") female_myogenic_data <- read.csv("~/Circadian_Rhythms/Myogenic_Data/Final_data_females.csv") ``` We will focus our analysis first on the common rhythmic genes for both males and females, from the MetaCycle results ```{r} male_metaCycle <- read.csv("~/Circadian_Rhythms/metaout_20231106/meta2d_DESeq2_normalized_male_counts_20231106.csv") female_metaCycle <- read.csv("~/Circadian_Rhythms/metaout_20231106/meta2d_DESeq2_normalized_female_counts_20231106.csv") male_cycling <- male_metaCycle %>% filter(meta2d_pvalue < 0.05) %>% pull(CycID) female_cycling <- female_metaCycle %>% filter(meta2d_pvalue < 0.05) %>% pull(CycID) # Find the common genes that cycle between males and females common_cycling <- intersect(male_cycling, female_cycling) # Filter out only the common_cycling genes in males cycling_m <- male_counts %>% dplyr::filter(gene %in% common_cycling) %>% column_to_rownames("gene") average_per_gene_M <- sapply(seq(1, ncol(cycling_m), by = 6), function(start_col) { end_col <- min(start_col + 5, ncol(cycling_m)) # Determine the end column for the group avg_exprs <- rowSums(cycling_m[, start_col:end_col])/6 return(avg_exprs) }) %>% data.frame() colnames(average_per_gene_M) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23") # Filter out only the common_cycling genes in females cycling_f <- female_counts %>% dplyr::filter(gene %in% common_cycling) %>% column_to_rownames("gene") average_per_gene_F <- sapply(seq(1, ncol(cycling_f), by = 6), function(start_col) { end_col <- min(start_col + 5, ncol(cycling_f)) # Determine the end column for the group avg_exprs <- rowSums(cycling_f[, start_col:end_col])/6 return(avg_exprs) }) %>% data.frame() colnames(average_per_gene_F) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23") ``` Now we also need to do some data wrangling for the myogenic data too. ```{r} # Overview of the data, and filter out the Myo60 data male_myogenic_data %>% head() female_myogenic_data %>% head() M_Myo60 <- male_myogenic_data %>% filter(ID == "M Myo 60") %>% dplyr::select(-ID) # Assuming df is your data frame # Define the list of group sizes group_sizes <- c(4, 7, 6, 4, 5, 5) # Initialize an empty list to store group means group_means_list <- list() # Track the starting column index start_col <- 1 # Loop through each group size for (group_size in group_sizes) { end_col <- min(start_col + group_size - 1, ncol(M_Myo60)) # Determine the end column for the group averages <- rowSums(M_Myo60[, start_col:end_col])/group_size # Calculate the mean for the current group group_means_list[[length(group_means_list) + 1]] <- averages # Store the mean in the list start_col <- end_col + 1 # Move to the next group } # Convert the list of group means to a data frame averages_df_m <- data.frame(do.call(cbind, group_means_list)) colnames(averages_df_m) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23") F_Myo60 <- female_myogenic_data %>% filter(ID == "F Myo 60") %>% dplyr::select(-ID) # Assuming df is your data frame # Define the list of group sizes group_sizes <- c(4, 7, 5, 6, 8, 6) # Initialize an empty list to store group means group_means_list <- list() # Track the starting column index start_col <- 1 # Loop through each group size for (group_size in group_sizes) { end_col <- min(start_col + group_size - 1, ncol(F_Myo60)) # Determine the end column for the group averages <- rowSums(F_Myo60[, start_col:end_col])/group_size # Calculate the mean for the current group group_means_list[[length(group_means_list) + 1]] <- averages # Store the mean in the list start_col <- end_col + 1 # Move to the next group } # Convert the list of group means to a data frame averages_df_f <- data.frame(do.call(cbind, group_means_list)) colnames(averages_df_f) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23") v1 <- averages_df_m %>% t() v2 <- averages_df_f %>% t() cor(v1,v2) ``` At this point, we can calculate the correlations between male or female myogenic data and the expression data for each rhythmic gene. ```{r} # Take an example of one gene for males example <- average_per_gene_M[6,] %>% t() cor(example, v1) # Let's make a for loop to get the correlations male_tibble <- tibble() for (i in 1:dim(average_per_gene_M)[1]){ example <- average_per_gene_M[i,] %>% t() gene <- rownames(average_per_gene_M)[i] correlation <- data.frame(gene, abs(cor(example, v1))) male_tibble <- bind_rows(male_tibble, correlation) } male_tibble %>% dplyr::rename(abs_corr = abs.cor.example..v1..) %>% pull(abs_corr) %>% mean() female_tibble <- tibble() for (i in 1:dim(average_per_gene_F)[1]){ example <- average_per_gene_F[i,] %>% t() gene <- rownames(average_per_gene_F)[i] correlation <- data.frame(abs(cor(example, v2))) female_tibble <- bind_rows(female_tibble, correlation) } female_tibble %>% dplyr::rename(abs_corr = abs.cor.example..v2..) %>% pull(abs_corr) %>% mean() pdf(file = "Correlations_p0.05_20231107.pdf") male_tibble %>% dplyr::rename(abs_corr = abs.cor.example..v1..) %>% pull(abs_corr) %>% hist(col = "lightblue", main = "Distributions of Correlations (Male)", breaks = 10) female_tibble %>% dplyr::rename(abs_corr = abs.cor.example..v2..) %>% pull(abs_corr) %>% hist(col = "lightblue", main = "Distributions of Correlations (Female)", breaks = 10) dev.off() ``` ```{r} # Test for spearman correlation male_tibble <- tibble() for (i in 1:dim(average_per_gene_M)[1]){ example <- average_per_gene_M[i,] %>% t() correlation <- data.frame(abs(cor(example, v1, method = "spearman"))) male_tibble <- bind_rows(male_tibble, correlation) } male_tibble %>% dplyr::rename(abs_corr = abs.cor.example..v1..method....spearman...) %>% pull(abs_corr) %>% hist() female_tibble <- tibble() for (i in 1:dim(average_per_gene_F)[1]){ example <- average_per_gene_F[i,] %>% t() correlation <- data.frame(abs(cor(example, v2, method = "spearman"))) female_tibble <- bind_rows(female_tibble, correlation) } female_tibble %>% dplyr::rename(abs_corr = abs.cor.example..v2..method....spearman...) %>% pull(abs_corr) %>% mean() ``` Let's use a more stringent p-value (the adjusted version instead). ```{r} male_cycling_BH.Q <- male_metaCycle %>% filter(meta2d_BH.Q < 0.05) %>% pull(CycID) female_cycling_BH.Q <- female_metaCycle %>% filter(meta2d_BH.Q < 0.05) %>% pull(CycID) # Find the common genes that cycle between males and females common_cycling_BH.Q <- intersect(male_cycling_BH.Q, female_cycling_BH.Q) # Filter out only the common_cycling genes in males cycling_m_BH.Q <- male_counts %>% dplyr::filter(gene %in% common_cycling_BH.Q) %>% column_to_rownames("gene") average_per_gene_M_BH.Q <- sapply(seq(1, ncol(cycling_m_BH.Q), by = 6), function(start_col) { end_col <- min(start_col + 5, ncol(cycling_m)) # Determine the end column for the group avg_exprs <- rowSums(cycling_m_BH.Q[, start_col:end_col])/6 return(avg_exprs) }) %>% data.frame() colnames(average_per_gene_M_BH.Q) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23") # Filter out only the common_cycling genes in females cycling_f_BH.Q <- female_counts %>% dplyr::filter(gene %in% common_cycling_BH.Q) %>% column_to_rownames("gene") average_per_gene_F_BH.Q <- sapply(seq(1, ncol(cycling_f_BH.Q), by = 6), function(start_col) { end_col <- min(start_col + 5, ncol(cycling_f_BH.Q)) # Determine the end column for the group avg_exprs <- rowSums(cycling_f_BH.Q[, start_col:end_col])/6 return(avg_exprs) }) %>% data.frame() colnames(average_per_gene_F_BH.Q) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23") # Try to recalculate the correlations # Let's make a for loop to get the correlations male_tibble_BH.Q <- tibble() for (i in 1:dim(average_per_gene_M_BH.Q)[1]){ example <- average_per_gene_M_BH.Q[i,] %>% t() correlation <- data.frame(abs(cor(example, v1))) male_tibble_BH.Q <- bind_rows(male_tibble_BH.Q, correlation) } male_tibble_BH.Q %>% dplyr::rename(abs_corr = abs.cor.example..v1..) %>% pull(abs_corr) %>% mean() female_tibble_BH.Q <- tibble() for (i in 1:dim(average_per_gene_F_BH.Q)[1]){ example <- average_per_gene_F_BH.Q[i,] %>% t() correlation <- data.frame(abs(cor(example, v2))) female_tibble_BH.Q <- bind_rows(female_tibble_BH.Q, correlation) } female_tibble_BH.Q %>% dplyr::rename(abs_corr = abs.cor.example..v2..) %>% pull(abs_corr) %>% mean() pdf(file = "Correlations_Q0.05_20231107.pdf") male_tibble_BH.Q %>% dplyr::rename(abs_corr = abs.cor.example..v1..) %>% pull(abs_corr) %>% hist(col = "lightblue", main = "Distributions of Correlations (Male)", breaks = 10) female_tibble_BH.Q %>% dplyr::rename(abs_corr = abs.cor.example..v2..) %>% pull(abs_corr) %>% hist(col = "lightblue", main = "Distributions of Correlations (Female)", breaks = 10) dev.off() # Test out for spearman correlations male_tibble_BH.Q <- tibble() for (i in 1:dim(average_per_gene_M_BH.Q)[1]){ example <- average_per_gene_M_BH.Q[i,] %>% t() correlation <- data.frame(abs(cor(example, v1, method = "spearman"))) male_tibble_BH.Q <- bind_rows(male_tibble_BH.Q, correlation) } male_tibble_BH.Q %>% dplyr::rename(abs_corr = abs.cor.example..v1..method....spearman...) %>% pull(abs_corr) %>% mean() female_tibble_BH.Q <- tibble() for (i in 1:dim(average_per_gene_F_BH.Q)[1]){ example <- average_per_gene_F_BH.Q[i,] %>% t() correlation <- data.frame(abs(cor(example, v2, method = "spearman"))) female_tibble_BH.Q <- bind_rows(female_tibble_BH.Q, correlation) } female_tibble_BH.Q %>% dplyr::rename(abs_corr = abs.cor.example..v2..method....spearman...) %>% pull(abs_corr) %>% mean() # For curiosity, let's test MvsF malesVSfemales <- tibble() for (i in 1:length(common_cycling)){ males <- average_per_gene_M[i,] %>% t() females <- average_per_gene_F[i,] %>% t() correlation_value <- data.frame(cor(males, females)) %>% pull() correlation <- data.frame(gene = colnames(males), corr = correlation_value) malesVSfemales <- bind_rows(malesVSfemales, correlation) } malesVSfemales$corr %>% hist(breaks = length(common_cycling)) malesVSfemales_BH.Q <- tibble() for (i in 1:length(common_cycling_BH.Q)){ males <- average_per_gene_M_BH.Q[i,] %>% t() females <- average_per_gene_F_BH.Q[i,] %>% t() correlation_value <- data.frame(cor(males, females)) %>% pull() correlation <- data.frame(gene = colnames(males), corr = correlation_value) malesVSfemales_BH.Q <- bind_rows(malesVSfemales_BH.Q, correlation) } malesVSfemales_BH.Q$corr %>% mean() ```