---
title: "(20231027) Circadian_Rhythms_DE"
output: pdf_document
---

```{r}
# load libraries
library(DESeq2)
library(apeglm)
library(EnhancedVolcano)
# BiocManager::install("DESeq2")
# BiocManager::install("apeglm")
# BiocManager::install("EnhancedVolcano")
```

Also, try to get the correlation between normalized gene exprs data and the myogenic tone data.

```{r}
# Load the filtered, normalized counts
male_counts <- read.csv("DESeq2_normalized_male_counts_20231106.csv")
female_counts <- read.csv("DESeq2_normalized_female_counts_20231106.csv")
  
# Remember that these are normalized counts!
# Continue to load the myogenic data
male_myogenic_data <- read.csv("~/Circadian_Rhythms/Myogenic_Data/Final_data_males.csv")
female_myogenic_data <- read.csv("~/Circadian_Rhythms/Myogenic_Data/Final_data_females.csv")
```

We will focus our analysis first on the common rhythmic genes for both males and females, from the MetaCycle results

```{r}
male_metaCycle <- read.csv("~/Circadian_Rhythms/metaout_20231106/meta2d_DESeq2_normalized_male_counts_20231106.csv")
female_metaCycle <- read.csv("~/Circadian_Rhythms/metaout_20231106/meta2d_DESeq2_normalized_female_counts_20231106.csv")

male_cycling <- male_metaCycle %>% 
  filter(meta2d_pvalue < 0.05) %>% pull(CycID)
female_cycling <- female_metaCycle %>% 
  filter(meta2d_pvalue < 0.05) %>% pull(CycID)
# Find the common genes that cycle between males and females
common_cycling <- intersect(male_cycling, female_cycling)

# Filter out only the common_cycling genes in males
cycling_m <- male_counts %>% 
  dplyr::filter(gene %in% common_cycling) %>% 
  column_to_rownames("gene")

average_per_gene_M <- sapply(seq(1, ncol(cycling_m), by = 6), function(start_col) {
  end_col <- min(start_col + 5, ncol(cycling_m))  # Determine the end column for the group
  avg_exprs <- rowSums(cycling_m[, start_col:end_col])/6
  return(avg_exprs)
}) %>% data.frame()

colnames(average_per_gene_M) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23")

# Filter out only the common_cycling genes in females
cycling_f <- female_counts %>% 
  dplyr::filter(gene %in% common_cycling) %>% 
  column_to_rownames("gene")

average_per_gene_F <- sapply(seq(1, ncol(cycling_f), by = 6), function(start_col) {
  end_col <- min(start_col + 5, ncol(cycling_f))  # Determine the end column for the group
  avg_exprs <- rowSums(cycling_f[, start_col:end_col])/6
  return(avg_exprs)
}) %>% data.frame()

colnames(average_per_gene_F) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23")
```

Now we also need to do some data wrangling for the myogenic data too.

```{r}
# Overview of the data, and filter out the Myo60 data
male_myogenic_data %>% head()
female_myogenic_data %>% head()

M_Myo60 <- male_myogenic_data %>% 
  filter(ID == "M Myo 60") %>% 
  dplyr::select(-ID)

# Assuming df is your data frame
# Define the list of group sizes
group_sizes <- c(4, 7, 6, 4, 5, 5)

# Initialize an empty list to store group means
group_means_list <- list()

# Track the starting column index
start_col <- 1

# Loop through each group size
for (group_size in group_sizes) {
  end_col <- min(start_col + group_size - 1, ncol(M_Myo60))  # Determine the end column for the group
  averages <- rowSums(M_Myo60[, start_col:end_col])/group_size  # Calculate the mean for the current group
  group_means_list[[length(group_means_list) + 1]] <- averages  # Store the mean in the list
  start_col <- end_col + 1  # Move to the next group
}

# Convert the list of group means to a data frame
averages_df_m <- data.frame(do.call(cbind, group_means_list))
colnames(averages_df_m) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23")


F_Myo60 <- female_myogenic_data %>% 
  filter(ID == "F Myo 60") %>% 
  dplyr::select(-ID)

# Assuming df is your data frame
# Define the list of group sizes
group_sizes <- c(4, 7, 5, 6, 8, 6)

# Initialize an empty list to store group means
group_means_list <- list()

# Track the starting column index
start_col <- 1

# Loop through each group size
for (group_size in group_sizes) {
  end_col <- min(start_col + group_size - 1, ncol(F_Myo60))  # Determine the end column for the group
  averages <- rowSums(F_Myo60[, start_col:end_col])/group_size  # Calculate the mean for the current group
  group_means_list[[length(group_means_list) + 1]] <- averages  # Store the mean in the list
  start_col <- end_col + 1  # Move to the next group
}

# Convert the list of group means to a data frame
averages_df_f <- data.frame(do.call(cbind, group_means_list))
colnames(averages_df_f) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23")

v1 <- averages_df_m %>% t()
v2 <- averages_df_f %>% t()
cor(v1,v2)
```

At this point, we can calculate the correlations between male or female myogenic data and the expression data for each rhythmic gene.

```{r}
# Take an example of one gene for males
example <- average_per_gene_M[6,] %>% 
  t()
cor(example, v1)

# Let's make a for loop to get the correlations
male_tibble <- tibble()
for (i in 1:dim(average_per_gene_M)[1]){
  example <- average_per_gene_M[i,] %>% t()
  gene <- rownames(average_per_gene_M)[i]
  correlation <- data.frame(gene, abs(cor(example, v1)))
  male_tibble <- bind_rows(male_tibble, correlation)
}
male_tibble %>% 
  dplyr::rename(abs_corr = abs.cor.example..v1..) %>% 
  pull(abs_corr) %>% 
  mean()

female_tibble <- tibble()
for (i in 1:dim(average_per_gene_F)[1]){
  example <- average_per_gene_F[i,] %>% t()
  gene <- rownames(average_per_gene_F)[i]
  correlation <- data.frame(abs(cor(example, v2)))
  female_tibble <- bind_rows(female_tibble, correlation)
}
female_tibble %>% 
  dplyr::rename(abs_corr = abs.cor.example..v2..) %>% 
  pull(abs_corr) %>% 
  mean()

pdf(file = "Correlations_p0.05_20231107.pdf")
male_tibble %>% 
  dplyr::rename(abs_corr = abs.cor.example..v1..) %>% 
  pull(abs_corr) %>% 
  hist(col = "lightblue",
       main = "Distributions of Correlations (Male)",
       breaks = 10)
 female_tibble %>% 
  dplyr::rename(abs_corr = abs.cor.example..v2..) %>% 
  pull(abs_corr) %>% 
  hist(col = "lightblue",
       main = "Distributions of Correlations (Female)",
       breaks = 10)
dev.off()
```

```{r}

# Test for spearman correlation
male_tibble <- tibble()
for (i in 1:dim(average_per_gene_M)[1]){
  example <- average_per_gene_M[i,] %>% t()
  correlation <- data.frame(abs(cor(example, v1, method = "spearman")))
  male_tibble <- bind_rows(male_tibble, correlation)
}
male_tibble %>% 
  dplyr::rename(abs_corr = abs.cor.example..v1..method....spearman...) %>% 
  pull(abs_corr) %>% 
  hist()

female_tibble <- tibble()
for (i in 1:dim(average_per_gene_F)[1]){
  example <- average_per_gene_F[i,] %>% t()
  correlation <- data.frame(abs(cor(example, v2, method = "spearman")))
  female_tibble <- bind_rows(female_tibble, correlation)
}
female_tibble %>% 
  dplyr::rename(abs_corr = abs.cor.example..v2..method....spearman...) %>% 
  pull(abs_corr) %>% 
  mean()
```

Let's use a more stringent p-value (the adjusted version instead).

```{r}
male_cycling_BH.Q <- male_metaCycle %>% 
  filter(meta2d_BH.Q < 0.05) %>% pull(CycID)
female_cycling_BH.Q <- female_metaCycle %>% 
  filter(meta2d_BH.Q < 0.05) %>% pull(CycID)
# Find the common genes that cycle between males and females
common_cycling_BH.Q <- intersect(male_cycling_BH.Q, female_cycling_BH.Q)

# Filter out only the common_cycling genes in males
cycling_m_BH.Q <- male_counts %>% 
  dplyr::filter(gene %in% common_cycling_BH.Q) %>% 
  column_to_rownames("gene")

average_per_gene_M_BH.Q <- sapply(seq(1, ncol(cycling_m_BH.Q), by = 6), function(start_col) {
  end_col <- min(start_col + 5, ncol(cycling_m))  # Determine the end column for the group
  avg_exprs <- rowSums(cycling_m_BH.Q[, start_col:end_col])/6
  return(avg_exprs)
}) %>% data.frame()

colnames(average_per_gene_M_BH.Q) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23")

# Filter out only the common_cycling genes in females
cycling_f_BH.Q <- female_counts %>% 
  dplyr::filter(gene %in% common_cycling_BH.Q) %>% 
  column_to_rownames("gene")

average_per_gene_F_BH.Q <- sapply(seq(1, ncol(cycling_f_BH.Q), by = 6), function(start_col) {
  end_col <- min(start_col + 5, ncol(cycling_f_BH.Q))  # Determine the end column for the group
  avg_exprs <- rowSums(cycling_f_BH.Q[, start_col:end_col])/6
  return(avg_exprs)
}) %>% data.frame()

colnames(average_per_gene_F_BH.Q) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23")

# Try to recalculate the correlations
# Let's make a for loop to get the correlations
male_tibble_BH.Q <- tibble()
for (i in 1:dim(average_per_gene_M_BH.Q)[1]){
  example <- average_per_gene_M_BH.Q[i,] %>% t()
  correlation <- data.frame(abs(cor(example, v1)))
  male_tibble_BH.Q <- bind_rows(male_tibble_BH.Q, correlation)
}
male_tibble_BH.Q %>% 
  dplyr::rename(abs_corr = abs.cor.example..v1..) %>% 
  pull(abs_corr) %>% 
  mean()

female_tibble_BH.Q <- tibble()
for (i in 1:dim(average_per_gene_F_BH.Q)[1]){
  example <- average_per_gene_F_BH.Q[i,] %>% t()
  correlation <- data.frame(abs(cor(example, v2)))
  female_tibble_BH.Q <- bind_rows(female_tibble_BH.Q, correlation)
}
female_tibble_BH.Q %>% 
  dplyr::rename(abs_corr = abs.cor.example..v2..) %>% 
  pull(abs_corr) %>% 
  mean()

pdf(file = "Correlations_Q0.05_20231107.pdf")
male_tibble_BH.Q %>% 
  dplyr::rename(abs_corr = abs.cor.example..v1..) %>% 
  pull(abs_corr) %>% 
  hist(col = "lightblue",
       main = "Distributions of Correlations (Male)",
       breaks = 10)
female_tibble_BH.Q %>% 
  dplyr::rename(abs_corr = abs.cor.example..v2..) %>% 
  pull(abs_corr) %>% 
  hist(col = "lightblue",
       main = "Distributions of Correlations (Female)",
       breaks = 10)
dev.off()

# Test out for spearman correlations
male_tibble_BH.Q <- tibble()
for (i in 1:dim(average_per_gene_M_BH.Q)[1]){
  example <- average_per_gene_M_BH.Q[i,] %>% t()
  correlation <- data.frame(abs(cor(example, v1, method = "spearman")))
  male_tibble_BH.Q <- bind_rows(male_tibble_BH.Q, correlation)
}
male_tibble_BH.Q %>% 
  dplyr::rename(abs_corr = abs.cor.example..v1..method....spearman...) %>% 
  pull(abs_corr) %>% 
  mean()

female_tibble_BH.Q <- tibble()
for (i in 1:dim(average_per_gene_F_BH.Q)[1]){
  example <- average_per_gene_F_BH.Q[i,] %>% t()
  correlation <- data.frame(abs(cor(example, v2, method = "spearman")))
  female_tibble_BH.Q <- bind_rows(female_tibble_BH.Q, correlation)
}
female_tibble_BH.Q %>% 
  dplyr::rename(abs_corr = abs.cor.example..v2..method....spearman...) %>% 
  pull(abs_corr) %>% 
  mean()


# For curiosity, let's test MvsF
malesVSfemales <- tibble()
for (i in 1:length(common_cycling)){
  males <- average_per_gene_M[i,] %>% t()
  females <- average_per_gene_F[i,] %>% t()
  correlation_value <- data.frame(cor(males, females)) %>% pull()
  correlation <- data.frame(gene = colnames(males),
                            corr = correlation_value)
  malesVSfemales <- bind_rows(malesVSfemales, correlation)
}

malesVSfemales$corr %>% hist(breaks = length(common_cycling))

malesVSfemales_BH.Q <- tibble()
for (i in 1:length(common_cycling_BH.Q)){
  males <- average_per_gene_M_BH.Q[i,] %>% t()
  females <- average_per_gene_F_BH.Q[i,] %>% t()
  correlation_value <- data.frame(cor(males, females)) %>% pull()
  correlation <- data.frame(gene = colnames(males),
                            corr = correlation_value)
  malesVSfemales_BH.Q <- bind_rows(malesVSfemales_BH.Q, correlation)
}

malesVSfemales_BH.Q$corr %>% mean()

```