---
title: "(20231027) Circadian_Rhythms_DE"
output: pdf_document
---

```{r}
# load libraries
library(DESeq2)
library(apeglm)
library(EnhancedVolcano)
library(tidyverse)
library(org.Mm.eg.db)
library(ComplexHeatmap)

# BiocManager::install("DESeq2")
# BiocManager::install("apeglm")
# BiocManager::install("EnhancedVolcano")
```

Also, try to get the correlation between normalized gene exprs data and the myogenic tone data.

```{r}
# Load the filtered, normalized counts
male_counts <- read.csv("DESeq2_normalized_male_counts_20231106.csv")
female_counts <- read.csv("DESeq2_normalized_female_counts_20231106.csv")
  
head(male_counts)
# Remember that these are normalized counts!
# Continue to load the myogenic data
male_myogenic_data <- read.csv("~/Circadian_Rhythms/Myogenic_Data/Final_data_males.csv")
female_myogenic_data <- read.csv("~/Circadian_Rhythms/Myogenic_Data/Final_data_females.csv")
```

We will focus our analysis first on the common rhythmic genes for both males and females, from the MetaCycle results

```{r}
male_metaCycle <- read.csv("~/Circadian_Rhythms/metaout_20231106/meta2d_DESeq2_normalized_male_counts_20231106.csv")
female_metaCycle <- read.csv("~/Circadian_Rhythms/metaout_20231106/meta2d_DESeq2_normalized_female_counts_20231106.csv")

male_cycling <- male_metaCycle %>% 
  filter(meta2d_pvalue < 0.05) %>% pull(CycID)
female_cycling <- female_metaCycle %>% 
  filter(meta2d_pvalue < 0.05) %>% pull(CycID)
# Find the common genes that cycle between males and females
common_cycling <- intersect(male_cycling, female_cycling)

# Filter out only the common_cycling genes in males
cycling_m <- male_counts %>% 
  dplyr::filter(gene %in% common_cycling) %>% 
  column_to_rownames("gene")

average_per_gene_M <- sapply(seq(1, ncol(cycling_m), by = 6), function(start_col) {
  end_col <- min(start_col + 5, ncol(cycling_m))  # Determine the end column for the group
  avg_exprs <- rowSums(cycling_m[, start_col:end_col])/6
  return(avg_exprs)
}) %>% data.frame()

colnames(average_per_gene_M) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23")

# Filter out only the common_cycling genes in females
cycling_f <- female_counts %>% 
  dplyr::filter(gene %in% common_cycling) %>% 
  column_to_rownames("gene")

average_per_gene_F <- sapply(seq(1, ncol(cycling_f), by = 6), function(start_col) {
  end_col <- min(start_col + 5, ncol(cycling_f))  # Determine the end column for the group
  avg_exprs <- rowSums(cycling_f[, start_col:end_col])/6
  return(avg_exprs)
}) %>% data.frame()

colnames(average_per_gene_F) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23")


```

Now we also need to do some data wrangling for the myogenic data too.

```{r}
# Overview of the data, and filter out the Myo60 data
male_myogenic_data %>% head()
female_myogenic_data %>% head()

M_Myo60 <- male_myogenic_data %>% 
  filter(ID == "M Myo 60") %>% 
  dplyr::select(-ID)

# Assuming df is your data frame
# Define the list of group sizes
group_sizes <- c(4, 7, 6, 4, 5, 5)

# Initialize an empty list to store group means
group_means_list <- list()

# Track the starting column index
start_col <- 1

# Loop through each group size
for (group_size in group_sizes) {
  end_col <- min(start_col + group_size - 1, ncol(M_Myo60))  # Determine the end column for the group
  averages <- rowSums(M_Myo60[, start_col:end_col])/group_size  # Calculate the mean for the current group
  group_means_list[[length(group_means_list) + 1]] <- averages  # Store the mean in the list
  start_col <- end_col + 1  # Move to the next group
}

# Convert the list of group means to a data frame
averages_df_m <- data.frame(do.call(cbind, group_means_list))
colnames(averages_df_m) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23")


F_Myo60 <- female_myogenic_data %>% 
  filter(ID == "F Myo 60") %>% 
  dplyr::select(-ID)

# Assuming df is your data frame
# Define the list of group sizes
group_sizes <- c(4, 7, 5, 6, 8, 6)

# Initialize an empty list to store group means
group_means_list <- list()

# Track the starting column index
start_col <- 1

# Loop through each group size
for (group_size in group_sizes) {
  end_col <- min(start_col + group_size - 1, ncol(F_Myo60))  # Determine the end column for the group
  averages <- rowSums(F_Myo60[, start_col:end_col])/group_size  # Calculate the mean for the current group
  group_means_list[[length(group_means_list) + 1]] <- averages  # Store the mean in the list
  start_col <- end_col + 1  # Move to the next group
}

# Convert the list of group means to a data frame
averages_df_f <- data.frame(do.call(cbind, group_means_list))
colnames(averages_df_f) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23")

v1 <- averages_df_m %>% t()
v2 <- averages_df_f %>% t()
cor(v1,v2)
```

At this point, we can calculate the correlations between male or female myogenic data and the expression data for each rhythmic gene.

```{r}
# Take an example of one gene for males
example <- average_per_gene_M[6,] %>% 
  t()
cor(example, v1)

# Let's make a for loop to get the correlations
male_tibble <- tibble()
for (i in 1:dim(average_per_gene_M)[1]){
  example <- average_per_gene_M[i,] %>% t()
  gene <- rownames(average_per_gene_M)[i]
  correlation <- data.frame(gene, abs(cor(example, v1)))
  male_tibble <- bind_rows(male_tibble, correlation)
}
male_tibble %>% 
  dplyr::rename(abs_corr = abs.cor.example..v1..) %>% 
  pull(abs_corr) %>% 
  mean()

female_tibble <- tibble()
for (i in 1:dim(average_per_gene_F)[1]){
  example <- average_per_gene_F[i,] %>% t()
  gene <- rownames(average_per_gene_F)[i]
  correlation <- data.frame(abs(cor(example, v2)))
  female_tibble <- bind_rows(female_tibble, correlation)
}
female_tibble %>% 
  dplyr::rename(abs_corr = abs.cor.example..v2..) %>% 
  pull(abs_corr) %>% 
  mean()

pdf(file = "Correlations_p0.05_20231107.pdf")
male_tibble %>% 
  dplyr::rename(abs_corr = abs.cor.example..v1..) %>% 
  pull(abs_corr) %>% 
  hist(col = "lightblue",
       main = "Distributions of Correlations (Male)",
       breaks = 10)
 female_tibble %>% 
  dplyr::rename(abs_corr = abs.cor.example..v2..) %>% 
  pull(abs_corr) %>% 
  hist(col = "lightblue",
       main = "Distributions of Correlations (Female)",
       breaks = 10)
dev.off()
```

```{r}

# Test for spearman correlation
male_tibble <- tibble()
for (i in 1:dim(average_per_gene_M)[1]){
  example <- average_per_gene_M[i,] %>% t()
  gene <- rownames(average_per_gene_M)[i]
  correlation <- data.frame(gene, abs(cor(example, v1, method = "spearman")))
  male_tibble <- bind_rows(male_tibble, correlation)
}
male_tibble %>% 
  dplyr::rename(abs_corr = abs.cor.example..v1..method....spearman...) %>% 
  pull(abs_corr) %>% 
  hist()

female_tibble <- tibble()
for (i in 1:dim(average_per_gene_F)[1]){
  example <- average_per_gene_F[i,] %>% t()
  gene <- rownames(average_per_gene_M)[i]
  correlation <- data.frame(gene, abs(cor(example, v2, method = "spearman")))
  female_tibble <- bind_rows(female_tibble, correlation)
}
female_tibble %>% 
  dplyr::rename(abs_corr = abs.cor.example..v2..method....spearman...) %>% 
  pull(abs_corr) %>% 
  hist()
```

Let's use a more stringent p-value (the adjusted version instead).

```{r}
male_cycling_BH.Q <- male_metaCycle %>% 
  filter(meta2d_BH.Q < 0.05) %>% pull(CycID)
female_cycling_BH.Q <- female_metaCycle %>% 
  filter(meta2d_BH.Q < 0.05) %>% pull(CycID)
# Find the common genes that cycle between males and females
common_cycling_BH.Q <- intersect(male_cycling_BH.Q, female_cycling_BH.Q)

# Filter out only the common_cycling genes in males
cycling_m_BH.Q <- male_counts %>% 
  dplyr::filter(gene %in% common_cycling_BH.Q) %>% 
  column_to_rownames("gene")

average_per_gene_M_BH.Q <- sapply(seq(1, ncol(cycling_m_BH.Q), by = 6), function(start_col) {
  end_col <- min(start_col + 5, ncol(cycling_m_BH.Q))  # Determine the end column for the group
  avg_exprs <- rowSums(cycling_m_BH.Q[, start_col:end_col])/6
  return(avg_exprs)
}) %>% data.frame()

colnames(average_per_gene_M_BH.Q) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23")

# Filter out only the common_cycling genes in females
cycling_f_BH.Q <- female_counts %>% 
  dplyr::filter(gene %in% common_cycling_BH.Q) %>% 
  column_to_rownames("gene")

average_per_gene_F_BH.Q <- sapply(seq(1, ncol(cycling_f_BH.Q), by = 6), function(start_col) {
  end_col <- min(start_col + 5, ncol(cycling_f_BH.Q))  # Determine the end column for the group
  avg_exprs <- rowSums(cycling_f_BH.Q[, start_col:end_col])/6
  return(avg_exprs)
}) %>% data.frame()

colnames(average_per_gene_F_BH.Q) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23")

a <- male_metaCycle %>% 
  filter(CycID %in% common_cycling_BH.Q) %>% 
  dplyr::select(CycID, meta2d_BH.Q, meta2d_period, meta2d_phase, meta2d_rAMP) %>% 
  dplyr::rename(m.p_value = meta2d_BH.Q, m.period = meta2d_period, m.phase = meta2d_phase, m.amplitude = meta2d_rAMP)
b <- female_metaCycle %>% 
  filter(CycID %in% common_cycling_BH.Q) %>% 
  dplyr::select(CycID, meta2d_BH.Q, meta2d_period, meta2d_phase, meta2d_rAMP) %>% 
  dplyr::rename(f.p_value = meta2d_BH.Q, f.period = meta2d_period, f.phase = meta2d_phase, f.amplitude = meta2d_rAMP)
common_genes_BH <- inner_join(a, b, by = "CycID")
```

Before we run dryR, let's select out the genes that have similar rhythmicity between males and females

```{r}
common_genes_BH <- common_genes_BH %>% 
  mutate(diff_amp = abs(m.amplitude - f.amplitude)) %>% 
  # I converted the phase back to polar coordinates since the period of the same gene can be different between males and females. 
  mutate(m.polar = 2*pi*m.phase/m.period) %>% 
  mutate(f.polar = 2*pi*f.phase/f.period) %>% 
  mutate(diff_polar = abs(m.polar - f.polar) / (2 * pi))

# Generate the numbers for the Venn diagram
# Left circle will be genes that have difference in amplitude > 0.1
# Right circle will be genes that have difference in polar coordinates > 0.1
common_genes_BH %>% 
  filter(diff_amp > 0.1) %>% 
  pull(CycID) %>% 
  length()

common_genes_BH %>% 
  filter(diff_polar > 0.1 & diff_polar < 0.9)

common_genes_BH %>% 
  filter(diff_polar > 0.1 & diff_polar < 0.9) %>% 
  filter(diff_amp > 0.1)

# Let's save common_genes_BH as another .csv file to make the analysis easier.
# write.csv(common_genes_BH, "common_genes_BH_corrected_20231213.csv")

common_genes_BH <- read.csv("common_genes_BH_corrected_20231213.csv")
a <- common_genes_BH %>% 
  filter(diff_amp > 0.1)
b <- common_genes_BH %>% 
  filter(diff_polar > 0.1 & diff_polar < 0.9) 

dplyr::union(a,b) %>% 
  write_csv("common_genes_BH_model5_20231214.csv")
write.csv(a, "diff_amp_20231214.csv")
write.csv(b, "diff_polar_20231214.csv")

42 + 79 - 13
intersect(a$CycID, b$CycID)

pdf(file = "Histograms_MetaCycle_20231210.pdf")
common_genes_BH %>% 
  pull(diff_amp) %>% 
  hist(col = "lightblue",
       main = "Histogram of diff_rAMP of Common Cycling Genes",
       breaks = 50)

common_genes_BH %>% 
  pull(diff_polar) %>% 
  hist(col = "lightblue",
       main = "Histogram of diff_polar of Common Cycling Genes",
       breaks = 100)
dev.off()
```


```{r}
# Let's implement dryR and check if we get the same results

# devtools::install_github("naef-lab/dryR")
library(dryR)
?dryseq
# The following is simulated data
countData = simData[["countData"]]
group     = simData[["group"]]
time      = simData[["time"]]

# Let's generate the same formatted datasets
# Recall that we have saved count matrix with the important sample info as colnames
seq_countData <- read.csv("Labelled_counts.csv")
gene_symbols <- read.csv("mm_gene_symbols.csv")
seq_countData <- seq_countData %>% 
  column_to_rownames("X")
dim(seq_countData)

# The code below is for data wrangling!
sample_info <- colnames(seq_countData)
group_mydata <- c()
time_mydata <- c()
for(i in 1:72){
  a <- strsplit(sample_info, "_")[[i]]
  a_1 <- a[1]
  a_2 <- a[2] %>% 
    substr(start = 3, stop = 4)
  group_mydata <- append(group_mydata, a_1)
  time_mydata <- append(time_mydata, a_2)
}

length(group)
class(time)
length(group_mydata)

time_mydata <- as.numeric(time_mydata)
class(time_mydata)

# prefilter
keep <- rowSums(countData) > 0
countData <- countData[keep,]

# Do the same for our data
keep_mydata <- rowSums(seq_countData) > 0
seq_countData <- seq_countData[keep_mydata,]

```


(20231114) At this point, let's check for the genes that have correlation > 0.7 for both males and females.

```{r}

# Get a newer version where we get the raw values, not the absolute values

male_tibble_BH.Q <- tibble()
for (i in 1:dim(average_per_gene_M_BH.Q)[1]){
  example <- average_per_gene_M_BH.Q[i,] %>% t()
  gene <- rownames(average_per_gene_M_BH.Q)[i]
  correlation <- data.frame(gene, cor(example, v1))
  male_tibble_BH.Q <- bind_rows(male_tibble_BH.Q, correlation)
}
male_tibble_BH.Q <- male_tibble_BH.Q %>% 
  dplyr::rename(male_corr = cor.example..v1.)

male_tibble_BH.Q$male_corr %>% 
  hist()

female_tibble_BH.Q <- tibble()
for (i in 1:dim(average_per_gene_F_BH.Q)[1]){
  example <- average_per_gene_F_BH.Q[i,] %>% t()
  gene <- rownames(average_per_gene_F_BH.Q)[i]
  correlation <- data.frame(gene, cor(example, v2))
  female_tibble_BH.Q <- bind_rows(female_tibble_BH.Q, correlation)
}
female_tibble_BH.Q <- female_tibble_BH.Q %>% 
  dplyr::rename(female_corr = cor.example..v2.)

female_tibble_BH.Q$female_corr %>% 
  hist()

male_tibble_BH.Q %>% 
  data.frame() %>% 
  write.csv("male_raw_correlations.csv")
female_tibble_BH.Q %>%
  data.frame() %>% 
  write.csv("female_raw_correlations.csv")

dim(female_tibble_BH.Q)
dim(female_tibble_BH.Q)

highCorr_M %>%
  data.frame() %>% 
  write.csv("Correlations>0.7_Male.csv")
highCorr_F %>%
  data.frame() %>% 
  write.csv("Correlations>0.7_Female.csv")


```

(20231121) Let's try to get the correlations between the expression data and myogenic data for all genes

```{r}
# Check the columns of male_counts
colnames(male_counts)

rownames(male_counts) <- NULL
male_counts <- male_counts %>% 
  column_to_rownames("gene")

average_per_gene_M_all <- sapply(seq(1, ncol(male_counts), by = 6), function(start_col) {
  end_col <- min(start_col + 5, ncol(male_counts))  # Determine the end column for the group
  avg_exprs <- rowSums(male_counts[, start_col:end_col])/6
  return(avg_exprs)
}) %>% data.frame()

dim(average_per_gene_M_all)

colnames(average_per_gene_M_all) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23")

# Perform the same thing for the females
colnames(female_counts)

rownames(female_counts) <- NULL
female_counts <- female_counts %>% 
  column_to_rownames("gene")
average_per_gene_F_all <- sapply(seq(1, ncol(female_counts), by = 6), function(start_col) {
  end_col <- min(start_col + 5, ncol(female_counts))  # Determine the end column for the group
  avg_exprs <- rowSums(female_counts[, start_col:end_col])/6
  return(avg_exprs)
}) %>% data.frame()

dim(average_per_gene_F_all)

colnames(average_per_gene_F_all) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23")
```

Similarly, we create the for-loop to get the correlations between the two types of data for each gene.

```{r}
# Let's make a for loop to get the correlations
male_tibble <- tibble()
for (i in 1:dim(average_per_gene_M_all)[1]){
  example <- average_per_gene_M_all[i,] %>% t()
  gene <- rownames(average_per_gene_M_all)[i]
  correlation <- data.frame(gene, cor(example, v1))
  male_tibble <- bind_rows(male_tibble, correlation)
}
male_tibble <- male_tibble %>% 
  dplyr::rename(male_corr = cor.example..v1.)

female_tibble <- tibble()
for (i in 1:dim(average_per_gene_F_all)[1]){
  example <- average_per_gene_F_all[i,] %>% t()
  gene <- rownames(average_per_gene_F_all)[i]
  correlation <- data.frame(gene, cor(example, v2))
  female_tibble <- bind_rows(female_tibble, correlation)
}
female_tibble <- female_tibble %>% 
  dplyr::rename(female_corr = cor.example..v2.) 

# Save the correlations histograms
pdf(file = "Correlations_all_genes_20231121.pdf")
male_tibble %>% 
  dplyr::rename(male_corr = male_corr) %>% 
  pull(male_corr) %>% 
  hist(col = "lightblue",
       main = "Distributions of Correlations (Male)",
       breaks = 10)
female_tibble %>% 
  dplyr::rename(female_corr = female_corr) %>% 
  pull(female_corr) %>% 
  hist(col = "lightblue",
       main = "Distributions of Correlations (Female)",
       breaks = 10)
dev.off()
```

Let's also perform Fisher's transformation at this step

```{r}
#install.packages("DescTools")
library(DescTools)
fisherZ_male <- FisherZ(male_tibble$male_corr)

male_tibble <- male_tibble %>% 
  mutate(After_FT = FisherZ(male_tibble$male_corr)) %>% 
  mutate(p_value = 2 * (1 - pnorm(abs(After_FT)))) %>% 
  mutate(adjusted_p_fdr = p.adjust(p_value, method = "fdr"))

# compare results with another library that also has fisher's transformation function
# install.packages("psych")
library(psych)

male_tibble <- male_tibble %>% 
  mutate(After_FT_2 = psych::fisherz(male_tibble$male_corr))
all(male_tibble$After_FT == male_tibble$After_FT_2)

female_tibble <- female_tibble %>% 
  mutate(After_FT = FisherZ(female_tibble$female_corr)) %>% 
  mutate(p_value = 2 * (1 - pnorm(abs(After_FT)))) %>% 
  mutate(adjusted_p_fdr = p.adjust(p_value, method = "fdr"))

# No genes remain after adjusting p-values for multiple hypothesis correction
afterFT_male_adj <- male_tibble[male_tibble$adjusted_p_fdr < 0.05,] %>% pull(gene)
afterFT_female_adj <- female_tibble[female_tibble$adjusted_p_fdr < 0.05,] %>% pull(gene)

# No genes remain after adjusting p-values for multiple hypothesis correction
afterFT_male <- male_tibble[male_tibble$p_value < 0.05,] %>% nrow()
afterFT_female <- female_tibble[female_tibble$p_value < 0.05,] %>% nrow()

# Check for the distributions of adjusted_p_fdr, and p-values
pdf(file = "Correlations_p-values_20240105.pdf")
male_tibble$p_value %>% 
  hist(col = "lightblue",
       main = "Distributions of p-values (Male)",
       breaks = 20)
female_tibble$p_value %>% 
  hist(col = "lightblue",
       main = "Distributions of p-values (Female)",
       breaks = 20)
dev.off()
```

Let's revisit the volcano plots again.
This time, we will remove the Y chromosome genes to start off.

```{r}
# Let's recall our dds object
dds <- readRDS("DESeq2_object_20231106.rds")
# check results
resultsNames(dds)
res <- results(dds, name = "sex_male_vs_female")

# To remove Y chromosome genes, load the mouse gtf file, containing information on each transcript
gtf_file <- "/home/youkim/Genome_indexing/Mus_musculus.GRCm39.110.gtf"
txdb <- makeTxDbFromGFF(gtf_file, format = "gtf")

# Select out the Y chromosome
chromosome_name <- "Y"

#Extract the Y chromosome Ensembl IDs
genes_on_Y <- genes(txdb, filter = list(tx_chrom = "Y"))
ensembl_gene_ids <- genes_on_Y@ranges@NAMES

# This function checks for the gene symbols that match the particular ENSEMBL ID.
gene_symbols <- mapIds(org.Mm.eg.db, 
                       keys = ensembl_gene_ids , 
                       column = "SYMBOL",
                       keytype = "ENSEMBL")

# how many gene symbols?
length(gene_symbols)

gene_symbols %<>% 
  as_tibble(rownames = "ENSEMBL") %>% 
  dplyr::rename(gene = value)

gene_symbols %>% 
  group_by(gene) %>% 
  dplyr::count() %>% 
  filter(n != 1)

# Filter out the genes where it doesn't have a gene symbol
gene_symbols %<>% dplyr::filter(!is.na(gene))

# Save the genes that are NAs (invalid since we already filtered them out)
# gene_symbols_NA <- gene_symbols %>% dplyr::filter(is.na(gene))

#check for duplicate ENSEMBL IDs
gene_symbols %>% group_by(ENSEMBL) %>% dplyr::count() %>% filter(n != 1)

#check for duplicate gene symbols
gene_symbols %>% group_by(gene) %>% dplyr::count() %>% filter(n != 1)
dim(gene_symbols)

# Now remove the genes from our results that are located in the Y chromosome.
Y_genes <- intersect(gene_symbols$gene, rownames(res))
res <- res[!(rownames(res) %in% Y_genes),]
plotMA(res, ylim = c(-2,2))

male_counts["Prl",]
female_counts["Prl",]

res_filtered <- res[!(rownames(res) %in% c("Xist")),]

pdf(file = "Volcanoplot_20240105.pdf",
    height = 8,
    width = 8)
EnhancedVolcano(res,
                lab = rownames(res),
                x = "log2FoldChange",
                pCutoff = 1.3,
                FCcutoff = 1.0,
                y= "pvalue"
                )
EnhancedVolcano(res_filtered,
                lab = rownames(res_filtered),
                x = "log2FoldChange",
                pCutoff = 1.3,
                FCcutoff = 1.0,
                y= "pvalue"
                )
dev.off()
```

At this point, we want to get the heatmaps for Model 2 to 5 genes.
We will extract and rename the normalized expression matrix for each model.

```{r}
# Recall the male_cycling_BH.Q object
length(male_cycling_BH.Q)
length(female_cycling_BH.Q)

# Define function
averaging <- function(model2_m){
  a <- sapply(seq(1, ncol(model2_m), by = 6), function(start_col) {
  end_col <- min(start_col + 5, ncol(model2_m))  # Determine the end column for the group
  avg_exprs <- rowSums(model2_m[, start_col:end_col])/6
  return(avg_exprs)
}) %>% data.frame()
  colnames(a) <- c("ZT3", "ZT7", "ZT11", "ZT15", "ZT19", "ZT23")
  a
}

# Define the objects
model2_genes <- setdiff(male_cycling_BH.Q, female_cycling_BH.Q)
model2_m <- male_counts[model2_genes,]
model2_m <- averaging(model2_m)
model2_m <- scale(t(model2_m)) %>% t()
model2_m_res <- Heatmap(model2_m,
                        cluster_rows = TRUE,
                        cluster_columns = FALSE,
                        row_names_gp = gpar(fontsize = 6),
                        name = "male_expression")

model2_f <- female_counts[model2_genes,]
model2_f <- averaging(model2_f)
model2_f <- scale(t(model2_f)) %>% t()
model2_f_res <- Heatmap(model2_f,
                        cluster_rows = TRUE,  # Cluster rows (genes) if needed
                        cluster_columns = FALSE,
                        row_names_gp = gpar(fontsize = 6),
                        name = "female_expression")

m2_hm_list <- model2_m_res + model2_f_res

# Display both heatmaps side by side
pdf("Model2_heatmaps.pdf", height = 15, width = 12)
draw(m2_hm_list,
     column_title = "Model 2 Genes, Males vs Females", 
     column_title_gp = gpar(fontsize = 16))
dev.off()

```

Perform the same task for other models

```{r}
model3_genes <- setdiff(female_cycling_BH.Q, male_cycling_BH.Q)
model3_m <- male_counts[model3_genes,]
model3_m <- averaging(model3_m)
model3_m <- scale(t(model3_m)) %>% t()
model3_m_res <- Heatmap(model3_m,
                        cluster_rows = TRUE,
                        cluster_columns = FALSE,
                        row_names_gp = gpar(fontsize = 6),
                        name = "male_expression")

model3_f <- female_counts[model3_genes,]
model3_f <- averaging(model3_f)
model3_f <- scale(t(model3_f)) %>% t()
model3_f_res <- Heatmap(model3_f,
                        cluster_rows = TRUE,  # Cluster rows (genes) if needed
                        cluster_columns = FALSE,
                        row_names_gp = gpar(fontsize = 6),
                        name = "female_expression")

m3_hm_list <- model3_m_res + model3_f_res

# Display both heatmaps side by side
pdf("Model3_heatmaps.pdf", height = 15, width = 12)
draw(m3_hm_list,
     column_title = "Model 3 Genes, Males vs Females", 
     column_title_gp = gpar(fontsize = 16),
     main_heatmap = "female_expression",
     row_dend_side = "right")
dev.off()
```

```{r}
model4_genes <- common_cycling_BH.Q
model4_m <- male_counts[model4_genes,]
model4_m <- averaging(model4_m)
model4_m <- scale(t(model4_m)) %>% t()
model4_m_res <- Heatmap(model4_m,
                        cluster_rows = TRUE,
                        cluster_columns = FALSE,
                        row_names_gp = gpar(fontsize = 5),
                        name = "male_expression")

model4_f <- female_counts[model4_genes,]
model4_f <- averaging(model4_f)
model4_f <- scale(t(model4_f)) %>% t()
model4_f_res <- Heatmap(model4_f,
                        cluster_rows = TRUE,  # Cluster rows (genes) if needed
                        cluster_columns = FALSE,
                        row_names_gp = gpar(fontsize = 5),
                        name = "female_expression")

m4_hm_list <- model4_m_res + model4_f_res

# Display both heatmaps side by side
pdf("Model4_heatmaps.pdf", height = 15, width = 12)
draw(m4_hm_list,
     column_title = "Model 4 Genes, Males vs Females", 
     column_title_gp = gpar(fontsize = 16))
dev.off()

model5_genes <- read_csv("common_genes_BH_model5_20231214.csv")
model5_genes <- model5_genes$CycID
model5_m <- male_counts[model5_genes,]
model5_m <- averaging(model5_m)
model5_m <- scale(t(model5_m)) %>% t()
model5_m_res <- Heatmap(model5_m,
                        cluster_rows = TRUE,
                        cluster_columns = FALSE,
                        row_names_gp = gpar(fontsize = 5),
                        name = "male_expression")

model5_f <- female_counts[model5_genes,]
model5_f <- averaging(model5_f)
model5_f <- scale(t(model5_f)) %>% t()
model5_f_res <- Heatmap(model5_f,
                        cluster_rows = TRUE,  # Cluster rows (genes) if needed
                        cluster_columns = FALSE,
                        row_names_gp = gpar(fontsize = 5),
                        name = "female_expression")

m5_hm_list <- model5_m_res + model5_f_res
pdf("Model5_heatmaps.pdf", height = 15, width = 12)
draw(m5_hm_list,
     column_title = "Model 5 Genes, Males vs Females", 
     column_title_gp = gpar(fontsize = 16))
dev.off()

```