---
title: "Meta-analysis Testing"
output: html_document
---
```{r}
# Load required libraries
library(SingleCellExperiment)
library(Seurat)
library(dplyr)
library(tidyverse)
# BiocManager::install("MetaNeighbor")
library(MetaNeighbor)
library(magrittr)
```
```{r eval = FALSE}
# Load the Seurat objects
hdataset <- readRDS("/home/youkim/MetaMarkers/4-dataset-test/hSeurat_annotated.rds")
vtdataset <- readRDS("/home/youkim/MetaMarkers/4-dataset-test/vtSeurat2_annotated.rds")
ldataset <- readRDS("/home/youkim/MetaMarkers/4-dataset-test/lSeurat_annotated.rds")
sdataset <- readRDS("/home/youkim/MetaMarkers/4-dataset-test/sSeurat_20230515.rds")
adataset <- readRDS("/home/youkim/Meta-analysis/Arutyanyan_et_al.2023/aSeurat_analyzed.rds")
# Collapse the cell types in Liu dataset
lmeta_copy <- ldataset@meta.data %>%
dplyr::select(Annotation)
dim(ldataset@meta.data)
# read the cell types data frame
l_cell_types <- read.csv("/home/youkim/MetaMarkers/4-dataset-test/Liu Cell Types (T vs NT).csv") %>%
dplyr::rename(Annotation = Annotations)
# use inner_join() on hmeta_copy and h_cell_types
df <- inner_join(lmeta_copy, l_cell_types)
# Check if the order is maintained
summary(df$Annotation == lmeta_copy$Annotation)
# add to meta.data
ldataset$merged_annotation <- df$Merged
# Clear unnecessary data
rm(lmeta_copy)
rm(l_cell_types)
rm(df)
# Collapse the cell types in VT dataset
vtmeta_copy <- vtdataset@meta.data %>%
dplyr::select(Annotation)
# Do the same thing for the vtSeurat
vt_cell_types <- read.csv("/home/youkim/MetaMarkers/4-dataset-test/VT Cell Types (T vs NT).csv") %>%
dplyr::rename(Annotation = Annotations)
# use inner_join() on vtmeta_copy and vt_cell_types
df_vt <- inner_join(vtmeta_copy, vt_cell_types)
# Check if the order is maintained
summary(df_vt$Annotation == vtmeta_copy$Annotation)
# add to meta.data
vtdataset$merged_annotation <- df_vt$Merged
# Clear unnecessary data
rm(vtmeta_copy)
rm(vt_cell_types)
rm(df_vt)
# Collapse the cell types in Suryawanshi dataset
smeta_copy <- sdataset@meta.data %>%
dplyr::select(CellType) %>%
mutate(Annotation = CellType)
sdataset@meta.data$CellType %>%
table()
# read the cell types data frame
s_cell_types <- read.csv("/home/youkim/MetaMarkers/4-dataset-test/Suryawanshi Cell Types (T vs NT).csv") %>%
dplyr::rename(Annotation = Annotations)
# use inner_join() on smeta_copy and s_cell_types
df_s <- inner_join(smeta_copy, s_cell_types)
# Check if the order is maintained
summary(df_s$Annotation == smeta_copy$Annotation)
# add to meta.data
sdataset$merged_annotation <- df_s$Merged
# Remove unnecessary data
rm(smeta_copy)
rm(s_cell_types)
rm(df_s)
# Collapse the cell types in Han dataset
hmeta_copy <- hdataset@meta.data %>%
dplyr::select(Annotation)
dim(hdataset@meta.data)
# read the cell types data frame
h_cell_types <- read.csv("/home/youkim/MetaMarkers/4-dataset-test/Han Cell Types (T vs NT).csv") %>%
dplyr::rename(Annotation = Annotations)
# use inner_join() on hmeta_copy and h_cell_types
df <- inner_join(hmeta_copy, h_cell_types)
# Check if the order is maintained
summary(df$Annotation == hmeta_copy$Annotation)
# add to meta.data
hdataset$merged_annotation <- df$Merged
# Remove unnecessary data
rm(hmeta_copy)
rm(h_cell_types)
rm(df)
# Collapse the cell types in Arutyanyan dataset
ameta_copy <- adataset@meta.data %>%
dplyr::select(cell_type) %>%
mutate(Annotation = cell_type)
ameta_copy %<>%
mutate(Annotation = as.data.frame(Idents(adataset))[,1])
# Check if order is preserved
summary(ameta_copy$Annotation == Idents(adataset))
dim(adataset@meta.data)
# read the cell types data frame
a_cell_types <- read.csv("/home/youkim/Meta-analysis/Arutyanyan_et_al.2023/Arutyanyan Cell Types (T vs NT).csv") %>%
dplyr::rename(Annotation = Annotations)
# use inner_join() on lmeta_copy and l_cell_types
df_a <- inner_join(ameta_copy, a_cell_types)
# Check if the order is maintained
summary(df_a$Annotation == ameta_copy$Annotation)
# add to meta.data
adataset$merged_annotation <- df_a$Merged
# Remove unnecessary data
rm(ameta_copy)
rm(a_cell_types)
rm(df_a)
# Change the Idents of the Seurat Objects
Idents(object = vtdataset) <- "merged_annotation"
Idents(object = hdataset) <- "merged_annotation"
Idents(object = ldataset) <- "merged_annotation"
Idents(object = sdataset) <- "merged_annotation"
Idents(object = adataset) <- "merged_annotation"
```
```{r eval = FALSE}
# Convert to SCE objects
hSCE <- as.SingleCellExperiment(hdataset)
vtSCE <- as.SingleCellExperiment(vtdataset)
lSCE <- as.SingleCellExperiment(ldataset)
sSCE <- as.SingleCellExperiment(sdataset)
aSCE <- as.SingleCellExperiment(adataset)
# Create my_data
mydata <- list(hSCE = hSCE,
vtSCE = vtSCE,
lSCE = lSCE,
sSCE = sSCE,
aSCE = aSCE)
# Check whether the gene information align between the five datasets
lapply(mydata, function(x) head(rownames(x), 3))
# cell-type information is labeled identically in all datasets
lapply(mydata, function(x) colnames(colData(x)))
# add the cell type information onto a new column name called "cell type"
mydata$hSCE$"cell type" <- mydata$hSCE$ident
mydata$vtSCE$"cell type" <- mydata$vtSCE$ident
mydata$lSCE$"cell type" <- mydata$lSCE$ident
mydata$sSCE$"cell type" <- mydata$sSCE$ident
mydata$shSCE$"cell type" <- mydata$shSCE$ident
# Check that count matrices stored in the assays slot, have identical names
lapply(mydata, function(x) names(assays(x)))
# Create a merged dataset by using mergeSCE, which takes a list of SCE objects
# as an input and outputs a single SCE object
fused_data <- mergeSCE(mydata)
dim(fused_data)
# dim(hSCE)
# dim(vtSCE)
# dim(lSCE)
# dim(sSCE)
# dim(aSCE)
head(colData(fused_data))
# To avoid having to recreate the merged object, save the R object to a file
# by using R's RDS format.
# saveRDS(fused_data, "merged_5_datasets(20230614).rds")
```
We notice that the fused data contains the cells from both datasets, as the # of columns is the sum of the two datasets.
```{r}
# Load RDS file
# five_datasets <- readRDS("merged_5_datasets(20230614).rds")
# To obtain a cursory overview of cell-type composition by study, we cross-tabulate cell-type annotations by study IDs
# For this file, we will continue to work with the fused_data object.
copy <- fused_data
table(fused_data$merged_annotation, fused_data$study_id)
# It is okay to have identical cell type names at this stage.
```
Since there were some problems in the legends for the heatmap, we use this function.
```{r}
my_heatmap <- function (aurocs, cex = 1, margins = c(8, 8), ...)
{
auroc_cols <- rev((grDevices::colorRampPalette(RColorBrewer::brewer.pal(11,
"RdYlBu")))(100))
breaks <- seq(0, 1, length = 101)
ordering <- stats::as.dendrogram(orderCellTypes(aurocs))
arg_list <- list(x = aurocs, margins = margins, key = TRUE,
keysize = 1, key.xlab = "AUROC", key.title = "",
offsetRow = 0.1, offsetCol = 0.1, trace = "none", density.info = "none",
Rowv = ordering, Colv = ordering, col = auroc_cols, breaks = breaks,
na.color = grDevices::gray(0.95), cexRow = cex, cexCol = cex)
additional_args <- list(...)
arg_list[names(additional_args)] <- additional_args
do.call(gplots::heatmap.2, arg_list)
}
```
: Hierarchical cell-type replicability analysis
```{r, fig.height = 8, fig.width = 10}
# Picking the highly variable genes
global_hvgs <- variableGenes(dat = fused_data,
exp_labels = fused_data$study_id)
length(global_hvgs)
# MetaNeighborUS returns a cell-type-by-cell-type matrix containing cell-type similarities.
# This fast version should always be used on large datasets but can also be run on smaller datasets.
aurocs <- MetaNeighborUS(var_genes = global_hvgs,
dat = fused_data,
study_id = fused_data$study_id,
cell_type = fused_data$merged_annotation,
fast_version = TRUE)
study_colors <- tibble(study = getStudyId(rownames(aurocs)),
cell_type = getCellType(rownames(aurocs)))
study_colors %<>% mutate(color = recode(study,
hSCE = "black",
vtSCE = "purple",
lSCE = "orange",
sSCE = "red",
aSCE = "green"),
color2 = recode(cell_type,
`Non-Trophoblast` = "blue",
`Trophoblast` = "magenta"))
# Apparently, we can use this line of code as well.
pdf(file = "Trophoblast_vs_NT.pdf")
my_heatmap(aurocs,
cex = 1,
margins = c(10,10),
ColSideColors = study_colors %>% pull(color),
RowSideColors = study_colors %>% pull(color2))
dev.off()
# To identify pairs of replicable cell types, run the following function
top_hits <- topHitsByStudy(aurocs, threshold = 0.9)
# Using the topHitsByStudy function is more desirable.
# top_hits_before <- topHits(aurocs,
# dat = four_datasets,
# study_id = four_datasets$study_id,
# cell_type = four_datasets$"cell type", threshold = 0.9)
top_hits %>%
filter(Match_type != "Reciprocal_top_hit")
# top_hits_before %>%
# filter(Match_type != "Reciprocal_top_hit")
```
```{r fig.height = 8, fig.width = 10}
# When there is a clear structure in the data, we can refine AUROCs by
# splitting the data.
level1_split <- splitClusters(aurocs, k = 2)
level1_split
first_split <- level1_split[[1]]
first_split
# Repeat the MetaNeighbor analysis on immune cells only
full_labels <- makeClusterName(five_datasets$study_id,
five_datasets$merged_annotation)
subdata <- five_datasets[, full_labels %in% first_split]
dim(subdata)
# To focus on variability specific to endocrine cells, re-pick highly variable genes
var_genes <- variableGenes(dat = subdata, exp_labels = subdata$study_id)
# recompute cell-type similarities and visualize AUROCs
aurocs <- MetaNeighborUS(var_genes = var_genes,
dat = subdata, fast_version = TRUE,
study_id = subdata$study_id,
cell_type = subdata$merged_annotation)
my_heatmap(aurocs,
cex = 0.5,
margins = c(10,10))
pdf("all-vs-all_split1(20230614).pdf")
my_heatmap(aurocs,
label_size = 10,
cex = 0.3,
margins = c(15,15))
while (!is.null(dev.list())) dev.off()
# Continue to zoom in as long as there are at least two cell types per dataset.
# Repeat steps 13-16
level2_split <- splitClusters(aurocs, k = 2)
level2_split
my_split <- level2_split[[1]]
my_split
subdata <- five_datasets[, full_labels %in% my_split]
var_genes <- variableGenes(dat = subdata, exp_labels = subdata$study_id)
length(var_genes)
aurocs <- MetaNeighborUS(var_genes = var_genes, dat = subdata,
fast_version = TRUE,
study_id = subdata$study_id,
cell_type = subdata$merged_annotation)
my_heatmap(aurocs,
cex = 1,
margins = c(15,15))
# Define the rows to exclude based on conditions
rows_to_exclude <- c("hSCE|Neutrophil", "hSCE|Epithelial cell") # Replace with actual row names or indices
# Find the row indices that match the conditions
rows_to_exclude_indices <- which(rownames(aurocs) %in% rows_to_exclude)
# Exclude the identified rows from the matrix
aurocs_filtered <- aurocs[-rows_to_exclude_indices,-rows_to_exclude_indices, drop = FALSE]
study_colors <- tibble(study = getStudyId(rownames(aurocs_filtered)),
cell_type = getCellType(rownames(aurocs_filtered)))
study_colors %<>% mutate(color = recode(study,
hSCE = "black",
vtSCE = "purple",
lSCE = "orange",
sSCE = "red",
aSCE = "green"),
color2 = recode(cell_type,
`Extravillous trophoblast` = "green",
`Syncytiotrophoblast` = "blue",
`Cytotrophoblast` = "orange"))
pdf("all-vs-all_split2(20231202).pdf")
my_heatmap(aurocs_filtered,
cex = 1,
margins = c(15,15),
ColSideColors = study_colors %>% pull(color),
RowSideColors = study_colors %>% pull(color2))
while (!is.null(dev.list())) dev.off()
```
However, if we use one-vs-best AUROCs, MetaNeighbor can automatically identify the two closest matching cell types in each target dataset.
```{r fig.height = 10, fig.width = 10}
best_hits <- MetaNeighborUS(var_genes = var_genes,
dat = five_datasets,
study_id = five_datasets$study_id,
cell_type = five_datasets$merged_annotation,
fast_version = TRUE,
one_vs_best = TRUE, symmetric_output = FALSE)
my_heatmap(best_hits,
label_size = 1,
margins = c(12,12)
)
pdf("1-vs-best(20230614).pdf")
my_heatmap(best_hits,
label_size = 1,
cex = 0.5,
margins = c(12,12),
ColSideColors = study_colors %>% pull(color),
RowSideColors = study_colors %>% pull(color))
while (!is.null(dev.list())) dev.off()
```
```{r}
# Extracting cell types as meta-clusters
mclusters <- extractMetaClusters(best_hits, threshold = 0.7)
mcsummary <- scoreMetaClusters(mclusters, best_hits)
# scoreMetaClusters() function provides a good summary of meta-clusters
# Visualize the number of robust cell types, the replicability structure
# can be summarized as an Upset plot with plotUpset()
plotUpset(mclusters)
# Save the output as pdf.
pdf("meta_clusters.pdf")
plotMetaClusters(mclusters, best_hits)
while (!is.null(dev.list())) dev.off()
```
```{r fig.height = 6, fig.width = 8}
cluster_graph <- makeClusterGraph(best_hits, low_threshold = 0.3)
plotClusterGraph(cluster_graph, five_datasets$study_id,
five_datasets$merged_annotation, size_factor = 3)
# We can focus on a cluster of interest (coi).
# Take a closer look at 'hSCE|Dendritic cell', query its closest neighbors in the graph with extendClusterSet() and then zoom in on its subgraph with subsetClusterGraph()
coi <- "hSCE|Extravillous trophoblast"
coi <- extendClusterSet(cluster_graph, initial_set = coi,
max_neighbor_distance = 2)
subgraph <- subsetClusterGraph(cluster_graph, coi)
pdf(file = "Extracted_network_20231202.pdf")
plotClusterGraph(subgraph, five_datasets$study_id,
five_datasets$merged_annotation, size_factor = 4)
dev.off()
```