---
title: "Meta-analysis Testing"
output: html_document
---
```{r}
# Load required libraries
library(SingleCellExperiment)
library(Seurat)
library(tidyverse)
library(MetaNeighbor)
# Load the Seurat objects
hSeurat <- readRDS("hSeurat_annotated.rds")
vtSeurat <- readRDS("vtSeurat_annotated.rds")
# Convert to SCE objects
hSCE <- as.SingleCellExperiment(hSeurat)
vtSCE <- as.SingleCellExperiment(vtSeurat)
# Create my_data
mydata <- list(hSCE = hSCE,
vtSCE = vtSCE)
# Check whether the gene information align between the two datasets
lapply(mydata, function(x) head(rownames(x), 3))
# cell-type information is labeled identically in all datasets
lapply(mydata, function(x) colnames(colData(x)))
# add the cell type information onto a new column name called "cell type"
mydata$hSCE$"cell type" <- mydata$hSCE$ident
mydata$vtSCE$"cell type" <- mydata$vtSCE$ident
# Check that count matrices stored in the assays slot, have identical names
lapply(mydata, function(x) names(assays(x)))
# Create a merged dataset by using mergeSCE, which takes a list of SCE objects
# as an input and outputs a single SCE object
fused_data <- mergeSCE(mydata)
dim(fused_data)
dim(hSCE)
dim(vtSCE)
head(colData(fused_data))
# To obtain a cursory overview of cell-type composition by study, we cross-tabulate
# cell-type annotations by study IDs
table(fused_data$"cell type", fused_data$study_id)
# It is okay to have identical cell type names (e.g., ductal/Ductal) at this stage.
# To avoid having to recreate the merged object, save the R object to a file
# by using R's RDS format.
saveRDS(fused_data, "merged_2_datasets.rds")
```
: Hierarchical cell-type replicability analysis
```{r, fig.height = 8, fig.width = 10}
# Load required libraries
two_datasets <- readRDS("merged_2_datasets.rds")
# Picking the highly variable genes
global_hvgs <- variableGenes(dat = two_datasets,
exp_labels = two_datasets$study_id)
length(global_hvgs)
# MetaNeighborUS returns a cell-type-by-cell-type matrix containing cell-type similarities.
# This fast version should always be used on large datasets but can also be run on smaller datasets.
aurocs <- MetaNeighborUS(var_genes = global_hvgs,
dat = two_datasets,
study_id = two_datasets$study_id,
cell_type = two_datasets$"cell type",
fast_version = TRUE)
# Apparently, we can use this line of code as well.
MetaNeighbor::ggPlotHeatmap(aurocs, label_size = 10)
# To identify pairs of replicable cell types, run the following function
top_hits <- topHits(aurocs,
dat = two_datasets,
study_id = two_datasets$study_id,
cell_type = two_datasets$"cell type", threshold = 0.9)
top_hits
# When there is a clear structure in the data, we can refine AUROCs by
# splitting the data.
level1_split <- splitClusters(aurocs, k = 2)
level1_split
first_split <- level1_split[[1]]
first_split
# Repeat the MetaNeighbor analysis on immune cells only
full_labels <- makeClusterName(two_datasets$study_id,
two_datasets$`cell type`)
subdata <- two_datasets[, full_labels %in% first_split]
dim(subdata)
# To focus on variability specific to endocrine cells, re-pick highly variable genes
var_genes <- variableGenes(dat = subdata, exp_labels = subdata$study_id)
# recompute cell-type similarities and visualize AUROCs
aurocs <- MetaNeighborUS(var_genes = var_genes,
dat = subdata, fast_version = TRUE,
study_id = subdata$study_id,
cell_type = subdata$"cell type")
plotHeatmap(aurocs, cex = 0.7)
# Continue to zoom in as long as there are at least two cell types per dataset.
# Repeat steps 13-16
level2_split <- splitClusters(aurocs, k = 3)
level2_split
my_split <- level2_split[[1]]
my_split
subdata <- two_datasets[, full_labels %in% my_split]
var_genes <- variableGenes(dat = subdata, exp_labels = subdata$study_id)
length(var_genes)
aurocs <- MetaNeighborUS(var_genes = var_genes, dat = subdata,
fast_version = TRUE,
study_id = subdata$study_id,
cell_type = subdata$"cell type")
plotHeatmap(aurocs, cex = 1)
```