--- title: "Meta-analysis Testing" output: html_document --- ```{r} # Load required libraries library(SingleCellExperiment) library(Seurat) library(dplyr) library(tidyverse) # BiocManager::install("MetaNeighbor") library(MetaNeighbor) library(magrittr) ``` ```{r eval = FALSE} # Load the Seurat objects hSeurat <- readRDS("/home/youkim/MetaMarkers/4-dataset-test/hSeurat_annotated.rds") vtSeurat <- readRDS("/home/youkim/MetaMarkers/4-dataset-test/vtSeurat2_annotated.rds") lSeurat <- readRDS("/home/youkim/MetaMarkers/4-dataset-test/lSeurat_annotated.rds") sSeurat <- readRDS("/home/youkim/MetaMarkers/4-dataset-test/sSeurat_20230515.rds") aSeurat <- readRDS("/home/youkim/Meta-analysis/Arutyanyan_et_al.2023/aSeurat_analyzed.rds") (aSeurat@assays$RNA$counts != 0) %>% colSums() %>% mean() (vtSeurat@assays$RNA$counts != 0) %>% colSums() %>% mean() ``` ```{r} # Collapse the cell types in Han dataset hmeta_copy <- hSeurat@meta.data %>% select(Annotation) dim(hSeurat@meta.data) # read the cell types data frame h_cell_types <- read.csv("/home/youkim/MetaMarkers/4-dataset-test/Han Cell Types.csv") %>% dplyr::rename(Annotation = Annotations) # use inner_join() on hmeta_copy and h_cell_types df <- inner_join(hmeta_copy, h_cell_types) # Check if the order is maintained summary(df$Annotation == hmeta_copy$Annotation) # add to meta.data hSeurat$merged_annotation <- df$Merged # Collapse the cell types in VT dataset vtmeta_copy <- vtSeurat@meta.data %>% select(Annotation) # Do the same thing for the vtSeurat vt_cell_types <- read.csv("/home/youkim/MetaMarkers/4-dataset-test/VT Cell Types.csv") %>% dplyr::rename(Annotation = Annotations) # use inner_join() on vtmeta_copy and vt_cell_types df_vt <- inner_join(vtmeta_copy, vt_cell_types) # Check if the order is maintained summary(df_vt$Annotation == vtmeta_copy$Annotation) # add to meta.data vtSeurat$merged_annotation <- df_vt$Merged # Collapse the cell types in Liu dataset lmeta_copy <- lSeurat@meta.data %>% select(Annotation) dim(lSeurat@meta.data) # read the cell types data frame l_cell_types <- read.csv("/home/youkim/MetaMarkers/4-dataset-test/Liu Cell Types.csv") %>% dplyr::rename(Annotation = Annotations) # use inner_join() on lmeta_copy and l_cell_types df_l <- inner_join(lmeta_copy, l_cell_types) # Check if the order is maintained summary(df_l$Annotation == lmeta_copy$Annotation) # add to meta.data lSeurat$merged_annotation <- df_l$Merged # Collapse the cell types in Suryawanshi dataset smeta_copy <- sSeurat@meta.data %>% select(CellType) %>% mutate(Annotation = CellType) sSeurat@meta.data$CellType %>% table() dim(lSeurat@meta.data) # read the cell types data frame s_cell_types <- read.csv("/home/youkim/MetaMarkers/4-dataset-test/Suryawanshi Cell Types.csv") %>% dplyr::rename(Annotation = Annotations) # use inner_join() on smeta_copy and s_cell_types df_s <- inner_join(smeta_copy, s_cell_types) # Check if the order is maintained summary(df_s$Annotation == smeta_copy$Annotation) # add to meta.data sSeurat$merged_annotation <- df_s$Merged # Collapse the cell types in Arutyanyan dataset ameta_copy <- aSeurat@meta.data %>% select(cell_type) %>% mutate(Annotation = cell_type) ameta_copy %<>% mutate(Annotation = as.data.frame(Idents(aSeurat))[,1]) # Check if order is preserved summary(ameta_copy$Annotation == Idents(aSeurat)) dim(aSeurat@meta.data) # read the cell types data frame a_cell_types <- read.csv("/home/youkim/Meta-analysis/Arutyanyan_et_al.2023/Arutyanyan Cell Types.csv") %>% dplyr::rename(Annotation = Annotations) # use inner_join() on lmeta_copy and l_cell_types df_a <- inner_join(ameta_copy, a_cell_types) # Check if the order is maintained summary(df_a$Annotation == ameta_copy$Annotation) # add to meta.data aSeurat$merged_annotation <- df_a$Merged aSeurat@meta.data %>% group_by(sample) %>% count() # Change the Idents of the Seurat Objects Idents(object = vtSeurat) <- "merged_annotation" Idents(object = hSeurat) <- "merged_annotation" Idents(object = lSeurat) <- "merged_annotation" Idents(object = sSeurat) <- "merged_annotation" Idents(object = aSeurat) <- "merged_annotation" ``` ```{r eval = FALSE} # Convert to SCE objects hSCE <- as.SingleCellExperiment(hSeurat) vtSCE <- as.SingleCellExperiment(vtSeurat) lSCE <- as.SingleCellExperiment(lSeurat) sSCE <- as.SingleCellExperiment(sSeurat) aSCE <- as.SingleCellExperiment(aSeurat) # Create my_data mydata <- list(hSCE = hSCE, vtSCE = vtSCE, lSCE = lSCE, sSCE = sSCE, aSCE = aSCE) # Check whether the gene information align between the five datasets lapply(mydata, function(x) head(rownames(x), 3)) # cell-type information is labeled identically in all datasets lapply(mydata, function(x) colnames(colData(x))) # add the cell type information onto a new column name called "cell type" mydata$hSCE$"cell type" <- mydata$hSCE$ident mydata$vtSCE$"cell type" <- mydata$vtSCE$ident mydata$lSCE$"cell type" <- mydata$lSCE$ident mydata$sSCE$"cell type" <- mydata$sSCE$ident mydata$shSCE$"cell type" <- mydata$shSCE$ident # Check that count matrices stored in the assays slot, have identical names lapply(mydata, function(x) names(assays(x))) # Create a merged dataset by using mergeSCE, which takes a list of SCE objects # as an input and outputs a single SCE object fused_data <- mergeSCE(mydata) dim(fused_data) dim(hSCE) dim(vtSCE) dim(lSCE) dim(sSCE) dim(aSCE) head(colData(fused_data)) # To avoid having to recreate the merged object, save the R object to a file # by using R's RDS format. saveRDS(fused_data, "merged_5_datasets(20230614).rds") ``` We notice that the fused data contains the cells from both datasets, as the # of columns is the sum of the two datasets. ```{r} # Load RDS file five_datasets <- readRDS("merged_5_datasets(20230614).rds") # To obtain a cursory overview of cell-type composition by study, we cross-tabulate cell-type annotations by study IDs table(five_datasets$merged_annotation, five_datasets$study_id) # It is okay to have identical cell type names at this stage. ``` Since there were some problems in the legends for the heatmap, we use this function. ```{r} my_heatmap <- function (aurocs, cex = 1, margins = c(8, 8), ...) { auroc_cols <- rev((grDevices::colorRampPalette(RColorBrewer::brewer.pal(11, "RdYlBu")))(100)) breaks <- seq(0, 1, length = 101) ordering <- stats::as.dendrogram(orderCellTypes(aurocs)) arg_list <- list(x = aurocs, margins = margins, key = TRUE, keysize = 1, key.xlab = "AUROC", key.title = "", offsetRow = 0.1, offsetCol = 0.1, trace = "none", density.info = "none", Rowv = ordering, Colv = ordering, col = auroc_cols, breaks = breaks, na.color = grDevices::gray(0.95), cexRow = cex, cexCol = cex) additional_args <- list(...) arg_list[names(additional_args)] <- additional_args do.call(gplots::heatmap.2, arg_list) } ``` : Hierarchical cell-type replicability analysis ```{r, fig.height = 8, fig.width = 10} # Picking the highly variable genes global_hvgs <- variableGenes(dat = five_datasets, exp_labels = five_datasets$study_id) length(global_hvgs) # MetaNeighborUS returns a cell-type-by-cell-type matrix containing cell-type similarities. # This fast version should always be used on large datasets but can also be run on smaller datasets. aurocs <- MetaNeighborUS(var_genes = global_hvgs, dat = five_datasets, study_id = five_datasets$study_id, cell_type = five_datasets$merged_annotation, fast_version = TRUE) plotHeatmap(aurocs, labRow = FALSE, labCol = FALSE, ColSideColors = study_colors %>% pull(color), RowSideColors = study_colors %>% pull(color)) # Apparently, we can use this line of code as well. my_heatmap(aurocs, cex = 1, margins = c(10,10), ColSideColors = study_colors %>% pull(color), RowSideColors = study_colors %>% pull(color)) # To identify pairs of replicable cell types, run the following function top_hits <- topHitsByStudy(aurocs, threshold = 0.9) # Using the topHitsByStudy function is more desirable. # top_hits_before <- topHits(aurocs, # dat = four_datasets, # study_id = four_datasets$study_id, # cell_type = four_datasets$"cell type", threshold = 0.9) top_hits %>% filter(Match_type != "Reciprocal_top_hit") # top_hits_before %>% # filter(Match_type != "Reciprocal_top_hit") ``` ```{r fig.height = 8, fig.width = 10} # When there is a clear structure in the data, we can refine AUROCs by # splitting the data. level1_split <- splitClusters(aurocs, k = 2) level1_split first_split <- level1_split[[1]] first_split # Repeat the MetaNeighbor analysis on immune cells only full_labels <- makeClusterName(five_datasets$study_id, five_datasets$merged_annotation) subdata <- five_datasets[, full_labels %in% first_split] dim(subdata) # To focus on variability specific to endocrine cells, re-pick highly variable genes var_genes <- variableGenes(dat = subdata, exp_labels = subdata$study_id) # recompute cell-type similarities and visualize AUROCs aurocs <- MetaNeighborUS(var_genes = var_genes, dat = subdata, fast_version = TRUE, study_id = subdata$study_id, cell_type = subdata$merged_annotation) my_heatmap(aurocs, cex = 0.5, margins = c(10,10)) pdf("all-vs-all_split1(20230614).pdf") my_heatmap(aurocs, label_size = 10, cex = 0.3, margins = c(15,15)) while (!is.null(dev.list())) dev.off() # Continue to zoom in as long as there are at least two cell types per dataset. # Repeat steps 13-16 level2_split <- splitClusters(aurocs, k = 2) level2_split my_split <- level2_split[[1]] my_split subdata <- five_datasets[, full_labels %in% my_split] var_genes <- variableGenes(dat = subdata, exp_labels = subdata$study_id) length(var_genes) aurocs <- MetaNeighborUS(var_genes = var_genes, dat = subdata, fast_version = TRUE, study_id = subdata$study_id, cell_type = subdata$merged_annotation) my_heatmap(aurocs, cex = 1, margins = c(15,15)) # Define the rows to exclude based on conditions rows_to_exclude <- c("hSCE|Neutrophil", "hSCE|Epithelial cell") # Replace with actual row names or indices # Find the row indices that match the conditions rows_to_exclude_indices <- which(rownames(aurocs) %in% rows_to_exclude) # Exclude the identified rows from the matrix aurocs_filtered <- aurocs[-rows_to_exclude_indices,-rows_to_exclude_indices, drop = FALSE] study_colors <- tibble(study = getStudyId(rownames(aurocs_filtered)), cell_type = getCellType(rownames(aurocs_filtered))) study_colors %<>% mutate(color = recode(study, hSCE = "black", vtSCE = "purple", lSCE = "orange", sSCE = "red", aSCE = "green"), color2 = recode(cell_type, `Extravillous trophoblast` = "blue", `Syncytiotrophoblast` = "grey", `Cytotrophoblast` = "brown")) pdf("all-vs-all_split2(20231203).pdf") my_heatmap(aurocs_filtered, cex = 1, margins = c(15,15), ColSideColors = study_colors %>% pull(color), RowSideColors = study_colors %>% pull(color2)) while (!is.null(dev.list())) dev.off() ``` However, if we use one-vs-best AUROCs, MetaNeighbor can automatically identify the two closest matching cell types in each target dataset. ```{r fig.height = 10, fig.width = 10} best_hits <- MetaNeighborUS(var_genes = var_genes, dat = five_datasets, study_id = five_datasets$study_id, cell_type = five_datasets$merged_annotation, fast_version = TRUE, one_vs_best = TRUE, symmetric_output = FALSE) my_heatmap(best_hits, label_size = 1, margins = c(12,12) ) pdf("1-vs-best(20230614).pdf") my_heatmap(best_hits, label_size = 1, cex = 0.5, margins = c(12,12), ColSideColors = study_colors %>% pull(color), RowSideColors = study_colors %>% pull(color)) while (!is.null(dev.list())) dev.off() ``` ```{r} # Extracting cell types as meta-clusters mclusters <- extractMetaClusters(best_hits, threshold = 0.7) mcsummary <- scoreMetaClusters(mclusters, best_hits) # scoreMetaClusters() function provides a good summary of meta-clusters # Visualize the number of robust cell types, the replicability structure # can be summarized as an Upset plot with plotUpset() plotUpset(mclusters) # Save the output as pdf. pdf("meta_clusters.pdf") plotMetaClusters(mclusters, best_hits) while (!is.null(dev.list())) dev.off() ``` ```{r fig.height = 6, fig.width = 8} cluster_graph <- makeClusterGraph(best_hits, low_threshold = 0.3) plotClusterGraph(cluster_graph, five_datasets$study_id, five_datasets$merged_annotation, size_factor = 3) # We can focus on a cluster of interest (coi). # Take a closer look at 'hSCE|Dendritic cell', query its closest neighbors in the graph with extendClusterSet() and then zoom in on its subgraph with subsetClusterGraph() coi <- "hSCE|Extravillous trophoblast" coi <- extendClusterSet(cluster_graph, initial_set = coi, max_neighbor_distance = 2) subgraph <- subsetClusterGraph(cluster_graph, coi) pdf(file = "Extracted_network_20231202.pdf") plotClusterGraph(subgraph, five_datasets$study_id, five_datasets$merged_annotation, size_factor = 4) dev.off() ```