---
title: "Integrated_Single_Cell"
output: html_document
---

The purpose of integrating the four single-cell studies is to get the trophoblast markers from the initial single cell data, not by running Metamarkers from the markers derived from the four individual datasets.

We will adhere to the pipleine written in the Seurat website, using FindIntegrationAnchors(). For our first step, we will load the required libraries and upload the Seurat objects.

```{r}
# Load libraries
library(tidyverse)
library(Seurat)
library(SeuratDisk)
library(patchwork)
library(SeuratData)

```

```{r}
InstallData("ifnb")
```
```{r}
# load dataset
vtdataset <-  readRDS("vtSeurat2_annotated.rds")
sdataset <-  readRDS("sSeurat2_annotated.rds")
ldataset <- readRDS("lSeurat_annotated.rds")
hdataset <- readRDS("hSeurat_annotated.rds")

vtdataset@meta.data$dataset <- "VT"
sdataset@meta.data$dataset <- "S"
ldataset@meta.data$dataset <- "L"
hdataset@meta.data$dataset <- "H"

# Collapse the cell types in Liu dataset
lmeta_copy <- ldataset@meta.data %>% 
  select(Annotation)

dim(ldataset@meta.data)

# read the cell types data frame
l_cell_types <- read.csv("Liu Cell Types (T vs NT).csv") %>% 
  rename(Annotation = Annotations)

# use inner_join() on hmeta_copy and h_cell_types
df <- inner_join(lmeta_copy, l_cell_types)

# Check if the order is maintained
summary(df$Annotation == lmeta_copy$Annotation)

# add to meta.data
ldataset$merged_annotation <- df$Merged
```

```{r}
# Collapse the cell types in VT dataset
vtmeta_copy <- vtdataset@meta.data %>% 
  select(Annotation)

# Do the same thing for the vtSeurat
vt_cell_types <- read.csv("VT Cell Types (T vs NT).csv") %>% 
  rename(Annotation = Annotations)

# use inner_join() on vtmeta_copy and vt_cell_types
df_vt <- inner_join(vtmeta_copy, vt_cell_types)

# Check if the order is maintained
summary(df_vt$Annotation == vtmeta_copy$Annotation)

# add to meta.data
vtdataset$merged_annotation <- df_vt$Merged
```


```{r}
# Collapse the cell types in Suryawanshi dataset
smeta_copy <- sdataset@meta.data %>% 
  select(CellType) %>% 
  mutate(Annotation = CellType)

sdataset@meta.data$CellType %>% 
  table()

# read the cell types data frame
s_cell_types <- read.csv("Suryawanshi Cell Types (T vs NT).csv") %>% 
  rename(Annotation = Annotations)

# use inner_join() on smeta_copy and s_cell_types
df_s <- inner_join(smeta_copy, s_cell_types)

# Check if the order is maintained
summary(df_s$Annotation == smeta_copy$Annotation)

# add to meta.data
sdataset$merged_annotation <- df_s$Merged
```

```{r}
# Collapse the cell types in Han dataset
hmeta_copy <- hdataset@meta.data %>% 
  select(Annotation)

dim(hdataset@meta.data)

# read the cell types data frame
h_cell_types <- read.csv("Han Cell Types (T vs NT).csv") %>% 
  rename(Annotation = Annotations)

# use inner_join() on hmeta_copy and h_cell_types
df <- inner_join(hmeta_copy, h_cell_types)

# Check if the order is maintained
summary(df$Annotation == hmeta_copy$Annotation)

# add to meta.data
hdataset$merged_annotation <- df$Merged
```


```{r}
# Create a list of the single cell datasets
plac.list <- list(vtdataset,
                  sdataset,
                  ldataset,
                  hdataset)

# normalize and identify variable features for each dataset independently
plac.list <- lapply(X = plac.list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

# select features that are repeatedly variable across datasets for integration
features <- SelectIntegrationFeatures(object.list = plac.list)
```

We then identify anchors using the FindIntegrationAnchors() function, which takes a list of Seurat objects as input, and use these anchors to integrate the two datasets together with IntegrateData().

```{r}
plac.anchors <- FindIntegrationAnchors(object.list = plac.list,
                                         anchor.features = features)
# this command creates an 'integrated' data assay
plac.combined <- IntegrateData(anchorset = plac.anchors)
"dataset" %in% (plac.combined@meta.data %>% colnames())
```

Now we can run a single integrated analysis on all cells!

```{r}
# specify that we will perform downstream analysis on the corrected data note that the
# original unmodified data still resides in the 'RNA' assay
DefaultAssay(plac.combined) <- "integrated"
Idents(object = plac.combined) <- "merged_annotation"

# Run the standard workflow for visualization and clustering
plac.combined <- ScaleData(plac.combined, verbose = FALSE)
plac.combined <- RunPCA(plac.combined, npcs = 30, verbose = FALSE)
plac.combined <- RunUMAP(plac.combined, reduction = "pca", dims = 1:30)
plac.combined <- FindNeighbors(plac.combined, reduction = "pca", dims = 1:30)
plac.combined <- FindClusters(plac.combined, resolution = 0.5)

# Visualization
p1 <- DimPlot(plac.combined, reduction = "umap", group.by = "dataset")
p2 <- DimPlot(plac.combined, reduction = "umap", label = TRUE, repel = TRUE)
p1 + p2
```

Now, based on the .Rmd file for MetaMarkers, let's derive the marker set for trophoblasts from our integrated scRNA-seq data.
```{r}
# Convert to SCE objects
placSCE <- as.SingleCellExperiment(plac.combined)

# Display # of cells per cell type
table(placSCE@colData$merged_annotation)
```

The last step before computing markers is data normalization. The only requirement here is that the normalization is uniform across datasets for the meta-analytic stats to be meaningful. Any normalization procedure may be used, the MetaMarkers package offer a simple library size normalization procedure:

```{r compute_cpm}
assay(placSCE, "cpm") <- convert_to_cpm(assay(placSCE))
```

We can now proceed to marker selection (based on the ROC test). In our experience, it is best to *not* use logarithmic scaling as it will provide better fold change evaluation.

```{r compute_markers}
markers_placSCE <- compute_markers(assay(placSCE, "cpm"),
                                 placSCE@colData$merged_annotation)
```

```{r show_Trophoblast_markers}
markers_placSCE %>%
    filter(cell_type == "Trophoblast") %>%
    head(50)
```

Individual dataset markers can be saved to file for later use, which is particularly useful to accumulate datasets over time and regenerate more comprehensive meta-marker lists. 

```{r export_markers}
export_markers(markers_placSCE, "Integrated_markers_(TvsNT).csv")
```