# Load libraries
library(SingleCellExperiment)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ dplyr::slice() masks IRanges::slice()
library(Seurat)
## Attaching SeuratObject
## Attaching sp
##
## Attaching package: 'Seurat'
##
## The following object is masked from 'package:SummarizedExperiment':
##
## Assays
library(RColorBrewer)
library(scDblFinder)
library(magrittr)
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
##
## The following object is masked from 'package:GenomicRanges':
##
## subtract
library(pheatmap)
library(GEOquery)
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
library(SeuratDisk)
## Registered S3 method overwritten by 'SeuratDisk':
## method from
## as.sparse.H5Group Seurat
library(loomR)
## Loading required package: R6
## Loading required package: hdf5r
##
## Attaching package: 'hdf5r'
##
## The following object is masked from 'package:purrr':
##
## flatten_df
##
## The following object is masked from 'package:SummarizedExperiment':
##
## values
##
## The following object is masked from 'package:GenomicRanges':
##
## values
##
## The following object is masked from 'package:S4Vectors':
##
## values
##
##
## Attaching package: 'loomR'
##
## The following object is masked from 'package:SeuratDisk':
##
## loom
##
## The following object is masked from 'package:dplyr':
##
## combine
##
## The following object is masked from 'package:Biobase':
##
## combine
##
## The following object is masked from 'package:BiocGenerics':
##
## combine
library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.25.0 (2022-06-12 02:20:02 UTC) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
##
## The following object is masked from 'package:R.methodsS3':
##
## throw
##
## The following object is masked from 'package:magrittr':
##
## equals
##
## The following object is masked from 'package:SummarizedExperiment':
##
## trim
##
## The following object is masked from 'package:GenomicRanges':
##
## trim
##
## The following object is masked from 'package:IRanges':
##
## trim
##
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
##
## The following objects are masked from 'package:base':
##
## attach, detach, load, save
##
## R.utils v2.12.2 (2022-11-11 22:00:03 UTC) successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
##
## The following object is masked from 'package:GEOquery':
##
## gunzip
##
## The following object is masked from 'package:magrittr':
##
## extract
##
## The following object is masked from 'package:tidyr':
##
## extract
##
## The following object is masked from 'package:utils':
##
## timestamp
##
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, isOpen, nullfile, parse, warnings
# Define color palettes
WtOrRd_pal <- colorRampPalette(c("#FFFFFF", brewer.pal(9, "OrRd")))(100)
# Data Wrangling Processes
metadata.genes <- read.csv("GSE156793_S2_Metadata_genes.txt")
metadata.placentacells <- readRDS("Placenta cells.rds")
tissue <- read.csv("GSE156793_S4_gene_expression_tissue.txt")
fraction_tissue <- read.csv("GSE156793_S5_gene_fraction_tissue.txt")
expression_celltype <- read.csv("GSE156793_S6_gene_expression_celltype.txt")
fraction_celltype <- read.csv("GSE156793_S7_gene_fraction_celltype.txt")
loom_obj <- connect(filename = "GSE156793_S3_gene_count.loom", mode = 'r+')
metadata.placentacells %<>%
filter(Development_day == 89 | Development_day == 90 | Development_day == 94)
length(metadata.placentacells$sample)
# Let's call for the loom file
# gunzip("GSE156793_S3_gene_count.loom.gz", remove=FALSE)
# loom_obj$add.row.attribute(list(Gene = loom_obj$row.attrs$gene_id[]))
# loom_obj$add.col.attribute(list(CellID = loom_obj$col.attrs$sample[]))
loom_obj$add.col.attribute(list(tokeep_FT = loom_obj$col.attrs$sample[] %in% metadata.placentacells$sample))
filtered <- loom_obj[["matrix"]][which(loom_obj$col.attrs$tokeep_FT[]),]
dim(filtered)
counts <- t(filtered)
# loom_obj[["col_attrs"]][,]
coldf <- loom_obj$get.attribute.df(MARGIN = 2)
coldf <- coldf %>%
filter(tokeep_FT)
colnames(coldf)
rowdf <- loom_obj$get.attribute.df(MARGIN = 1)
# loom_obj$row.attrs$Gene = loom_obj$row.attrs$gene_id
# loom_obj[["row_attrs"]]$Gene = loom_obj[["row_attrs"]]$gene_id
#
# loom_obj$col.attrs$sample[] %>%
# unique() %>%
# length()
# loom_obj$close_all()
colnames(counts) <- coldf$CellID
rownames(counts) <- rowdf$gene_short_name
# the whole thing is too big to convert to Seurat - filter the loom first!
# seurat_loom <- as.Seurat(loom_obj)
cSeurat <- CreateSeuratObject(
counts,
project = "Placenta_Seurat",
assay = "RNA",
names.field = 1,
names.delim = "_",
meta.data = coldf,
)
# Check if the metadata looks okay
cSeurat@meta.data
# loom_obj$version()
# Even running the following lines of code yields the same type of error.
# loom_obj$version <- function(){
# return("0.2.1.9000")
# }
# detach("package:SeuratDisk", unload = TRUE)
# detach("package:Seurat", unload = TRUE)
#
# library(Seurat)
saveRDS(cSeurat, "cSeurat.rds")
For convenience, we can start running from this chunk of code, since we have already saved the Seurat object as a file in our directory.
# Start running from this line of code
cSeurat <- readRDS("cSeurat.rds")
dim(cSeurat)
## [1] 63561 12267
# Add in the Mitochondrial PCT% information
cSeurat$percent.mt <- PercentageFeatureSet(cSeurat, pattern = "^MT-")
# nCount_RNA is the number of UMI counts in a cell
hist(cSeurat$nCount_RNA)
summary(cSeurat@meta.data$nCount_RNA)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 251.0 301.0 380.0 484.5 528.5 8156.0
# nFeature_RNA is the number of different genes that had any reads
hist(cSeurat$nFeature_RNA)
summary(cSeurat@meta.data$nFeature_RNA)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 120.0 249.0 307.0 368.8 413.0 3392.0
# percent.mt is the percent mitochondrial reads
summary(cSeurat$percent.mt)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 0 0 0
# Later, we will subset the Seurat object based on a percent.mt cutoff.
# Check for the percentage of ribosomal genes
cSeurat[["percent.rb"]] <- PercentageFeatureSet(cSeurat, pattern = "^RP[SL]")
summary(cSeurat$percent.rb)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.3584 0.7937 1.2704 1.7391 14.1479
Another important part for QC is removing doublets.
# Find for doublets
cSeurat <- as.SingleCellExperiment(cSeurat)
cSeurat <- scDblFinder(cSeurat)
## Creating ~9814 artificial doublets...
## Dimensional reduction
## Evaluating kNN...
## Training model...
## iter=0, 2127 cells excluded from training.
## iter=1, 2217 cells excluded from training.
## iter=2, 2274 cells excluded from training.
## Threshold found:0.499
## 1178 (9.6%) doublets called
# Convert back to Seurat object.
cSeurat <- as.Seurat(cSeurat)
# Notice that our identifiers have changed.
levels(Idents(object = cSeurat))
## [1] "SingleCellExperiment"
# We want to specify the identifier whether the cell is a singlet or doublet.
table(cSeurat@meta.data$scDblFinder.class)
##
## singlet doublet
## 11089 1178
Idents(object = cSeurat) <- "scDblFinder.class"
cSeurat@meta.data$scDblFinder.class <- as.character(cSeurat@meta.data$scDblFinder.class)
# Subset the Seurat object to only include singlets.
cSeurat <- subset(cSeurat, idents = "singlet")
levels(as.factor(cSeurat@meta.data$scDblFinder.class))
## [1] "singlet"
# convert column names to make the protocol coherent
colnames(cSeurat@meta.data)[2] <- "nCount_RNA"
colnames(cSeurat@meta.data)[3] <- "nFeature_RNA"
# We draw a violin plot to explore the values in meta data
VlnPlot(cSeurat,
features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"),
ncol = 4,
pt.size = 0.1) &
theme(plot.title = element_text(size=10))
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of percent.mt.
# The following four lines of code are for scatterplots addressing the correlations of every 2 pairs
FeatureScatter(cSeurat, feature1 = "nCount_RNA", feature2 = "percent.mt")
## Warning in cor(x = data[, 1], y = data[, 2]): the standard deviation is zero
FeatureScatter(cSeurat, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
FeatureScatter(cSeurat, feature1 = "nCount_RNA", feature2 = "percent.rb")
FeatureScatter(cSeurat, feature1 = "percent.rb", feature2 = "percent.mt")
## Warning in cor(x = data[, 1], y = data[, 2]): the standard deviation is zero
# We also check if there is correlation between number of features and the doublet score
FeatureScatter(cSeurat, feature1 = "nFeature_RNA", feature2 = "scDblFinder.score")
# Since the typical cutoff for percent.mt is 5%, we will try out < 5.
# Last week we used 10%, and couldn't see much difference.
# Also, let't filter out cells that have less than 5% ribosomal content.
# (11/29 Update) Removed the ribosomal filter due to insufficient evidence
# cSeurat <- subset(cSeurat, subset = percent.mt < 5)
dim(cSeurat)
## [1] 63561 11089
Now, we have a reduced version of the Seurat object, where the cells contain no genes.
# Next, we normalize the data
cSeurat <- NormalizeData(cSeurat)
# Explore the most variable features, let's use the default version first.
cSeurat <- FindVariableFeatures(cSeurat)
# cSeurat <- FindVariableFeatures(cSeurat, selection.method = "vst", nfeatures = 2000)
# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(cSeurat), 10)
top10
## [1] "ALB" "PAEP" "IGFBP1" "CSH1" "AFP" "CSH2"
## [7] "TFPI2" "IGFBP5" "KISS1" "SERPINA1"
# plot variable features with and without labels
plot2 <- VariableFeaturePlot(cSeurat)
plot2 <- LabelPoints(plot = plot2, points = top10, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 34795 rows containing missing values (geom_point).
# Convert normalized gene expression to Z-score
all.genes <- rownames(cSeurat)
cSeurat <- ScaleData(cSeurat, features = all.genes)
## Centering and scaling data matrix
# Run PCA analysis
cSeurat <- RunPCA(cSeurat, features = VariableFeatures(object = cSeurat))
## PC_ 1
## Positive: TACC2, CTD-3006G17.2, LDB2, RP11-548L20.1, ADAMTS6, MIR181A1HG, EXOC6, RP11-40F8.2, RP11-179A16.1, LINC00669
## GULP1, PAGE4, RALGAPA2, PLCB1, RIMKLB, CYP19A1, SLC26A2, RP11-305B6.3, ANK2, MEOX2
## RAPGEF5, RP11-608O21.1, EGFL7, COL1A1, RP11-454C18.2, CASS4, AC079610.1, EXPH5, RP11-770E5.1, RP11-510J16.5
## Negative: PAPPA, NOTUM, PRG2, QSOX1, TIMP2, TIMP3, IGFBP1, FSTL3, PAPPA2, SCARA5
## ITGA5, HSPG2, EBI3, SYN3, HTRA4, AQPEP, HLA-G, ITM2B, AOC1, SERPINE1
## HPGD, FN1, IGFBP4, SERPING1, KRT19, CHRDL1, RORB, DKK1, APOD, MMP2
## PC_ 2
## Positive: SCARA5, IGFBP1, IGFBP4, DKK1, CHRDL1, APOD, RORB, RBP1, LUM, CD248
## HSPB6, GAS1, F2R, ISLR, DCN, C3, CD82, IGFBP2, IGFBP5, SPOCK1
## PRDM1, SPARCL1, IGFBP6, OMD, RP1-78O14.1, THY1, LGALS1, VIM, ADRA2C, WNT5A
## Negative: NOTUM, PAPPA2, PRG2, HPGD, AQPEP, EBI3, HTRA4, QSOX1, ISM2, GPR78
## FSTL3, PTPRF, HLA-G, KRT19, AF127936.7, AOC1, ADAM19, HSPG2, MFAP5, FLT1
## AC083884.8, KRT8, MGAT5, CSH1, ASCL2, TNFSF10, CPM, DIO2, FN1, ITGA5
## PC_ 3
## Positive: PAEP, AC073218.2, CATSPERB, PAX8, PTPRR, CP, C1orf186, MECOM, AC012593.1, MAP3K5
## ATP2C2, TC2N, FAM155A, SLC1A1, RP1-60O19.1, MACC1, RXFP1, RIMKLB, SLC7A2, GPX3
## UBE2D2, SLC6A6, PKHD1, IRX3, BICD1, DNER, DEPTOR, ZBTB20, NNMT, PRUNE2
## Negative: COL1A1, DCN, COL1A2, TIMP3, SERPINE2, LUM, ABI3BP, CDKN1C, ADAMTS1, CD248
## MMP2, ADAMTS5, MFAP4, HSPB6, FN1, SCARA5, DKK1, APOD, IGFBP2, RBP1
## FSTL1, LGALS1, THY1, S100A6, PDGFRB, GAS1, CFD, MYL9, SYN3, ISLR
## PC_ 4
## Positive: PLAC4, CYP19A1, CSH2, KISS1, PSG1, AFF1, CGA, ANGPT2, HOPX, PSG4
## PSG5, RP11-266L9.2, PSG9, CHODL, DEPDC1B, CSH1, TACC2, GDF15, PSG7, PSG2
## GULP1, HSD3B1, TFPI2, RP11-390D11.1, SH3TC2, PSG6, DAB2, AC003051.1, AC008394.1, SDC1
## Negative: FN1, SERPINE2, COL1A1, LHFP, COL1A2, HIP1, LDB2, NOTUM, CXCL14, HSPG2
## BICD1, RASGRF2, PRG2, MECOM, FSTL3, MIR181A1HG, CTD-3006G17.2, MCAM, HLA-G, PLCB1
## TPM1, DLGAP1, MEOX2, FLNB, PLCB4, RAPGEF5, KLF6, C10orf11, FLT1, GUCY1A2
## PC_ 5
## Positive: SERPINA1, APOA2, AFP, APOA1, APOB, AHSG, FGB, MT1G, APOC3, AMBP
## ALB, FGA, ITIH2, TTR, VTN, MT1H, FABP1, APOH, AGT, APOC1
## CYP3A7, ALDOB, RBP4, FGG, TF, SCD, SERPINF2, AL139819.1, CLU, FTL
## Negative: LDB2, MEOX2, EXOC6, CTD-3006G17.2, BICD1, EGFL7, DACH1, ZEB1, C10orf11, FCGR2B
## RAPGEF5, ZBTB20, MIR181A1HG, RALGAPA2, FLI1, PLCB4, JAM2, DOCK4, MECOM, ZNF521
## CACHD1, KDR, CDH13, ERG, PTPRB, FUT9, PLCB1, SPARCL1, LHFP, IGFBP5
# Plot the PCA
DimPlot(cSeurat, reduction = "pca")
# Plot the loadings
VizDimLoadings(cSeurat, dims = 1:2, reduction = "pca") &
theme(axis.text=element_text(size=8), axis.title=element_text(size = 8,face="bold"))
# Check the elbow plot to see how much variation is covered for each PC
ElbowPlot(cSeurat)
# FeaturePlot of PC1 top features
FeaturePlot(cSeurat, features = c("PAPPA",
"NOTUM"))
# FeaturePlot of PC2 top features
FeaturePlot(cSeurat, features = c("SCARA5",
"IGFBP1"))
# Now perform clustering
cSeurat <- FindNeighbors(cSeurat, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
cSeurat <- FindClusters(cSeurat, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 11089
## Number of edges: 336645
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8972
## Number of communities: 12
## Elapsed time: 1 seconds
# For visualization purposes, we need to generate UMAP reduced dimensionality representation
cSeurat <- RunUMAP(cSeurat, dims = 1:10, verbose = F)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
# Plot the UMAP with clustering
DimPlot(cSeurat, reduction = "umap")
# Get the TSNE embedding
cSeurat <- RunTSNE(cSeurat, dims = 1:10)
# Plot the t-SNE with clustering
DimPlot(cSeurat, reduction = "tsne")
# Let's look at cluster sizes
table(cSeurat@meta.data$seurat_clusters)
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 3344 3063 1450 758 599 461 382 334 226 218 182 72
# Let's plot those clusters!
DimPlot(cSeurat,label.size = 4,repel = T,label = T)
FeaturePlot(cSeurat, features = c("PAPPA",
"IGFBP1",
"CSH1",
"ERVW-1"))
Let’s calculate the cell cycle scores at this point.
cSeurat <- SetIdent(cSeurat, value = "Main_cluster_name")
# Calculate the cell cycle scores
s.genes <- cc.genes.updated.2019$s.genes
g2m.genes <- cc.genes.updated.2019$g2m.genes
# Somehow this code does not work.
cSeurat <- CellCycleScoring(cSeurat,
search = TRUE,
s.features = s.genes,
g2m.features = g2m.genes)
## Warning: The following features are not present in the object: CENPU, attempting
## to find updated synonyms
## Warning: No updated symbols found
## Warning: The following features are still not present in the object: CENPU
## Warning: The following features are not present in the object: PIMREG, JPT1,
## attempting to find updated synonyms
## Warning: No updated symbols found
## Warning: The following features are still not present in the object: PIMREG,
## JPT1
table(cSeurat[[]]$Phase)
##
## G1 G2M S
## 5878 2401 2810
Around 1/4 of the cells are in the S phase.
# Check if mitochondrial genes show dependency on cell type
VlnPlot(cSeurat,
features = "percent.mt") &
theme(plot.title = element_text(size = 10),
legend.position = 'none')
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of percent.mt.
# Check if ribosomal genes show dependency on cell type
VlnPlot(cSeurat,
features = "percent.rb") &
theme(plot.title = element_text(size=10),
legend.position = 'none')
# Check # of genes and # of transcripts per cluster
VlnPlot(cSeurat,features =
c("nCount_RNA","nFeature_RNA")) &
theme(plot.title = element_text(size=10))
# Cell cycle score does not seem to depend on the cell type much
FeaturePlot(cSeurat,
features = c("S.Score","G2M.Score"),
label.size = 4,
repel = T,
label = T) &
theme(plot.title = element_text(size = 10))
VlnPlot(cSeurat,features = c("S.Score","G2M.Score")) &
theme(plot.title = element_text(size = 10),
legend.position = 'none')
In this case, it looks like most of the cell types show low cell cycle scores, except for the proliferating cells.
# Plot in UMAP
DimPlot(cSeurat,
group.by = "Phase")
# Most of the clusters contain all three cell cycle states.
# Plot with clusters
compTable.int <- table(cSeurat$Main_cluster_name, cSeurat$Phase)
compTable.int <- (compTable.int / rowSums(compTable.int)) * 100
pheatmap(compTable.int, color = WtOrRd_pal, breaks = 1:100)
# FeaturePlot for showing mod score
FeaturePlot(cSeurat,
features = c("S.Score", "G2M.Score"))
This heatmap is interesting, as most syncytiotrophoblasts are in the G1 phase, and almost all endothelial cells are in the S phase.
# This part is replaced when using the SCTransform
DefaultAssay(cSeurat) <- "RNA"
# Perform log-normalization
cSeurat <- NormalizeData(cSeurat)
# Find variable features
cSeurat <- FindVariableFeatures(cSeurat,
selection.method = "vst",
nfeatures = 2000)
all.genes <- rownames(cSeurat)
cSeurat <- ScaleData(cSeurat, features = all.genes)
## Centering and scaling data matrix
# Designate the markers
all.markers <- FindAllMarkers(cSeurat,
only.pos = T,
min.pct = 0.5,
logfc.threshold = 0.5)
## Calculating cluster Syncytiotrophoblasts and villous cytotrophoblasts
## Calculating cluster AFP_ALB positive cells
## Calculating cluster Stromal cells
## Calculating cluster Smooth muscle cells
## Calculating cluster Vascular endothelial cells
## Calculating cluster Myeloid cells
## Calculating cluster Extravillous trophoblasts
## Calculating cluster PAEP_MECOM positive cells
## Calculating cluster Trophoblast giant cells
## Calculating cluster Megakaryocytes
## Calculating cluster IGFBP1_DKK1 positive cells
## Calculating cluster Lymphoid cells
dim(all.markers)
## [1] 340 7
table(all.markers$cluster)
##
## Syncytiotrophoblasts and villous cytotrophoblasts
## 5
## AFP_ALB positive cells
## 31
## Stromal cells
## 17
## Smooth muscle cells
## 25
## Vascular endothelial cells
## 15
## Myeloid cells
## 8
## Extravillous trophoblasts
## 38
## PAEP_MECOM positive cells
## 28
## Trophoblast giant cells
## 32
## Megakaryocytes
## 78
## IGFBP1_DKK1 positive cells
## 49
## Lymphoid cells
## 14
top3_markers <- as.data.frame(all.markers %>%
group_by(cluster) %>%
top_n(n = 3, wt = avg_log2FC))
# We can check the dataframe briefly
head(top3_markers)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## 1 0.000000e+00 4.233187 0.760 0.080 0.000000e+00
## 2 0.000000e+00 3.953669 0.661 0.055 0.000000e+00
## 3 0.000000e+00 3.603971 0.569 0.054 0.000000e+00
## 4 0.000000e+00 7.378025 0.972 0.017 0.000000e+00
## 5 0.000000e+00 7.170272 1.000 0.014 0.000000e+00
## 6 1.650927e-187 8.536737 1.000 0.086 1.049346e-182
## cluster gene
## 1 Syncytiotrophoblasts and villous cytotrophoblasts TENM3
## 2 Syncytiotrophoblasts and villous cytotrophoblasts FBN2
## 3 Syncytiotrophoblasts and villous cytotrophoblasts KANK1
## 4 AFP_ALB positive cells AFP
## 5 AFP_ALB positive cells SERPINA1
## 6 AFP_ALB positive cells ALB
Clustering based on Cell Type annotation
options(ggrepel.max.overlaps = Inf)
DimPlot(cSeurat, label = T , repel = T, label.size = 3) +
NoLegend()
# Syncytin-1 is a gene that allows the differentiation from cytotrophoblasts to SCTs.
FeaturePlot(cSeurat, features = "ERVW-1")
DimPlot(cSeurat, reduction = "pca")
DimPlot(cSeurat, reduction = "tsne")
DimPlot(cSeurat, reduction = "umap")
# ggsave(filename = "Check.png", plot = p1, width = 30, height = 10)
saveRDS(cSeurat, "cSeurat_annotated.rds")
Luckily, it seems that the cell types are preserved even after changing the mitochondrial gene percentage to 5%. Another difference is that the plots show more clustering compared to the 10% threshold, which could be promising in terms of clarity.
# Pseudotime
# Convert to monocle3 cell_data_set
hSeurat.cds <- as.cell_data_set(hSeurat)
class(hSeurat.cds)
# Re-cluster (required for trajectories in monocle3)
hSeurat.cds <- cluster_cells(hSeurat.cds)
# Learn the trajectory
hSeurat.cds <- learn_graph(hSeurat.cds)
# Plot the trajectory
plot_cells(hSeurat.cds, color_cells_by = "seurat_clusters", show_trajectory_graph = TRUE)
# Find the pseudotime order
DimPlot(hSeurat, group.by = "Phase")
hSeurat.cds <- order_cells(hSeurat.cds)
# Plot pseudotime
plot_cells(hSeurat.cds, color_cells_by = "pseudotime")
# Add pseudotime score back into the dataset
hSeurat$pseudotime_score <- hSeurat.cds@principal_graph_aux@listData$UMAP$pseudotime
hSeurat$pseudotime_score[hSeurat$pseudotime_score == Inf] <- NA
# FeaturePlot with Pseudotime
FeaturePlot(hSeurat, features = c("pseudotime_score"))
DimPlot(hSeurat, group.by = "Annotation")
FeaturePlot(hSeurat, features = c("pseudotime_score"))