# Load required libraries
library(SingleCellExperiment)
## Loading required package: SummarizedExperiment
## Loading required package: methods
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: utils
## Loading required package: graphics
## 
## Attaching package: 'graphics'
## The following object is masked from 'package:stats4':
## 
##     plot
## Loading required package: stats
## 
## Attaching package: 'stats'
## The following objects are masked from 'package:stats4':
## 
##     AIC, BIC, coef, confint, logLik, nobs, profile, update, vcov
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(Seurat)
## Attaching SeuratObject
## 
## Attaching package: 'Seurat'
## The following object is masked from 'package:SummarizedExperiment':
## 
##     Assays
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse()     masks IRanges::collapse()
## ✖ dplyr::combine()      masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count()        masks matrixStats::count()
## ✖ dplyr::desc()         masks IRanges::desc()
## ✖ tidyr::expand()       masks S4Vectors::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ dplyr::first()        masks S4Vectors::first()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ ggplot2::Position()   masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()       masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()       masks S4Vectors::rename()
## ✖ lubridate::second()   masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice()        masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(MetaMarkers)
library(scRNAseq)
library(dplyr)

Introduction

Single-cell RNA-sequencing technologies have enabled the discovery and characterization of an incredible number of novel cell types and batches. However, cell types are usually measured in only one biological condition and subject to technical variability, making it difficult to identify robust markers, particularly for rare populations. MetaMarkers proposed a simple methodology to pool marker information across datasets while keeping datasets independent, identifying robust marker signatures and

Here are some of the reasons why you may want to use MetaMarkers: - the pipeline is fast and easy to use, making it possible to aggregate a large number of datasets. - the more datasets you provide, the more technical and biological variability you sample, progressively increasing marker robustness. - MetaMarkers creates marker signatures that offer redundant information about cell types. For example, these robust signatures enable rapid cell type identification at the individual cell level, even if the main markers have not been measured in a particular cell due to dropout. - MetaMarkers offers the possibility to stratify marker selection by grouping cell types into broader classes, enabling combinatorial identification of cell types.

What is required from the user’s perspective are: - a list of annotated datasets. - matching cell type names across datasets.

To obtain matching annotation across datasets in an automated way, we suggest using Seurat to perform integrative clustering or MetaNeighbor to match highly replicable cell types.

We illustrate how to create meta-marker lists by using 4 placenta datasets. We show how to create meta-marker lists, visualize and pick the best meta-markers. We then show how to create hierarchical lists by grouping cell types into trophoblast and non-trophoblast classes.

Create marker lists for individual placental datasets

Here, we will load the the datasets in the SingleCellExperiment format.

Then, we need to make sure that gene names are identical across datasets. Here, the two datasets share the same format of gene names. Note that MetaMarkers will automatically remove duplicate gene names, so we don’t need to worry.

For this trial, our objective is to create two marker sets for trophoblasts and non-trophoblasts. We will collapse the cell type information across the four datasets, and run MetaMarkers to get the marker sets that define trophoblasts vs non-trophoblasts.

vtdataset <-  readRDS("vtSeurat2_annotated.rds")
sdataset <-  readRDS("sSeurat_20230514.rds")
ldataset <- readRDS("lSeurat_annotated.rds")
hdataset <- readRDS("hSeurat_annotated.rds")

# Collapse the cell types in Liu dataset
lmeta_copy <- ldataset@meta.data %>% 
  select(Annotation)

dim(ldataset@meta.data)
#> [1] 1332   16

# read the cell types data frame
l_cell_types <- read.csv("Liu Cell Types.csv") %>% 
  dplyr::rename(Annotation = Annotations)

# use inner_join() on hmeta_copy and h_cell_types
df <- inner_join(lmeta_copy, l_cell_types)

# Check if the order is maintained
summary(df$Annotation == lmeta_copy$Annotation)
#>    Mode    TRUE 
#> logical    1332

# add to meta.data
ldataset$merged_annotation <- df$Merged
# Collapse the cell types in VT dataset
vtmeta_copy <- vtdataset@meta.data %>% 
  select(Annotation)

# Do the same thing for the vtSeurat
vt_cell_types <- read.csv("VT Cell Types.csv") %>% 
  dplyr::rename(Annotation = Annotations)

# use inner_join() on vtmeta_copy and vt_cell_types
df_vt <- inner_join(vtmeta_copy, vt_cell_types)
#> Joining with `by = join_by(Annotation)`

# Check if the order is maintained
summary(df_vt$Annotation == vtmeta_copy$Annotation)
#>    Mode    TRUE 
#> logical   16818

# add to meta.data
vtdataset$merged_annotation <- df_vt$Merged
# Collapse the cell types in Suryawanshi dataset
smeta_copy <- sdataset@meta.data %>% 
  select(CellType) %>% 
  mutate(Annotation = CellType)

sdataset@meta.data$CellType %>% 
  table()
#> .
#>              Cytotrophoblast     Extravillous trophoblast 
#>                         1530                           65 
#> Hematopoietic (Erythroblast)          Immune (Macrophage) 
#>                          778                          671 
#>             Immune (Myeloid)        Stromal (Endothelial) 
#>                            1                         1028 
#>         Stromal (Fibroblast)          Syncytiotrophoblast 
#>                         3278                          784

# read the cell types data frame
s_cell_types <- read.csv("Suryawanshi Cell Types.csv") %>% 
  dplyr::rename(Annotation = Annotations)

# use inner_join() on smeta_copy and s_cell_types
df_s <- inner_join(smeta_copy, s_cell_types)
#> Joining with `by = join_by(Annotation)`

# Check if the order is maintained
summary(df_s$Annotation == smeta_copy$Annotation)
#>    Mode    TRUE 
#> logical    8135

# add to meta.data
sdataset$merged_annotation <- df_s$Merged
# Collapse the cell types in Han dataset
hmeta_copy <- hdataset@meta.data %>% 
  select(Annotation)

dim(hdataset@meta.data)
#> [1] 4383   17

# read the cell types data frame
h_cell_types <- read.csv("Han Cell Types.csv") %>% 
  dplyr::rename(Annotation = Annotations)

# use inner_join() on hmeta_copy and h_cell_types
df <- inner_join(hmeta_copy, h_cell_types)
#> Joining with `by = join_by(Annotation)`

# Check if the order is maintained
summary(df$Annotation == hmeta_copy$Annotation)
#>    Mode    TRUE 
#> logical    4383

# add to meta.data
hdataset$merged_annotation <- df$Merged

# (I think we can use an integrated dataset at this point.)

# Change the Idents of the Seurat Objects
Idents(object = ldataset) <- "merged_annotation"
Idents(object = vtdataset) <- "merged_annotation"
Idents(object = sdataset) <- "merged_annotation"
Idents(object = hdataset) <- "merged_annotation"


# Convert to SCE objects
lSCE <- as.SingleCellExperiment(ldataset)
vtSCE <- as.SingleCellExperiment(vtdataset)
sSCE <- as.SingleCellExperiment(sdataset)
hSCE <- as.SingleCellExperiment(hdataset)

Next we need to match cell type names across datasets. Cell types are contained in the “label” column of the datasets (part of the colData slot, but can be accessed directly as shown here).

table(lSCE@colData$merged_annotation)
#> 
#>          Cytotrophoblast Extravillous trophoblast             Stromal cell 
#>                      245                      437                      587 
#>      Syncytiotrophoblast 
#>                       63
table(vtSCE@colData$merged_annotation)
#> 
#>          Cytotrophoblast           Dendritic cell         Endothelial cell 
#>                     8346                       11                      143 
#>          Epithelial cell Extravillous trophoblast               Fibroblast 
#>                       37                     3037                     2118 
#>              Granulocyte            Hofbauer cell                     ILC3 
#>                       37                     1200                        1 
#>               Macrophage                 Monocyte      Natural killer cell 
#>                      829                       16                        9 
#>                   Plasma             Stromal cell      Syncytiotrophoblast 
#>                        6                        1                     1023 
#>                  T cells 
#>                        4
table(sSCE@colData$merged_annotation)
#> 
#>          Cytotrophoblast         Endothelial cell             Erythroblast 
#>                     1530                     1028                      778 
#> Extravillous trophoblast               Fibroblast               Macrophage 
#>                       65                     3278                      671 
#>             Myeloid cell      Syncytiotrophoblast 
#>                        1                      784
table(hSCE@colData$merged_annotation)
#> 
#>          Cytotrophoblast           Dendritic cell         Endothelial cell 
#>                       31                      526                        8 
#>          Epithelial cell           Erythroid cell Extravillous trophoblast 
#>                       12                      507                      584 
#>            Hofbauer cell               Macrophage               Neutrophil 
#>                      181                      102                       61 
#>       Proliferating cell       Smooth muscle cell             Stromal cell 
#>                       47                      805                     1500 
#>      Syncytiotrophoblast 
#>                       19

Some cell types are only present in one dataset, but there is a good overlap for the main cell types. We update names of cell types that overlap but have different names.


common_labels <-  Reduce(intersect, 
                         list(unique(lSCE@colData$merged_annotation),
                            unique(vtSCE@colData$merged_annotation),
                            unique(sSCE@colData$merged_annotation),
                            unique(hSCE@colData$merged_annotation)))

common_labels
#> [1] "Cytotrophoblast"          "Extravillous trophoblast"
#> [3] "Syncytiotrophoblast"

We only keep cell types that are common to both datasets. Note that this step is not strictly necessary: if we kept cell types present in only one dataset, the pipeline would run normally but the “meta-markers” for these cell types would be based on a single dataset.

lSCE <- lSCE[, lSCE@colData$merged_annotation %in% common_labels]
vtSCE <- vtSCE[, vtSCE@colData$merged_annotation %in% common_labels]
sSCE <- sSCE[, sSCE@colData$merged_annotation %in% common_labels]
hSCE <- hSCE[, hSCE@colData$merged_annotation %in% common_labels]

The last step before computing markers is data normalization. The only requirement here is that the normalization is uniform across datasets for the meta-analytic stats to be meaningful. Any normalization procedure may be used, the MetaMarkers package offer a simple library size normalization procedure:

assay(lSCE, "cpm") <- convert_to_cpm(assay(lSCE))
assay(vtSCE, "cpm") <-  convert_to_cpm(assay(vtSCE))
assay(sSCE, "cpm") = convert_to_cpm(assay(sSCE))
assay(hSCE, "cpm") = convert_to_cpm(assay(hSCE))

We can now proceed to marker selection (based on the ROC test). In our experience, it is best to not use logarithmic scaling as it will provide better fold change evaluation.

markers_lSCE <- compute_markers(assay(lSCE, "cpm"),
                                 lSCE@colData$merged_annotation)
markers_vtSCE <- compute_markers(assay(vtSCE, "cpm"),
                                 vtSCE@colData$merged_annotation)
markers_sSCE <- compute_markers(assay(sSCE, "cpm"),
                                sSCE@colData$merged_annotation)
markers_hSCE <- compute_markers(assay(hSCE, "cpm"),
                                hSCE@colData$merged_annotation)

These functions provide marker lists for all cell types in a given dataset (ranked by AUROC). For example, we can visualize the best markers for the Trophoblast:

markers_sSCE %>%
    filter(cell_type == "Cytotrophoblast") %>%
    head(10)
#> # A tibble: 10 × 14
#>    group cell_type       gene   fold_change auroc log_fdr population_size
#>    <chr> <chr>           <chr>        <dbl> <dbl>   <dbl>           <dbl>
#>  1 all   Cytotrophoblast RPL41         1.86 0.832   -354.            1530
#>  2 all   Cytotrophoblast RPS10         2.75 0.821   -332.            1530
#>  3 all   Cytotrophoblast RPL36A        2.98 0.802   -292.            1530
#>  4 all   Cytotrophoblast IFI6          2.61 0.796   -282.            1530
#>  5 all   Cytotrophoblast RPL39         2.38 0.795   -279.            1530
#>  6 all   Cytotrophoblast EEF1A1        2.34 0.764   -223.            1530
#>  7 all   Cytotrophoblast RPS27         1.38 0.742   -189.            1530
#>  8 all   Cytotrophoblast RPS28         1.61 0.742   -188.            1530
#>  9 all   Cytotrophoblast RPS29         1.60 0.740   -186.            1530
#> 10 all   Cytotrophoblast RPL36         1.47 0.717   -151.            1530
#> # ℹ 7 more variables: population_fraction <dbl>, average_expression <dbl>,
#> #   se_expression <dbl>, detection_rate <dbl>, fold_change_detection <dbl>,
#> #   precision <dbl>, recall <dbl>

sdataset@meta.data %>% 
  filter(merged_annotation == "Cytotrophoblast") %>% 
  select(percent.rb) %>% 
  summary()
#>    percent.rb   
#>  Min.   :10.59  
#>  1st Qu.:31.33  
#>  Median :34.71  
#>  Mean   :34.57  
#>  3rd Qu.:37.97  
#>  Max.   :55.89

sdataset@meta.data %>% 
  select(percent.rb) %>% 
  summary()
#>    percent.rb     
#>  Min.   : 0.8677  
#>  1st Qu.:22.0196  
#>  Median :28.4647  
#>  Mean   :26.8642  
#>  3rd Qu.:33.2340  
#>  Max.   :58.2871

Individual dataset markers can be saved to file for later use, which is particularly useful to accumulate datasets over time and regenerate more comprehensive meta-marker lists.

export_markers(markers_lSCE, "Liu_markers_20230513.csv")
export_markers(markers_vtSCE, "VT_markers_20230513.csv")
export_markers(markers_sSCE, "Suryawanshi_markers_20230514.csv")
export_markers(markers_hSCE, "Han_markers_20230513.csv")

Build list of meta-analytic markers

We now build meta-markers from individual dataset marker lists. First we load the markers we previously exported. Note that the export function compresses marker files by default, so the extension has changed.

markers = list(
    lmarkers = read_markers("Liu_markers_20230513.csv.gz"),
    vtmarkers = read_markers("VT_markers_20230513.csv.gz"),
    smarkers = read_markers("Suryawanshi_markers_20230514.csv.gz"),
    hmarkers = read_markers("Han_markers_20230513.csv.gz")
)

We create meta-markers by calling the following function. By default, the function only returns a ranked list of markers, but can also provide more detailed information about markers by setting “detailed_stats = TRUE”.

meta_markers <-  make_meta_markers(markers, detailed_stats = TRUE)

We can look at meta-analytic markers for the Cytotrophoblast cell type again.

meta_markers %>%
    filter(cell_type == "Cytotrophoblast") %>%
    head(100)
#> # A tibble: 100 × 17
#>    group cell_type        rank gene   recurrence auroc fold_change
#>    <chr> <chr>           <int> <chr>       <int> <dbl>       <dbl>
#>  1 all   Cytotrophoblast     1 PAGE4           3 0.815       22.3 
#>  2 all   Cytotrophoblast     2 MEST            3 0.764        5.77
#>  3 all   Cytotrophoblast     3 ADRB1           3 0.637        6.16
#>  4 all   Cytotrophoblast     4 LDHB            2 0.787        4.22
#>  5 all   Cytotrophoblast     5 SMAGP           2 0.749        8.46
#>  6 all   Cytotrophoblast     6 PEG10           2 0.743        7.67
#>  7 all   Cytotrophoblast     7 FXYD3           2 0.733        4.55
#>  8 all   Cytotrophoblast     8 JUN             2 0.726        4.15
#>  9 all   Cytotrophoblast     9 ISYNA1          2 0.726        6.62
#> 10 all   Cytotrophoblast    10 CYSTM1          2 0.722        6.79
#> # ℹ 90 more rows
#> # ℹ 10 more variables: fold_change_detection <dbl>, expression <dbl>,
#> #   precision <dbl>, recall <dbl>, population_size <dbl>, n_datasets <int>,
#> #   lmarkers <lgl>, vtmarkers <lgl>, smarkers <lgl>, hmarkers <lgl>

The PAGE4 gene is a member of the GAGE family. The GAGE genes are expressed in a variety of tumors and in some fetal and reproductive tissues.

The “recurrence” column shows how often (in how many datasets) a gene was detected as “strongly” differentially expressed (by default FC>4, FDR<0.05, these parameters can be changed when calling “make_meta_markers”).

To better appreciate the trade-off between AUROC and fold change of detection, we can look at genes that offer optimal AUROC performance vs genes that offer optimal detection performance by looking at the Pareto front of markers in the (AUROC, detection) plot.

plot_pareto_markers(meta_markers, "Cytotrophoblast", min_recurrence = 0)

In this plot, dotted lines indicate high AUROC performance (sensitive marker) and high detection performance (specific marker, binary-like behavior).

To better appreciate what the above explanation means, we can look at the expression of Pareto markers in one of the datasets.

pareto_markers = get_pareto_markers(meta_markers, "Cytotrophoblast", min_recurrence=1)
plot_marker_expression(assay(lSCE, "cpm"), 
                       pareto_markers, 
                       lSCE@colData$merged_annotation)
#> Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
#> of ggplot2 3.3.4.
#> ℹ The deprecated feature was likely used in the MetaMarkers package.
#>   Please report the issue to the authors.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.

plot_marker_expression(assay(vtSCE, "cpm"), 
                       pareto_markers, 
                       vtSCE@colData$merged_annotation)

plot_marker_expression(assay(sSCE, "cpm"), 
                       pareto_markers, 
                       sSCE@colData$merged_annotation)

plot_marker_expression(assay(hSCE, "cpm"), 
                       pareto_markers, 
                       hSCE@colData$merged_annotation)

To obtain an overview of the strength of markers we can obtain for all cell types, we can make a summary plot that contains Pareto markers for all cell types.

library(ggrepel)
is_pareto_front = function(x, y, tolerance = 0) {
    ids = seq_along(x)
    result = data.frame(x, y, ids) %>%
        dplyr::arrange(dplyr::desc(.data$x), dplyr::desc(.data$y)) %>%
        dplyr::mutate(max_y = cummax(.data$y), is_pareto = .data$y >= .data$max_y - tolerance) %>%
        dplyr::select(.data$ids, .data$is_pareto) %>%
        dplyr::arrange(.data$ids) %>%
        dplyr::pull(.data$is_pareto)
    return(result)
}

pdf("pareto_front2.pdf")
new_plot <- function (meta_markers, min_recurrence = 1) {
    meta_markers %>% dplyr::filter(.data$recurrence >= min_recurrence) %>% 
        dplyr::group_by(.data$cell_type) %>% dplyr::filter(is_pareto_front(.data$auroc, 
        .data$fold_change_detection)) %>% ggplot2::ggplot(ggplot2::aes(x = log2(.data$fold_change_detection), 
        y = .data$auroc, col = .data$cell_type, label = .data$gene)) + 
        ggplot2::geom_line() + geom_text_repel(show.legend = FALSE) + 
        ggplot2::geom_hline(yintercept = 0.8, linetype = "dashed") + 
        ggplot2::geom_vline(xintercept = 3, linetype = "dashed") + 
        ggplot2::labs(x = "log2(Fold change) of detection rate", 
            y = "AUROC") + ggplot2::theme(legend.title = ggplot2::element_blank()) + theme_bw()
}
new_plot(meta_markers, min_recurrence=0)
while (!is.null(dev.list()))  dev.off()

We see that all cell types have highly sensitive markers, but the specificity (background expression) for these markers vary considerably. For example, cytotrophoblasts and syncytiotrophoblasts don’t have any marker that is both highly specific and sensitive.

plot_pareto_markers(meta_markers, "Syncytiotrophoblast", min_recurrence=0)

pareto_markers = get_pareto_markers(meta_markers, "Syncytiotrophoblast", min_recurrence=0)
plot_marker_expression(assay(lSCE, "cpm"), 
                       pareto_markers, 
                       lSCE@colData$merged_annotation)

plot_marker_expression(assay(vtSCE, "cpm"), 
                       pareto_markers, 
                       vtSCE@colData$merged_annotation)

plot_marker_expression(assay(sSCE, "cpm"), 
                       pareto_markers, 
                       sSCE@colData$merged_annotation)

plot_marker_expression(assay(hSCE, "cpm"), 
                       pareto_markers, 
                       hSCE@colData$merged_annotation)

The violin plot makes it clear how background expression is distributed across cell types.

We strongly recommend saving the meta-markers to a file for later use.

export_meta_markers(meta_markers, "meta_markers_(4_datasets)_20230514.csv", names(markers))
meta_markers %>% # Top N highest values by group
  arrange(desc(auroc)) %>% 
  group_by(cell_type) %>%
  dplyr::slice(1:30)
#> # A tibble: 90 × 17
#> # Groups:   cell_type [3]
#>    group cell_type        rank gene    recurrence auroc fold_change
#>    <chr> <chr>           <int> <chr>        <int> <dbl>       <dbl>
#>  1 all   Cytotrophoblast     1 PAGE4            3 0.815       22.3 
#>  2 all   Cytotrophoblast     4 LDHB             2 0.787        4.22
#>  3 all   Cytotrophoblast     2 MEST             3 0.764        5.77
#>  4 all   Cytotrophoblast   809 COX7C            0 0.751        1.99
#>  5 all   Cytotrophoblast   810 RPS25            0 0.750        1.60
#>  6 all   Cytotrophoblast     5 SMAGP            2 0.749        8.46
#>  7 all   Cytotrophoblast     6 PEG10            2 0.743        7.67
#>  8 all   Cytotrophoblast   215 SLC25A5          1 0.736        2.72
#>  9 all   Cytotrophoblast   216 APLP2            1 0.735        3.24
#> 10 all   Cytotrophoblast   217 SPINT2           1 0.735        2.82
#> # ℹ 80 more rows
#> # ℹ 10 more variables: fold_change_detection <dbl>, expression <dbl>,
#> #   precision <dbl>, recall <dbl>, population_size <dbl>, n_datasets <int>,
#> #   lmarkers <lgl>, vtmarkers <lgl>, smarkers <lgl>, hmarkers <lgl>

plot_pareto_summary(meta_markers, min_recurrence=0)

The list can be retrieved later by loading the MetaMarkers package and running the following (again, note that the export function will compress the file by default, changing the file extension in the process).