In [5]:
import cellxgene_census
import pandas as pd

In [4]:
census_datasets = (
    census["census_info"]["datasets"]
    .read(column_names=["collection_name", "dataset_title", "dataset_id", "soma_joinid"])
    .concat()
    .to_pandas()
)
census_datasets = census_datasets.set_index("dataset_id")
census_datasets

Unnamed: 0_level_0,collection_name,dataset_title,soma_joinid
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0895c838-e550-48a3-a777-dbcd35d30272,"Single-Cell, Single-Nucleus, and Spatial RNA S...",Healthy human liver: B cells,0
00ff600e-6e2e-4d76-846f-0eec4f0ae417,Single-cell analysis of human B cell maturatio...,Human tonsil nonlymphoid cells scRNA,1
bdacc907-7c26-419f-8808-969eab3ca2e8,Molecular characterization of selectively vuln...,Molecular characterization of selectively vuln...,2
a5d95a42-0137-496f-8a60-101e17f263c8,Single-cell Atlas of common variable immunodef...,Steady-state B cells - scRNA-seq,3
d3566d6a-a455-4a15-980f-45eb29114cab,Single-cell proteo-genomic reference maps of t...,blood and bone marrow from a healthy young donor,4
...,...,...,...
0bce33ed-455c-4e12-93f8-b7b04a2de4a1,A single-cell transcriptional timelapse of mou...,Whole dataset: Normalized subset 2,807
c2876b1b-06d8-4d96-a56b-5304f815b99a,SEA-AD: Seattle Alzheimer’s Disease Brain Cell...,Whole Taxonomy - MTG: Seattle Alzheimer's Dise...,808
6f7fd0f1-a2ed-4ff1-80d3-33dde731cbc3,SEA-AD: Seattle Alzheimer’s Disease Brain Cell...,Whole Taxonomy - DLPFC: Seattle Alzheimer's Di...,809
dcfa2614-7ca7-4d82-814c-350626eccb26,A single-cell transcriptional timelapse of mou...,Major cell cluster: Mesoderm,810


In [6]:
# save table of datasets
census_datasets.to_csv('cellxgene_datasets.txt', sep = '\t')

In [7]:
with cellxgene_census.open_soma() as census:
    adata = cellxgene_census.get_anndata(
        census = census,
        organism = "Mus musculus",
        var_value_filter = "feature_id in ['ENSMUSG00000027967', 'ENSMUSG00000020052', 'ENSMUSG00000048904']",
        obs_value_filter = "tissue_general in ['brain']",
        column_names = {"obs": ["dataset_id", "assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease"]},
    )

    print(adata)

The "stable" release is currently 2024-07-01. Specify 'census_version="2024-07-01"' in future calls to open_soma() to ensure data consistency.
  adata = cellxgene_census.get_anndata(


AnnData object with n_obs × n_vars = 3792093 × 3
    obs: 'dataset_id', 'assay', 'cell_type', 'tissue', 'tissue_general', 'suspension_type', 'disease'
    var: 'soma_joinid', 'feature_id', 'feature_name', 'feature_length', 'nnz', 'n_measured_obs'


In [8]:
adata

AnnData object with n_obs × n_vars = 3792093 × 3
    obs: 'dataset_id', 'assay', 'cell_type', 'tissue', 'tissue_general', 'suspension_type', 'disease'
    var: 'soma_joinid', 'feature_id', 'feature_name', 'feature_length', 'nnz', 'n_measured_obs'

In [9]:
adata.obs.head()

Unnamed: 0,dataset_id,assay,cell_type,tissue,tissue_general,suspension_type,disease
0,6de332e1-465e-4243-9412-6fdc7497e99d,10x 3' v3,neural progenitor cell,diencephalon,brain,cell,normal
1,6de332e1-465e-4243-9412-6fdc7497e99d,10x 3' v3,neural progenitor cell,diencephalon,brain,cell,normal
2,6de332e1-465e-4243-9412-6fdc7497e99d,10x 3' v3,neural progenitor cell,diencephalon,brain,cell,normal
3,6de332e1-465e-4243-9412-6fdc7497e99d,10x 3' v3,neural progenitor cell,diencephalon,brain,cell,normal
4,6de332e1-465e-4243-9412-6fdc7497e99d,10x 3' v3,neural progenitor cell,diencephalon,brain,cell,normal


In [10]:
adata.write_h5ad('neurog2_ascl1_neurog1_census.h5ad')