#!/usr/bin/env python # # Copyright (c) 2019 10X Genomics, Inc. All rights reserved. # """ Tool for converting feature-barcode matrices from sparse format to dense CSV format, for use by external programs. The commands below should be preceded by '{cmd}': Usage: mat2csv [--genome=GENOME] mat2csv -h | --help | --version Arguments: input_path Path to a {product} feature-barcode matrix. Can be either a feature-barcode h5 file (recommended) or a path to a MEX {product} output folder. output_csv Output CSV file. Options: --genome=GENOME Specify which genome to extract. This only applies to multi-genome h5 input files. -h --help Show this message. --version Show version. """ from __future__ import absolute_import, division, print_function import os import sys import pathlib import docopt from cellranger.matrix import CountMatrix import cellranger.io as cr_io from cellranger.products import get_cmd_names def _parse_args(product_name): product, cmd = get_cmd_names(product_name) version = "%s %s %s\n%s" % ( product_name, os.getenv("TENX_SUBCMD", ""), os.getenv("TENX_VERSION", ""), os.getenv("TENX_COPYRIGHT", ""), ) return docopt.docopt(__doc__.format(cmd=cmd, product=product), version=version) def main(): args = _parse_args(os.getenv("TENX_PRODUCT", "")) output_csv = pathlib.Path(cr_io.get_output_path(args[""])) input_path = args[""] genome = args["--genome"] if input_path.endswith(".h5"): input_path = cr_io.get_input_path(input_path) gbm = CountMatrix.load_h5_file(input_path) else: input_path = pathlib.Path(cr_io.get_input_path(input_path, is_dir=True)) gbm = CountMatrix.load_mtx(input_path) if genome is not None: sys.exit( "The '--genome' argument can only be use with .h5 input files, " "not with MEX directories" ) if genome is None: matrix = gbm else: genomes = gbm.get_genomes() if genome not in genomes: sys.exit("Genome '%s' not found (genomes available: %s)" % (genome, genomes)) matrix = gbm.select_features_by_genome(genome) num_features, num_barcodes, num_entries = ( matrix.features_dim, matrix.bcs_dim, matrix.get_num_nonzero(), ) dense_size = num_features * num_barcodes zero_frac = float(dense_size - num_entries) * 100.0 / float(dense_size) print( """ WARNING: this matrix has %d x %d (%d total) elements, %f%% of which are zero. Converting it to dense CSV format may be very slow and memory intensive. Moreover, other programs (e.g. Excel) may be unable to load it due to its size. To cancel this command, press + C. If you need to inspect the data, we recommend using Loupe Browser. """ % (num_features, num_barcodes, dense_size, zero_frac) ) sys.stdout.flush() try: matrix.save_dense_csv(output_csv) except KeyboardInterrupt: if output_csv.exists(): output_csv.unlink() if __name__ == "__main__": main()