# # # Version 2 # Pandas/DataFrame/TSV-oriented, rather than FASTA-oriented. # Used in HTNA 2023 # [barcodes] # eventually shift over to calling them SSI codes, to distinguish # from viral barcodes. # ssifile = ~/git/mapseq-processing/etc/ssi_v2.txt ssifile = ~/git/mapseq-processing/etc/barcode_v2.txt [fastq] r1start = 0 r1end = 32 r2start = 0 r2end = 20 # to remove suspect sequences with long homopolymer runs. Should be < 1%? max_repeats=7 # remove sequences with ambiguous bases max_n_bases = 0 # reporting intervals for verbose output, defines how often to print. seqhandled_interval = 1000000 [collapse] tool = bowtie2 max_mismatch = 3 seq_length = 30 [bowtie2] threads = 10 [readtable] # T or C = YY # last 2nt of spike-ins AND reals # A or G = RR # last 2nt of L1 controls spikeseq= CGTCAGTC realregex = [TC][TC]$ loneregex = [AG][AG]$ [vbctable] inj_min_reads = 2 target_min_reads = 2 [matrices] #clustermap_logscale=log10 clustermap_scale=log2 # whether to require that target VBCs be present in the injection area to count. # Default should be True, unless the dataset doesn't have any injection areas analyzed. # require_injection=True require_injection=False # Per-brain, threshold UMI count in target areas to only those # more than the largest target-negative total in that brain. use_target_negative=False use_target_water_control=False # minimum unique molecules (UMIs) in injection to be used default: 30 inj_min_umi = 10 # Alternatively explicit minimum unique molecules (UMIs) # in *any* target area to retain VBC default: 10 target_min_umi = 2