#
#
# Version 2
# Pandas/DataFrame/TSV-oriented, rather than FASTA-oriented. 
# Used in HTNA 2023
#

[barcodes]
# eventually shift over to calling them SSI codes, to distinguish
# from viral barcodes. 
# ssifile = ~/git/mapseq-processing/etc/ssi_v2.txt
ssifile = ~/git/mapseq-processing/etc/barcode_v2.txt

[fastq]
r1start = 0
r1end = 32
r2start = 0
r2end = 20

# to remove suspect sequences with long homopolymer runs. Should be < 1%?
max_repeats=7
# remove sequences with ambiguous bases
max_n_bases = 0
# reporting intervals for verbose output, defines how often to print. 
seqhandled_interval = 1000000


[collapse]
tool = bowtie2
max_mismatch = 3
seq_length = 30

[bowtie2]
threads = 10

[readtable]
# T or C = YY
# last 2nt of spike-ins AND reals
# A or G = RR
# last 2nt of L1 controls 
spikeseq= CGTCAGTC
realregex = [TC][TC]$
loneregex = [AG][AG]$

[vbctable]
inj_min_reads = 2
target_min_reads = 2


[matrices]
#clustermap_logscale=log10
clustermap_scale=log2

#  whether to require that target VBCs be present in the injection area to count.
#  Default should be True, unless the dataset doesn't have any injection areas analyzed.
# require_injection=True
require_injection=False

# Per-brain, threshold UMI count in target areas to only those 
# more than the largest target-negative total in that brain.  
use_target_negative=False
use_target_water_control=False

# minimum unique molecules (UMIs) in injection to be used default: 30
inj_min_umi = 10

# Alternatively explicit minimum unique molecules (UMIs) 
# in *any* target area to retain VBC default: 10
target_min_umi = 2