import re import sys import random import string import operator import copy def main(): number_of_randomizations = int(sys.argv[1]) ORIGIN = open("/mnt/wigstore3/user/kendall/stillman05/wthu.sc3.replication.origins.txt", "r") CHROMINFO = open("/mnt/wigstore3/user/kendall/stillman04/sc3.chromInfo.txt", "r") roman = {'1': "I", '2': "II", '3': "III", '4': "IV", '5': "V", '6': "VI", '7': "VII", '8': "VIII", '9': "IX", '10': "X", '11': "XI", '12': "XII", '13': "XIII", '14': "XIV", '15': "XV", '16': "XVI"} chrominfo = dict() for x in CHROMINFO: arow = x.rstrip().split("\t") thisChrom = arow[0] thisChromLength = int(arow[1]) chrominfo[thisChrom] = thisChromLength origin = dict() x = ORIGIN.readline() for x in ORIGIN: arow = x.rstrip().split("\t") thisChromNumber = arow[0] thisStart = arow[1] thisEnd = arow[2] thisStatus = arow[5] # This widden the range of origin chromosome locus to check - up to BOTH upstream and downstream 4750bp. Making the total range maybe around 10kb to count reads. # From the previous EdU pulldown data, under HU45 or HU90 condition, WT strain has early origin peaks ranging from around 5kb to 15kb. However, the confirmed origin range from OriDB only has around maybe 250bp. Therefore, the idea is to widden the range and count the reads that in the range. rangeStart = int(thisStart) - 4750 rangeEnd = int(thisEnd) + 4750 if rangeStart < 1: rangeStart = 1 else: pass if rangeEnd > int(chrominfo[thisChrom]): rangeEnd = int(chrominfo[thisChrom]) else: pass #if thisStatus == "Confirmed": # pass #else: # continue thisChrom = "XXX" if roman.has_key(thisChromNumber): thisChrom = "chr" + roman[thisChromNumber] if thisChrom == "XXX": continue ##### We're going to pretend chrXII:430000-500000 doesn't exist for origins and for reads. if thisChrom == "chrXII": if rangeStart >= 430000 or rangeStart <= 500000: continue if rangeEnd >= 430000 or rangeEnd <= 500000: continue ##### End of code passing over this region on chrXII if origin.has_key(thisChrom): pass else: origin[thisChrom] = [] origin[thisChrom].append([thisChrom, int(rangeStart), int(rangeEnd)]) randomOriginList = [] for i in range(number_of_randomizations): randomOrigin = dict() for thisChrom in origin.keys(): for v in origin[thisChrom]: thisLength = v[2] - v[1] randomStart = random.randint(1, int(chrominfo[thisChrom]) - thisLength) if v[0] == "chrXII": #excludeStart = 450000 - thisLength #excludeEnd = 490500 excludeStart = 430000 excludeEnd = 500000 while True: if randomStart >= excludeStart and randomStart <= excludeEnd: randomStart = random.randint(1, int(chrominfo[thisChrom]) - thisLength) else: break # This code below might create a new randomStart in the exclude region # Have to keep trying until outside exclude region #exclude = range(450000 - thisLength,490500) #if randomStart in exclude: # randomStart = random.randint(1, int(chrominfo[thisChrom]) - thisLength) #else: # pass else: pass if randomOrigin.has_key(thisChrom): pass else: randomOrigin[thisChrom] = [] randomOrigin[thisChrom].append([thisChrom, randomStart, randomStart + thisLength]) randomOriginList.append(copy.deepcopy(randomOrigin)) totalReads = [] for i in range(number_of_randomizations + 1): totalReads.append(0) for x in sys.stdin: arow = x.rstrip().split("\t") thisChrom = arow[0] thisPosStart = int(arow[1]) thisPosEnd = int(arow[2]) if origin.has_key(thisChrom): pass else: continue if chrominfo.has_key(thisChrom): pass else: continue ##### We're going to pretend chrXII:430000-500000 doesn't exist for origins and for reads. if thisChrom == "chrXII": if thisPosStart >= 430000 or thisPosStart <= 500000: continue if thisPosEnd >= 430000 or thisPosEnd <= 500000: continue ##### End of code passing over this region on chrXII for r in origin[thisChrom]: if thisPosStart >= r[1] and thisPosStart <= r[2]: totalReads[0] += 1 else: if thisPosEnd >= r[1] and thisPosEnd <= r[2]: totalReads[0] += 1 else: continue for i in range(1, number_of_randomizations + 1): for r in randomOriginList[i - 1][thisChrom]: if thisPosStart >= r[1] and thisPosStart <= r[2]: totalReads[i] += 1 else: if thisPosEnd >= r[1] and thisPosEnd <= r[2]: totalReads[i] += 1 else: continue realCount = totalReads[0] higherThanRealCount = 0 for i in range(number_of_randomizations + 1): if i > 0: if totalReads[i] > realCount: higherThanRealCount += 1 print totalReads[i] print print realCount, higherThanRealCount if __name__ == "__main__": main()