import re import sys import random import string import operator import copy def main(): ORIGIN = open("/Users/Elaine/Coding/TReSeq-test/wthu.sc3.replication.origins.txt", "r") CHROMINFO = open("/Users/Elaine/Coding/TReSeq-test/sc3.chromInfo.txt", "r") roman = {'1': "I", '2': "II", '3': "III", '4': "IV", '5': "V", '6': "VI", '7': "VII", '8': "VIII", '9': "IX", '10': "X", '11': "XI", '12': "XII", '13': "XIII", '14': "XIV", '15': "XV", '16': "XVI"} chrominfo = dict() for x in CHROMINFO: arow = x.rstrip().split("\t") thisChrom = arow[0] thisChromLength = int(arow[1]) chrominfo[thisChrom] = thisChromLength origin = dict() x = ORIGIN.readline() for x in ORIGIN: arow = x.rstrip().split("\t") thisChromNumber = arow[0] thisStart = arow[1] thisEnd = arow[2] thisStatus = arow[5] # This widden the range of origin chromosome locus to check - up to BOTH upstream and downstream 4750bp. Making the total range maybe around 10kb to count reads. # From the previous EdU pulldown data, under HU45 or HU90 condition, WT strain has early origin peaks ranging from around 5kb to 15kb. However, the confirmed origin range from OriDB only has around maybe 250bp. Therefore, the idea is to widden the range and count the reads that in the range. rangeStart = int(thisStart) - 4750 rangeEnd = int(thisEnd) + 4750 if rangeStart < 1: rangeStart = 1 else: pass if rangeEnd > int(chrominfo[thisChrom]): rangeEnd = int(chrominfo[thisChrom]) else: pass #if thisStatus == "Confirmed": # pass #else: # continue thisChrom = "XXX" if roman.has_key(thisChromNumber): thisChrom = "chr" + roman[thisChromNumber] if thisChrom == "XXX": continue if origin.has_key(thisChrom): pass else: origin[thisChrom] = [] origin[thisChrom].append([thisChrom, int(rangeStart), int(rangeEnd)]) randomOriginList = [] for i in range(1000): randomOrigin = dict() for thisChrom in origin.keys(): for v in origin[thisChrom]: thisLength = v[2] - v[1] randomStart = random.randint(1, int(chrominfo[thisChrom]) - thisLength) if v[0] == "chrXII": exclude = range(450000 - thisLength,490500) if randomStart in exclude: randomStart = random.randint(1, int(chrominfo[thisChrom]) - thisLength) else: pass else: pass if randomOrigin.has_key(thisChrom): pass else: randomOrigin[thisChrom] = [] randomOrigin[thisChrom].append([thisChrom, randomStart, randomStart + thisLength]) randomOriginList.append(copy.deepcopy(randomOrigin)) totalReads = [] for i in range(1001): totalReads.append(0) for x in sys.stdin: arow = x.rstrip().split("\t") thisChrom = arow[0] thisPosStart = int(arow[1]) thisPosEnd = int(arow[2]) if origin.has_key(thisChrom): pass else: continue if chrominfo.has_key(thisChrom): pass else: continue for r in origin[thisChrom]: if thisPosStart >= r[1] and thisPosStart <= r[2]: totalReads[0] += 1 else: if thisPosEnd >= r[1] and thisPosEnd <= r[2]: totalReads[0] += 1 else: continue for i in range(1, 1001): for r in randomOriginList[i - 1][thisChrom]: if thisPosStart >= r[1] and thisPosStart <= r[2]: totalReads[i] += 1 else: if thisPosEnd >= r[1] and thisPosEnd <= r[2]: totalReads[i] += 1 else: continue realCount = totalReads[0] higherThanRealCount = 0 for i in range(1001): if i > 0: if totalReads[i] > realCount: higherThanRealCount += 1 print totalReads[i] print print realCount, higherThanRealCount if __name__ == "__main__": main()