#!/usr/bin/env Rscript dat=read.table("hg19_stats", header=FALSE, as.is=TRUE) args<-commandArgs(TRUE) #initialize variables CN_cnt = as.numeric(args[[1]]) CN_size = as.numeric(args[[2]]) CN_mean = as.numeric(args[[3]]) #CN_cnt = 80 #CN_size = 5000000 #CN_mean = 2.5 n=dim(dat)[1] cnts = array(0, n) #n-1 to ignore y chromosome bounds = matrix(0, CN_cnt, 4) #Determine the number of copy number changes per chromosome odds=runif(CN_cnt,0,1) cnts[1] = length(which(odds < dat[1,4])) for (i in 2:n) { cnts[i] = length(which((odds >= dat[(i-1),4]) & (odds < dat[i,4]))) } spacers = matrix(0, ( CN_cnt + length(which(cnts > 0)) ), 3) step = 0 step2 = 0 for (i in 1:n) { if (cnts[i] > 0) { #Generate length of CN segments from exponential distribution size = 999999999 while (sum(size) > dat[i,2]) { size = round(rexp(1000,1/CN_size)) size = head(size[which(size>200000 & size<20000000)], cnts[i]) } #Generate spacing between CN segments from uniform random distribution space = runif((cnts[i] + 1),0,1) space = floor( (dat[i,2] - sum(size))*space/sum(space) ) #Generate integer CN value for each segment from poisson distribution CN = rpois(1000, CN_mean) CN = head(CN[which(CN != 2)], cnts[i]) pos=space[1] spacers[step2,] = c(dat[i,1], 0, pos) for (j in 1:cnts[i]) { step = step + 1 step2 = step2 + 1 bounds[step,] = c(dat[i,1], pos, pos+size[j], CN[j]) spacers[step2,] = c(dat[i,1], (pos + size[j] + 1), (pos + size[j] + space[j+1])) pos = pos + size[j] + space[j+1] } step2 = step2 + 1 spacers[step2,] = c(dat[i,1], (as.numeric(bounds[step,3]) + 1), dat[i,2]) } } #bounds #cnts write.table(bounds, file=paste("out"), row.names=FALSE, col.names=FALSE, quote=FALSE, sep="\t") write.table(spacers, file=paste("spacers"), row.names=FALSE, col.names=FALSE, quote=FALSE, sep="\t")