# Parallel processing simulations ################################################################################ # Simulate data options(width = 60) #Change width of output in R to help format my notes #Settings mu <- 2.713333 sigma <- 2.195581 sigma^2 n <- 9 num.sim <- 500 #Number of simulations - do not use R here because R is number of resamples in boot section #Simulate data all at once set.seed(9811) y.sim <- matrix(data = rnorm(n = n*num.sim, mean = mu, sd = sigma), nrow = num.sim, ncol = n) y.sim[1,] #Sample #1 var(y.sim[1,]) #t_1 ################################################################################ # Testing per.core2 <- function(my.data, alpha, R) { setwd(dir = "/work/stattools/bilder/simpp2") source(file = "Run_on_each_core.R") save.int<-t(apply(X = my.data, MARGIN = 1, FUN = sim.func, alpha = alpha, R = R)) #Oddly, this code will not work if it is in the source program save.int } #Test it per.core2(my.data = y.sim[1:10,], alpha = 0.05, R = 1999) library(parallel) #TESTING start.time<-proc.time() cl <- makeCluster(2) clusterSetRNGStream(cl = cl, 8881) sim.intervals.PP <- parLapply(cl = cl, X = list(y.sim[1:2,], y.sim[3:4,]), fun = per.core2, alpha = 0.05, R = 1999) save.all<-do.call(what = rbind, args = sim.intervals.PP) head(save.all) stopCluster(cl) end.time<-proc.time() save.time<-end.time-start.time cat("\n Number of minutes running:", save.time[3]/60, "\n \n") ################################################################################ #ALL 500 #1 core start.time<-proc.time() cl <- makeCluster(1) clusterSetRNGStream(cl = cl, 8881) #One should normally change the seed number for each run sim.intervals.PP <- parLapply(cl = cl, X = list(y.sim), fun = per.core2, alpha = 0.05, R = 1999) save.all<-do.call(what = rbind, args = sim.intervals.PP) head(save.all) stopCluster(cl) end.time<-proc.time() save.time1<-end.time-start.time cat("\n Number of minutes running:", save.time1[3]/60, "\n \n") #2 cores start.time<-proc.time() cl <- makeCluster(2) clusterSetRNGStream(cl = cl, 8881) sim.intervals.PP <- parLapply(cl = cl, X = list(y.sim[1:250,], y.sim[251:500,]), fun = per.core2, alpha = 0.05, R = 1999) save.all<-do.call(what = rbind, args = sim.intervals.PP) head(save.all) stopCluster(cl) end.time<-proc.time() save.time2<-end.time-start.time cat("\n Number of minutes running:", save.time2[3]/60, "\n \n") #4 cores start.time<-proc.time() cl <- makeCluster(4) clusterSetRNGStream(cl = cl, 8881) sim.intervals.PP <- parLapply(cl = cl, X = list(y.sim[1:125,], y.sim[126:250,], y.sim[251:375,], y.sim[376:500,]), fun = per.core2, alpha = 0.05, R = 1999) save.all<-do.call(what = rbind, args = sim.intervals.PP) head(save.all) stopCluster(cl) end.time<-proc.time() save.time4<-end.time-start.time cat("\n Number of minutes running:", save.time4[3]/60, "\n \n") #5 cores start.time<-proc.time() cl <- makeCluster(5) clusterSetRNGStream(cl = cl, 8881) sim.intervals.PP <- parLapply(cl = cl, X = list(y.sim[1:100,], y.sim[101:200,], y.sim[201:300,], y.sim[301:400,], y.sim[401:500,]), fun = per.core2, alpha = 0.05, R = 1999) save.all<-do.call(what = rbind, args = sim.intervals.PP) head(save.all) stopCluster(cl) end.time<-proc.time() save.time5<-end.time-start.time cat("\n Number of minutes running:", save.time5[3]/60, "\n \n") #This will help with the larger number of cores cases split.up.data <- function(data, numb.cores, numb.sim) { #This function can not handle "remainder" data sets to choose numb.cores wisely temp <- list() for (i in 1:numb.cores) { print((1 + (i-1)*num.sim/numb.cores)) #Use as a check print(i*num.sim/numb.cores) #Use as a check temp[[i]] <- data[(1 + (i-1)*num.sim/numb.cores):(i*num.sim/numb.cores),] } temp } #10 cores start.time<-proc.time() numb.cores <- 10 cl <- makeCluster(numb.cores) clusterSetRNGStream(cl = cl, 8881) save.split <- split.up.data(data = y.sim, numb.cores = numb.cores, numb.sim = numb.sim) sim.intervals.PP <- parLapply(cl = cl, X = save.split, fun = per.core2, alpha = 0.05, R = 1999) save.all<-do.call(what = rbind, args = sim.intervals.PP) head(save.all) stopCluster(cl) end.time<-proc.time() save.time10<-end.time-start.time cat("\n Number of minutes running:", save.time10[3]/60, "\n \n") #20 cores start.time<-proc.time() numb.cores <- 20 cl <- makeCluster(numb.cores) clusterSetRNGStream(cl = cl, 8881) save.split <- split.up.data(data = y.sim, numb.cores = numb.cores, numb.sim = numb.sim) sim.intervals.PP <- parLapply(cl = cl, X = save.split, fun = per.core2, alpha = 0.05, R = 1999) save.all<-do.call(what = rbind, args = sim.intervals.PP) head(save.all) stopCluster(cl) end.time<-proc.time() save.time20<-end.time-start.time cat("\n Number of minutes running:", save.time20[3]/60, "\n \n") #50 cores start.time<-proc.time() numb.cores <- 50 cl <- makeCluster(numb.cores) clusterSetRNGStream(cl = cl, 8881) save.split <- split.up.data(data = y.sim, numb.cores = numb.cores, numb.sim = numb.sim) sim.intervals.PP <- parLapply(cl = cl, X = save.split, fun = per.core2, alpha = 0.05, R = 1999) save.all<-do.call(what = rbind, args = sim.intervals.PP) head(save.all) stopCluster(cl) end.time<-proc.time() save.time50<-end.time-start.time cat("\n Number of minutes running:", save.time50[3]/60, "\n \n") ################################################################################ # Summarize summarize <- function(low.up, sigma.sq) { true.conf <- mean(ifelse(test = sigma.sq > low.up[,1], yes = ifelse(test = sigma.sq < low.up[,2], yes = 1, no = 0), no = 0), na.rm = TRUE) exp.length <- mean(low.up[,2] - low.up[,1], na.rm = TRUE) exclude <- sum(is.na(low.up[,1])|is.na(low.up[,2])) data.frame(true.conf, exp.length, exclude) } normal.based <- summarize(low.up = save.all[,1:2], sigma.sq = sigma^2) asym <- summarize(low.up = save.all[,3:4], sigma.sq = sigma^2) basic <- summarize(low.up = save.all[,5:6], sigma.sq = sigma^2) percentile <- summarize(low.up = save.all[,7:8], sigma.sq = sigma^2) bca <- summarize(low.up = save.all[,9:10], sigma.sq = sigma^2) student <- summarize(low.up = save.all[,11:12], sigma.sq = sigma^2) interval.names <- c("Normal", "Asymptotic", "Basic", "Percentile", "BCa", "Student") data.frame(interval = interval.names, rbind(normal.based, asym, basic, percentile, bca, student)) ################################################################################ # Time run vs. core time.run <- c(save.time1, save.time2, save.time4, save.time5, save.time10, save.time20, save.time50) time.run <- c(16.73073, 8.6087, 4.331067, 3.451517, 1.939717, 1.31935, 1.570917) cores <- c(1, 2, 4, 5, 10, 20, 50) pdf(file = "time.run.pdf") plot(x = cores, y = time.run, main = "Comparisons of time", type = "o", col = "red", ylim = c(0, max(time.run)), ylab = "Time (minutes)", xlab = "Number of cores requested", panel.first = grid()) dev.off() #