############################################################################ # NAME: Chris Bilder # # DATE: 3-24-16 # # PURPOSE: Estimated true confidence level for # # ybar +- t(1-alpha/2, n-1)*s/sqrt(n) # # # # NOTES: # # # ############################################################################ ############################################################################ # Settings and simulate data # Stated confidence level is 1-alpha alpha <- 0.05 # Simulate the data num.samples <- 1000 sample.size <- 10 mu <- 2 sigma <- 3 ############################################################################ # Method #1 - Very inefficient # Simulate one data set set.seed(4778) y <- rnorm(n = sample.size, mean = mu, sd = sigma) lower <- mean(y) + qt(p = alpha/2, df = sample.size - 1) * sd(y) / sqrt(sample.size) upper <- mean(y) + qt(p = 1-alpha/2, df = sample.size - 1) * sd(y) / sqrt(sample.size) c(lower, upper) # Check if mu is interval - multiple ways ifelse(test = lower < mu, yes = ifelse(test = upper > mu, yes = 1, no = 0), no = 0) ifelse(test = lower < mu & upper > mu, yes = 1, no = 0) lower < mu & upper > mu # Complete same type of calculations for all samples set.seed(4778) save.results1 <- matrix(data = NA, nrow = num.samples, ncol = 3) for(i in 1:num.samples) { y <- rnorm(n = sample.size, mean = mu, sd = sigma) lower <- mean(y) + qt(p = alpha/2, df = sample.size - 1) * sd(y) / sqrt(sample.size) upper <- mean(y) + qt(p = 1-alpha/2, df = sample.size - 1) * sd(y) / sqrt(sample.size) check <- lower < mu & upper > mu save.results1[i,] <- c(check, lower, upper) } # Estimated true confidence level - two ways sum(save.results1[,1])/num.samples mean(save.results1[,1]) ############################################################################ # Method #2 - better, not as efficient as it could be set.seed(4778) y <- matrix(data = rnorm(n = num.samples * sample.size, mean = mu, sd = sigma) , nrow = num.samples, ncol = sample.size, byrow = TRUE) head(y, n = 3) tail(y, n = 3) # Calculate interval for one data set interval <- mean(y[1,]) + qt(p = c(alpha/2, 1-alpha/2), df = sample.size - 1) * sd(y[1,]) / sqrt(sample.size) interval interval[1] < mu & interval[2] > mu # Complete same type of calculations for all samples save.results2 <- matrix(data = NA, nrow = num.samples, ncol = 3) for(i in 1:num.samples) { interval <- mean(y[i,]) + qt(p = c(alpha/2, 1-alpha/2), df = sample.size - 1) * sd(y[i,]) / sqrt(sample.size) check <- interval[1] < mu & interval[2] > mu save.results2[i,] <- c(check, interval[1], interval[2]) } # Estimated true confidence level mean(save.results2[,1]) ############################################################################ # Method #3 - even better # Complete same type of calculations for all intervals save.results3 <- matrix(data = NA, nrow = num.samples, ncol = 2) for(i in 1:num.samples) { interval <- mean(y[i,]) + qt(p = c(alpha/2, 1-alpha/2), df = sample.size - 1) * sd(y[i,]) / sqrt(sample.size) save.results3[i,] <- c(interval[1], interval[2]) } # Estimated true confidence level check <- save.results3[,1] < mu & save.results3[,2] > mu mean(check) ############################################################################ # Method #4 - Helpful to know when data is in a data frame structure set.seed(4778) y <- matrix(data = rnorm(n = num.samples * sample.size, mean = mu, sd = sigma) , nrow = num.samples * sample.size, ncol = 1, byrow = TRUE) head(y) y.df <- as.data.frame(y) head(y.df) tail(y.df) save.results4 <- matrix(data = NA, nrow = num.samples, ncol = 2) for(i in 1:num.samples) { indices <- (sample.size*(i-1)+1):(i*sample.size) interval <- mean(y[indices,1]) + qt(p = c(alpha/2, 1-alpha/2), df = sample.size - 1) * sd(y[indices,1]) / sqrt(sample.size) save.results4[i,] <- c(interval[1], interval[2]) } # Estimated true confidence level check <- save.results4[,1] < mu & save.results4[,2] > mu mean(check) save.results5 <- matrix(data = NA, nrow = num.samples, ncol = 2) for(i in 1:num.samples) { indices <- (sample.size*(i-1)+1):(i*sample.size) interval <- mean(y.df[indices,]) + qt(p = c(alpha/2, 1-alpha/2), df = sample.size - 1) * sd(y.df[indices,]) / sqrt(sample.size) save.results5[i,] <- c(interval[1], interval[2]) } check <- save.results5[,1] < mu & save.results5[,2] > mu mean(check) #Debugging example: Limit to the first 3 data sets save.results4 <- matrix(data = NA, nrow = num.samples, ncol = 2) for(i in 1:3) { indices <- (sample.size*(i-1)+1):(i*sample.size) cat("Iteration #", i, "where indices =", indices, "\n") interval <- mean(y[indices,1]) + qt(p = c(alpha/2, 1-alpha/2), df = sample.size - 1) * sd(y[indices,1]) / sqrt(sample.size) print(interval) save.results4[i,] <- c(interval[1], interval[2]) } ############################################################################ # Method #5 - good too set.seed(4778) y <- matrix(data = rnorm(n = num.samples * sample.size, mean = mu, sd = sigma) , nrow = num.samples, ncol = sample.size, byrow = TRUE) head(y, n = 3) # tail(y, n = 3) calc.interval <- function(y, alpha, sample.size) { mean(y) + qt(p = c(alpha/2, 1-alpha/2), df = sample.size - 1) * sd(y) / sqrt(sample.size) } # apply() will return the results for each data set in a column save.results5 <- apply(X = y, MARGIN = 1, FUN = calc.interval, alpha = alpha, sample.size = sample.size) # Estimated true confidence level check <- save.results5[1,] < mu & save.results5[2,] > mu mean(check) ############################################################################ # Method #6 - good ybar <- rowMeans(y) # Could use head(ybar) sd.y <- apply(X = y, MARGIN = 1, FUN = sd) head(sd.y) lower <- ybar + qt(p = alpha/2, df = sample.size - 1)*sd.y/sqrt(sample.size) upper <- ybar + qt(p = 1-alpha/2, df = sample.size - 1)*sd.y/sqrt(sample.size) check <- lower < mu & upper > mu # Estimated true confidence level mean(check) #