######################################################################
# Show CI works #
# #
######################################################################
#Plot of distribution for Y_i
# Let X = 4*W where W~beta(5,2) (for those who have had a mathematical statistics class)
curve(expr = 15/2048 * x^4 * (4 - x), from = 0, to = 4, col = "red",
main = "Probability distribution for GPAs", ylab = "f(y)", xlab = "y")
abline(h = 0)
#Mean and variance
mu <- 2.8571 #4 * 5/(5+2)
sigma.sq <- 0.4082 #See formula for Var(W) from a book like Casella and Berger (2001) and then multiply it by 4^2
#####################################################################
# Simulate the samples
options(width = 60) # Helps with copy/paste into notes
n <- 20
set.seed(8129)
#Simulated samples from a beta probability distribution (4*W where W~beta(5,2))
# Students are not responsible for how I simulated data.
set1 <- 4*matrix(data = rbeta(n = 1000*n, shape1 = 5, shape2 = 2), nrow = 1000, ncol = n)
head(round(set1,2))
ybar <- rowSums(set1)/n
head(round(ybar,2))
###################################################################
# Coverage
alpha <- 0.05 # Find 95% CIs
means <- rowMeans(set1)
# means <- apply(X = set1, MARGIN = 1, FUN = mean)
SDs <- apply(X = set1, MARGIN = 1, FUN = sd)
lower <- means - qt(p = 1-alpha/2, df = n - 1) * SDs/sqrt(n)
upper <- means + qt(p = 1-alpha/2, df = n - 1) * SDs/sqrt(n)
head(data.frame(lower, upper))
coverage <- mean(lower < mu & upper > mu)
coverage
library(gplots)
# Plot the CIs and mu
hwidth <- qt(p = 1 - alpha/2, df = n - 1) * SDs/sqrt(n)
plotCI(x = means, uiw = hwidth, col = "black", barcol = "blue", lwd = 0.1,
cex = 0.8, ylab = "", xlab = "", main = "Confidence intervals of mu for n = 20")
abline(h = mu, col = "red")
# First 50 instead
plotCI(x = means[1:50], uiw = hwidth[1:50], col = "black", barcol = "blue", lwd = 0.1,
cex = 0.8, ylab = "", main = "Confidence intervals of mu for n = 20",
xlab = "Interval number")
abline(h = mu, col = "red")
# First 50 instea - different y-axis scale to allow for comparisons with n = 5
plotCI(x = means[1:50], uiw = hwidth[1:50], col = "black", barcol = "blue", lwd = 0.1,
cex = 0.8, ylab = "", main = "Confidence intervals of mu for n = 20",
xlab = "Interval number", ylim = c(0.75, 4.25))
abline(h = mu, col = "red")
###################################################################
# Other sample sizes
n <- 30
set.seed(3478)
#Simulated samples from a beta probability distribution (4*W where W~beta(5,2))
# Students are not responsible for how I simulated data.
set1 <- 4*matrix(data = rbeta(n = 1000*n, shape1 = 5, shape2 = 2), nrow = 1000, ncol = n)
means <- apply(X = set1, MARGIN = 1, FUN = mean)
SDs <- apply(X = set1, MARGIN = 1, FUN = sd)
lower <- means - qt(p = 1-alpha/2, df = n - 1) * SDs/sqrt(n)
upper <- means + qt(p = 1-alpha/2, df = n - 1) * SDs/sqrt(n)
coverage <- mean(lower < mu & upper > mu)
coverage
# First 50
hwidth <- qt(p = 1 - alpha/2, df = n - 1) * SDs/sqrt(n)
plotCI(x = means[1:50], uiw = hwidth[1:50], col = "black", barcol = "blue", lwd = 0.1,
cex = 0.8, ylab = "", main = "Confidence intervals of mu for n = 30",
xlab = "Interval number")
abline(h = mu, col = "red")
n <- 10
set.seed(1992)
#Simulated samples from a beta probability distribution (4*W where W~beta(5,2))
# Students are not responsible for how I simulated data.
set1 <- 4*matrix(data = rbeta(n = 1000*n, shape1 = 5, shape2 = 2), nrow = 1000, ncol = n)
means <- apply(X = set1, MARGIN = 1, FUN = mean)
SDs <- apply(X = set1, MARGIN = 1, FUN = sd)
lower <- means - qt(p = 1-alpha/2, df = n - 1) * SDs/sqrt(n)
upper <- means + qt(p = 1-alpha/2, df = n - 1) * SDs/sqrt(n)
coverage <- mean(lower < mu & upper > mu)
coverage
# First 50
hwidth <- qt(p = 1 - alpha/2, df = n - 1) * SDs/sqrt(n)
plotCI(x = means[1:50], uiw = hwidth[1:50], col = "black", barcol = "blue", lwd = 0.1,
cex = 0.8, ylab = "", main = "Confidence intervals of mu for n = 10",
xlab = "Interval number")
abline(h = mu, col = "red")
n <- 5
set.seed(1349)
#Simulated samples from a beta probability distribution (4*W where W~beta(5,2))
# Students are not responsible for how I simulated data.
set1 <- 4*matrix(data = rbeta(n = 1000*n, shape1 = 5, shape2 = 2), nrow = 1000, ncol = n)
means <- apply(X = set1, MARGIN = 1, FUN = mean)
SDs <- apply(X = set1, MARGIN = 1, FUN = sd)
lower <- means - qt(p = 1-alpha/2, df = n - 1) * SDs/sqrt(n)
upper <- means + qt(p = 1-alpha/2, df = n - 1) * SDs/sqrt(n)
coverage <- mean(lower < mu & upper > mu)
coverage
# First 50
hwidth <- qt(p = 1 - alpha/2, df = n - 1) * SDs/sqrt(n)
plotCI(x = means[1:50], uiw = hwidth[1:50], col = "black", barcol = "blue", lwd = 0.1,
cex = 0.8, ylab = "", main = "Confidence intervals of mu for n = 5",
xlab = "Interval number")
abline(h = mu, col = "red")
n <- 2
set.seed(5365)
#Simulated samples from a beta probability distribution (4*W where W~beta(5,2))
# Students are not responsible for how I simulated data.
set1 <- 4*matrix(data = rbeta(n = 1000*n, shape1 = 5, shape2 = 2), nrow = 1000, ncol = n)
means <- apply(X = set1, MARGIN = 1, FUN = mean)
SDs <- apply(X = set1, MARGIN = 1, FUN = sd)
lower <- means - qt(p = 1-alpha/2, df = n - 1) * SDs/sqrt(n)
upper <- means + qt(p = 1-alpha/2, df = n - 1) * SDs/sqrt(n)
coverage <- mean(lower < mu & upper > mu)
coverage
# First 50
hwidth <- qt(p = 1 - alpha/2, df = n - 1) * SDs/sqrt(n)
plotCI(x = means[1:50], uiw = hwidth[1:50], col = "black", barcol = "blue", lwd = 0.1,
cex = 0.8, ylab = "", main = "Confidence intervals of mu for n = 2",
xlab = "Interval number")
abline(h = mu, col = "red")
#