#AC data

################################################################################
# Initial look at data

  y <- c(3,5,7,18,43,85,91,98,100,130,230,487)
  t <- mean(y)
  t

  #EDF
  par(pty = "s", mfrow = c(1,2), lend = "square")
  plot.ecdf(x = y, verticals = TRUE, do.p = FALSE, main = "EDF for AC failure times", lwd = 2, xlim = c(0,600),
            panel.first = grid(), ylab = expression(hat(F)), xlab = "y")
  #Notice the "rate" parameter is 1/scale parameter - I am using the MLE (sample mean) in it
  curve(expr = pexp(q = x, rate = 1/t), xlim = c(0, 600), col = "red", add = TRUE)

  #QQ-Plot
  exp.quant <- qexp(p = seq(from = 1/(length(y)+1), to = 1-1/(length(y)+1), by = 1/(length(y)+1)), rate = 1/t)
  plot(y = sort(y), x = exp.quant, main = "QQ-Plot for AC failure times", ylab = "y", xlab = "Exp. quantiles",
       panel.first = grid(), ylim = c(0,600))
  data.frame(exp.quant, y)
  abline(a = 0, b = 1, col = "red")


################################################################################
# Method #1: Take resamples and calculate interval

  #One resample
  set.seed(8912)
  y.star <- sample(x = y, replace = TRUE)
  y.star
  table(y.star)
  mean(y.star) # t*

  #Another way to take a resample
  set.seed(8912)
  index.star <- sample(x = 1:12, replace = TRUE)
  y[index.star]


  #Large number of resamples and calculate t* for each
  R <- 4999
  t.star <- numeric(R)
  set.seed(8912)
  for (r in 1:R) {
    y.star <- sample(x = y, replace = TRUE)
    t.star[r] <- mean(y.star)
  }

  #EDF
  par(pty = "s", mfrow = c(1,2), lend = "square")
  #par(mar = c(5, 5, 4, 2) + 0.1)
  plot.ecdf(x = t.star - t, verticals = TRUE, do.p = FALSE, main = "EDF for t* - t", lwd = 2,
            panel.first = grid(), ylab = "EDF", xlab = "t* - t")
  hist(x = t.star - t, xlab = "t* - t", main = "Histogram for t* - t")

  alpha <- 0.025
  order.val <- c((R+1)*alpha, (R+1)*(1-alpha))
  sort(t.star)[order.val]
  quantile(x = t.star, type = 1, probs = c(alpha, 1-alpha)) #type = 1 is inverse of EDF

  #(1-2*alpha)100% basic bootstrap interval
  low <- 2*t - sort(t.star)[(R+1)*(1-alpha)]
  up <- 2*t - sort(t.star)[(R+1)*alpha]
  data.frame(low, up)


################################################################################
# Method #2: Take resamples and calculate interval

  library(boot)

  #Function for the statistic
  #  First element is the original or resampled data.
  #  Second element represents the indices of the data. For example, the indices will be 1:length(y) for the observed
  #    data.
  calc.t <- function(data, i) {
     d <- data[i]
     mean(d)
  }

  #Try it
  calc.t(data = y, i = 1:length(y))

  #Try it again
  set.seed(8912)
  calc.t(data = y, i = sample(x = 1:length(y), size = 12, replace = TRUE))

  #Do bootstrap
  set.seed(8912)
  boot.res <- boot(data = y, statistic = calc.t, R = 4999, sim = "ordinary")
  boot.res

  names(boot.res)
  boot.res$t0 # t
  head(boot.res$t) # t*
  boot.res$statistic


  class(boot.res)
  methods(class = boot)

  plot(boot.res)

  #List of indices from the resamples
  options(width = 60)
  save.ind <- boot.array(boot.out = boot.res, indices = TRUE)
  head(save.ind)

  #CI - Different resamples were taken than with my for() function
  # implementation, so that's why the interval is different than earlier.
  boot.ci(boot.out = boot.res, conf = 0.95, type = "basic")


################################################################################
# Usual t-distribution based interval

  n <- length(y)
  alpha <- 0.025
  mean(y) + qt(p = c(alpha, 1 - alpha), df = n - 1) * sd(y)/sqrt(n)


################################################################################
# Bias and variance

  #Bias-corrected estimate
  b <- mean(boot.res$t[,1]) - boot.res$t0 # mean(boot.res$t) also works for first part because only one column
  b
  boot.res$t0  - b

  #bootstrap estimated variance
  var(boot.res$t[,1])
  sd(boot.res$t[,1])

  # s/sqrt(n)
  sd(y)/sqrt(length(y))
  #s^2 / n
  var(y)/length(y)

  #jackknife
  l.jack <- empinf(data = y, statistic = calc.t, stype = "i", type = "jack")
  var.linear(l.jack)
  sum(l.jack^2)/length(y)^2

  #regression estimates - Equation 2.46 in Davison and Hinkley (1996)
  l.reg <- empinf(boot.out = boot.res)
  var.linear(l.reg)


################################################################################
# Studentized interval

  ##################
  #Jackknife version
  calc.t2 <- function(data, i) {
    d <- data[i]
    mean(d)
  }

  calc.t <- function(data, i) {
    d <- data[i]
    l.jack <- empinf(data = d, statistic = calc.t2, stype = "i", type = "jack")
    v.jack <- var.linear(L = l.jack)
    c(mean(d), v.jack)
  }

  #Try it
  calc.t(data = y, i = 1:length(y))

  #Do bootstrap
  set.seed(8912)
  boot.res <- boot(data = y, statistic = calc.t, R = 4999, sim = "ordinary")
  boot.res

  head(boot.res$t)
  boot.res$t0

  z.star <- (boot.res$t[,1] - boot.res$t0[1])/sqrt(boot.res$t[,2])
  alpha <- 0.025
  z.quant <- quantile(x = z.star, type = 1, probs = c(alpha, 1-alpha))
  z.quant
  qt(p = c(alpha, 1 - alpha), df = length(y) - 1)

  low <- boot.res$t0[1] - z.quant[2]*sqrt(boot.res$t0[2])
  up <- boot.res$t0[1] - z.quant[1]*sqrt(boot.res$t0[2])
  data.frame(low, up)

  boot.ci(boot.out = boot.res, conf = 0.95, type = "stud")

  #Safer way to use boot.ci()
  boot.ci(boot.out = boot.res, conf = 0.95, type = "stud", var.t0 = boot.res$t0[2],
        var.t = boot.res$t[,2], t0 = boot.res$t0[1], t = boot.res$t[,1])

  #Plot Z* and t-distribution
  par(pty = "s", mfrow = c(1,2), lend = "square")
  #par(mar = c(5, 5, 4, 2) + 0.1)
  plot.ecdf(x = z.star, verticals = TRUE, do.p = FALSE, main = "EDF for z*", lwd = 2,
            panel.first = grid(), ylab = "EDF", xlab = "z*")
  curve(expr = pt(q = x, df = length(y) - 1), xlim = c(min(z.star), max(z.star)), col = "red", add = TRUE)

  hist(x = z.star, xlab = "z*", main = "Histogram for z*", freq = FALSE, ylim = c(0, 0.3+0.1))
  curve(expr = dt(x = x, df = length(y) - 1), xlim = c(min(z.star), max(z.star)), col = "red", add = TRUE)


  ####################
  #Double boot version

  calc.t2<-function(data, i) {
    d2 <- data[i]
    mean(d2)
  }

  calc.t<-function(data, i, M) {
    d <- data[i]
    boot.res.M <- boot(data = d, statistic = calc.t2, R = M, sim = "ordinary")
    v.boot <- var(boot.res.M$t)
    c(mean(d), v.boot)
  }

  #Testing
  calc.t(data = y, i = 1:length(y), M = 999)

  #Find start time
  start.time <- proc.time()

  set.seed(8912)
  boot.res2 <- boot(data = y, statistic = calc.t, R = 4999, sim="ordinary", M = 999)

  end.time<-proc.time()
  save.time<-end.time-start.time
  cat("\n Number of minutes running:", save.time[3]/60, "\n \n")

  boot.res2
  z.star2 <- (boot.res2$t[,1] - boot.res2$t0[1])/sqrt(boot.res2$t[,2])
  z.quant2 <- quantile(x = z.star2, type = 1, probs = c(alpha, 1-alpha))
  z.quant2

  boot.ci(boot.out = boot.res2, conf = 0.95, type = "stud")

  #Examine the standard deviations produced by the double boot
  par(mfrow = c(1,1))
  summary(sqrt(boot.res$t[,2]))
  hist(x = sqrt(boot.res$t[,2]), main = "S.D. produced by double bootstrap", xlab = "sqrt(v*)")
  abline(h=0)


################################################################################
# Percentile interval

  boot.ci(boot.out = boot.res, conf = 0.95, type = c("basic", "perc"))

  alpha <- 0.025
  quantile(x = t.star, type = 1, probs = c(alpha, 1-alpha)) #type = 1 is inverse of EDF

  order.val <- c((R+1)*alpha, (R+1)*(1-alpha))
  sort(t.star)[order.val]


################################################################################
# BCa interval

  #BCa
  boot.ci(boot.out = boot.res, conf = 0.95, type = "bca")

  #Empirical influence values estimated by the jackknife
  l.jack <- empinf(data = y, statistic = calc.t, stype = "i", type = "jack")
  # l.reg <- empinf(boot.out = boot.res)  #Boot estimated
  # l.j <- y - mean(y)  #Actual values

  #Acceleration
  a.jack < -1/6 * sum(l.jack^3)/sum(l.jack^2)^(3/2)
  # a.reg<-1/6 * sum(l.reg^3)/sum(l.reg^2)^(3/2)
  # a<-1/6 * sum(l.j^3)/sum(l.j^2)^(3/2)

  #Bias correction
  sum(boot.res$t[,1]<=boot.res$t0[1])/(R+1)
  sum(boot.res$t[,1]<=boot.res$t0[1])/R
  w <- qnorm(p = sum(boot.res$t[,1]<=boot.res$t0[1])/(R+1)) #This is how Davison and Hinkley (1997) does the calculation
  w <- qnorm(p = sum(boot.res$t[,1]<=boot.res$t0[1])/R) #This is how bca.ci() does the calculation
  w

  #BCa calculations Davison and Hinkley (1997, Table 5.4)
  alpha <- c(0.025, 0.975)
  z.tilde <- w + qnorm(p = alpha)
  alpha.tilde <- pnorm(q = w + z.tilde/(1-a.jack*z.tilde))
  d <- (R+1)*alpha.tilde
  d

  #Note that r will need to be an integer or the quantile function can be used as below
  #quantile(x = boot.res$t[,1], probs = alpha.tilde, type = 1)
  limit.ceil <- sort(boot.res$t[,1])[ceiling(d)]
  limit.floor <- sort(boot.res$t[,1])[floor(d)]
  #Alternatively, interpolation could be used as outlined in Davison and Hinkley (1997, p. 195)

  data.frame(alpha, z.tilde, alpha.tilde, d,  limit.ceil, limit.floor)


################################################################################
# Additional aspects of boot.ci()

  #All intervals
  boot.ci(boot.out = boot.res2, conf = 0.95, type = "all")

  lower <- t - b - qnorm(0.975, mean = 0, sd = 1)*sd(boot.res$t[,1])
  upper <- t - b - qnorm(0.025, mean = 0, sd = 1)*sd(boot.res$t[,1])
  data.frame(name = "Normal interval calc in boot.ci", lower, upper)


  #Basic and studentized using log transformation
  hdot.func <- function(u) {
    1/u
  }
  save.tran.ci <- boot.ci(boot.out = boot.res2, conf = 0.95, type = c("basic", "stud"), h = log, hinv = exp, hdot = hdot.func)
  save.tran.ci


#