# Examine models for placekicking data more closely

options(width = 70)

placekick <- read.csv(file = "C:\\data\\placekick.csv")
head(placekick)
tail(placekick)


#####################################################################
# Aggregate data according to variables we plan to put into the model

  # Find the observed proportion of successes at each distance
  w <- aggregate(x = good ~ distance, data = placekick, FUN = sum)
  n <- aggregate(x = good ~ distance, data = placekick, FUN = length)
  w.n <- data.frame(distance = w$distance, success = w$good, trials = n$good, prop = round(w$good/n$good,4))
  head(w.n)
  tail(w.n)

  # Fit model using aggregated data
  mod.fit.bin <- glm(formula = success/trials ~ distance, weights = trials, family = binomial(link = logit), data = w.n)
  summary(mod.fit.bin)

  # Show how to find the hat matrix using matrix algebra
  X <- model.matrix(mod.fit.bin)
  # mod.fit.bin$weights is a shortcut for n*hat(pi)*(1-hat(pi))
  # Compare: w.n$trials*mod.fit.bin$fitted.values*(1-mod.fit.bin$fitted.values)
  V <- diag(mod.fit.bin$weights)
  H <- sqrt(V)%*%X%*%solve(t(X)%*%V%*%X)%*%t(X)%*%sqrt(V)
  head(diag(H))
  head(hatvalues(mod.fit.bin))  # Matches



#####################################################################
# Individual diagnostic measures

  pi.hat <- predict(mod.fit.bin, type = "response")
  s.res <- rstandard(mod.fit.bin, type = "pearson")
  # If want Pearson residuals
  # p.res <- residuals(mod.fit.bin, type = "pearson")
  prob.smaller <- pbinom(q = w.n$success, size = w.n$trials, prob = pi.hat, lower.tail = TRUE)
  prob.higher <- pbinom(q = w.n$success, size = w.n$trials, prob = pi.hat, lower.tail = FALSE) +  dbinom(x = w.n$success, size = w.n$trials, prob = pi.hat)
  # Mininum of P(W <= w_m) and P(W >= w_m)
  tail.prob <- apply(X = cbind(prob.smaller, prob.higher), MARGIN = 1, FUN = min)

  lin.pred <- mod.fit.bin$linear.predictors
  w.n <- data.frame(w.n, pi.hat, s.res, tail.prob, lin.pred)
  round(head(w.n), digits = 3)

  round(w.n[abs(w.n$s.res) > 2, ], digits = 3)
  round(w.n[w.n$tail.prob < 0.025, ], digits = 3)


####################################################################
# Residual plots

  dev.new(height = 7, width = 13, pointsize=20)
  par(mfrow = c(1,3))

  # Standardized Pearson residual vs x plot
  plot(x = w.n$distance, y = w.n$s.res, xlab = "Distance", ylab = "Standardized Pearson residuals",  main =
    "Standardized residuals vs. \n X")
  abline(h = c(3, 2, 0, -2, -3), lty = 3, col = "blue")

  # Add loess model to help visualize trend
  smooth.stand <- loess(formula = s.res ~ distance, data = w.n, weights = trials)
  # Make sure that loess estimates are ordered by "x" for the plots, so that they are displayed properly
  order.dist <- order(w.n$distance)
  lines(x = w.n$distance[order.dist], y = predict(smooth.stand)[order.dist], lty = 3, col =
    "red", lwd = 3)


  # Standardized Pearson residual vs pi plot
  plot(x = w.n$pi.hat, y = w.n$s.res, xlab = "Estimated probability of success", ylab = "Standardized Pearson residuals",
    main = "Standardized residuals vs. \n pi.hat")
  abline(h = c(3, 2, 0, -2, -3), lty = "dotted", col = "blue")
  smooth.stand <- loess(formula = s.res ~ pi.hat, data = w.n, weights = trials)
  # Make sure that loess estimates are ordered by "X" for the plots, so that they are displayed properly
  order.pi.hat <- order(w.n$pi.hat)
  lines(x = w.n$pi.hat[order.pi.hat], y = predict(smooth.stand)[order.pi.hat], lty = 3, col = "red", lwd = 3)


  # Standardized Pearson residual vs linear predictor plot
  plot(x = w.n$lin.pred, y = w.n$s.res, xlab = "Linear predictor", ylab = "Standardized Pearson residuals",
    main = "Standardized residuals vs. \n Linear predictor")
  abline(h = c(3, 2, 0, -2, -3), lty = "dotted", col = "blue")
  smooth.stand <- loess(formula = s.res ~ lin.pred, data = w.n, weights = trials)
  # Make sure that loess estimates are ordered by "X" for the plots, so that they are displayed properly
  order.lin.pred <- order(w.n$lin.pred)
  lines(x = w.n$lin.pred[order.lin.pred], y = predict(smooth.stand)[order.lin.pred], lty = 3, col = "red", lwd = 3)
  # dev.off()  # Create plot for book


################################################################################
#  Use Examine.logistic.reg.R

  source(file = "C:\\Rprograms\\Examine.logistic.reg.R")
  save.info1 <- examine.logistic.reg(mod.fit.obj = mod.fit.bin, identify.points = TRUE)
  names(save.info1)
  data.frame(w.n[,-4], prop = round(w.n[,4],2), pi.hat = round(mod.fit.bin$fitted.values,2),
    std.Pe = round(save.info1$stand.resid,2), tail.p = round(save.info1$tail.prob,2),
    dXsq = round(save.info1$deltaXsq,2), cook = round(save.info1$cookd,2))

  save.info2 <- examine.logistic.reg(mod.fit.obj = mod.fit.bin, identify.points = TRUE,
    scale.n = sqrt, scale.cookd = sqrt)

  one.fourth.root <- function(x) {
    x^0.25
  }
  one.fourth.root(16)  # Example

  save.info3 <- examine.logistic.reg(mod.fit.obj = mod.fit.bin, identify.points = TRUE,
    scale.n = one.fourth.root, scale.cookd = one.fourth.root)


  # Now include distance and PAT
  w2 <- aggregate(x = good ~ distance + PAT, data = placekick, FUN = sum)
  n2 <- aggregate(x = good ~ distance + PAT, data = placekick, FUN = length)
  w.n2 <- data.frame(distance = w2$distance, PAT = w2$PAT, success = w2$good, trials = n2$good,
    prop = round(w2$good/n2$good,4))
  head(w.n2)
  tail(w.n2)

  mod.fit.bin2 <- glm(formula = success/trials ~ distance + PAT, weights = trials,
    family = binomial(link = logit), data = w.n2)
  summary(mod.fit.bin2)

  # Note that this function will not work because the 44th EVP is the only one with
  #  PAT = 1. The standardized residual value is given as "NaN" due to the perfect fit
  save.info.dist.PAT <- examine.logistic.reg(mod.fit.obj = mod.fit.bin2, identify.points = TRUE)





























  #