###################################################################### # NAME: Chris Bilder # # DATE: 7-26-06 # # UPDATE: # # Purpose: Automate part of the simple linear regression diagnostic # # procedures into one function. # # # # NOTES: Based on examine.mod.simple.R used in Chapter 3. I have # # generalized it to allow for more than one predictor variable# # This function needs to be run just once before it is used. # # One needs to call the function with model fit object only. # # For example, # # # # examine.mod.simple(mod.fit.obj = mod.fit) # # # # The seed for jittering can be changed with the # # seed option. The Levene and Breusch-Pagan tests can be # # performed if the var.test option is change to TRUE. Note # # that the car and lmtest packages need to be loaded for these# # tests. The Y optional values can be used if a # # transformation of Y was used for the model. For # # example, if log(Y) was used, then fit the model with log(Y) # # and include the Y = log(original Y variable) when invoking # # this function. Unlike examine.mod.simple.R, there is not a # # similar X option. The boxcox.find = TRUE option produces # # an estimate for lambda. # # # ###################################################################### examine.mod.multiple<-function(mod.fit.obj, seed = 9180, const.var.test = FALSE, Y = NULL, boxcox.find = FALSE) { n<-length(mod.fit.obj$residuals) pred.var.numb<-length(mod.fit.obj$coefficients)-1 #X<-mod.fit.obj$model[,2:(2+pred.var.numb-1)] X<-model.matrix(mod.fit.obj)[,2:(2+pred.var.numb-1)] #NOTE: This does not containing the first column of 1's in X! if (is.null(Y)) { Y<-mod.fit.obj$model[,1]} ############################################################################## #Graph window set #1 #Open a new plotting window 1x2 win.graph(width = 6, height = 6, pointsize = 10) par(mfrow = c(1,2)) set.seed(seed) boxplot(x = Y, col = "lightblue", main = "Box plot", ylab = "Response variable", xlab = " ") stripchart(x = Y, method = "jitter", vertical = TRUE, pch = 1, main = "Dot plot", ylab = "Response variable") counter<-1 #Need for opening new plot windows below win.graph(width = 6, height = 6, pointsize = 10) par(mfrow = c(2,2)) #X.ij box and dot plots for(j in 1:pred.var.numb) { if (counter/2 > 1) { win.graph(width = 6, height = 6, pointsize = 10) par(mfrow = c(2,2)) counter<-1 } boxplot(x = X[,j], col = "lightblue", main = "Box plot", ylab = paste("Predictor variable", j), xlab = " ") stripchart(x = X[,j], method = "jitter", vertical = TRUE, pch = 1, main = "Dot plot", ylab = paste("Predictor variable", j)) counter<-counter + 1 } #Initial summary statistics summary.data<-summary(data.frame(Y,X)) ############################################################################## #Graph window set #2 #Open a new plotting window 2x2 win.graph(width = 6, height = 6, pointsize = 10) par(mfrow = c(1,2), pty = "s") sum.fit<-summary(mod.fit.obj) semi.stud.resid<-mod.fit.obj$residuals/sum.fit$sigma #e.i vs. Y.hat.i plot(x = mod.fit.obj$fitted.values, y = mod.fit.obj$residuals, xlab = "Estimated mean response", ylab = "Residuals", main = "Residuals vs. estimated mean response", panel.first = grid(col = "gray", lty = "dotted")) abline(h = 0, col = "red") #e.i.star vs. Y.hat.i plot(x = mod.fit.obj$fitted.values, y = semi.stud.resid, xlab = "Estimated mean response", ylab = "Semistud. residuals", main = expression(paste(e[i]^{"*"}, " vs. estimated mean response")), panel.first = grid(col = "gray", lty = "dotted"), ylim = c(min(semi.stud.resid,-3), max(semi.stud.resid,3))) abline(h = 0, col = "red") abline(h = c(-3,3), col = "red", lwd = 2) identify(x = mod.fit.obj$fitted.values, y = semi.stud.resid) ############################################################################## #Graph window set #3 win.graph(width = 6, height = 6, pointsize = 10) par(mfrow = c(2,2), pty = "m") counter<-1 #Need for opening new plot windows below #e.i vs. X.ij for(j in 1:pred.var.numb) { if (counter/4 > 1) { win.graph(width = 6, height = 6, pointsize = 10) par(mfrow = c(2,2)) counter<-1 } plot(x = X[,j], y = mod.fit.obj$residuals, ylab = "Residuals", main = paste("Residuals vs. predictor",j), xlab = paste("Predictor variable",j), panel.first = grid(col = "gray", lty = "dotted")) abline(h = 0, col = "red") counter<-counter + 1 } ############################################################################## #Graph window set #4 #Open a new plotting window 2x2 win.graph(width = 6, height = 6, pointsize = 10) par(mfrow = c(2,2)) #(1,1) - e.i vs. obs. number plot(x = 1:n,, y = mod.fit.obj$residuals, xlab = "Observation number", ylab = "Residuals", type = "o", main = "Residuals vs. observation number", panel.first = grid(col = "gray", lty = "dotted")) abline(h = 0, col = "red") #(1,2) - Histogram of the semi-studentized residuals with normal distribution overlay hist(x = semi.stud.resid, main = "Histogram of semistud. residuals", xlab = "Semistud. residuals", freq = FALSE) curve(expr = dnorm(x, mean = mean(semi.stud.resid), sd = sd(semi.stud.resid)), col = "red", add = TRUE) #(2,1) - QQ-plot done by R qqnorm(y = semi.stud.resid, ylab = "Semistud. residuals", panel.first = grid(col = "gray", lty = "dotted")) qqline(y = semi.stud.resid, col = "red") #Save all results so far save.res<-list(sum.data = summary.data, semi.stud.resid = round(semi.stud.resid,2)) ############################################################################## #Levene and BP tests - Examine the residuals for normality if (const.var.test) { library(car) #The Levene's Test function is in the package for Fox's book save.levene<-matrix(NA, nrow = pred.var.numb, ncol = 3) for(j in 1:pred.var.numb) { #This code will not work for indicator variables so this is a quick fix to skip over them. if(length(table(X[,j])) != 2) { group<-ifelse(X[,j] < median(X[,j]), 1, 2) save.levene.test<-leveneTest(y = mod.fit.obj$residuals, group = factor(group)) save.levene[j,]<-c(j, round(save.levene.test$"F value"[1],4), save.levene.test$"Pr(>F)"[1]) } } library(lmtest) #Location of BP test function save.bp<-bptest(mod.fit.obj, studentize = FALSE) #KNN Version of the test save.res$levene<-save.levene save.res$bp<-save.bp } ############################################################################## #Box-cox transformation if (boxcox.find) { library(MASS) #Function is in the MASS library save.bc<-boxcox(object = mod.fit.obj, lambda = seq(from = -2, to = 2, by = 0.01)) title(main = "Box-Cox transformation plot") lambda.hat<-save.bc$x[save.bc$y == max(save.bc$y)] save.res$lambda.hat<-lambda.hat } ############################################################################## #Return results save.res }