############################################################################ # NAME: Chris Bilder # # DATE: 7-19-06 # # PURPOSE: Work with the power load data set # # # # NOTES: 1) # # # ############################################################################ #Read in the data power.load<-read.table(file = "C:\\chris\\UNL\\STAT870\\Chapter3\\power.txt", header=TRUE, sep = "") head(power.load) tail(power.load) par(mfrow = c(1,1)) plot(x = power.load$temp, y = power.load$load, xlab = "Temperature", ylab = "Power load", main = "Power load vs. Temperature", col = "black", pch = 1, panel.first = grid(col = "gray", lty = "dotted")) ############################################################################# # First order model mod.fit<-lm(formula = load ~ temp, data = power.load) sum.fit<-summary(mod.fit) sum.fit #e.i vs. X.i plot(x = power.load$temp, y = mod.fit$residuals, xlab = "Temperature", ylab = "Residuals", main = "Residuals vs. Temperature", col = "black", pch = 1, panel.first = grid(col = "gray", lty = "dotted")) abline(h = 0, col = "red") #This function can be useful when you want to identify particular observations on the plot #identify(x = power.load$temp, y = mod.fit$residuals) #labels option can be used as well #Scatter plot with sample model plot(x = power.load$temp, y = power.load$load, xlab = "Temperature", ylab = "Power load", main = "Power load vs. Temperature", col = "black", pch = 1, panel.first = grid(col = "gray", lty = "dotted")) curve(expr = mod.fit$coefficients[1] + mod.fit$coefficients[2]*x, col = "red", lty = "solid", lwd = 1, add = TRUE, xlim = c(min(power.load$temp), max(power.load$temp))) #Additional plots automatically created - plot.lm() also par(mfrow=c(2,2)) plot(mod.fit) ############################################################################# # Second order model mod.fit2<-lm(formula = load ~ temp + I(temp^2), data = power.load) sum.fit2<-summary(mod.fit2) sum.fit2 #e.i vs. X.i par(mfrow=c(1,1)) plot(x = power.load$temp, y = mod.fit2$residuals, xlab = "Temperature", ylab = "Residuals", main = "Residuals vs. Temperature", col = "black", pch = 1, panel.first = grid(col = "gray", lty = "dotted")) abline(h = 0, col = "red") #Scatter plot with sample model plot(x = power.load$temp, y = power.load$load, xlab = "Temperature", ylab = "Power load", main = "Power load vs. Temperature", col = "black", pch = 1, panel.first = grid(col = "gray", lty = "dotted")) curve(expr = mod.fit2$coefficients[1] + mod.fit2$coefficients[2]*x + mod.fit2$coefficients[3]*x^2, col = "red", lty = "solid", lwd = 1, add = TRUE, xlim = c(min(power.load$temp), max(power.load$temp))) #Another way to get the regression model on the plot curve(expr = predict(object = mod.fit2, newdata = data.frame(temp = x)), col = "blue", lty = "solid", lwd = 1, add = TRUE, xlim = c(min(power.load$temp), max(power.load$temp))) ############################################################################# # Examine semi-studentized residuals semi.stud.resid<-mod.fit$residuals/sum.fit$sigma #e.i.star vs. Yhat.i par(mfrow = c(1,1)) plot(x = mod.fit$fitted.values, y = semi.stud.resid, xlab = expression(hat(Y)), ylab = "Semi-studentized residuals", main = expression(paste("Semi-studentized residuals vs. ", hat(Y))), panel.first = grid(col = "gray", lty = "dotted"), ylim = c(-3,3)) abline(h = 0, col = "red") abline(h = c(-3,3), col = "red", lwd = 2) #e.i.star vs. Yhat.i with different y-axis label par(mfrow = c(1,1), pty = "s") #pty = "s" makes the plot Square plot(x = mod.fit$fitted.values, y = semi.stud.resid, xlab = expression(hat(Y)), ylab = expression(paste(e[i]^{"*"})), main = expression(paste("Semi-studentized residuals vs.", hat(Y))), panel.first = grid(col = "gray", lty = "dotted"), ylim = c(-3,3)) abline(h = 0, col = "red") abline(h = c(-3,3), col = "red", lwd = 2) ############################################################################# # Examine normality par(pty = "m") #pty = "m" makes the plot Maximize the plot window (this is the default) ############## #HISTOGRAM #Regular histogram of the semi-studentized residuals hist(x = semi.stud.resid, main = "Histogram of semistudentized residuals", xlab = "Semistudentized residuals") #Histogram of the semi-studentized residuals with normal distribution overlay hist(x = semi.stud.resid, main = "Histogram of semistudentized residuals", xlab = "Semistudentized residuals", freq = FALSE) curve(expr = dnorm(x, mean = mean(semi.stud.resid), sd = sd(semi.stud.resid)), col = "red", add = TRUE) #Extra code to show the heights on the above histogram save.hist<-hist(x = semi.stud.resid, plot = FALSE) n<-length(semi.stud.resid) save.hist$counts/(n*(save.hist$breaks[2]-save.hist$breaks[1])) ############# #QQ-PLOT #QQ-plot done by R qqnorm(y = semi.stud.resid) qqline(y = semi.stud.resid, col = "red") #What are the qqnorm() and qqline() functions plotting exactly? save.qqnorm<-qqnorm(y = semi.stud.resid, plot = FALSE) n<-length(semi.stud.resid) save.it<-data.frame(y.axis = round(sort(save.qqnorm$y),2), semi.stud.res = round(sort(semi.stud.resid),2), prob1 = ppoints(semi.stud.resid), prob2 = (1:n - 1/2)/n, x.axis = round(sort(save.qqnorm$x),2), quant.norm = round(qnorm(p = (1:n - 1/2)/n, mean = 0, sd = 1),2)) save.it #Another QQ-plot qqnorm(y = semi.stud.resid) qqline(y = semi.stud.resid) points(x = qnorm(p = c(0.25, 0.75), mean = 0, sd = 1), y = quantile(x = semi.stud.resid, probs = c(0.25, 0.75)), pch = 2, lwd = 2, col = "darkgreen") #Line is drawn through 25th an 75th percentiles abline(a = 0, b = sd(semi.stud.resid), col = "blue") #Using Z = (X-mu)/sigma implies X = mu + sigma*Z line #My version of a QQ-plot win.graph(width = 6, height = 6, pointsize = 12) #Open a new graphics window n<-length(semi.stud.resid) norm.quant<-qnorm(p = seq(from = 1/(n+1), to = 1-1/(n+1), by = 1/(n+1)), mean = mean(semi.stud.resid), sd = sd(semi.stud.resid)) plot(y = sort(semi.stud.resid), x = norm.quant, main = "QQ-Plot for semi-studentized residuals", ylab = "Semi-studentized residuals", xlab = "Normal quantiles", panel.first = grid(nx = NULL, ny = NULL, col="gray", lty="dotted")) abline(a = 0, b = 1, col = "red") #What is being plotted above? probs<-round(seq(from = 1/(length(semi.stud.resid)+1), to = 1-1/(length(semi.stud.resid)+1), by = 1/(length(semi.stud.resid)+1)),4) data.frame(sort.semi.stud.res = round(sort(semi.stud.resid),2), norm.quant = round(norm.quant,2), probs) #Empirical cumulative distribution function (ECDF) plot plot.ecdf(semi.stud.resid, verticals = TRUE, do.p = FALSE, main = "ECDF plot of semi-studentized residuals", lwd = 2, panel.first = grid(nx = NULL, ny = NULL, col="gray", lty="dotted"), ylab = "ECDF", xlab = "Semi-studentized residuals") curve(expr = pnorm(x, mean = mean(semi.stud.resid), sd = sd(semi.stud.resid)), col = "red", add = TRUE) #