############################################################################ # NAME: Chris Bilder # # DATE: 3-10-16 # # PURPOSE: Focus on graphics with the gpa data set # # # # NOTES: # # # ############################################################################ ############################################################################ # Data management # Read in the data gpa <- read.table(file = "C:\\data\\GPA.txt", header = TRUE, sep = "") # Print part of the data head(gpa) ######################################################################## # Plots of the model and intervals # Estimate the simple linear regression model and save the results in mod.fit mod.fit <- lm(formula = College.GPA ~ HS.GPA, data = gpa) summary(object = mod.fit) x11(width = 6, height = 6, pointsize = 10) plot(x = gpa$HS.GPA, y = gpa$College.GPA, xlab = "HS GPA", ylab = "College GPA", main = "College GPA vs. HS GPA", xlim = c(1.5,4.5), ylim = c(0.5,4.5), col = "black", lwd = 1, panel.first = grid()) curve(expr = predict(object = mod.fit, newdata = data.frame(HS.GPA = x)), col= "red", add = TRUE, lwd = 2, xlim = c(min(gpa$HS.GPA),max(gpa$HS.GPA))) curve(expr = predict(object = mod.fit, newdata = data.frame(HS.GPA = x), interval = "confidence", level = 0.95)[,2], col= "blue", add = TRUE, lwd = 2, xlim = c(min(gpa$HS.GPA),max(gpa$HS.GPA)), lty = "dashed") curve(expr = predict(object = mod.fit, newdata = data.frame(HS.GPA = x), interval = "confidence", level = 0.95)[,3], col= "blue", add = TRUE, lwd = 2, xlim = c(min(gpa$HS.GPA),max(gpa$HS.GPA)), lty = "dashed") curve(expr = predict(object = mod.fit, newdata = data.frame(HS.GPA = x), interval = "prediction", level = 0.95)[,2], col= "darkgreen", add = TRUE, lwd = 2, xlim = c(min(gpa$HS.GPA),max(gpa$HS.GPA)), lty = "dotdash") curve(expr = predict(object = mod.fit, newdata = data.frame(HS.GPA = x), interval = "prediction", level = 0.95)[,3], col= "darkgreen", add = TRUE, lwd = 2, xlim = c(min(gpa$HS.GPA),max(gpa$HS.GPA)), lty = "dotdash") legend(x = 2, y = 1.25, legend = c("Estimated response", "95% confidence intervals", "95% prediction intervals"), bty = "n", col = c("black", "blue", "darkgreen"), lty = c("solid", "dashed", "dotdash")) # Can also allow for interactive placement of the legend legend(locator(1), legend = c("Estimated response", "95% confidence intervals", "95% prediction intervals"), bty = "n", col = c("black", "blue", "darkgreen"), lty = c("solid", "dashed", "dotdash")) # Example of longer line lengths in the legend legend(locator(1), legend = c("Estimated response", "95% confidence intervals", "95% prediction intervals"), bty = "n", col = c("black", "blue", "darkgreen"), lty = c("solid", "dashed", "dotdash"), seg.len = 4) # Identify observations text(x = gpa$HS.GPA, y = gpa$College.GPA+0.1, labels = 1:nrow(gpa)) # Identify observations interactively identify(x = gpa$HS.GPA, y = gpa$College.GPA, labels = 1:nrow(gpa)) # Put legend outside of main plot area par(mar = c(8, 4, 4, 2)) # Put larger margin at bottom; default is mar = c(5, 4, 4, 2) + 0.1 inches plot(x = gpa$HS.GPA, y = gpa$College.GPA, xlab = "HS GPA", ylab = "College GPA", main = "College GPA vs. HS GPA", xlim = c(1.5,4.5), ylim = c(0.5,4.5), col = "black", lwd = 1, panel.first = grid()) curve(expr = predict(object = mod.fit, newdata = data.frame(HS.GPA = x)), col= "red", add = TRUE, lwd = 2, xlim = c(min(gpa$HS.GPA),max(gpa$HS.GPA))) curve(expr = predict(object = mod.fit, newdata = data.frame(HS.GPA = x), interval = "confidence", level = 0.95)[,2], col= "blue", add = TRUE, lwd = 2, xlim = c(min(gpa$HS.GPA),max(gpa$HS.GPA)), lty = "dashed") curve(expr = predict(object = mod.fit, newdata = data.frame(HS.GPA = x), interval = "confidence", level = 0.95)[,3], col= "blue", add = TRUE, lwd = 2, xlim = c(min(gpa$HS.GPA),max(gpa$HS.GPA)), lty = "dashed") curve(expr = predict(object = mod.fit, newdata = data.frame(HS.GPA = x), interval = "prediction", level = 0.95)[,2], col= "darkgreen", add = TRUE, lwd = 2, xlim = c(min(gpa$HS.GPA),max(gpa$HS.GPA)), lty = "dotdash") curve(expr = predict(object = mod.fit, newdata = data.frame(HS.GPA = x), interval = "prediction", level = 0.95)[,3], col= "darkgreen", add = TRUE, lwd = 2, xlim = c(min(gpa$HS.GPA),max(gpa$HS.GPA)), lty = "dotdash") par(mar = c(1, 4, 4, 2)) # Change bottom margin again so that I can put the legend there par(xpd = TRUE) # Allows plotting to be outside of main plot area - see help(par) legend(x = 1, y = -0.5, legend = c("Estimated response", "95% confidence intervals", "95% prediction intervals"), bty = "n", col = c("black", "blue", "darkgreen"), lty = c("solid", "dashed", "dotdash"), horiz = TRUE ) par(xpd = FALSE) # For more examples, see http://stackoverflow.com/questions/3932038/plot-a-legend-outside-of-the-plotting-area-in-base-graphics # Show how to use abline() and mtext() plot(x = gpa$HS.GPA, y = gpa$College.GPA, xlab = "HS GPA", ylab = "College GPA", main = "College GPA vs. HS GPA", xlim = c(1.5,4.5), ylim = c(0.5,4.5), col = "black", lwd = 1, panel.first = grid()) curve(expr = predict(object = mod.fit, newdata = data.frame(HS.GPA = x)), col= "red", add = TRUE, lwd = 2, xlim = c(min(gpa$HS.GPA),max(gpa$HS.GPA))) abline(h = 2.75, lty = "dashed", col = "blue", lwd = 2) mtext(text = "2.75 ", side = 2, at = 2.75, las = 1, adj = 1) # Show how to change axis tick mark locations and make plot square par(pty = "s") plot(x = gpa$HS.GPA, y = gpa$College.GPA, xlab = "HS GPA", ylab = "College GPA", main = "College GPA vs. HS GPA", xlim = c(1.5,4.5), ylim = c(1.5,4.5), col = "black", lwd = 1, panel.first = grid(), yaxt = "n") axis(side = 2, at = seq(from = 1.5, to = 4.5, by = 0.5)) curve(expr = predict(object = mod.fit, newdata = data.frame(HS.GPA = x)), col= "red", add = TRUE, lwd = 2, xlim = c(min(gpa$HS.GPA), max(gpa$HS.GPA))) par(pty = "m") # Add minor tick marks axis(side = 2, at = seq(from = 1.0, to = 5.0, by = 0.1), tck = -0.01, labels = FALSE) axis(side = 1, at = seq(from = 1.0, to = 5.0, by = 0.1), tck = -0.01, labels = FALSE) # Mathematical expressions and Greek letters plot(x = gpa$HS.GPA, y = gpa$College.GPA, xlab = "HS GPA", ylab = "College GPA", main = expression(hat(Y) == hat(beta)[0] + hat(beta)[1]*x), xlim = c(1.5,4.5), ylim = c(0.5,4.5), col = "black", lwd = 1, panel.first = grid()) curve(expr = predict(object = mod.fit, newdata = data.frame(HS.GPA = x)), col= "red", add = TRUE, lwd = 2, xlim = c(min(gpa$HS.GPA),max(gpa$HS.GPA))) text(x = 1.5, y = 1.5, label = expression(paste("Example of combining text and math expressions: ", hat(Y) == hat(beta)[0] + hat(beta)[1]*x)), pos = 4) text(x = 1.5, y = 1.25, label = substitute(paste("Example of including numerical values directly from mod.fit: ", hat(Y) == betahat0 + betahat1*x), list(betahat0 = round(mod.fit$coefficients[1],2), betahat1 = round(mod.fit$coefficients[2],2))), pos = 4) # Run this demo to see the various options for mathematical expressions and Greek letters demo(plotmath) # Show how to change x-axis range from default 4% expansion par(xaxs = "i") plot(x = gpa$HS.GPA, y = gpa$College.GPA, xlab = "HS GPA", ylab = "College GPA", main = "College GPA vs. HS GPA", xlim = c(1.5,4.5), ylim = c(1.5,4.5), col = "black", lwd = 1, panel.first = grid()) par(xaxs = "r") # Simple of example using \n plot(x = gpa$HS.GPA, y = gpa$College.GPA, xlab = "HS GPA", ylab = "College GPA", main = "College GPA vs. \n HS GPA", xlim = c(1.5,4.5), ylim = c(0.5,4.5), col = "black", lwd = 1, panel.first = grid()) # Font name change windowsFonts(A = windowsFont("Arial")) #A is a name I gave it par(family = "A") plot(x = gpa$HS.GPA, y = gpa$College.GPA, xlab = "HS GPA", ylab = "College GPA", main = "College GPA vs. \n HS GPA", xlim = c(1.5,4.5), ylim = c(0.5,4.5), col = "black", lwd = 1, panel.first = grid()) par(family = "sans") # Default ########################################################################## # Useful single variable plots par(mfrow = c(2,1)) # hist(x = gpa$HS.GPA, xlab = "HS GPA", main = "HS GPA") hist(x = gpa$HS.GPA, xlab = "HS GPA", main = "HS GPA", breaks = c(0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5)) hist(x = gpa$College.GPA, xlab = "College GPA", main = "College GPA", breaks = seq(from = 0, to = 4.5, by = 0.5)) # Do not use breaks argument to have R choose the breaks itself #Normal distribution approximation hist(x = gpa$HS.GPA, xlab = "HS GPA", main = "HS GPA", breaks = c(0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5), ylim = c(0, 0.8), freq = FALSE) curve(expr = dnorm(x = x, mean = mean(gpa$HS.GPA), sd = sd(gpa$HS.GPA)), col = "red", add = TRUE) hist(x = gpa$College.GPA, xlab = "College GPA", main = "College GPA", breaks = seq(from = 0, to = 4.5, by = 0.5), ylim = c(0, 0.8), freq = FALSE) curve(expr = dnorm(x = x, mean = mean(gpa$College.GPA), sd = sd(gpa$College.GPA)), col = "red", add = TRUE) par(mfrow = c(1,1)) # Box plots are drawn in order of variables in data frame boxplot(x = gpa, col = "lightblue", main = "Box and dot plots", ylab = "GPA", xlab = "", names = c("HS GPA", "College GPA"), pars = list(outpch = NA)) set.seed(7128) # Reproduce same jittering each time stripchart(x = gpa, lwd = 2, col = "red", method = "jitter", vertical = TRUE, pch = 1, add = TRUE) # Example to showing how the whiskers work with boxplot() set1 <- rbind(gpa, data.frame(HS.GPA = 10, College.GPA = 10)) # Add an outlier boxplot(x = set1, col = "lightblue", main = "Box and dot plots", ylab = "GPA", xlab = "", names = c("HS GPA", "College GPA")) boxplot(x = set1, col = "lightblue", main = "Box and dot plots", ylab = "GPA", xlab = "", names = c("HS GPA", "College GPA"), range = 0) # Below is a different form of the data which requires a different way to specify what to plot HS.only <- data.frame(school = "HS", gpa = gpa$HS.GPA) College.only <- data.frame(school = "College", gpa = gpa$College.GPA) HS.college <- rbind(HS.only, College.only) head(HS.college) tail(HS.college) boxplot(formula = gpa ~ school, data = HS.college, col = "lightblue", main = "Box and dot plots", ylab = "GPA", xlab = "") # Unfortunately, there is no data argument for stripchart()! set.seed(8812) # Reproduce same jittering each time stripchart(x = HS.college$gpa ~ HS.college$school, lwd = 2, col = "red", method = "jitter", vertical = TRUE, pch = 1, add = TRUE) ########################################################################## # Control layout of plots par(mfrow = c(1,2)) # Make sure plots have same y-axis scales min.y <- min(gpa$HS.GPA, gpa$College.GPA) max.y <- max(gpa$HS.GPA, gpa$College.GPA) # Note: removed pars = list(outpch = NA) boxplot(x = gpa, col = "lightblue", ylab = "GPA", xlab = "", ylim = c(min.y, max.y), names = c("HS GPA", "College GPA")) title(main = "Box and dot plots", outer = TRUE, line = -3) set.seed(6162) # Reproduce same jittering each time stripchart(x = gpa, lwd = 2, col = "red", method = "jitter", vertical = TRUE, pch = 1, group.names = c("HS GPA", "College GPA"), ylim = c(min.y, max.y)) par(mfrow = c(1,2)) par(mar = c(5,4,4,0.5)) # Default is mar = c(5, 4, 4, 2) + 0.1 boxplot(x = gpa, col = "lightblue", ylab = "GPA", xlab = "", ylim = c(min.y, max.y), names = c("HS GPA", "College GPA")) title(main = "Box and dot plots", outer = TRUE, line = -3) set.seed(6162) # Reproduce same jittering each time par(mar = c(5,0.5,4,4)) # Default is mar = c(5, 4, 4, 2) + 0.1 stripchart(x = gpa, lwd = 2, col = "red", method = "jitter", vertical = TRUE, pch = 1, group.names = c("HS GPA", "College GPA"), ylim = c(min.y, max.y), yaxt = "n") axis(side = 4) #Reset to default values par(mfrow = c(1,1), mar = c(5, 4, 4, 2) + 0.1) ###################### # Using layout() #Simple example showing how to create a matrix in R X <- matrix(data = c(1,2,3,4), nrow = 2, ncol = 2, byrow = TRUE) X save.layout1 <- layout(mat = matrix(data = c(1, 2), nrow = 1, ncol = 2, byrow = TRUE)) layout.show(save.layout1) save.layout2 <- layout(mat = matrix(data = c(1, 1, 2, 3), nrow = 2, ncol = 2, byrow = TRUE), heights = c(1, 5)) save.layout3 <- layout(mat = matrix(data = c(2, 0, 1, 3), nrow = 2, ncol = 2, byrow = TRUE), heights = c(1,3), widths = c(3,1)) layout.show(save.layout3) par(mar = c(5, 4, 4, 2) + 0.1) # Default values plot(x = gpa$HS.GPA, y = gpa$College.GPA, xlab = "HS GPA", ylab = "College GPA", xlim = c(1.5,4.5), ylim = c(0.5,4.5), col = "black", lwd = 1, panel.first = grid()) par(mar = c(0, 4, 4, 2) + 0.1) boxplot(x = gpa$HS.GPA, xlab = NA, ylab = NA, main = NA, horizontal = TRUE, ylim = c(1.5, 4.5)) # Notice ylim is really for x-axis here due to horizontal par(mar = c(5, 4, 4, 2) + 0.1) boxplot(x = gpa$College.GPA, xlab = NA, ylab = NA, main = NA, horizontal = FALSE, ylim = c(0.5, 4.5)) # Without axes for box plots - should only do this when no everything lines up # correctly with the scatter plot par(mar = c(5, 4, 0, 0) + 0.1) plot(x = gpa$HS.GPA, y = gpa$College.GPA, xlab = "HS GPA", ylab = "College GPA", xlim = c(1.5,4.5), ylim = c(0.5,4.5), col = "black", lwd = 1, panel.first = grid()) par(mar = c(0, 4, 4, 0) + 0.1) boxplot(x = gpa$HS.GPA, xlab = NA, ylab = NA, main = NA, horizontal = TRUE, ylim = c(1.5, 4.5), axes = FALSE) par(mar = c(5, 0, 0, 2) + 0.1) boxplot(x = gpa$College.GPA, xlab = NA, ylab = NA, main = NA, horizontal = FALSE, ylim = c(0.5, 4.5), axes = FALSE) ########################################################################## ########################################################################## ########################################################################## # ggplot2 library(package = "ggplot2") save.plot <- ggplot(data = gpa, mapping = aes(x = HS.GPA, y = College.GPA)) save.plot + geom_point(color = "red") # A further look at how ggplot2 does the plotting temp1 <- save.plot + geom_point(color = "red") class(temp1) methods(class = "gg") methods(class = "ggplot") getAnywhere(print.ggplot) getAnywhere(summary.ggplot) # temp1 # Show plot print(temp1) # Show plot summary(temp1) names(temp1) temp1$mapping temp1$layers temp1$labels # Same plot, different code ggplot(data = gpa, mapping = aes(x = HS.GPA, y = College.GPA)) + geom_point(color = "red") # geom = "auto" is default - qplot() tries to guess what type of plot you want qplot(x = HS.GPA, y = College.GPA, data = gpa, geom = "auto", color = "red") #Also produces as legend qplot(x = HS.GPA, y = College.GPA, data = gpa, geom = "auto", color = I("red")) qplot(x = HS.GPA, y = College.GPA, data = gpa, geom = "point", color = I("red")) # Removes awful gray background! theme_set(new = theme_bw()) # Can also set add theme_bw() to save.plot() if the theme change is only for current plot save.plot + geom_point(color = "red", shape = 1) + xlim(0, 4.5) + ylim(0,4.5) + ggtitle(label = "College GPA vs. HS GPA") + xlab(label = "HS GPA") + ylab(label = "College GPA") # Can also use for plotting labels: labs(title = "College GPA vs. HS GPA", x = "HS GPA", ylab = "College GPA") # title is no longer centered with ggplot2 2.2.0 and higher; can center it with + theme(plot.title = element_text(hjust = 0.5)) # Y^ and CIs - remember that ggplot2 needs a data frame format save.pred <- as.data.frame(predict(object = mod.fit, interval = "confidence", level = 0.95)) save.pred$HS.GPA <- gpa$HS.GPA save.pred$College.GPA <- gpa$College.GPA # Scatter plot with confidence interval bands save.plot2 <- save.plot + geom_point(color = "red", shape = 1) + xlim(0, 4.5) + ylim(0,4.5) + ggtitle(label = "College GPA vs. HS GPA") + xlab(label = "HS GPA") + ylab(label = "College GPA") save.plot2 + geom_line(data = save.pred, mapping = aes(y = fit)) + geom_ribbon(data = save.pred, mapping = aes(ymin = lwr, ymax = upr), alpha = 0.3) # Plot like above except the alpha does not give the same amount of shading! save.plot2 + geom_smooth(formula = y ~ x, level = 0.95, method = "lm", color = "black", alpha = 0.3) # Plot like above but no control ovel color and alpha (perhaps this is within the theme?) save.plot2 + stat_smooth(formula = y ~ x, level = 0.95, method = "lm") # Same as above but no control ovel color and alpha ######################## # Obtain a plot similar to what I had with the graphics package save.plot3 <- save.plot + geom_point(color = "black", shape = 1) + ggtitle(label = "College GPA vs. HS GPA") + xlab(label = "HS GPA") + ylab(label = "College GPA") # Functions used in drawing Y^ and confidence interval bands yhat <- function(x, mod.fit) { mod.fit$coefficients[1] + mod.fit$coefficients[2]*x } lower.bound <- function(x, mod.fit) { predict(object = mod.fit, newdata = data.frame(HS.GPA = x), interval = "confidence", level = 0.95)[,2] } upper.bound <- function(x, mod.fit) { predict(object = mod.fit, newdata = data.frame(HS.GPA = x), interval = "confidence", level = 0.95)[,3] } # Data frame containing legend location and labels legend.df <- data.frame(x = c(2.5,2.5), y = c(1.25, 1.1), name = c("Estimated response", "95% confidence interval"), color.line = c("red", "blue"), linetype = c("solid", "dashed")) legend.df # Change some aspects of the theme to obtain different grid lines theme_bw()$panel.grid.major # Current http://docs.ggplot2.org/dev/vignettes/themes.html chris.theme.changes <- theme(panel.grid.major = element_line(color = "gray", linetype = "dotted")) + theme(panel.grid.minor = element_blank()) chris.theme.changes$panel.grid.major # New save.plot3 + stat_function(fun = yhat, args = list(mod.fit = mod.fit), color = "red") + stat_function(fun = lower.bound, args = list(mod.fit = mod.fit), color = "blue", linetype = "dashed") + stat_function(fun = upper.bound, args = list(mod.fit = mod.fit), color = "blue", linetype = "dashed") + coord_cartesian(ylim = c(0,4.5), xlim = c(0,4.5)) + chris.theme.changes + geom_text(mapping = aes(y = College.GPA+0.1, label = 1:20), size = 4) + geom_text(data = legend.df, mapping = aes(x = x, y = y, label = name), size = 4, hjust = 0) + geom_segment(data = legend.df, mapping = aes(x = x-0.5, y = y, xend = x-0.1, yend = y), color = c("red", "blue"), linetype = c("solid", "dashed")) #Maybe should do 1:20 in the data frame instead for label = 1:20? Would be more general... # Second attempt at the legend which produces an INCORRECT legend legend.df2 <- data.frame(x = c(2.5,2.5), y = c(1.25, 1.1), name = c("Estimated response", "95% confidence interval"), color = c("red", "blue"), linetype = c("solid", "dashed")) legend.df2 save.plot3 + stat_function(fun = yhat, args = list(mod.fit = mod.fit), color = "red") + stat_function(fun = lower.bound, args = list(mod.fit = mod.fit), color = "blue", linetype = "dashed") + stat_function(fun = upper.bound, args = list(mod.fit = mod.fit), color = "blue", linetype = "dashed") + coord_cartesian(ylim = c(0,4.5), xlim = c(0,4.5)) + geom_text(mapping = aes(y = College.GPA+0.1, label = 1:20), size = 4) + geom_text(data = legend.df2, mapping = aes(x = x, y = y, label = name, color = color, linetype = linetype)) # Another way to obtain a legend is to following the recommendations at # http://zevross.com/blog/2014/08/04/beautiful-plotting-in-r-a-ggplot2-cheatsheet-3/#limit-an-axis-to-a-range-ylim-scale_x_continuous-coord_cartesian # in its "Manually adding legend items" section. This described a way to still use the meant for # mechanisms to have a legend drawn, but eventually edit the result. I think my way shown above is better! # A further look how ggplot2 does the plotting temp2 <- save.plot3 + stat_function(fun = yhat, args = list(mod.fit = mod.fit), color = "red") + stat_function(fun = lower.bound, args = list(mod.fit = mod.fit), color = "blue", linetype = "dashed") + stat_function(fun = upper.bound, args = list(mod.fit = mod.fit), color = "blue", linetype = "dashed") + coord_cartesian(ylim = c(0,4.5), xlim = c(0,4.5)) + chris.theme.changes + geom_text(mapping = aes(y = College.GPA+0.1, label = 1:20), size = 4) + geom_text(data = legend.df, mapping = aes(x = x, y = y, label = name), size = 4, hjust = 0) + geom_segment(data = legend.df, mapping = aes(x = x-0.5, y = y, xend = x-0.1, yend = y), color = c("red", "blue"), linetype = c("solid", "dashed")) class(temp2) plot(temp2) # Same as just submitting "temp2" at command prompt summary(temp2) names(temp2) temp2$mapping temp2$layers temp2$labels # Example showing a case where an automatically generated legend works (mostly) # Add a variable denoting an in or out-of-state student state <- c(rep(x = "out", times = 7), rep(x = "in", times = 13)) # Create a new data frame that adds the new variable to it. Alternatively, # one could have kept the old data frame and used gpa$state <- state gpa2 <- data.frame(gpa, state) save.plot4 <- ggplot(data = gpa2, mapping = aes(x = HS.GPA, y = College.GPA, color = state, shape = state, group = state)) save.plot4 + geom_point() # Control color of plotting point save.plot5 <- ggplot(data = gpa2, mapping = aes(x = HS.GPA, y = College.GPA, color = state)) save.plot5 + geom_point() + scale_color_manual(values = c("out" = "red", "in" = "blue")) + guides(color = guide_legend(title = "Residency status")) # Control style of plotting point save.plot6 <- ggplot(data = gpa2, mapping = aes(x = HS.GPA, y = College.GPA, shape = state)) save.plot6 + geom_point() + scale_shape(solid = FALSE) + guides(shape = guide_legend(title = "Residency status")) # Control color and style does not work well save.plot7 <- ggplot(data = gpa2, mapping = aes(x = HS.GPA, y = College.GPA, shape = state, color = state)) save.plot7 + geom_point() + scale_shape(solid = FALSE) + scale_color_manual(values = c("out" = "red", "in" = "blue")) + guides(shape = guide_legend(title = "Residency status")) # Change location of legend # theme(legend.position = "bottom") # theme(legend.position = c(0.5, 0.5)) ############################### # Histograms ggplot(data = gpa, mapping = aes(x = HS.GPA)) + geom_histogram() # By default, \code{stat_bin} uses 30 bins - this is not a good default chris.theme.changes2 <- theme(panel.grid.major = element_blank()) + theme(panel.grid.minor = element_blank()) ggplot(data = gpa, mapping = aes(x = HS.GPA)) + xlab("HS GPA") + ylab("Density") + chris.theme.changes2 + geom_histogram(aes(y = ..density..), fill = NA, color = "black", binwidth = 0.5) + xlim(0,5) + stat_function(fun = dnorm, args = list(mean = mean(gpa$HS.GPA), sd = sd(gpa$HS.GPA)), color = "red", n = 1000) # This does not work - x does not pass in to args! ggplot(data = gpa, mapping = aes(x = HS.GPA)) + xlab("HS GPA") + ylab("") + chris.theme.changes2 + geom_histogram(aes(y = ..density..), binwidth = 0.5) + xlim(0,5) + stat_function(fun = dnorm, args = list(mean = mean(x), sd = sd(x)), color = "red") ############################### # Box and dot plots # Use the alternative data format for the plot head(HS.college, n = 2) tail(HS.college, n = 2) #Need additional theme change because do not want an x-axis title chris.theme.changes3 <- theme(panel.grid.major = element_line(color = "gray", linetype = "dotted")) + theme(panel.grid.minor = element_blank(), axis.title.x = element_blank()) set.seed(8912) ggplot(data = HS.college, mapping = aes(x = school, y = gpa)) + ylab(label = "GPA") + chris.theme.changes3 + geom_boxplot(outlier.shape = NA) + geom_point(position = position_jitter(height = 0.0, width = 0.1), shape = 1, color = "red") # I do not think there is a way to turn of grid lines for one of the axes - looked in the settings # for theme. Could manually turn off all grid lines and then # draw in the lines with + geom_hline(yintercept = 2:4, color = "gray", lty = "dotted") ############################### # Multiple plot in one graphics window - using grid package directly plot1 <- ggplot(data = gpa, mapping = aes(x = HS.GPA)) + xlab("HS GPA") + ylab("Density") + chris.theme.changes2 + geom_histogram(aes(y = ..density..), fill = NA, color = "black", binwidth = 0.5) + xlim(0,5) + stat_function(fun = dnorm, args = list(mean = mean(gpa$HS.GPA), sd = sd(gpa$HS.GPA)), color = "red", n = 1000) plot2 <- ggplot(data = gpa, mapping = aes(x = College.GPA)) + xlab("College GPA") + ylab("Density") + chris.theme.changes2 + geom_histogram(aes(y = ..density..), fill = NA, color = "black", binwidth = 0.5) + xlim(0,5) + stat_function(fun = dnorm, args = list(mean = mean(gpa$College.GPA), sd = sd(gpa$College.GPA)), color = "red", n = 1000) library(package = "grid") # Surprised that ggplot2 does not automatically load it? # Removes any previous graphics window settings grid.newpage() layout2x1 <- grid.layout(nrow = 2, ncol = 1) # Push layout onto graphics window pushViewport(viewport(layout = layout2x1)) # Show layout - null units because did not define using exact measurements grid.show.layout(l = layout2x1) pushViewport(viewport(layout = layout2x1)) print(plot1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1)) print(plot2, vp = viewport(layout.pos.row = 2, layout.pos.col = 1)) ############################### # Multiple plot in one graphics window - using Trellis plots ggplot(data = HS.college, mapping = aes(x = gpa)) + chris.theme.changes2 + geom_histogram(aes(y = ..density..), fill = NA, color = "black", binwidth = 0.5) + xlim(0,5) + ylab("Density") + xlab("GPA") + facet_wrap(~ school, nrow = 2) # Because stat_function() will not work properly, one could use geom_density() for a smoothed density estimate ggplot(data = HS.college, mapping = aes(x = gpa)) + chris.theme.changes2 + geom_histogram(aes(y = ..density..), fill = NA, color = "black", binwidth = 0.5) + xlim(0,5) + ylab("Density") + xlab("GPA") + facet_wrap(~ school, nrow = 2) + geom_density(color = "red") # Example using facet_grid() ggplot(data = HS.college, mapping = aes(x = gpa)) + chris.theme.changes2 + geom_histogram(aes(y = ..density..), fill = NA, color = "black", binwidth = 0.5) + xlim(0,5) + ylab("Density") + xlab("GPA") + facet_grid(.~ school) ggplot(data = HS.college, mapping = aes(x = gpa)) + chris.theme.changes2 + geom_histogram(aes(y = ..density..), fill = NA, color = "black", binwidth = 0.5) + xlim(0,5) + ylab("Density") + xlab("GPA") + facet_grid(school~.) # Below is an attempt to "trick" R into giving a normal distribution overlay # I use geom_density() with a larger adjust value with simulated data set.seed(1213) set1 <- rbind(data.frame(x = rnorm(100, 0, 1), var = "first"), data.frame(x = rnorm(100, 5, 1), var = "second")) head(set1) tail(set1) ggplot(data = set1, aes(x = x)) + chris.theme.changes2 + facet_wrap(~ var, nrow = 2) + geom_histogram(aes(y = ..density..), fill = NA, color = "black", binwidth = 0.5) + geom_density(color = "red", args = list(kernel = "gaussian"), adjust = 5) # If you do not like the given names used for panels, here's how you could # change the names. # Make school names more formal - could have done this originally #levels(HS.college$school) #levels(HS.college$school)[1] <- "High school" #levels(HS.college$school) # Could do this with factor() as well #HS.college$school <- factor(HS.college$school, levels = c("High school", "College")) ##################################################### # A function to put more than one plot in a graphics window # This uses some of the same grid concepts above #Source: http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/ # Multiple plot function # # ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects) # - cols: Number of columns in layout # - layout: A matrix specifying the layout. If present, 'cols' is ignored. # # If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), # then plot 1 will go in the upper left, 2 will go in the upper right, and # 3 will go all the way across the bottom. # multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { library(grid) # Make a list from the ... arguments and plotlist plots <- c(list(...), plotlist) numPlots = length(plots) # If layout is NULL, then use 'cols' to determine layout if (is.null(layout)) { # Make the panel # ncol: Number of columns of plots # nrow: Number of rows needed, calculated from # of cols layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), ncol = cols, nrow = ceiling(numPlots/cols)) } if (numPlots==1) { print(plots[[1]]) } else { # Set up the page grid.newpage() pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) # Make each plot, in the correct location for (i in 1:numPlots) { # Get the i,j matrix positions of the regions that contain this subplot matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, layout.pos.col = matchidx$col)) } } } ################################################################################# ################################################################################# #Example of creating a new theme - Easiest way is to replace parts of an old theme # Idea came from http://stackoverflow.com/questions/6736378/how-do-i-change-the-background-color-of-a-plot-made-with-ggplot2 new.theme <- function (base_size = 12, base_family = "") { theme_bw(base_size = base_size, base_family = base_family) %+replace% theme( panel.grid.major = element_line(color = "gray", linetype = "dotted"), panel.grid.minor = element_blank() ) } theme_set(new = new.theme()) #theme_update() can be useful too. #