###################################################################### # Cereal data - summaries # # # ###################################################################### ###################################################################### #Read in data cereal <- read.csv(file = "cereal.csv") # Note changed the first variable name to "Cereal.ID" because Excel produces # warning messages when one opens the file in that program (Excel sees "ID" # and thinks the file is of a special type) head(cereal) #Shows first 6 observations tail(cereal) #Shows last 6 observations #Adjust data to take into account the different serving sizes cereal$sugar <- cereal$sugar_g/cereal$size_g cereal$fat <- cereal$fat_g/cereal$size_g cereal$sodium <- cereal$sodium_mg/cereal$size_g head(cereal) #Shows first 6 observations ###################################################################### # Summary statistics cereal$sugar aggregate(formula = sugar ~ Shelf, data = cereal, FUN = mean) aggregate(formula = sugar ~ Shelf, data = cereal, FUN = sd) aggregate(formula = sugar ~ Shelf, data = cereal, FUN = summary) #Other ways aggregate(formula = sugar ~ Shelf, data = cereal, FUN = fivenum) aggregate(formula = sugar ~ Shelf, data = cereal, FUN = quantile, probs = c(0.25, 0.5, 0.75)) ###################################################################### # Plots stripchart(x = cereal$sugar ~ cereal$Shelf, method = "jitter", vertical = TRUE, pch = 1, main = "Dot plot", ylab = "Sugar", xlab = "Shelf") #Similar plot set.seed(7127) x.value <- cereal$Shelf + runif(n=40, min=0, max=0.1) plot(x = x.value, y = cereal$sugar, pch = 1, main = "Dot plot", ylab = "Sugar", xlab = "Shelf", xaxt = "n") axis(side = 1, at = c(1,2,3,4)) #Similar plot with observation numbers set.seed(7127) x.value <- cereal$Shelf + runif(n=40, min=0, max=0.2) # runif() helps with the jittering by adding a small random value to the shelf number plot(x = x.value, y = cereal$sugar, pch = 1, main = "Dot plot", ylab = "Sugar", xlab = "Shelf", type = "n", xaxt = "n") text(x = x.value, y = cereal$sugar, label = cereal$Cereal.ID) axis(side = 1, at = c(1,2,3,4)) #Side by side plots par(mfrow = c(1,2)) # One row and two columns of plots stripchart(x = cereal$sugar ~ cereal$Shelf, method = "jitter", vertical = TRUE, pch = 1, main = "Dot plot", ylab = "Sugar", xlab = "Shelf") boxplot(formula = sugar ~ Shelf, data = cereal, col = "lightblue", main = "Box plot", ylab = "Sugar", xlab = "Shelf") #Overlaying plots par(mfrow = c(1,1)) boxplot(formula = sugar ~ Shelf, data = cereal, main = "Box and dot plot", ylab = "Sugar", xlab = "Shelf", col = NA, pars = list(outpch=NA)) stripchart(x = cereal$sugar ~ cereal$Shelf, lwd = 2, col = "red", method = "jitter", vertical = TRUE, pch = 1, add = TRUE) #Parallel coordinates plot library(package = MASS) cereal2 <- data.frame(cereal$Cereal.ID, cereal$sugar, cereal$fat, cereal$sodium) color.by.shelf <- rep(x = c("black", "red", "blue", "green"), each = 10) parcoord(x = cereal2, col = color.by.shelf, main = "Parallel coordinate plot for cereal data") legend(locator(1), legend = c("1", "2", "3", "4"), lty = "solid", col = c("black", "red", "blue", "green"), bty = "n") #Interactive plots - Need JAVA on computer library(package = iplots) ipcp(vars = cereal2) ihist(var = cereal$sugar) #