#ggplot2 presentation code library(ggplot2) head(mtcars) ################################################################################ # Example 1 #Note that ggplot2 tries to guess the correct plot. #Bad part of histogram is the default way it creates the bins. hist() is much better win.graph(width = 10) par(mfrow = c(1,2)) #Does not work - see solution later qplot(x = mpg, data = mtcars) qplot(x = mpg, data = mtcars, binwidth = 3, xlab = "Miles Per Gallon", ylab = "Frequency") ################################################################################ # Example 2 qplot(x = mpg, y = cyl, data = mtcars) qplot(x = mpg, data = mtcars, binwidth = 3, xlab = "Miles Per Gallon", ylab = "Frequency", fill = factor(cyl)) qplot(x = mpg, data = mtcars, binwidth = 3, xlab = "Miles Per Gallon", ylab = "Frequency", facets = .~cyl) qplot(x = mpg, data = mtcars, binwidth = 3, xlab = "Miles Per Gallon", ylab = "Frequency", facets = .~factor(cyl)) #Does not work ################################################################################ # Example 3 #The +stat_boxplot() part does not work unless it is on the same line as qplot() qplot(x = hp, y = mpg, data = mtcars, xlab = "Horsepower", ylab = "Miles Per Gallon", shape = factor(cyl), color = factor(cyl)) +stat_boxplot() p<-qplot(x = hp, y = mpg, data = mtcars, xlab = "Horsepower", ylab = "Miles Per Gallon", shape = factor(cyl), color = factor(cyl)) p + stat_boxplot() names(p) qplot(x = hp, y = mpg, data = mtcars, xlab = "Horsepower", ylab = "Miles Per Gallon", facets = cyl ~.) qplot(x = hp, y = mpg, data = mtcars, xlab = "Horsepower", ylab = "Miles Per Gallon", facets = . ~cyl) p<-qplot(x = cyl, y = mpg, data = mtcars, xlab = "Number of Cylinders", ylab = "Miles Per Gallon", shape = factor(cyl), fill = factor(cyl)) p+stat_summary(fun.y = median, geom = "bar") #Poor plot because bars hide points - see video for fix p+stat_summary(fun.y = median, geom = "bar", alpha = I(0.5)) #Uses opacity to allow you to see points p+stat_summary(fun.y = median, geom = "bar") + geom_point(mapping = aes(x = cyl, y = mpg), data = mtcars) #In above example (partially suggested by Aimee), not sure why need to use aes() # and not simply x = , y = #Good examples at http://127.0.0.1:10550/library/ggplot2/html/geom_boxplot.html ################################################################################ # Example 4 qplot(x = hp, y = mpg, data = mtcars, xlab = "Horsepower", ylab = "Miles Per Gallon", facets = cyl ~., geom = c("point", "smooth")) #Maybe good to see the equivalent of using geom_smooth and the geom argument in qplot() - done p<-qplot(x = hp, y = mpg, data = mtcars, xlab = "Horsepower", ylab = "Miles Per Gallon", facets = cyl ~., geom = c("point")) p+geom_smooth() qplot(x = hp, y = mpg, data = mtcars, xlab = "Horsepower", ylab = "Miles Per Gallon", facets = cyl ~., geom = c("point", "smooth"), method = "lm") #Just regression model #A layer then would be +geom_ or +stat_ ################################################################################ # Example 5 set.seed(0220) diamonds<-diamonds[sample(nrow(diamonds), 5000),] qplot(x = carat, y = price, data = diamonds) qplot(x = carat, y = price, data = diamonds, xlab = "Number of Carats", ylab = "Price in $", color = cut, shape = cut) p<-qplot(x = carat, y = price, data = diamonds, xlab = "Number of Carats", ylab = "Price in $", color = cut, shape = cut) p + geom_smooth() #Interesting - it does the smoother for all cuts p + geom_smooth(mapping = aes(x = carat, y = price), data = diamonds) #No change #I do not see an option for just a linear model qplot(x = carat, y = price, data = diamonds, xlab = "Number of Carats", ylab = "Price in $", color = cut, shape = cut, alpha = 0.1) # Final ggplot2 graph : qplot(x = carat, y = price, data = diamonds, xlab = "Number of Carats", ylab = "Price in S", facets = .~ cut, color = clarity, size = depth, alpha = I(0.5)) #Two plots are the same p<-qplot(x = carat, y = price, data = diamonds, xlab = "Number of Carats", ylab = "Price in S", facets = .~ cut, color = clarity, size = depth, alpha = I(0)) p + geom_point(pch = 1) p<-qplot(x = carat, y = price, data = diamonds, xlab = "Number of Carats", ylab = "Price in S", facets = .~ cut, color = clarity, size = depth, alpha = I(1), shape = I(1)) p #shape only allows 6 categories # Lattice graph : library(lattice) xyplot(x = price ~ carat | cut, data = diamonds, layout = c(5,1), groups = color, main = "Carat vs. Price", xlab = " Carat", ylab = "Price", auto.key = list (points = TRUE, space = "right")) #Note: R says this is old function ggpcp(data = diamonds, vars = c("price", "carat")) + geom_line() #See also p. 181 of Wickham book ################################################################################ # Section 3 #Read in an Excel version of the file library(RODBC) z<-odbcConnectExcel("C:\\chris\\unl\\Dropbox\\NEW\\STAT950\\projects\\fall2012\\sim_results.xls") set1<-sqlFetch(z, "sim_results") close(z) set1 head(set1) qplot(y = CI, x = ExpLength, data = set1, color = factor(SampleSize), facets = ~ Distribution) p<-qplot(y = CI, x = ExpLength, data = set1, color = factor(SampleSize), facets = Distribution ~., xlim = c(0, 20), ylab = "Confidence Limits", xlab = "Expected Length") p + scale_color_hue("Sample Size", h = c(0, 40)) p + geom_point(mapping = aes(shape = factor(set1$SampleSize)), legend = "none") p<-qplot(y = CI, x = ExpLength, data = set1, color = factor(SampleSize), facets = Distribution ~., xlim = c(0, 20), ylab = "Confidence Limits", xlab = "Expected Length", shape = factor(SampleSize)) p + scale_color_hue("Sample Size", h = c(0, 40)) #Two different legend example p<-qplot(y = CI, x = ExpLength, data = set1, color = factor(SampleSize), facets = Distribution ~., xlim = c(0, 20), ylab = "Confidence Limits", xlab = "Expected Length", shape = factor(SampleSize)) p #o.k. #Could change variable type before doing plot so that we get rid of "factor" in legend #R color brewer package p<-qplot(x = hp, y = mpg, data = mtcars, xlab = "Horsepower", ylab = "Miles Per Gallon", shape = factor(cyl), color = factor(cyl), main = "theme_bw()") p + theme_bw() #This is the theme that I like - p + theme_minimal() # p + theme(panel.background = element_rect(fill = "darkblue")) #Can define own theme #There are other themes that people have created. Go to ggplot2 website ################################################################################ # Other notes #Notice how geom = "auto" is default for qqplot() #Example from Chapter 6 of book p <- qplot(cty, hwy, data = mpg, colour = displ) p p + scale_x_continuous("City mpg") p + xlab("City mpg") p + ylab("Highway mpg") p + labs(x = "City mpg", y = "Highway", colour = "Displacement") p + xlab(expression(frac(miles, gallon))) #Section 8.4 of book (a <- qplot(date, unemploy, data = economics, geom = "line")) (b <- qplot(uempmed, unemploy, data = economics) + geom_smooth(se = F)) (c <- qplot(uempmed, unemploy, data = economics, geom="path")) grid.newpage() pushViewport(viewport(layout = grid.layout(2, 2))) vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y) print(a, vp = vplayout(1, 1:2)) print(b, vp = vplayout(2, 1)) print(c, vp = vplayout(2, 2)) #Page 197 of book gives shapes and line types ################################################################################ # Similar to par(mfrow = c(1,2)) #Code from http://stackoverflow.com/questions/1249548/side-by-side-plots-with-ggplot2-in-r # Alternative to par(mfrow = c(1,2)) plot1<-qplot(x = mpg, data = mtcars) plot2<-qplot(x = mpg, data = mtcars) plot3<-qplot(x = mpg, data = mtcars) plot4<-qplot(x = mpg, data = mtcars) library(gridExtra) grid.arrange(plot1, plot2, ncol = 3) grid.arrange(plot1, plot2, plot3, ncol = 3) grid.arrange(plot1, plot2, plot3, plot4, ncol = 2) #Last comment at http://stackoverflow.com/questions/1249548/side-by-side-plots-with-ggplot2-in-r # http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/ # There is also an "R Graphics Cookbook" # See http://www.cookbook-r.com/Graphs/ multiplot <- function(..., plotlist=NULL, cols) { require(grid) # Make a list from the ... arguments and plotlist plots <- c(list(...), plotlist) numPlots = length(plots) # Make the panel plotCols = cols # Number of columns of plots plotRows = ceiling(numPlots/plotCols) # Number of rows needed, calculated from # of cols # Set up the page grid.newpage() pushViewport(viewport(layout = grid.layout(plotRows, plotCols))) vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y) # Make each plot, in the correct location for (i in 1:numPlots) { curRow = ceiling(i/plotCols) curCol = (i-1) %% plotCols + 1 print(plots[[i]], vp = vplayout(curRow, curCol )) } } multiplot(plot1, plot2, plot3, plot4, cols=2) ################################################################################ # library(ggplot2) #Pavel code dat.b<-msleep[!is.na(msleep$sleep_rem) &! is.na(msleep$vore),] plot<-qplot(x = sleep_total, y = sleep_rem, data = dat.b, xlab = "Hours of sleep", ylab = "Hours of REM sleep", color = vore, size = 2, ) + scale_colour_brewer("Eating habits", palette = "Set1") plot #Aimee code qplot(x=sleep_total, y=sleep_rem, color=vore, xlab="Total Hours of Sleep", ylab="Hours of REM Sleep", data=msleep)+scale_colour_brewer(palette="Spectral", name="Eating Habits") msleep.na.removed2<-msleep.na.removed[!is.na(msleep.na.removed$sleep_rem),] qplot(x=sleep_total, y=sleep_rem, data=msleep.na.removed2, xlab="Sleep Total", ylab="Hours of REM Sleep")+geom_point(aes(color=factor(vore)))+labs(colour = "Vore") #Brad code objects(msleep) # question 1 part a msleep.na.removed<-msleep[!is.na(msleep$vore),] #Stacked Histogram by "vore" qplot(sleep_total,binwidth=.5, data=msleep.na.removed, xlab="Sleep Total", ylab="Frequency", fill=factor(vore)) qplot(sleep_total,binwidth=2, data=msleep.na.removed, xlab="Sleep Total", ylab="Frequency", facets=vore~., fill = factor(vore)) msleep.na.removed2<-msleep.na.removed[!is.na(msleep.na.removed$sleep_rem),] qplot(x=sleep_total, y=sleep_rem, data=msleep.na.removed2, xlab="Sleep Total", ylab="Hours of REM Sleep")+geom_point(aes(color=factor(vore))) +labs(colour = "Vore") #Brad #2 #setwd(dir = "C:\\chris\\unl\\Dropbox\\NEW\\STAT875_new\\875_new_website\\colour_red\\Chapter1") #source(file = "ConfLevel4Intervals.R") #Initial settings and calculations alpha<-0.05 n<-40 w<-0:n pi.hat<-w/n p.tilde<-(w + qnorm(p = 1-alpha/2)^2 /2) / (n+qnorm(1-alpha/2)^2) #Wald var.wald<-pi.hat*(1-pi.hat)/n lower.wald<-pi.hat - qnorm(p = 1-alpha/2) * sqrt(var.wald) upper.wald<-pi.hat + qnorm(p = 1-alpha/2) * sqrt(var.wald) #Agresti-Coull lower.AC<-p.tilde - qnorm(p = 1-alpha/2) * sqrt(p.tilde*(1-p.tilde) / (n+qnorm(1-alpha/2)^2)) upper.AC<-p.tilde + qnorm(p = 1-alpha/2) * sqrt(p.tilde*(1-p.tilde) / (n+qnorm(1-alpha/2)^2)) #Wilson lower.wilson<-p.tilde - qnorm(p = 1-alpha/2) * sqrt(n) / (n+qnorm(1-alpha/2)^2) * sqrt(pi.hat*(1-pi.hat) + qnorm(1-alpha/2)^2/(4*n)) upper.wilson<-p.tilde + qnorm(p = 1-alpha/2) * sqrt(n) / (n+qnorm(1-alpha/2)^2) * sqrt(pi.hat*(1-pi.hat) + qnorm(1-alpha/2)^2/(4*n)) #Clopper-Pearson - This is a little more complicated due to the y = 0 and n cases lower.CP<-numeric(n+1) #This initializes a vector to save the lower bounds into upper.CP<-numeric(n+1) #This initializes a vector to save the upper bounds into #y = 0 w0<-0 #Set here for emphasis lower.CP[1]<-0 upper.CP[1]<-qbeta(p = 1-alpha/2, shape1 = w0+1, shape2 = n-w0) #y = n wn<-n #Set here for emphasis lower.CP[n+1]<-qbeta(p = alpha/2, shape1 = wn, shape2 = n-wn+1) upper.CP[n+1]<-1 #y = 1, ..., n-1 w.new<-1:(n-1) lower.CP[2:n]<-qbeta(p = alpha/2, shape1 = w.new, shape2 = n-w.new+1) upper.CP[2:n]<-qbeta(p = 1-alpha/2, shape1 = w.new+1, shape2 = n-w.new) #All pi's pi.seq<-seq(from = 0.001, to = 0.999, by = 0.0005) #pi.seq<-0.16 #Testing #pi.seq<-seq(from = 0.1, to = 0.9, by = 0.1) #Testing #Save true confidence levels in a matrix save.true.conf<-matrix(data = NA, nrow = length(pi.seq), ncol = 5) #Create counter for the loop counter<-1 #Loop over each pi that the true confidence level is calculated on for(pi in pi.seq) { pmf<-dbinom(x = w, size = n, prob = pi) #Wald save.wald<-ifelse(test = pi>lower.wald, yes = ifelse(test = pilower.AC, yes = ifelse(test = pilower.wilson, yes = ifelse(test = pilower.CP, yes = ifelse(test = pi