######################################################################
# Lincoln wind speed - summaries #
# #
######################################################################
######################################################################
#Read in data
# May need to set folder location of file
# This will not be needed if you open the program from the folder where the data is located
# setwd(dir = "C:\\Chris\\data") #Set this to your own folder
wind <- read.csv(file = "Lincoln_Feb_wind.csv")
#wind #Shows whole data set - probably do not want to due to number of observations
head(wind) #Shows first 6 observations
tail(wind) #Shows last 6 observations
######################################################################
# Summary statistics
mean(wind$y)
#Alternative way
sum(wind$y)/length(wind$y)
median(wind$y)
sd(wind$y)
sum((wind$y - mean(wind$y))^2)
sqrt(sum((wind$y - mean(wind$y))^2)/(length(wind$y) - 1))
data.frame(lower = mean(wind$y) - 2*sd(wind$y), upper = mean(wind$y) + 2*sd(wind$y))
data.frame(lower = mean(wind$y) - 3*sd(wind$y), upper = mean(wind$y) + 3*sd(wind$y))
save.interval <- data.frame(lower = mean(wind$y) - 2*sd(wind$y), upper = mean(wind$y) + 2*sd(wind$y))
save.interval$upper
sum(wind$y > save.interval$upper)
sum(wind$y < save.interval$lower)
sum(wind$y > save.interval$upper)/length(wind$y)
quantile(x = wind$y, probs = c(0.25, 0.5, 0.75), type = 5)
quantile(x = wind$y, probs = seq(from = 0.1, to = 0.9, by = 0.1), type = 5)
quantile(x = wind$y, probs = 0.2, type = 5)
quantile(x = wind$y, probs = 0.8, type = 5)
######################################################################
# Frequency distribution and plots
#Default version of frequency distribution
hist(x = wind$y, col = NA)
#Nicer version of plot
win.graph(width = 7, height = 7, pointsize = 10)
# x11(width = 7, height = 7, pointsize = 10) # Works for some operating systems
# quartz(width = 7, height = 7, pointsize = 10) # Works for some operating systems
# dev.new(width = 7, height = 7, pointsize = 10) # Works for all operating systems
hist(x = wind$y, main = "Daily wind speed for Lincoln in February", xlab = "Wind speed",
xlim = c(0,25))
# axis(side = 1, at = seq(from = 0, to = 25, by = 5)) # Can be used to draw x-axis too
#Put the rule of thumb for the number of standard deviations all data lies from its mean on the above plot
# segments() draws a line segment between (x0, y0) and (x1, y1)
segments(x0 = save.interval$lower, y0 = -0.1, x1 = save.interval$lower, y1 = 2, lwd = 2, col = "red")
segments(x0 = save.interval$upper, y0 = -0.1, x1 = save.interval$upper, y1 = 2, lwd = 2, col = "red")
#Could also save results to help obtain frequency distribution
save.hist <- hist(x = wind$y, main = "Daily wind speed for Lincoln in February", xlab = "Wind speed",
xlim = c(0,25))
names(save.hist)
#Information on the classes and the frequencies per class
save.hist$breaks #Notice there is one more "break" than there are counts
save.hist$counts
sum(wind$y > 2 & wind$ y <= 4)
sum(wind$y > 4 & wind$ y <= 6)
sum(wind$y > 22 & wind$ y <= 24)
sum(wind$y > 24)
#Frequency distribution
Rel.Frequency <- round(save.hist$counts/sum(save.hist$counts), digits = 2)
data.frame(class = save.hist$breaks[-1], Frequency = save.hist$counts,
Rel.Frequency = Rel.Frequency)
#Examples of how to use breaks and nclass
#Example - two classes
hist(x = wind$y, main = "Daily wind speed for Lincoln in February", xlab = "Wind speed",
breaks = 1)
hist(x = wind$y, main = "Daily wind speed for Lincoln in February", xlab = "Wind speed",
nclass = 1)
#Example - 30 classes
hist(x = wind$y, main = "Daily wind speed for Lincoln in February", xlab = "Wind speed",
breaks = 70)
#Specify the specific locations
hist(x = wind$y, main = "Daily wind speed for Lincoln in February", xlab = "Wind speed",
breaks = c(0, 10, 20, 30, 40, 50))
hist(x = wind$y, main = "Daily wind speed for Lincoln in February", xlab = "Wind speed",
breaks = seq(from = 0, to = 50, by = 10))
#My own function to find the frequency distribution.
freq.dist <- function(data, numb.breaks = "Sturges") {
save.hist <- hist(x = data, plot = FALSE, breaks = numb.breaks)
Rel.Frequency <- round(save.hist$counts/sum(save.hist$counts), 2)
data.frame(class = save.hist$breaks[-1], Frequency = save.hist$counts,
Rel.Frequency = Rel.Frequency)
}
freq.dist(data = wind$y)
freq.dist(data = wind$y, numb.breaks = 1)
freq.dist(data = wind$y, numb.breaks = c(0, 10, 20, 30, 40, 50))
#Can also find the plots using R Commander
library(package = Rcmdr)
#Stem and leaf plot
stem(x = wind$y)
######################################################################
# By year investigation
par(mfrow = c(1,1))
boxplot(formula = y ~ Year, data = wind, main = "Box and dot plot", ylab = "Wind speed",
xlab = "Year")
stripchart(x = wind$y ~ wind$Year, lwd = 2, col = "red",
method = "jitter", vertical = TRUE, pch = 1, main =
"Dot plot", add = TRUE)
#Without the outliers plotted by boxplot()
boxplot(formula = y ~ Year, data = wind, main = "Box and dot plot", ylab = "Wind speed",
xlab = "Year", pars = list(outpch=NA))
stripchart(x = wind$y ~ wind$Year, lwd = 2, col = "red",
method = "jitter", vertical = TRUE, pch = 1, main =
"Dot plot", add = TRUE)
aggregate(formula = y ~ Year, data = wind, FUN = mean)
aggregate(formula = y ~ Year, data = wind, FUN = sd)
aggregate(formula = y ~ Year, data = wind, FUN = quantile, probs = c(0.25, 0.5, 0.75))
library(package = lattice)
histogram(~ y | Year, data = wind, layout = c(2,3), nint = 8)
#