###################################################################### # Transaction time # # # ###################################################################### # PDF # x and y-axes start/end at the specified x and y limits in curve() # Change back to the default with par(xaxs = "r", yaxs = "r") par(xaxs = "i", yaxs = "i") curve(expr = 1/30*exp(-x/30), xlim = c(0,140), col = "red", ylab = "f(x)", xlab = "x") # CDF curve(expr = 1 - exp(-x/30), xlim = c(0,140), col = "red", ylab = "f(x)", xlab = "x", panel.first = grid(), ylim = c(0, 1.05), lwd = 2) # abline(h = c(0,1)) # Show P(X > 30) # Method #1 - Area underneath curve to the right of line at x = 30 par(xaxs = "i", yaxs = "i") curve(expr = 1/30*exp(-x/30), xlim = c(0,140), col = "red", ylab = "f(x)", xlab = "x") segments(x0 = 30, y0 = 0, x1 = 30, y1 = 1/30*exp(-30/30), col = "black") # Method #2 - Red area underneath curve curve(expr = 1/30*exp(-x/30), xlim = c(0,140), col = "red", ylab = "f(x)", xlab = "x") pdf <- function(x) { 1/30*exp(-x/30) } x <- c(30, seq(from = 30, to = 140, by = 0.01), 140) y <- c(0, pdf(x = seq(from = 30, to = 140, by = 0.01)), 0) polygon(x = x, y = y, col = "red", border = "red") ###################################################################### # Sample # Make sure the correct folder is specified on your computer for where the # data file is located setwd(dir = "C:\\chris\\unl") # This would be used if the actual folder on my Windows-based computer was here set1 <- read.csv(file = "ExampleSample.csv") head(set1) tail(set1) # Estimate P(X > 30) using sample n <- length(set1$x) n head(set1$x > 30) tail(set1$x > 30) sum(set1$x > 30) sum(set1$x > 30)/n mean(set1$x > 30) # Estimate P(25 < X < 100) using sample mean(set1$x > 25 & set1$x < 100) mean(set1$x < 100) - mean(set1$x < 25) hist(x = set1$x, xlab = "x", main = "") save.hist <- hist(x = set1$x, xlab = "x") names(save.hist) # Information on the classes and the frequencies per class save.hist$breaks #Notice there is one more "break" than there are counts save.hist$counts sum(set1$x > 0 & set1$x <= 20) sum(set1$x > 20 & set1$x <= 40) # Frequency distribution Rel.Frequency <- round(save.hist$counts/sum(save.hist$counts), digits = 2) Cumul.Rel.Freq = round(cumsum(save.hist$counts/sum(save.hist$counts)), digits = 2) data.frame(class = save.hist$breaks[-1], Frequency = save.hist$counts, Rel.Frequency = Rel.Frequency, Cumul.Rel.Freq = Cumul.Rel.Freq) # Histogram with PDF overlay # Changed y-axis scale to get PDF in plot better hist(x = set1$x, xlab = "x", main = "", freq = FALSE, ylim = c(0, 0.03)) curve(expr = 1/30*exp(-x/30), col = "red", add = TRUE, n = 1000) # Sample estimated time between transactions such that 95% are less than this value quantile(x = set1$x, probs = c(0.05, 0.95), type = 5) # ECDF with CDF overlay F.hat <- ecdf(x = set1$x) # Creates a function to find ECDF F.hat(seq(from = 20, to = 220, by = 20)) # Compare to previous calculations sum(set1$x <= 20)/n sum(set1$x <= 40)/n F.hat(0) F.hat(10) F.hat(300) plot.ecdf(x = set1$x, lwd = 2, panel.first = grid(), ylab = "Probability", xlab = "x", col = "blue", main = "", verticals = TRUE, do.p = FALSE) curve(expr = 1 - exp(-x/30), col = "red", add = TRUE, n = 1000, xlim = c(0,250)) legend(x = 100, y = 0.6, legend = c("ECDF", "CDF"), lty = 1, col = c("blue", "red"), lwd = 2, bty = "n") save.val <- plot.ecdf(x = set1$x, lwd = 2, panel.first = grid(), ylab = "Probability", xlab = "x", col = "blue", main = "") ###################################################################### # Take sample in R using rexp() set.seed(8291) x <- rexp(n = 100000, rate = 1/30) head(x) mean(x > 30) mean(x > 25 & x < 100) ##################################################################### # Numerical integration pdf <- function(x) { 1/30*exp(-x/30) } #Examples pdf(x = 0) 1/30*exp(-0/30) pdf(x = 10) 1/30*exp(-10/30) # P(X > 30) integrate(f = pdf, lower = 30, upper = Inf) # P(25 < X < 100) integrate(f = pdf, lower = 25, upper = 100) # Natural log function in R is log() 30*log(20) #################################################### # Calculations to help understand code in find.root() save.res <- integrate(f = pdf, lower = 0, upper = 89.87) names(save.res) save.res$value # Extract only the value from integration integrate(f = pdf, lower = 0, upper = 89.87)$value # Shows value is 0 0.95 - integrate(f = pdf, lower = 0, upper = 89.87)$value # Function want to find root for (the "c" where expression is 0) find.root <- function(c.val) { 0.95 - integrate(f = pdf, lower = 0, upper = c.val)$value } find.root(c.val = 89.87) # Find c value uniroot(f = find.root, interval = c(0,300)) #