######################################################################
# Transaction time #
# #
######################################################################
# PDF
# x and y-axes start/end at the specified x and y limits in curve()
# Change back to the default with par(xaxs = "r", yaxs = "r")
par(xaxs = "i", yaxs = "i")
curve(expr = 1/30*exp(-x/30), xlim = c(0,140), col = "red",
ylab = "f(x)", xlab = "x")
# CDF
curve(expr = 1 - exp(-x/30), xlim = c(0,140), col = "red",
ylab = "f(x)", xlab = "x", panel.first = grid(), ylim = c(0, 1.05), lwd = 2)
# abline(h = c(0,1))
# Show P(X > 30)
# Method #1 - Area underneath curve to the right of line at x = 30
par(xaxs = "i", yaxs = "i")
curve(expr = 1/30*exp(-x/30), xlim = c(0,140), col = "red",
ylab = "f(x)", xlab = "x")
segments(x0 = 30, y0 = 0, x1 = 30, y1 = 1/30*exp(-30/30), col = "black")
# Method #2 - Red area underneath curve
curve(expr = 1/30*exp(-x/30), xlim = c(0,140), col = "red",
ylab = "f(x)", xlab = "x")
pdf <- function(x) {
1/30*exp(-x/30)
}
x <- c(30, seq(from = 30, to = 140, by = 0.01), 140)
y <- c(0, pdf(x = seq(from = 30, to = 140, by = 0.01)), 0)
polygon(x = x, y = y, col = "red", border = "red")
######################################################################
# Sample
# Make sure the correct folder is specified on your computer for where the
# data file is located
setwd(dir = "C:\\chris\\unl") # This would be used if the actual folder on my Windows-based computer was here
set1 <- read.csv(file = "ExampleSample.csv")
head(set1)
tail(set1)
# Estimate P(X > 30) using sample
n <- length(set1$x)
n
head(set1$x > 30)
tail(set1$x > 30)
sum(set1$x > 30)
sum(set1$x > 30)/n
mean(set1$x > 30)
# Estimate P(25 < X < 100) using sample
mean(set1$x > 25 & set1$x < 100)
mean(set1$x < 100) - mean(set1$x < 25)
hist(x = set1$x, xlab = "x", main = "")
save.hist <- hist(x = set1$x, xlab = "x")
names(save.hist)
# Information on the classes and the frequencies per class
save.hist$breaks #Notice there is one more "break" than there are counts
save.hist$counts
sum(set1$x > 0 & set1$x <= 20)
sum(set1$x > 20 & set1$x <= 40)
# Frequency distribution
Rel.Frequency <- round(save.hist$counts/sum(save.hist$counts), digits = 2)
Cumul.Rel.Freq = round(cumsum(save.hist$counts/sum(save.hist$counts)), digits = 2)
data.frame(class = save.hist$breaks[-1], Frequency = save.hist$counts,
Rel.Frequency = Rel.Frequency, Cumul.Rel.Freq = Cumul.Rel.Freq)
# Histogram with PDF overlay
# Changed y-axis scale to get PDF in plot better
hist(x = set1$x, xlab = "x", main = "", freq = FALSE, ylim = c(0, 0.03))
curve(expr = 1/30*exp(-x/30), col = "red", add = TRUE, n = 1000)
# Sample estimated time between transactions such that 95% are less than this value
quantile(x = set1$x, probs = c(0.05, 0.95), type = 5)
# ECDF with CDF overlay
F.hat <- ecdf(x = set1$x) # Creates a function to find ECDF
F.hat(seq(from = 20, to = 220, by = 20)) # Compare to previous calculations
sum(set1$x <= 20)/n
sum(set1$x <= 40)/n
F.hat(0)
F.hat(10)
F.hat(300)
plot.ecdf(x = set1$x, lwd = 2, panel.first = grid(), ylab = "Probability",
xlab = "x", col = "blue", main = "", verticals = TRUE, do.p = FALSE)
curve(expr = 1 - exp(-x/30), col = "red", add = TRUE, n = 1000, xlim = c(0,250))
legend(x = 100, y = 0.6, legend = c("ECDF", "CDF"), lty = 1, col = c("blue", "red"),
lwd = 2, bty = "n")
save.val <- plot.ecdf(x = set1$x, lwd = 2, panel.first = grid(), ylab = "Probability",
xlab = "x", col = "blue", main = "")
######################################################################
# Take sample in R using rexp()
set.seed(8291)
x <- rexp(n = 100000, rate = 1/30)
head(x)
mean(x > 30)
mean(x > 25 & x < 100)
#####################################################################
# Numerical integration
pdf <- function(x) {
1/30*exp(-x/30)
}
#Examples
pdf(x = 0)
1/30*exp(-0/30)
pdf(x = 10)
1/30*exp(-10/30)
# P(X > 30)
integrate(f = pdf, lower = 30, upper = Inf)
# P(25 < X < 100)
integrate(f = pdf, lower = 25, upper = 100)
# Natural log function in R is log()
30*log(20)
####################################################
# Calculations to help understand code in find.root()
save.res <- integrate(f = pdf, lower = 0, upper = 89.87)
names(save.res)
save.res$value
# Extract only the value from integration
integrate(f = pdf, lower = 0, upper = 89.87)$value
# Shows value is 0
0.95 - integrate(f = pdf, lower = 0, upper = 89.87)$value
# Function want to find root for (the "c" where expression is 0)
find.root <- function(c.val) {
0.95 - integrate(f = pdf, lower = 0, upper = c.val)$value
}
find.root(c.val = 89.87)
# Find c value
uniroot(f = find.root, interval = c(0,300))
#