######################################################################
# Transaction time                                                   #
#                                                                    #
######################################################################


# PDF
# x and y-axes start/end at the specified x and y limits in curve()
#   Change back to the default with par(xaxs = "r", yaxs = "r")
par(xaxs = "i", yaxs = "i")
curve(expr = 1/30*exp(-x/30), xlim = c(0,140), col = "red",
  ylab = "f(x)", xlab = "x")


# CDF
curve(expr = 1 - exp(-x/30), xlim = c(0,140), col = "red",
  ylab = "f(x)", xlab = "x", panel.first = grid(), ylim = c(0, 1.05), lwd = 2)
# abline(h = c(0,1))


# Show P(X > 30)
#   Method #1  - Area underneath curve to the right of line at x = 30
par(xaxs = "i", yaxs = "i")
curve(expr = 1/30*exp(-x/30), xlim = c(0,140), col = "red",
  ylab = "f(x)", xlab = "x")
segments(x0 = 30, y0 = 0, x1 = 30, y1 = 1/30*exp(-30/30), col = "black")

#   Method #2 - Red area underneath curve
curve(expr = 1/30*exp(-x/30), xlim = c(0,140), col = "red",
  ylab = "f(x)", xlab = "x")
pdf <- function(x) {
  1/30*exp(-x/30)
}
x <- c(30, seq(from = 30, to = 140, by = 0.01), 140)
y <- c(0, pdf(x = seq(from = 30, to = 140, by = 0.01)), 0)
polygon(x = x, y = y,  col = "red", border = "red")


######################################################################
# Sample

  # Make sure the correct folder is specified on your computer for where the
  #   data file is located
  setwd(dir = "C:\\chris\\unl") # This would be used if the actual folder on my Windows-based computer was here
  set1 <- read.csv(file = "ExampleSample.csv")
  head(set1)
  tail(set1)

  # Estimate P(X > 30) using sample
  n <- length(set1$x)
  n
  head(set1$x > 30)
  tail(set1$x > 30)
  sum(set1$x > 30)
  sum(set1$x > 30)/n
  mean(set1$x > 30)


  # Estimate P(25 < X < 100) using sample
  mean(set1$x > 25 & set1$x < 100)
  mean(set1$x < 100) - mean(set1$x < 25)


  hist(x = set1$x, xlab = "x", main = "")

  save.hist <- hist(x = set1$x, xlab = "x")
  names(save.hist)

  # Information on the classes and the frequencies per class
  save.hist$breaks #Notice there is one more "break" than there are counts
  save.hist$counts
  sum(set1$x > 0 & set1$x <= 20)
  sum(set1$x > 20 & set1$x <= 40)

  # Frequency distribution
  Rel.Frequency <- round(save.hist$counts/sum(save.hist$counts), digits = 2)
  Cumul.Rel.Freq = round(cumsum(save.hist$counts/sum(save.hist$counts)), digits = 2)
  data.frame(class = save.hist$breaks[-1], Frequency = save.hist$counts,
    Rel.Frequency = Rel.Frequency, Cumul.Rel.Freq = Cumul.Rel.Freq)

  # Histogram with PDF overlay
  # Changed y-axis scale to get PDF in plot better
  hist(x = set1$x, xlab = "x", main = "", freq = FALSE, ylim = c(0, 0.03))
  curve(expr = 1/30*exp(-x/30), col = "red", add = TRUE, n = 1000)


  # Sample estimated time between transactions such that 95% are less than this value
  quantile(x = set1$x, probs = c(0.05, 0.95), type = 5)


  # ECDF with CDF overlay
  F.hat <- ecdf(x = set1$x)  # Creates a function to find ECDF
  F.hat(seq(from = 20, to = 220, by = 20))  # Compare to previous calculations
  sum(set1$x <= 20)/n
  sum(set1$x <= 40)/n
  F.hat(0)
  F.hat(10)
  F.hat(300)

  plot.ecdf(x = set1$x, lwd = 2, panel.first = grid(), ylab = "Probability",
    xlab = "x", col = "blue", main = "", verticals = TRUE, do.p = FALSE)
  curve(expr = 1 - exp(-x/30), col = "red", add = TRUE, n = 1000, xlim = c(0,250))
  legend(x = 100, y = 0.6, legend = c("ECDF", "CDF"), lty = 1, col = c("blue", "red"),
    lwd = 2, bty = "n")

  save.val <- plot.ecdf(x = set1$x, lwd = 2, panel.first = grid(), ylab = "Probability",
    xlab = "x", col = "blue", main = "")


######################################################################
# Take sample in R using rexp()

  set.seed(8291)
  x <- rexp(n = 100000, rate = 1/30)
  head(x)

  mean(x > 30)
  mean(x > 25 & x < 100)


#####################################################################
# Numerical integration

  pdf <- function(x) {
    1/30*exp(-x/30)
  }

  #Examples
  pdf(x = 0)
  1/30*exp(-0/30)
  pdf(x = 10)
  1/30*exp(-10/30)

  # P(X > 30)
  integrate(f = pdf, lower = 30, upper = Inf)
  # P(25 < X < 100)
  integrate(f = pdf, lower = 25, upper = 100)


  # Natural log function in R is log()
  30*log(20)

  ####################################################
  # Calculations to help understand code in find.root()
  save.res <- integrate(f = pdf, lower = 0, upper = 89.87)
  names(save.res)
  save.res$value
  # Extract only the value from integration
  integrate(f = pdf, lower = 0, upper = 89.87)$value
  # Shows value is 0
  0.95 - integrate(f = pdf, lower = 0, upper = 89.87)$value

  # Function want to find root for (the "c" where expression is 0)
  find.root <- function(c.val) {
    0.95 - integrate(f = pdf, lower = 0, upper = c.val)$value
  }

  find.root(c.val = 89.87)

  # Find c value
  uniroot(f = find.root, interval = c(0,300))



















#