# This program shows how to use functions within CalculateFunctionsWeb.R to
# estimate the operating characteristics (e.g., E(T)) for an array
# Author: Chris Bilder
# Read in program that contain functions to complete computations
source(file = "CalculateFunctionsWeb.R")
# The rdirichlet() function from the rBeta2009 package is used to simulate
# a set of individual joint probabilities of disease from a Dirichlet distribution
library(rBeta2009)
##############################################################################
# Example #1 - Two diseases (K = 2) and 3x3 array
# In addition to estimating the operating characteristics, this example
# shows how the Dirichlet distributions of Section 4 are summarized
# and how the arrays are simulated for the algorithm given in Section 2
# Number of rows and columns
I <- 3
J <- 3
# Ordering of P_ij = (P_ij,00, P_ij,01, P_ij,10, P_ij,11)
ordering <- matrix(data = c(0, 0,
0, 1,
1, 0,
1, 1), nrow = 4, ncol = 2, byrow = TRUE)
ordering
# Number of diseases
K <- ncol(ordering)
# Aptima Combo 2 Assay accuracy for males, where chlamydia is disease #1 and
# gonorrhea is disease #2
Se <- matrix(data = c(0.979, 0.979, 0.985, 0.985), nrow = 2, ncol = 2, dimnames =
list(Disease = 1:2, Stage = 1:2), byrow = TRUE)
Se
Sp <- matrix(data = c(0.985, 0.985, 0.996, 0.996), nrow = 2, ncol = 2,
dimnames = list(Disease = 1:2, Stage = 1:2), byrow = TRUE)
Sp
# E(P_ij,1+), E(P_ij,+1), Var(P_ij,1+), Var(P_ij,+1), and
# odds ratios - E(P_ij,11) * E(P_ij,00) / ( E(P_ij,01) * E(P_ij,01) )
# The alpha used here corresponds to the medium amount of variability case
dirichlet.summary(alpha = c(18.25,0.75,0.75,0.25), ordering = ordering)
# One set of simulated results for an array
# The method = "sd" corresponds to the spiral arrangment. One could also use
# method = "gd" (gradient), "none" (1:(I*J) by row), and "set.perm" (a
# specific arrangement - see function)
# Each row in $alg.result and $true.status represents the testing results and
# true statuses (respectively) of a person. Each column corresponds to a disease.
# Use print.arrangement = TRUE to see the arrangement within the array
# For the case here,
# 1) Individual 8 is truly positive for disease 2
# 2) Individual 8 also tested positive for disease 2
# 3) The number of tests for this example is 7 (3 row tests, 3 column tests, and 1 retest)
# 4) Individual #7 has the largest probability of being positive for at least one disease
# so this individual is placed in the (1,1) cell of the array
set.seed(8765)
arrange.info <- get.p.ij(alpha = c(18.25,0.75,0.75,0.25), I = I, J = J, K = K,
method = "sd", ordering = ordering, print.arrangement = TRUE)
save.array.sim <- array.test.gen(joint.p = arrange.info$joint.p, testing.config = arrange.info$testing.config,
I = I, J = J, K = K, ordering = ordering, Se = Se, Sp = Sp)
save.array.sim
# Obtain a set of joint probabilities and estimate the operating characteristics corresponding to them
# Each column in joint.p is one P_ij. These can be reordered within an array depending on
# the arrangement specified. Thus, the subscript ij here is not meaningful until
# the arragement is chosen.
set.seed(8765)
alpha <- c(18.25,0.75,0.75,0.25)
joint.p <- joint.p.create(alpha = alpha, total = I*J) #Note: This is used by get.p.ij()
dimnames(joint.p) <- list(joint.p.row.labels(ordering = ordering), as.character(1:(I*J)))
joint.p
operating.characteristics(p.vec = joint.p, I = I, J = J, K = K,
ordering = ordering, Se = Se, Sp = Sp, B = 10000, method = "sd")
# In application, a user of operating.charactertics() will likely have their own set of
# joint probabilities of disease. Therefore, joint.p.create() would not be needed.
################################################################################
# Example #2 - Two diseases (K = 2), 3x3 array, and equal p_ij for each individual
# The operating characteristics here match those that are calculated by the
# Hou et al. (2018) Shiny app
# Number of rows and columns
I <- 3
J <- 3
# Ordering of P_ij = (P_ij,00, P_ij,01, P_ij,10, P_ij,11)
ordering <- matrix(data = c(0, 0,
0, 1,
1, 0,
1, 1), nrow = 4, ncol = 2, byrow = TRUE)
# Number of diseases
K <- ncol(ordering)
# Sensitivities and specificities
Se <- matrix(data = 0.95, nrow = 2, ncol = 2, dimnames =
list(Disease = 1:2, Stage = 1:2), byrow = TRUE)
Se
Sp <- matrix(data = 0.99, nrow = 2, ncol = 2,
dimnames = list(Disease = 1:2, Stage = 1:2), byrow = TRUE)
Sp
# This represents E(P_ij) from Dirichlet with alpha = c(18.25,0.75,0.75,0.25)
Exp.Pij <- c(18.25,0.75,0.75,0.25)/sum(c(18.25,0.75,0.75,0.25))
# Assume every individual has an equal P_ij vector
joint.p <- matrix(data = Exp.Pij, nrow = 4, ncol = I*J, byrow = FALSE)
dimnames(joint.p) <- list(joint.p.row.labels(ordering = ordering), as.character(1:(I*J)))
joint.p
# Find operating characteristics
# Note that sim.print = 10000 prints simulation number by 10000 (and simulation b = 1)
set.seed(5475)
operating.characteristics(p.vec = joint.p, I = I, J = J, K = K,
ordering = ordering, Se = Se, Sp = Sp, B = 50000, method = "none", sim.print = 10000)
##############################################################################
# Example #3 - Three diseases (K = 3), 3x3 array, and E(P_ij) is available for each
# individual
# This is a case examined for Section 4 (Figure 4)
# Number of rows and columns
I <- 3
J <- 3
# Ordering of P_ij = (P_ij,000, P_ij,001, P_ij,010, P_ij,011, P_ij,100, P_ij,101, P_ij,110, P_ij,111)
ordering <- matrix(data = c(0, 0, 0,
0, 0, 1,
0, 1, 0,
0, 1, 1,
1, 0, 0,
1, 0, 1,
1, 1, 0,
1, 1, 1), nrow = 8, ncol = 3, byrow = TRUE)
# Number of diseases
K <- ncol(ordering)
# Sensitivities and specificities
Se <- matrix(data = c(0.95, 0.95, 0.95, 0.95, 0.95, 0.95), nrow = 3, ncol = 2,
dimnames = list(Infection = 1:K, Stage = 1:2))
Sp <- matrix(data = c(0.99, 0.99, 0.99, 0.99, 0.99, 0.99), nrow = 3, ncol = 2,
dimnames = list(Infection = 1:K, Stage = 1:2))
# E(P_ij) - This was obtained by adapting the correlated binary data simulation algorithm of
# Gange (American Statistician, 1995)
probK3 <- c(0.881085379, 0.031413602, 0.031413602, 0.006087417, 0.031413602, 0.006087417,
0.006087417, 0.006411564)
# Find alpha vector
alpha0 <- 20 # Sum of alphas, same as for the K = 2 case
# For a Dirichlet distribution in general, E(X_i) = alpha_i / sum(alpha_i), so solve for alpha_i
# to obtain the alpha vector
alpha <- alpha0 * probK3 # probK3 here plays role of E(X_i)
alpha
# The resulting alpha corresponds to the medium amount of variability case
# E(P_ij,1++), E(P_ij,+1+), E(P_ij,++1), Var(P_ij,1++), Var(P_ij,+1+), Var(P_ij,++1), and
# odds ratios
dirichlet.summary(alpha = alpha, ordering = ordering)
# Obtain a set of joint probabilities and estimate the operating characteristics corresponding to them
set.seed(3724)
joint.p <- joint.p.create(alpha = alpha, total = I*J)
dimnames(joint.p) <- list(joint.p.row.labels(ordering = ordering), as.character(1:(I*J)))
joint.p
operating.characteristics(p.vec = joint.p, I = I, J = J, K = K,
ordering = ordering, Se = Se, Sp = Sp, B = 10000, method = "sd")
#