# This program shows how to use functions within CalculateFunctionsWeb.R to # estimate the operating characteristics (e.g., E(T)) for an array # Author: Chris Bilder # Read in program that contain functions to complete computations source(file = "CalculateFunctionsWeb.R") # The rdirichlet() function from the rBeta2009 package is used to simulate # a set of individual joint probabilities of disease from a Dirichlet distribution library(rBeta2009) ############################################################################## # Example #1 - Two diseases (K = 2) and 3x3 array # In addition to estimating the operating characteristics, this example # shows how the Dirichlet distributions of Section 4 are summarized # and how the arrays are simulated for the algorithm given in Section 2 # Number of rows and columns I <- 3 J <- 3 # Ordering of P_ij = (P_ij,00, P_ij,01, P_ij,10, P_ij,11) ordering <- matrix(data = c(0, 0, 0, 1, 1, 0, 1, 1), nrow = 4, ncol = 2, byrow = TRUE) ordering # Number of diseases K <- ncol(ordering) # Aptima Combo 2 Assay accuracy for males, where chlamydia is disease #1 and # gonorrhea is disease #2 Se <- matrix(data = c(0.979, 0.979, 0.985, 0.985), nrow = 2, ncol = 2, dimnames = list(Disease = 1:2, Stage = 1:2), byrow = TRUE) Se Sp <- matrix(data = c(0.985, 0.985, 0.996, 0.996), nrow = 2, ncol = 2, dimnames = list(Disease = 1:2, Stage = 1:2), byrow = TRUE) Sp # E(P_ij,1+), E(P_ij,+1), Var(P_ij,1+), Var(P_ij,+1), and # odds ratios - E(P_ij,11) * E(P_ij,00) / ( E(P_ij,01) * E(P_ij,01) ) # The alpha used here corresponds to the medium amount of variability case dirichlet.summary(alpha = c(18.25,0.75,0.75,0.25), ordering = ordering) # One set of simulated results for an array # The method = "sd" corresponds to the spiral arrangment. One could also use # method = "gd" (gradient), "none" (1:(I*J) by row), and "set.perm" (a # specific arrangement - see function) # Each row in $alg.result and $true.status represents the testing results and # true statuses (respectively) of a person. Each column corresponds to a disease. # Use print.arrangement = TRUE to see the arrangement within the array # For the case here, # 1) Individual 8 is truly positive for disease 2 # 2) Individual 8 also tested positive for disease 2 # 3) The number of tests for this example is 7 (3 row tests, 3 column tests, and 1 retest) # 4) Individual #7 has the largest probability of being positive for at least one disease # so this individual is placed in the (1,1) cell of the array set.seed(8765) arrange.info <- get.p.ij(alpha = c(18.25,0.75,0.75,0.25), I = I, J = J, K = K, method = "sd", ordering = ordering, print.arrangement = TRUE) save.array.sim <- array.test.gen(joint.p = arrange.info$joint.p, testing.config = arrange.info$testing.config, I = I, J = J, K = K, ordering = ordering, Se = Se, Sp = Sp) save.array.sim # Obtain a set of joint probabilities and estimate the operating characteristics corresponding to them # Each column in joint.p is one P_ij. These can be reordered within an array depending on # the arrangement specified. Thus, the subscript ij here is not meaningful until # the arragement is chosen. set.seed(8765) alpha <- c(18.25,0.75,0.75,0.25) joint.p <- joint.p.create(alpha = alpha, total = I*J) #Note: This is used by get.p.ij() dimnames(joint.p) <- list(joint.p.row.labels(ordering = ordering), as.character(1:(I*J))) joint.p operating.characteristics(p.vec = joint.p, I = I, J = J, K = K, ordering = ordering, Se = Se, Sp = Sp, B = 10000, method = "sd") # In application, a user of operating.charactertics() will likely have their own set of # joint probabilities of disease. Therefore, joint.p.create() would not be needed. ################################################################################ # Example #2 - Two diseases (K = 2), 3x3 array, and equal p_ij for each individual # The operating characteristics here match those that are calculated by the # Hou et al. (2018) Shiny app # Number of rows and columns I <- 3 J <- 3 # Ordering of P_ij = (P_ij,00, P_ij,01, P_ij,10, P_ij,11) ordering <- matrix(data = c(0, 0, 0, 1, 1, 0, 1, 1), nrow = 4, ncol = 2, byrow = TRUE) # Number of diseases K <- ncol(ordering) # Sensitivities and specificities Se <- matrix(data = 0.95, nrow = 2, ncol = 2, dimnames = list(Disease = 1:2, Stage = 1:2), byrow = TRUE) Se Sp <- matrix(data = 0.99, nrow = 2, ncol = 2, dimnames = list(Disease = 1:2, Stage = 1:2), byrow = TRUE) Sp # This represents E(P_ij) from Dirichlet with alpha = c(18.25,0.75,0.75,0.25) Exp.Pij <- c(18.25,0.75,0.75,0.25)/sum(c(18.25,0.75,0.75,0.25)) # Assume every individual has an equal P_ij vector joint.p <- matrix(data = Exp.Pij, nrow = 4, ncol = I*J, byrow = FALSE) dimnames(joint.p) <- list(joint.p.row.labels(ordering = ordering), as.character(1:(I*J))) joint.p # Find operating characteristics # Note that sim.print = 10000 prints simulation number by 10000 (and simulation b = 1) set.seed(5475) operating.characteristics(p.vec = joint.p, I = I, J = J, K = K, ordering = ordering, Se = Se, Sp = Sp, B = 50000, method = "none", sim.print = 10000) ############################################################################## # Example #3 - Three diseases (K = 3), 3x3 array, and E(P_ij) is available for each # individual # This is a case examined for Section 4 (Figure 4) # Number of rows and columns I <- 3 J <- 3 # Ordering of P_ij = (P_ij,000, P_ij,001, P_ij,010, P_ij,011, P_ij,100, P_ij,101, P_ij,110, P_ij,111) ordering <- matrix(data = c(0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1), nrow = 8, ncol = 3, byrow = TRUE) # Number of diseases K <- ncol(ordering) # Sensitivities and specificities Se <- matrix(data = c(0.95, 0.95, 0.95, 0.95, 0.95, 0.95), nrow = 3, ncol = 2, dimnames = list(Infection = 1:K, Stage = 1:2)) Sp <- matrix(data = c(0.99, 0.99, 0.99, 0.99, 0.99, 0.99), nrow = 3, ncol = 2, dimnames = list(Infection = 1:K, Stage = 1:2)) # E(P_ij) - This was obtained by adapting the correlated binary data simulation algorithm of # Gange (American Statistician, 1995) probK3 <- c(0.881085379, 0.031413602, 0.031413602, 0.006087417, 0.031413602, 0.006087417, 0.006087417, 0.006411564) # Find alpha vector alpha0 <- 20 # Sum of alphas, same as for the K = 2 case # For a Dirichlet distribution in general, E(X_i) = alpha_i / sum(alpha_i), so solve for alpha_i # to obtain the alpha vector alpha <- alpha0 * probK3 # probK3 here plays role of E(X_i) alpha # The resulting alpha corresponds to the medium amount of variability case # E(P_ij,1++), E(P_ij,+1+), E(P_ij,++1), Var(P_ij,1++), Var(P_ij,+1+), Var(P_ij,++1), and # odds ratios dirichlet.summary(alpha = alpha, ordering = ordering) # Obtain a set of joint probabilities and estimate the operating characteristics corresponding to them set.seed(3724) joint.p <- joint.p.create(alpha = alpha, total = I*J) dimnames(joint.p) <- list(joint.p.row.labels(ordering = ordering), as.character(1:(I*J))) joint.p operating.characteristics(p.vec = joint.p, I = I, J = J, K = K, ordering = ordering, Se = Se, Sp = Sp, B = 10000, method = "sd") #