# Implement two-stage informative group testing for multiplex assays # Author: Chris Bilder ################################################################################ # Functions for testing implement.testing <- function(data.set, group.mem = group.mem, Se = Se, Sp = Sp) { I <- ncol(group.mem) K <- nrow(Se) # Number of individuals will not be evenly divisible by I; the "remainder" individuals # are simply individually tested at the end of the function max.groups <- floor(nrow(data.set)/I) # Use to store results save.numb.tests <- numeric(max.groups) save.C <- matrix(data = NA, nrow = max.groups, ncol = I) save.G <- matrix(data = NA, nrow = max.groups, ncol = I) # Iterate over each initial set of I individuals for (i in 1:max.groups) { group <- data.set[(1 + I*(i-1)):(I*i),] # Group of interest ytilde <- t(group[,1:2]) # Treating as true values p00 <- group[,3] # Estimated probability all negative ######################################### # Order individuals and implement testing p00.order <- order(1 - p00) # Find ordering of these probabilities ytilde.order <- ytilde[,p00.order] # Order individuals max.groups.block <- max(group.mem[1,]) diagnoses.save.block <- matrix(data = NA, nrow = K, ncol = I) save.numb.tests.block <- numeric(max.groups.block) # Because stage 1 for two-stage testing has group numbers not all equal to 1 # in the group membership matrix, I need to implement testing separately for each for(group.numb in 1:max.groups.block) { one.group <- as.matrix(group.mem[, group.mem[1,] == group.numb]) # as.matrix() used in case group size is 1 one.ytilde <- as.matrix(ytilde.order[, group.mem[1,] == group.numb]) # Need to change the actual group numbers to get test.results() to work properly # Thus, everyone will be a "group #1" one.group[1,] <- rep(x = 1, times = ncol(one.group)) one.group[2,] <- 1:ncol(one.group) numb.in.group <- sum(group.mem[1,] == group.numb) # test.results() could give 2 tests for a group of size 1 if (numb.in.group > 1) { # Implement testing save.test.results <- test.results(ytilde = one.ytilde, group.mem = one.group, Se = Se, Sp = Sp) # Number of tests used save.numb.tests.block[group.numb] <- save.test.results$numb.tests # Save diagnoses diagnoses.save.block[1:2, which(group.mem[1,] == group.numb)] <- save.test.results$diagnoses } else { save.numb.tests.block[group.numb] <- 1 # Can call testing function directly since only need to do one test diagnoses.save.block[1:2, which(group.mem[1,] == group.numb)] <- as.matrix(do.test(ytilde = one.ytilde, Se = Se, Sp = Sp, stage = 1, K = K)) } } save.numb.tests[i] <- sum(save.numb.tests.block) p00.order.back <- order(p00.order) save.C[i,] <- diagnoses.save.block[1, p00.order.back] # Save the diagnoses save.G[i,] <- diagnoses.save.block[2, p00.order.back] # Save the diagnoses ######################################### } #Put diagnoses into a vector form - perhaps could change parts of code above then # to save time if desired save.C2 <- matrix(data = t(save.C), nrow = I*max.groups, ncol = 1) save.G2 <- matrix(data = t(save.G), nrow = I*max.groups, ncol = 1) #Individual testing for remainder individuals who did not fit into groups 1:max.groups save.remainder <- matrix(data = NA, nrow = nrow(data.set) - max.groups*I, ncol = 2) if (max.groups*I < nrow(data.set)) { for (i in (max.groups*I + 1):nrow(data.set)) { save.remainder[i-max.groups*I,] <- do.test(ytilde = t(data.set[i,1:2]), Se = Se, Sp = Sp, stage = 1, K = K) } } save.numb.tests.w.remain <- c(save.numb.tests, rep(x = 1, times = nrow(data.set) - max.groups*I)) list(Tests = save.numb.tests.w.remain, C.diag = c(save.C2, save.remainder[,1]), G.diag = c(save.G2, save.remainder[,2])) } #Accuracy measures accuracy <- function(data, results) { c.acc <- c(pl_se(g = results$C.diag, Y = data[,1]), pl_sp(g = results$C.diag, Y = data[,1]), ppv(g = results$C.diag, Y = data[,1]), npv(g = results$C.diag, Y = data[,1])) g.acc <- c(pl_se(g = results$G.diag, Y = data[,2]), pl_sp(g = results$G.diag, Y = data[,2]), ppv(g = results$G.diag, Y = data[,2]), npv(g = results$G.diag, Y = data[,2])) list(C = c.acc, G = g.acc) } ################################################################################ # Main function IGT.sim <- function(data.set, group.mem = group.mem, Se = Se, Sp = Sp, B = B, iter.print = 100, ...) { save.T.acc <- matrix(data = NA, nrow = B, ncol = 9, dimnames = list(sim = 1:B, measure = c("Tests", "CPSe", "CPSp", "CPPPV", "CPNPV", "GPSe", "GPSp", "GPPPV", "GPNPV"))) for (b in 1:B) { save.res <- implement.testing(data.set = data.set, group.mem = group.mem, Se = Se, Sp = Sp) save.T.acc[b,1] <- sum(save.res$Tests) save.T.acc[b,2:9] <- c(accuracy(data = data.set, results = save.res)$C, accuracy(data = data.set, results = save.res)$G) if(floor(b/iter.print) == ceiling(b/iter.print)) {print(b)} } final <- c(sd(save.T.acc[,1]), colMeans(save.T.acc)) names(final)[1:2] <- c("sd.tests", "Mean.tests") final } ################################################################################ # # # MAIN # # # ################################################################################ # Folder location for additional functions and the data set setwd(dir = "C:\\chris") ############################################################################## # Read in external functions source(file = "DecodeFunctions.R") source(file = "Utility.R") ############################################################################## # Two-stage implementation for female urine in Idaho # Data set with disease information and estimated probability of not having either disease # C = Chlamydia diagnosis # G = Gonorrhea diagnosis # pi00.hat = Estimated probability of not having either disease FU.ID2011phat <- read.csv(file = "FU.ID2011phat.csv") head(FU.ID2011phat) # Aptima Combo 2 Assay accuracy Se <- matrix(data = c(0.947, 0.947, 0.913, 0.913), nrow = 2, ncol = 2, dimnames = list(Infection = 1:2, Stage = 1:2), byrow = TRUE) Se Sp <- matrix(data = c(0.989, 0.989, 0.993, 0.993), nrow = 2, ncol = 2, dimnames = list(Infection = 1:2, Stage = 1:2), byrow = TRUE) Sp # Approximate OTC - Find outside of this program from 2010 data group.mem.noninfo <- matrix(data = c(1, 1, 1, 1, 1, 2, 2, 2, 2 ,2), nrow = 2, ncol = 5, byrow = TRUE, dimnames = list(Stage = 1:2, Individual = 1:5)) group.mem.noninfo group.mem.info <- matrix(data = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 1:30), nrow = 2, ncol = 30, byrow = TRUE, dimnames = list(Stage = 1:2, Individual = 1:30)) group.mem.info # Informative group testing # This takes approximately 3 minutes to run on a Surface Pro 2 computer start.time <- proc.time() set.seed(9458) FU.ID2011.2stage.IGT <- IGT.sim(data.set = FU.ID2011phat, group.mem = group.mem.info, Se = Se, Sp = Sp, B = 500, iter.print = 10) # Mean (SD) number of tests = 2105.86 (24.91); also printed are the mean accuracy measures for # C (chlamydia) and G (gonorrhea); because of the variability in the accuracy, these should not be # compared directly to the accuracy from non-informative group testing FU.ID2011.2stage.IGT end.time <- proc.time() save.time <- end.time-start.time cat("\n Number of minutes running:", save.time[3]/60, "\n \n") # Non-informative group testing set.seed(4783) # In order for the same code to be used as with informative group testing, I simply # set all probabilities of positivity for at least one disease to be equal across all individuals. FU2011.noninfo <- data.frame(FU.ID2011phat[,1:2], pi00.hat = 0.5) FU.ID2011.2stage.NGT <- IGT.sim(data.set = FU2011.noninfo, group.mem = group.mem.noninfo, Se = Se, Sp = Sp, B = 500, iter.print = 10) # Mean (SD) number of tests = 2210.96 (23.56) FU.ID2011.2stage.NGT #