######################################################################### # R functions to calculate the operating characteristics for # # hierarchical H(n1:n2:...:nS), when biomarker distributions are # # normal, corresponding to the manuscript titled, # # "Group testing case identification with biomarker information" # # # Date: 08/06/2017. # ######################################################################### rm(list=ls(all=TRUE)) #***********************************************************************# # Supporting function # Function to exactly compute the three thresholds for pool of size Gseq # Arguments: # n.sep: = a sequence of pool sizes, for example, one could take # n.seq=c(1,2,10). This function will produce thresholds # for pools of size 1,2,10, respectively. # Pos.para = c(mu+, sigma+), where mu+ and sigma+ are the mean and # standard deviation of the positive biomarker distribution # respectively. For example, if it is N(6,1), we take # Pos.para=c(6,1). # Neg.para = c(mu-, sigma-), where mu- and sigma- are the mean and # standard deviation of the negative biomarker distribution # respectively. For example, if it is N(3,0.25), we take # Neg.para=c(3,0.5). # Mes.para = c(mu_epsilon, sigma_epsilon), where mu_epsilon and # sigma-epsilon are the mean and # standard deviation of the measrument error distribution # respectively. For example, if it is N(0,0.0025), we take # Mes.para=c(0,0.05). Exact.Rules.thres=function(n.sep,Pos.para,Neg.para,Mes.para) { # Define some variables up=Pos.para[1] sp=Pos.para[2] un=Neg.para[1] sn=Neg.para[2] ue=Mes.para[1] se=Mes.para[2] # Calculate the optimal threshold for individual testing Youden=function(t0) { res=pnorm(t0,un+ue,sqrt(sn^2+se^2))+1-pnorm(t0,up+ue,sqrt(sp^2+se^2)) return(-res) } tau.star=optimize(Youden,interval=c(un,up))$minimum # Based on the optimal threshold, compute the sensitivity and specificity of individual testing Sp=pnorm(tau.star,un+ue,sqrt(sn^2+se^2)) Se=1-pnorm(tau.star,up+ue,sqrt(sp^2+se^2)) # The first type of threshold for pools: fix it to be the individual optimal threshold ruleA=rep(tau.star,length(n.sep)) # The second type of threshold for pools: divide the individual optimal threshold by the pool size ruleB=tau.star/n.sep # The new type of threshold we proposed in the article ruleC=c() for(n in n.sep) { if(n==1){ ruleC=c(ruleC,tau.star) }else{ pooled.Youden=function(t0) { D=rep(1,n)/n Tv=rep(0,n) SP=pnorm(t0, sum( D*(up*Tv+un*(1-Tv)) )+ue, sqrt(sum(sp^2*Tv+sn^2*(1-Tv))/(n^2)+se^2) )[1] Tv[1]=1 SE=1-pnorm(t0, sum( D*(up*Tv+un*(1-Tv)) )+ue, sqrt(sum(sp^2*Tv+sn^2*(1-Tv))/(n^2)+se^2) )[1] return(-(SE+SP)) } ruleC=c(ruleC,optimize(pooled.Youden,interval=c(un,((n-1)*un+up)/n))$minimum)} } # Return all results in a list. # tau.star: optimal threshold for individual testing by maximing the Youden index # Se: sensitivity of individual testing # Sp: specificity of individual testing # ruleA: thresholds for pools of size n.seq by fixing them to be tau.star # ruleB: thresholds for pools of size n.seq by dividing tau.star by n.seq # ruleC: our newly proposed threshold for pooled by maximizing a pooled version of Youden index return(list(tau.star=tau.star,Se=Se,Sp=Sp,ruleA=ruleA,ruleB=ruleB,ruleC=ruleC)) } #***********************************************************************# # Supporting function # Function to exactly compute the operating characteristics of # an S-stage hierarchial algorithm H(n1:n2:...:nS) # when disease prevalence is p, # but under traditional assumptions that were assumed in Kim et al. (2007). # This funcion is coded based on Kim et al. (2007) # # Arguments: # S.size: = c(n1, n2, ..., nS). The configuration of the algorithm # p = disease prevalence # Se = sensitivity of individual testing # Sp = specificity of individual testing Exact.Tradition.Hierarchical.homo=function(S.size, p, Se, Sp) { S=length(S.size) q=1-p eff=1 for(s in 1:(S-1)) { temp=q^(S.size[1])*(1-Sp)^s+(1-q^(S.size[s]))*Se^s if(s>1) { for(j in 1:(s-1)) { temp=temp+(q^(S.size[j+1])-q^(S.size[j]))*Se^j*(1-Sp)^(s-j) } } eff=eff+S.size[1]/S.size[s+1]*temp } eff=eff/S.size[1] psp=q^(S.size[1]-1)*(1-Sp)^S for(s in 2:S) { psp=psp+(q^(S.size[s]-1)-q^(S.size[s-1]-1))*Se^(s-1)*(1-Sp)^(S-s+1) } psp=1-psp pse=Se^S ppv=pse*p/(pse*p+(1-psp)*(1-p)) npv=psp*(1-p)/(psp*(1-p)+(1-pse)*p) # return everything in a list # EFF: testing efficiency # PSE: pooling sensitivity # PSP: pooling specificity # PPV: pooling positive predictive value # NPV: pooling negative predictive value return(list(EFF=eff,PSE=pse,PSP=psp,PPV=ppv,NPV=npv)) } #***********************************************************************# # Main function for normal biomarkers # Function to exactly compute the operating characteristics of # an S-stage hierarchial algorithm H(n1:n2:...:nS) # when disease prevalence is p and # biomarker distributions are normal. # # Arguments: # S.size: = c(n1, n2, ..., nS). The configuration of the algorithm # p = disease prevalence # THRED = a sequence of thresholds used for pools of sizes n1, n2, # n3, ..., nS, respectively. Note that, one should make # sure that the order of thresholds in THRED is the same # as the one of pool sizes in S.size. # Pos.para = c(mu+, sigma+), where mu+ and sigma+ are the mean and # standard deviation of the positive biomarker distribution # respectively. For example, if it is N(6,1), we take # Pos.para=c(6,1). # Neg.para = c(mu-, sigma-), where mu- and sigma- are the mean and # standard deviation of the negative biomarker distribution # respectively. For example, if it is N(3,0.25), we take # Neg.para=c(3,0.5). # Mes.para = c(mu0, sigma0), where mu0 and sigma0 are the mean and # standard deviation of the measrument error distribution # respectively. For example, if it is N(0,0.0025), we take # Mes.para=c(0,0.05). Exact.Hierarchical=function(S.size, p, THRED, Pos.para, Neg.para, Mes.para) { require(mvtnorm) # define some variables up=Pos.para[1] sp=Pos.para[2] un=Neg.para[1] sn=Neg.para[2] ue=Mes.para[1] se=Mes.para[2] Stage=length(S.size) ######################################## # Supporting function General.space=function(temp.size) { if(length(temp.size)==1) { res=matrix(0:temp.size) } if(length(temp.size)>1) { temp.s=temp.size[-length(temp.size)]-temp.size[-1] temp.s=temp.s[length(temp.s):1] temp.s=c(temp.size[length(temp.size)],temp.s) res=matrix(0,prod(temp.s+1),length(temp.s)) temp.ss=c(0,cumsum(temp.s)) for(ss in 1:(length(temp.s)-1)) { res[,ss]=rep(0:(temp.s[ss]), rep(prod(temp.s[-(1:ss)]+1),temp.s[ss]+1) ) } res[,length(temp.s)]=0:(temp.s[length(temp.s)]) } return(res) } ######################################### # Supporting function joint.sesp=function(tempT,ss,temp.S,THRED) { D=matrix(1,1,temp.S[1])/temp.S[1] if(ss>1) { for(tem.s in 2:ss) { D=rbind(D, (rep(c(1,0),c(temp.S[tem.s],temp.S[1]-temp.S[tem.s])))/temp.S[tem.s]) } } lb=THRED[1:ss] ub=rep(Inf,ss) res=as.vector(pmvnorm(lower=lb,upper=ub,mean=as.vector(D%*%matrix(tempT*up+(1-tempT)*un+ue)), sigma=D%*%diag( tempT*sp^2+(1-tempT)*sn^2 )%*%t(D)+diag(se^2,ss))[1]) return(res) } ##################################### # Testing efficiency eff=1/S.size[1] for(ss in 1:(Stage-1)) { temp.S=S.size[1:ss] temp.space=General.space(temp.S) temp.size=apply(temp.space,2,max) temp=0 for(tem.m in 1:nrow(temp.space)) { temp.T=rep(rep(c(0,1),ss),as.vector(matrix(c(temp.size-temp.space[tem.m,],temp.space[tem.m,]),2,,byrow=TRUE))) temp=temp+joint.sesp(temp.T,ss,temp.S,THRED)*prod(dbinom(temp.space[tem.m,],temp.size,p)) } eff=eff+1/S.size[ss+1]*temp } ##################################### # Pooling specificity # Pooling sensitivity psp=0 pse=0 temp.space=General.space(S.size) temp.size=apply(temp.space,2,max) temp.space=as.matrix(temp.space[-(1:(nrow(temp.space)/2)),-1]) for(tem.m in 1:nrow(temp.space)) { temp.TT=rep(rep(c(0,1),Stage),as.vector(matrix(c(temp.size-c(1,temp.space[tem.m,]),c(1,temp.space[tem.m,])),2,,byrow=TRUE))) pse=pse+joint.sesp(temp.TT,Stage,S.size,THRED)*prod(dbinom(temp.space[tem.m,],temp.size[-1],p)) temp.TT[1]=0 psp=psp+joint.sesp(temp.TT,Stage,S.size,THRED)*prod(dbinom(temp.space[tem.m,],temp.size[-1],p)) } psp=1-psp ppv=pse*p/(pse*p+(1-psp)*(1-p)) npv=psp*(1-p)/(psp*(1-p)+(1-pse)*p) # return everything in a list # EFF: testing efficiency # PSE: pooling sensitivity # PSP: pooling specificity # PPV: pooling positive predictive value # NPV: pooling negative predictive value return(list(EFF=eff,PSE=pse,PSP=psp,PPV=ppv,NPV=npv)) } #***********************************************************************# # Final function for normal biomarkers # Function to exactly compute the operating characteristics of # an S-stage hierarchial algorithm H(n1:n2:...:nS) # when disease prevalence is p and # biomarker distributions are normal. # # Arguments: # S.size: = c(n1, n2, ..., nS). The configuration of the algorithm # p = disease prevalence # THRED = a sequence of thresholds used for pools of sizes n1, n2, # n3, ..., nS, respectively. Note that, one should make # sure that the order of thresholds in THRED is the same # as the one of pool sizes in S.size. # Pos.para = c(mu+, sigma+), where mu+ and sigma+ are the mean and # standard deviation of the positive biomarker distribution # respectively. For example, if it is N(6,1), we take # Pos.para=c(6,1). # Neg.para = c(mu-, sigma-), where mu- and sigma- are the mean and # standard deviation of the negative biomarker distribution # respectively. For example, if it is N(3,0.25), we take # Neg.para=c(3,0.5). # Mes.para = c(mu0, sigma0), where mu0 and sigma0 are the mean and # standard deviation of the measrument error distribution # respectively. For example, if it is N(0,0.0025), we take # Mes.para=c(0,0.05). # #Ouputs: # it provides FOUR sets operating charatersitics of H(n1:n2:...:nS) # 1st set: consider biomarker pooling and fix threshold to be the threshold for individual testing # 2nd set: consider biomarker pooling and use threshold to be the threshold for individual testing divided by pool size # 3rd set: consider biomarker pooling and use our newly proposed threshold to maximize the pooled version of Youden index # 4th set: under traditional assumption using methods provided by Kim et al. (2007) Exact.Hierarchical.all=function(S.size, p, Pos.para, Neg.para, Mes.para) { Rules=Exact.Rules.thres(S.size,Pos.para,Neg.para,Mes.para) print("The threshold for individual testing is") print(Rules$tau.star) Se=Rules$Se Sp=Rules$Sp print("The sensitivity for individual testing is") print(Se) print("The specificity for individual testing is") print(Sp) print("OP.tradi: under traditional assumption using methods provided by Kim et al. (2007)") OP.tradi=Exact.Tradition.Hierarchical.homo(S.size, p, Rules$Se, Rules$Sp) print("OP.fixt0: consider biomarker pooling and fix threshold to be the threshold for individual testing") OP.fixt0=Exact.Hierarchical(S.size, p, Rules$ruleA, Pos.para,Neg.para,Mes.para) print("OP.t0byn: consider biomarker pooling and use threshold to be the threshold for individual testing divided by pool size") OP.t0byn=Exact.Hierarchical(S.size, p, Rules$ruleB, Pos.para,Neg.para,Mes.para) print("OP.newth: consider biomarker pooling and use our newly proposed threshold to maximize the pooled version of Youden index") OP.newth=Exact.Hierarchical(S.size, p, Rules$ruleC, Pos.para,Neg.para,Mes.para) return(list(cbind(OP.fixt0,OP.t0byn,OP.newth,OP.tradi))) } ######################################################################### # For illustration # Set distributions, prevalence, and configuration of the hierarchial algorithm: # Positive biomarker distribution N(6,1) Pos.para=c(6,1) # Negative biomarker distribution N(3,0.25) Neg.para=c(3,0.5) # Measurement error N(0, 0.0025) Mes.para=c(0,0.05) # Prevalence is p=0.05 p=0.05 # Algorithm is H(8:4:1) S.size=c(8,4,1) set.seed(100) Exact.Hierarchical.all(S.size, p, Pos.para, Neg.para, Mes.para) #Outputs: #[1] "The threshold for individual testing is" #[1] 4.114972 #[1] "The sensitivity for individual testing is" #[1] 0.9701279 #[1] "The specificity for individual testing is" #[1] 0.9867524 #[1] "OP.tradi: under traditional assumption using methods provided by Kim et al. (2007)" #[1] "OP.fixt0: consider biomarker pooling and fix threshold to be the threshold for individual testing" #[1] "OP.t0byn: consider biomarker pooling and use threshold to be the threshold for individual testing divided by pool size" #[1] "OP.newth: consider biomarker pooling and use our newly proposed threshold to maximize the pooled version of Youden index" #[[1]] # OP.fixt0 OP.t0byn OP.newth OP.tradi #EFF 0.1304224 1.375 0.4156434 0.3854637 #PSE 0.03188749 0.9701279 0.8138943 0.913034 #PSP 0.9999088 0.9867524 0.9945616 0.9981931 #PPV 0.9484743 0.7939948 0.8873452 0.9637608 #NPV 0.9515129 0.9984092 0.9902475 0.9954355