################################################################################ #Data from Cognard et al. (1996) for DAVFs c.table <- array(data = c(0, 1, 0, 0, 0, 0, 83, 0, 8, 0, 1, 1, 0, 17, 2, 1, 0, 0, 0, 0, 7, 1, 2, 6, 2, 1, 0, 6, 10, 0, 8, 1, 0, 0, 6, 19, 4, 2, 3, 0, 0, 1, 5, 0, 1, 0, 0, 6, 0), dim=c(7,7), dimnames = list(Symptoms = c("Hemorrhage", "Intracranial hypertension", "Focal neurologic deficit", "Seizures", "Cardiac deficiency", "Myelopathy", "Nonaggresive symptoms"), Classification = c("I", "IIa", "IIb", "IIa and b", "III", "IV", "V"))) c.table ind.test <- chisq.test(c.table, correct = FALSE) ind.test ################################################################################ # MC simulation approach n <- sum(c.table) #Sample size pi.hat <- ind.test$expected/n #Estimated expected pi^_ij under Ho pi.hat B <- 10000 #Number of simulated data sets set.seed(7812) table.count <- rmultinom(n = B, size = n, prob = pi.hat) #Each column contains the counts for one contingency table table.count[,1:2] #Function calculates X^2 for a data set and checks the contingency table size calc.stat <- function(data) { c.table <- array(data = data, dim=c(7,7), dimnames = list(symptoms = 1:7, class = 1:7)) ind.test <- chisq.test(c.table, correct = FALSE) ck.0.row <- sum(rowSums(c.table) == 0) #Number of rows with all 0 entries ck.0.col <- sum(colSums(c.table) == 0) #Number of columns with all 0 entries c(ind.test$statistic, ck.0.row, ck.0.col) } #Example application of the function (could do this with the observed data too) calc.stat(data = table.count[,1]) #Calculate X^2* for each simulated data set save.star <- apply(X = table.count, MARGIN = 2, FUN = calc.stat) #The warnings are for "Chi-squared approximation may be incorrect" #We do not want to include contingency tables that have an all 0 row or all 0 column # because this changes the contingency table size (and degrees of freedom for the chi^2 distribution) sum(save.star[2,]) sum(save.star[3,]) X.sq.star <- save.star[1, save.star[2,] == 0 & save.star[3,] == 0] # Quantiles df <- (7-1)*(7-1) p <- c(0.01, 0.05, 0.10) quantile(x = X.sq.star, probs = 1 - p, type = 1) qchisq(p = 1 - p, df = df) dev.new(width = 10, height = 6, pointsize = 12) par(mfrow = c(1,2)) # Histogram df <- (7-1)*(7-1) hist(x = X.sq.star, main = "Histogram", freq = FALSE, xlab = expression(X^"2*"), ylim = c(0,0.05), col = NA) curve(expr = dchisq(x = x, df = df), col = "red", add = TRUE) # segments(x0 = ind.test$statistic, y0 = -10, x1 = ind.test$statistic, y1 = 10) # Compare CDFs plot.ecdf(x = X.sq.star, verticals = TRUE, do.p = FALSE, main = "CDFs", lwd = 2, col = "black", xlab = expression(X^"2*"), panel.first = grid(col="gray", lty="dotted"), ylab = "CDF") curve(expr = pchisq(q = x, df = df), col = "red", add = TRUE, lwd = 1) legend(x = 60, y = 0.4, legend = c(expression(X^"2*"), expression(chi[36]^2)), lwd = c(2,1), col = c("black", "red"), bty = "n") mean(X.sq.star > ind.test$statistic) # bootstrap p-value #