--- title: "Simulation Study: Comparing Item Selection Criteria and Stopping Rules" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Simulation Study: Comparing Item Selection Criteria and Stopping Rules} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set( echo = TRUE, message = FALSE, warning = FALSE ) ``` ## Overview This vignette presents a simulation study that evaluates how different **item selection criteria** and **stopping rules** affect classification accuracy and test efficiency in CD-CAT. We use the `k3_GDINA_HDLV` dataset from `cdcat_sim`: a 3-attribute, 70-item GDINA bank with high-discrimination / low-variability item parameters, and 160 examinees covering all possible attribute profiles (20 per profile). ### Experimental factors | Factor | Levels | |---|---| | Item selection criterion | KL, PWKL, MPWKL, SHE | | Stopping rule | Single threshold ($\tau = 0.80$), Dual threshold ($\tau_1 = 0.70,\ \tau_2 = 0.10$) | All conditions share the same settings: - **Estimation**: MAP with uniform prior - **Minimum items**: 1 (no minimum constraint) - **Maximum items**: none (test runs until the stopping criterion triggers) - **Start item**: one randomly drawn item, fixed across all conditions and examinees for comparability ### Evaluation metrics - **PCCR** (Pattern Correct Classification Rate): proportion of examinees whose entire attribute profile was correctly recovered. - **ACCR$_k$** (Attribute CCR): marginal correct classification rate for attribute $k$. - **ATL** (Average Test Length): mean number of items administered before the stopping rule triggered. --- ## Setup ```{r setup-data} library(cdCAT) # Dataset: K = 3, GDINA model, high-discrimination / low-variability d <- cdcat_sim[["k3_GDINA_HDLV"]] Q <- d$Q # 70 x 3 alpha <- d$alpha # 160 x 3 (true profiles) responses <- d$responses # 160 x 70 K <- ncol(Q) N <- nrow(alpha) J <- nrow(Q) # Build the item bank items <- cdcat_items(Q, "GDINA", gdina_params = d$parameters) items ``` ```{r fixed-start} # Draw one start item, fixed for all conditions and examinees set.seed(42) start_item <- sample(J, 1L) cat("Fixed start item:", start_item, "\n") ``` --- ## Simulation ```{r conditions} conditions <- expand.grid( criterion = c("KL", "PWKL", "MPWKL", "SHE"), stop_rule = c("single", "dual"), stringsAsFactors = FALSE ) ``` ```{r simulate} run_condition <- function(criterion, stop_rule) { threshold <- if (stop_rule == "single") 0.80 else c(0.70, 0.10) results <- vector("list", N) for (i in seq_len(N)) { session <- CdcatSession$new( items, criterion = criterion, method = "MAP", prior = NULL, # uniform prior threshold = threshold, min_items = 1L, max_items = J, # effectively no maximum: bounded by bank size start_item = start_item ) repeat { item <- session$next_item() if (item == 0L) break session$update(item, responses[i, item]) } res <- session$result() results[[i]] <- list( alpha_hat = res$alpha_hat, n_items = res$n_items ) } results } set.seed(2025) sim_results <- lapply(seq_len(nrow(conditions)), function(ci) { run_condition(conditions$criterion[ci], conditions$stop_rule[ci]) }) names(sim_results) <- paste(conditions$criterion, conditions$stop_rule, sep = "_") ``` --- ## Results ```{r compute-metrics} compute_metrics <- function(results, alpha) { N <- length(results) K <- ncol(alpha) alpha_hat_mat <- do.call(rbind, lapply(results, `[[`, "alpha_hat")) n_items_vec <- sapply(results, `[[`, "n_items") # PCCR: all K attributes correct pccr <- mean(apply(alpha_hat_mat == alpha, 1, all)) # ACCR per attribute accr <- colMeans(alpha_hat_mat == alpha) names(accr) <- paste0("ACCR_A", seq_len(K)) # ATL atl <- mean(n_items_vec) c(PCCR = round(pccr, 4), round(accr, 4), ATL = round(atl, 2)) } metrics_list <- lapply(sim_results, compute_metrics, alpha = alpha) metrics_df <- as.data.frame(do.call(rbind, metrics_list)) metrics_df <- cbind(conditions, metrics_df) rownames(metrics_df) <- NULL ``` ### Full results table ```{r table-full} knitr::kable( metrics_df, col.names = c("Criterion", "Stop rule", "PCCR", "ACCR A1", "ACCR A2", "ACCR A3", "ATL"), caption = "Classification accuracy and average test length by condition.", digits = 4, align = "llccccc" ) ``` ### PCCR and ATL by criterion ```{r table-criterion} agg_crit <- aggregate( cbind(PCCR, ATL) ~ criterion, data = metrics_df, FUN = mean ) knitr::kable( agg_crit, col.names = c("Criterion", "Mean PCCR", "Mean ATL"), caption = "Average PCCR and ATL across stopping rules, by criterion.", digits = 4 ) ``` ### PCCR and ATL by stopping rule ```{r table-stoprule} agg_stop <- aggregate( cbind(PCCR, ATL) ~ stop_rule, data = metrics_df, FUN = mean ) knitr::kable( agg_stop, col.names = c("Stop rule", "Mean PCCR", "Mean ATL"), caption = "Average PCCR and ATL across criteria, by stopping rule.", digits = 4 ) ``` ### PCCR heatmap ```{r heatmap, fig.width=5, fig.height=3.5, fig.cap="PCCR by criterion and stopping rule."} pccr_mat <- matrix( metrics_df$PCCR, nrow = 4, dimnames = list( Criterion = c("KL", "PWKL", "MPWKL", "SHE"), Stop = c("single", "dual") ) ) image( t(pccr_mat), col = hcl.colors(20, "Blues", rev = TRUE), xaxt = "n", yaxt = "n", zlim = c(min(pccr_mat) - 0.02, 1), main = "PCCR", xlab = "Stopping rule", ylab = "Criterion" ) axis(1, at = c(0, 1), labels = c("single", "dual")) axis(2, at = seq(0, 1, length.out = 4), labels = c("KL", "PWKL", "MPWKL", "SHE"), las = 2) for (i in 1:2) for (j in 1:4) { text((i - 1), (j - 1) / 3, labels = sprintf("%.3f", pccr_mat[j, i]), cex = 0.85) } ``` --- ## Summary The table below recaps the best-performing condition per metric. ```{r summary} best_pccr <- metrics_df[which.max(metrics_df$PCCR), c("criterion","stop_rule","PCCR","ATL")] best_atl <- metrics_df[which.min(metrics_df$ATL), c("criterion","stop_rule","PCCR","ATL")] knitr::kable( rbind( cbind(Optimises = "PCCR (max)", best_pccr), cbind(Optimises = "ATL (min)", best_atl) ), row.names = FALSE, caption = "Best condition per optimisation target." ) ``` > **Note.** All results are based on a single replication. For > publication-quality conclusions, increase `N` or run multiple > replications with different seeds.