## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 4 ) ## ----data--------------------------------------------------------------------- library(mixedsubjectsirt) set.seed(242424) n_human <- 120 n_generated <- 350 n_items <- 5 human_pars <- data.frame( item = paste0("Item", seq_len(n_items)), a = c(0.8, 1.0, 1.2, 1.35, 1.55), d = c(-0.8, -0.3, 0.1, 0.45, 0.9) ) human_pars$b <- -human_pars$d / human_pars$a llm_pars <- human_pars llm_pars$a <- pmax(0.35, 0.9 * human_pars$a) llm_pars$d <- human_pars$d + 0.25 llm_pars$b <- -llm_pars$d / llm_pars$a theta_human <- rnorm(n_human) observed <- simulate_2pl(theta_human, human_pars) predicted <- simulate_2pl(theta_human, llm_pars) generated <- simulate_2pl(rnorm(n_generated), llm_pars) lambda_grid <- c(0, 0.25, 0.5, 0.75, 1) ## ----ability-tuning-mml------------------------------------------------------- ability_tuned_mml <- tune_lambda_ability_risk( lambda_grid = lambda_grid, observed = observed, predicted = predicted, generated = generated, target_resp = observed, initial_pars = human_pars, method = "grid", # show the risk at each candidate value control = list(maxit = 100) ) ability_tuned_mml$summary[, c("lambda", "mean_param_var", "mean_total_risk", "convergence", "selection_risk")] ability_tuned_mml$best_lambda tune_lambda_ability_risk( observed = observed, predicted = predicted, generated = generated, target_resp = observed, initial_pars = human_pars, control = list(maxit = 100) )$best_lambda ## ----risk-components---------------------------------------------------------- risk <- ability_risk( resp = observed, fit_or_pars = ability_tuned_mml$best_fit, vcov = vcov(ability_tuned_mml$best_fit) ) risk$summary ## ----risk-with-truth---------------------------------------------------------- ability_tuned_truth <- tune_lambda_ability_risk( lambda_grid = lambda_grid, observed = observed, predicted = predicted, generated = generated, target_resp = observed, theta_true = theta_human, initial_pars = human_pars, fit_fn = fit_mixed_subjects_mml, method = "grid", control = list(maxit = 100) ) ability_tuned_truth$summary[, c("lambda", "mean_param_var", "mean_squared_error", "mean_total_risk")] ## ----crossfit----------------------------------------------------------------- split_id <- rep(1:2, length.out = nrow(observed)) crossfit_tuned <- tune_lambda_ability_risk_crossfit( lambda_grid = c(0, 0.5, 1), observed = observed, predicted = predicted, generated = generated, split_id = split_id, initial_pars = human_pars, n_quad = 7, control = list(maxit = 100) ) crossfit_tuned$lambda_by_split # per-fold tuned lambda crossfit_tuned$lambda_final # fold-size-weighted scalar used for the final fit ## ----frozen-ec---------------------------------------------------------------- ability_tuned_ec <- tune_lambda_ability_risk( lambda_grid = lambda_grid, observed = observed, predicted = predicted, generated = generated, target_resp = observed, initial_pars = human_pars, fit_fn = fit_mixed_subjects, # opt in to the frozen EC estimator n_quad = 7, slope_upper = 4, # required to prevent divergence control = list(maxit = 100) ) ability_tuned_ec$best_lambda ## ----ppi-score---------------------------------------------------------------- ppi_score <- tune_lambda_ppi_score( observed = observed, predicted = predicted, item_pars = human_pars, n_generated = nrow(generated) ) cat("PPI++ score lambda:", round(ppi_score$lambda, 3), " r =", round(ppi_score$r, 3), " N/(n+N) =", round(1 / (1 + ppi_score$r), 3), "\n") ## ----ppi-score-2-------------------------------------------------------------- ppi_score_match <- tune_lambda_ppi_score( observed = observed, predicted = observed, item_pars = human_pars, n_generated = nrow(generated) ) cat("PPI++ score lambda:", round(ppi_score_match$lambda, 3), " r =", round(ppi_score$r, 3), " N/(n+N) =", round(1 / (1 + ppi_score_match$r), 3), "\n")