## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) data.table::setDTthreads(1) ## ----------------------------------------------------------------------------- library(socialmixr) library(ggplot2) data(polymod) ## ----------------------------------------------------------------------------- polymod[country == "United Kingdom"] |> assign_age_groups(age_limits = c(0, 1, 5, 15)) |> compute_matrix() ## ----------------------------------------------------------------------------- uk_grouped <- polymod[country == "United Kingdom"] |> assign_age_groups(age_limits = c(0, 1, 5, 15)) head(uk_grouped$participants[, c("part_id", "part_age", "age.group")]) head(uk_grouped$contacts[, c("part_id", "cnt_age", "contact.age.group")]) ## ----------------------------------------------------------------------------- unique(polymod$participants$country) ## ----------------------------------------------------------------------------- polymod[country %in% c("United Kingdom", "Germany")] ## ----------------------------------------------------------------------------- bootstrap <- function(survey) { sampled_ids <- sample( unique(survey$participants$part_id), replace = TRUE ) survey$participants <- survey$participants[ list(sampled_ids), on = "part_id" ] survey$contacts <- survey$contacts[ list(sampled_ids), on = "part_id", nomatch = NULL, allow.cartesian = TRUE ] survey } uk <- polymod[country == "United Kingdom"] |> assign_age_groups(age_limits = c(0, 1, 5, 15)) m <- suppressWarnings( replicate(n = 5, uk |> bootstrap() |> compute_matrix()) ) mr <- Reduce("+", lapply(m["matrix", ], function(x) x / ncol(m))) mr ## ----eval=requireNamespace("wpp2024", quietly = TRUE), message=FALSE, warning=FALSE---- # data("popAge1dt", package = "wpp2024") # uk_pop <- popAge1dt[name == "United Kingdom" & year == 2020, # .(lower.age.limit = age, population = pop * 1000) # ] # head(uk_pop) ## ----------------------------------------------------------------------------- custom_pop <- data.frame( lower.age.limit = c(0, 18, 60), population = c(12000000, 35000000, 20000000) ) ## ----------------------------------------------------------------------------- survey_country_population(polymod, countries = "United Kingdom") ## ----message=FALSE, warning=FALSE--------------------------------------------- uk_pop <- survey_country_population(polymod, countries = "United Kingdom") polymod[country == "United Kingdom"] |> assign_age_groups(age_limits = c(0, 1, 5, 15)) |> compute_matrix() |> symmetrise(survey_pop = uk_pop) ## ----message=FALSE, warning=FALSE--------------------------------------------- de_pop <- survey_country_population(polymod, countries = "Germany") polymod[country == "Germany"] |> assign_age_groups(age_limits = c(0, 60)) |> compute_matrix() |> symmetrise(survey_pop = de_pop) |> per_capita(survey_pop = de_pop) ## ----------------------------------------------------------------------------- polymod[country == "United Kingdom"] |> assign_age_groups(age_limits = c(0, 1, 5, 15)) |> compute_matrix() |> split_matrix(survey_pop = uk_pop) ## ----warning=FALSE, message=FALSE--------------------------------------------- # contact matrix for school-related contacts polymod[cnt_school == 1] |> assign_age_groups(age_limits = c(0, 20, 60)) |> compute_matrix() # contact matrix for work-related contacts involving physical contact polymod[cnt_work == 1][phys_contact == 1] |> assign_age_groups(age_limits = c(0, 20, 60)) |> compute_matrix() # contact matrix for daily contacts at home with males polymod[cnt_home == 1][cnt_gender == "M"][duration_multi == 5] |> assign_age_groups(age_limits = c(0, 20, 60)) |> compute_matrix() ## ----message=FALSE, warning=FALSE--------------------------------------------- polymod[country == "United Kingdom"] |> assign_age_groups(age_limits = c(0, 18, 60)) |> weigh("dayofweek", target = c(5, 2), groups = list(1:5, c(0, 6))) |> weigh("part_age", target = uk_pop) |> compute_matrix() ## ----message=FALSE, warning=FALSE--------------------------------------------- polymod |> assign_age_groups(age_limits = c(0, 18, 60)) |> weigh("hh_size") |> compute_matrix() ## ----message=FALSE, warning=FALSE--------------------------------------------- polymod[country == "United Kingdom"] |> assign_age_groups(age_limits = c(0, 18, 60)) |> weigh("dayofweek", target = c(5, 2), groups = list(1:5, c(0, 6))) |> weigh("part_age", target = uk_pop) |> compute_matrix(weight_threshold = 3) ## ----echo=FALSE--------------------------------------------------------------- survey_data <- data.frame( age = c(1, 1, 2, 2, 2, 3), day.of.week = as.factor(c( "weekend", "weekend", "weekend", "week", "week", "week" )), age.group = NA, m_i = c(3, 2, 9, 10, 8, 15) ) # age groups 1-2 and 3 survey_data$age.group <- 1 - (survey_data$age < 3) + 1 survey_data$age.group <- as.factor(c("A", "B"))[survey_data$age.group] knitr::kable(survey_data) ## ----------------------------------------------------------------------------- N <- 6 N_age <- c(2, 3, 1) N_age.group <- c(5, 1) N_day.of.week <- c(3, 3) P <- 3000 P_age <- c(1000, 1000, 1000) P_age.group <- c(2000, 1000) P_day.of.week <- c(5 / 7, 2 / 7) * 3000 ## ----echo=FALSE--------------------------------------------------------------- cat(paste( "unweighted average number of contacts:", round(mean(survey_data$m_i), digits = 2) )) ## ----echo=FALSE--------------------------------------------------------------- knitr::kable(aggregate( m_i ~ age + age.group, data = survey_data, mean )) ## ----echo=FALSE--------------------------------------------------------------- # including population constants survey_data$w <- NA for (i in seq_len(nrow(survey_data))) { day_i <- survey_data$day.of.week[i] survey_data$w[i] <- (P_day.of.week[day_i] / P) / (N_day.of.week[day_i] / N) } survey_data$w_tilde <- survey_data$w / sum(survey_data$w) * N # without population constants survey_data$w_dot <- NA for (i in seq_len(nrow(survey_data))) { day_i <- survey_data$day.of.week[i] survey_data$w_dot[i] <- (P_day.of.week[day_i]) / (N_day.of.week[day_i]) } survey_data$w_dot_tilde <- survey_data$w_dot / sum(survey_data$w_dot) * N # round survey_data[, -(1:4)] <- round(survey_data[, -(1:4)], digits = 2) # print knitr::kable(survey_data) # remove the 'dot' weights survey_data$w_dot <- NULL survey_data$w_dot_tilde <- NULL ## ----echo=FALSE--------------------------------------------------------------- # add weighted number of contacts survey_data["m_i * w_tilde"] <- survey_data$m_i * survey_data$w_tilde # show table knitr::kable(survey_data) # remove the weighted number of contacts survey_data$`m_i * w_tilde` <- NULL cat(paste( "weighted average number of contacts:", round( mean(survey_data$m_i * survey_data$w_tilde), digits = 2 ) )) ## ----echo=FALSE--------------------------------------------------------------- knitr::kable(list( aggregate(m_i * w_tilde ~ age, data = survey_data, mean), aggregate(m_i * w_tilde ~ age.group, data = survey_data, mean) )) ## ----echo=FALSE--------------------------------------------------------------- survey_data$w_PS <- NA for (i in seq_len(nrow(survey_data))) { k_i <- survey_data$age.group[i] flag_k <- survey_data$age.group == k_i survey_data$w_PS[i] <- survey_data$w[i] / sum(survey_data$w[flag_k]) * N_age.group[k_i] } # round survey_data[, -(1:4)] <- round( survey_data[, -(1:4)], digits = 2 ) knitr::kable(survey_data) ## ----echo=FALSE--------------------------------------------------------------- knitr::kable(aggregate( m_i * w_PS ~ age.group, data = survey_data, mean )) ## ----echo=FALSE--------------------------------------------------------------- survey_data$w <- NA survey_data$w_tilde <- NA survey_data$w_PS <- NULL for (i in seq_len(nrow(survey_data))) { age_i <- survey_data$age[i] survey_data$w[i] <- (P_age[age_i] / P) / (N_age[age_i] / N) } survey_data$w_tilde <- survey_data$w / sum(survey_data$w) * N survey_data$w_PS <- NA for (i in seq_len(nrow(survey_data))) { k_i <- survey_data$age.group[i] flag_k <- survey_data$age.group == k_i survey_data$w_PS[i] <- survey_data$w[i] / sum(survey_data$w[flag_k]) * N_age.group[k_i] } # round survey_data[, -(1:4)] <- round( survey_data[, -(1:4)], digits = 2 ) # print knitr::kable(survey_data) cat(paste( "weighted average number of contacts:", round( mean(survey_data$m_i * survey_data$w_tilde), digits = 2 ) )) ## ----echo=FALSE--------------------------------------------------------------- knitr::kable(list( aggregate(m_i * w_tilde ~ age, data = survey_data, mean), aggregate(m_i * w_tilde ~ age.group, data = survey_data, mean) )) ## ----echo=FALSE--------------------------------------------------------------- knitr::kable(aggregate(m_i * w_PS ~ age.group, data = survey_data, mean)) ## ----echo=FALSE--------------------------------------------------------------- survey_data <- survey_data[c(rep(1:2, 5), 3:6), ] survey_data$m_i[survey_data$age == 3] <- 30 rownames(survey_data) <- NULL survey_data <- survey_data[order(survey_data$age), ] survey_data$w <- NULL survey_data$w_tilde <- NULL survey_data$w_PS <- NULL ## ----echo=FALSE--------------------------------------------------------------- N <- nrow(survey_data) N_age <- table(survey_data$age) N_age.group <- table(survey_data$age.group) N_day.of.week <- table(survey_data$day.of.week) ## ----echo=FALSE--------------------------------------------------------------- survey_data$w <- NA survey_data$w_tilde <- NA survey_data$w_PS <- NULL for (i in seq_len(nrow(survey_data))) { age_i <- survey_data$age[i] survey_data$w[i] <- (P_age[age_i] / P) / (N_age[age_i] / N) } survey_data$w_tilde <- survey_data$w / sum(survey_data$w) * N # round survey_data[, -(1:4)] <- round( survey_data[, -(1:4)], digits = 2 ) knitr::kable(survey_data) cat(paste( "unweighted average number of contacts:", round(mean(survey_data$m_i), digits = 2) )) cat(paste( "weighted average number of contacts:", round( mean(survey_data$m_i * survey_data$w_tilde), digits = 2 ) )) ## ----echo=FALSE--------------------------------------------------------------- survey_data$w_tilde[survey_data$w_tilde > 3] <- 3 ## ----echo=FALSE--------------------------------------------------------------- survey_data$w_tilde[survey_data$w_tilde > 3] <- 3 cat(paste( "weighted average number of contacts after truncation:", round( mean(survey_data$m_i * survey_data$w_tilde), digits = 2 ) )) ## ----fig.width=4, fig.height=4------------------------------------------------ df <- reshape2::melt( mr, varnames = c("age.group", "age.group.contact"), value.name = "contacts" ) ggplot(df, aes(x = age.group, y = age.group.contact, fill = contacts)) + theme(legend.position = "bottom") + geom_tile() ## ----fig.width=4, fig.height=4------------------------------------------------ matrix_plot(mr) matrix_plot(mr, color.palette = gray.colors)