## ----setup, include=FALSE-----------------------------------------------------
library(surveycore)
knitr::opts_chunk$set(
  comment = "#>"
)

## ----as_survey----------------------------------------------------------------
gss_svy <- as_survey(
  gss_2024,
  ids = vpsu,
  strata = vstrat,
  weights = wtssps
)

gss_svy


## ----replicate----------------------------------------------------------------
pew_jewish_svy <- as_survey_replicate(
  pew_jewish_2020,
  weights = extweight,
  repweights = extweight1:extweight100,
  type = "JK1"
)

pew_jewish_svy

## ----srs----------------------------------------------------------------------
set.seed(101)
N <- 400 # total schools in district
n <- 80 # schools sampled

school_survey <- data.frame(
  school_id = sample(seq_len(N), n),
  avg_score = round(rnorm(n, mean = 72, sd = 11), 1),
  pct_frpl = round(runif(n, 0.10, 0.85), 2), # % free/reduced price lunch
  enrollment = round(runif(n, 180, 850)),
  sw = N / n, # equal sampling weight = 400/80 = 5.0
  fpc = N # population size for FPC
)

school_svy <- as_survey(
  school_survey,
  weights = sw, # each sampled school represents 5 schools in the population
  fpc = fpc # reduces SEs: we sampled 20% of the population
)

school_svy

## ----calibrated---------------------------------------------------------------
ns_wave1_svy <- as_survey_nonprob(ns_wave1, weights = weight)

ns_wave1_svy

## ----nwtco, eval=requireNamespace("survival", quietly=TRUE)-------------------
nwtco <- survival::nwtco

# in.subcohort is stored as 0/1 — must be logical for as_survey_twophase()
nwtco$in.subcohort <- as.logical(nwtco$in.subcohort)

# Phase 1: all 4,028 enrolled patients (each patient is their own unit)
phase1 <- as_survey(nwtco, ids = seqno)

# Phase 2: subcohort, with Phase 2 sampling stratified by relapse status
nwtco_svy <- as_survey_twophase(
  phase1,
  strata2 = rel, # Phase 2 strata: cases (rel=1) vs. non-cases (rel=0)
  subset = in.subcohort, # Logical column: TRUE = selected into Phase 2
  method = "full"
)

nwtco_svy

## ----freqs-basic--------------------------------------------------------------

get_freqs(ns_wave1_svy, consider_trump)


## ----freqs-multi--------------------------------------------------------------
get_freqs(ns_wave1_svy, c(news_sources_facebook:news_sources_other))


## -----------------------------------------------------------------------------
ns_wave1_svy |>
  get_freqs(
    c(news_sources_facebook:news_sources_other),
    names_to = "news_source",
    values_to = "choice"
  )


## ----means-basic--------------------------------------------------------------
# Mean discrimination against blacks
get_means(ns_wave1_svy, discrimination_blacks)

## -----------------------------------------------------------------------------

get_totals(ns_wave1_svy)


## -----------------------------------------------------------------------------

get_totals(pew_jewish_svy)


## -----------------------------------------------------------------------------
get_totals(pew_jewish_svy, group = age4cat)

## ----corr-basic---------------------------------------------------------------
ns_wave1_clean <- ns_wave1 |>
  dplyr::filter(
    !is.na(cand_favorability_trump),
    !is.na(cand_favorability_biden),
    cand_favorability_trump != 999,
    cand_favorability_biden != 999
  )

ns_wave1_clean_svy <- as_survey_nonprob(ns_wave1_clean, weights = weight)

get_corr(
  ns_wave1_clean_svy,
  c(cand_favorability_trump, cand_favorability_biden)
)

## ----corr-multi---------------------------------------------------------------
fav_vars <- c(
  "cand_favorability_trump", "cand_favorability_biden",
  "cand_favorability_harris", "cand_favorability_sanders",
  "cand_favorability_warren", "cand_favorability_buttigieg",
  "cand_favorability_pence"
)

ns_wave1_multi <- ns_wave1 |>
  dplyr::filter(
    dplyr::if_all(dplyr::all_of(fav_vars), ~ !is.na(.x) & .x != 999)
  )

ns_wave1_multi_svy <- as_survey_nonprob(ns_wave1_multi, weights = weight)

get_corr(
  ns_wave1_multi_svy,
  c(cand_favorability_trump:cand_favorability_pence)
)

## ----corr-wide----------------------------------------------------------------
get_corr(
  ns_wave1_multi_svy,
  c(cand_favorability_trump:cand_favorability_pence),
  format = "wide"
)

## ----ratios-basic-------------------------------------------------------------
get_ratios(
  ns_wave1_clean_svy,
  numerator = cand_favorability_trump,
  denominator = cand_favorability_biden
)

## ----quantiles-basic----------------------------------------------------------
# Quartiles and median of age (default probs = c(0.25, 0.5, 0.75))
get_quantiles(ns_wave1_svy, age)

## ----quantiles-median---------------------------------------------------------
get_quantiles(ns_wave1_svy, age, probs = 0.5)

## ----quantiles-deciles--------------------------------------------------------
get_quantiles(ns_wave1_svy, age, probs = seq(0.1, 0.9, 0.1))

## ----group-means--------------------------------------------------------------
get_freqs(ns_wave1_svy, consider_trump, group = pid3)

## ----variance-options---------------------------------------------------------
get_means(
  ns_wave1_svy,
  age,
  variance = c("se", "ci", "moe"),
  conf_level = 0.9
)


## ----n-weighted---------------------------------------------------------------
get_freqs(pew_jewish_svy, age4cat, n_weighted = TRUE)