From raw data to model with tidyILD

tidyILD authors

2026-04-17

This vignette walks through the full tidyILD pipeline: prepare data, inspect structure, apply within-between decomposition and lags, fit a mixed-effects model, and run diagnostics and plots.

For which temporal assumptions (lags in the mean, residual AR, time-varying effects, state-space) fit your question, see vignette("temporal-dynamics-model-choice", package = "tidyILD").

Simulate and prepare

library(tidyILD)
# Simulate simple ILD
d <- ild_simulate(n_id = 10, n_obs_per = 12, irregular = TRUE, seed = 42)
# Prepare: encode time structure and add .ild_* columns
x <- ild_prepare(d, id = "id", time = "time", gap_threshold = 7200)

Inspect

ild_summary(x)
#> $summary
#> # A tibble: 1 × 7
#>    n_id n_obs time_min time_max prop_gap median_dt_sec iqr_dt_sec
#>   <int> <int>    <dbl>    <dbl>    <dbl>         <dbl>      <dbl>
#> 1    10   120        0   40136.        0         3612.       640.
#> 
#> $n_units
#> [1] 10
#> 
#> $n_obs
#> [1] 120
#> 
#> $time_range
#> [1]     0 40136
#> 
#> $spacing
#> $spacing$median_dt
#> [1] 3612.244
#> 
#> $spacing$iqr_dt
#> [1] 640.2958
#> 
#> $spacing$n_intervals
#> [1] 110
#> 
#> $spacing$pct_gap
#> [1] 0
#> 
#> $spacing$by_id
#> # A tibble: 10 × 5
#>       id median_dt iqr_dt n_intervals pct_gap
#>    <int>     <dbl>  <dbl>       <int>   <dbl>
#>  1     1     3627.   549.          11       0
#>  2     2     3702.   854.          11       0
#>  3     3     3617.   740.          11       0
#>  4     4     3321.   702.          11       0
#>  5     5     3616.   588.          11       0
#>  6     6     3438.   481.          11       0
#>  7     7     3767.   816.          11       0
#>  8     8     3591.   499.          11       0
#>  9     9     3609.   194.          11       0
#> 10    10     3700.   521.          11       0
#> 
#> 
#> $n_gaps
#> [1] 0
#> 
#> $pct_gap
#> [1] 0
ild_spacing_class(x)
#> [1] "regular-ish"

Within-person centering and lags

x <- ild_center(x, y)
x <- ild_lag(x, y, mode = "gap_aware", max_gap = 7200)

Fit a model

Without residual autocorrelation (lmer):

fit0 <- ild_lme(y ~ 1 + (1 | id), data = x, ar1 = FALSE, warn_no_ar1 = FALSE)

With AR1 residual correlation (nlme):

fit1 <- ild_lme(y ~ 1, data = x, ar1 = TRUE, correlation_class = "CAR1")

Diagnostics and plots

diag <- ild_diagnostics(fit1, data = x)
names(diag)  # meta, data, stats
#> [1] "meta"  "data"  "stats"
names(plot_ild_diagnostics(diag))  # plot names for requested types
#> [1] "residual_acf"        "residuals_vs_fitted" "residuals_vs_time"  
#> [4] "qq"
# Pooled residual ACF (tibble)
head(diag$stats$acf$pooled)
#> # A tibble: 6 × 2
#>     lag      acf
#>   <dbl>    <dbl>
#> 1     0  1      
#> 2     1  0.121  
#> 3     2 -0.00832
#> 4     3 -0.113  
#> 5     4 -0.0886 
#> 6     5 -0.147
# By-id ACF when by_id = TRUE: one tibble per person
names(diag$stats$acf$by_id)
#>  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10"
head(diag$stats$acf$by_id[[1]])
#> # A tibble: 6 × 2
#>     lag     acf
#>   <dbl>   <dbl>
#> 1     0  1     
#> 2     1  0.361 
#> 3     2 -0.0851
#> 4     3  0.149 
#> 5     4  0.196 
#> 6     5  0.0752
ild_plot(x, type = "trajectory", var = "y", max_ids = 5)

Trajectory plot

ild_plot(fit1, type = "fitted")

Fitted vs observed

MSM-style weights (IPTW and IPCW) — optional

For a marginal structural model–style sensitivity analysis, you can build inverse probability of treatment weights: either pooled (ild_iptw_weights()) or sequential MSM for time-varying A_t (ild_iptw_msm_weights() after building history with ild_lag()), then inverse probability of censoring weights for monotone dropout (ild_ipcw_weights()), multiply into a joint analysis weight with ild_joint_msm_weights(), and refit with ild_ipw_refit(). This path targets treatment assignment and loss-to-follow-up; it is distinct from outcome missingness IPW (ild_missing_model() + ild_ipw_weights()). Assumptions (positivity, correct models) are still yours; for uncertainty with estimated weights, use ild_msm_bootstrap() (see ?ild_msm_inference) instead of trusting default lmer SEs alone—weight_policy = "reestimate_weights" with a weights_fn that rebuilds IPW on each bootstrap resample is often the appropriate choice when you want first-stage uncertainty reflected; fixed_weights resamples clusters but keeps the weights attached to resampled rows (faster, approximate).

# Example skeleton (not run in the vignette build)
x2 <- ild_simulate(n_id = 12, n_obs_per = 10, seed = 1)
x2$stress <- rnorm(nrow(x2))
x2$trt <- rbinom(nrow(x2), 1L, 0.45)
x2 <- ild_prepare(x2, id = "id", time = "time")
x2 <- ild_center(x2, y)
x2 <- ild_iptw_weights(x2, treatment = "trt", predictors = "stress")
# Sequential A_t: x2 <- ild_lag(x2, stress); x2 <- ild_lag(x2, trt); ...
# x2 <- ild_iptw_msm_weights(x2, treatment = "trt", history = ~ stress_lag1 + trt_lag1)
x2 <- ild_ipcw_weights(x2, predictors = "stress")
x2 <- ild_joint_msm_weights(x2)
fit_msm <- ild_lme(y ~ y_bp + y_wp + stress + (1 | id), data = x2,
  ar1 = FALSE, warn_no_ar1 = FALSE, warn_uncentered = FALSE)
fit_msm_w <- ild_ipw_refit(fit_msm, data = x2)

Balance and overlap: After weights are attached, use ild_msm_balance() (weighted SMDs), ild_ipw_ess() (Kish effective sample size), and ild_msm_overlap_plot() (propensity densities by treatment—pooled IPTW or sequential MSM via attr(x, "ild_iptw_msm_fits")). Optional: ild_diagnose(..., balance = TRUE, balance_treatment = "trt", balance_covariates = c("stress")) fills causal$balance and may trigger guardrails GR_MSM_BALANCE_SMD_HIGH / GR_MSM_ESS_LOW. ild_autoplot(bundle, section = "causal", type = "overlap", treatment = "trt") draws overlap when ggplot2 is available.

For a full assumptions-oriented walkthrough (exchangeability, positivity/overlap, consistency, and weight-model correctness) and simulation-based recovery checks, see vignette("msm-identification-and-recovery", package = "tidyILD"). When using ild_msm_fit(), check fit_obj$inference$status/reason for degraded paths, or set strict_inference = TRUE to fail fast.

A minimal bootstrap illustration (small n_boot for speed; increase in real analyses):

set.seed(3)
xb <- ild_simulate(n_id = 10, n_obs_per = 5, seed = 3)
xb$stress <- rnorm(nrow(xb))
xb <- ild_prepare(xb, id = "id", time = "time")
xb <- ild_center(xb, y)
xb$.ipw <- runif(nrow(xb), 0.85, 1.15)
fb <- ild_lme(y ~ y_bp + y_wp + stress + (1 | id), data = xb,
  ar1 = FALSE, warn_no_ar1 = FALSE, warn_uncentered = FALSE)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 5.42413 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
fwb <- ild_ipw_refit(fb, data = xb, weights = ".ipw")
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 5.61967 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
bs_fixed <- ild_msm_bootstrap(fwb, n_boot = 20L, weight_policy = "fixed_weights", seed = 3)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.683848 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 2.19955 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.475695 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> boundary (singular) fit: see help('isSingular')
#> boundary (singular) fit: see help('isSingular')
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 2.88868 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 4.51989 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.402879 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 2.04425 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.50228 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> boundary (singular) fit: see help('isSingular')
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> unable to evaluate scaled gradient
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge: degenerate Hessian with 1 negative eigenvalues
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 1.7552 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> boundary (singular) fit: see help('isSingular')
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.0308156 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 1.59443 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 4.50989 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.175108 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.14053 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 4.03176 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
tidy_ild_msm_bootstrap(bs_fixed)
#> # A tibble: 4 × 18
#>   term        component effect_level  estimate std_error   conf_low conf_high
#>   <chr>       <chr>     <chr>            <dbl>     <dbl>      <dbl>     <dbl>
#> 1 (Intercept) fixed     population   -1.20e-17  1.69e-16 -2.29 e-16  3.53e-16
#> 2 y_bp        fixed     between       1   e+ 0  3.72e-16  1.000e+ 0  1   e+ 0
#> 3 y_wp        fixed     within        1   e+ 0  1.85e-16  1    e+ 0  1   e+ 0
#> 4 stress      fixed     unknown      -5.93e-17  1.60e-16 -3.55 e-16  2.13e-16
#> # ℹ 11 more variables: statistic <dbl>, p_value <dbl>, interval_type <chr>,
#> #   engine <chr>, model_class <chr>, rhat <dbl>, ess_bulk <dbl>,
#> #   ess_tail <dbl>, pd <dbl>, rope_low <dbl>, rope_high <dbl>
# reestimate_weights: weights_fn must return ILD with the weight column, e.g. re-run IPTW pipeline:
bs_re <- ild_msm_bootstrap(fwb, n_boot = 12L, weight_policy = "reestimate_weights",
  seed = 4, weights_fn = function(d) { d$.ipw <- runif(nrow(d), 0.85, 1.15); d })
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 2.13143 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 2.70039 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.765417 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.87247 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> unable to evaluate scaled gradient
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge: degenerate Hessian with 1 negative eigenvalues
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 1.43315 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 1.33261 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.804983 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.110021 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 2.08795 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.1423 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.506945 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?

Reproducibility

Use a fixed seed when simulating or fitting models so results can be recreated. The pipeline is deterministic for a given seed and data. When saving results (e.g. after [ild_lme()] or [ild_diagnostics()]), you can attach a reproducibility manifest and save a single bundle with [ild_manifest()] and [ild_bundle()]:

# Optional: build a manifest with scenario and seed, then bundle the fit for saving
manifest <- ild_manifest(seed = 42, scenario = ild_summary(x), include_session = FALSE)
bundle <- ild_bundle(fit1, manifest = manifest, label = "model_ar1")
# saveRDS(bundle, "run.rds")  # one file with result + manifest + label
names(bundle)
#> [1] "result"   "manifest" "label"

For simulation-based recovery and power under the bundled ild_simulate() DGP, see vignette("benchmark-simulation-recovery", package = "tidyILD").