nhscancerwaits provides tools for importing, cleaning,
analysing, benchmarking, modelling and visualising NHS Cancer Waiting
Times data.
The analytical workflow implemented in this package was developed from research investigating NHS Cancer Waiting Times performance, provider variation and cancer pathway outcomes. The package supports reproducible analysis of provider-level and pathway-level performance using modern statistical modelling, benchmarking and visualisation techniques.
This vignette uses simulated data with the same structure expected from NHS Cancer Waiting Times datasets. Simulated data are used so that the examples can run on any system without requiring external NHS files. The same workflow can be applied directly to real NHS England Cancer Waiting Times Excel or CSV datasets.
The package supports:
The following simulated dataset mimics the structure commonly encountered in NHS Cancer Waiting Times analyses. Variables include provider identifiers, cancer pathways, reporting periods, activity volumes and performance percentages.
set.seed(123)
example_data <- expand.grid(
provider_code = paste0("P", 1:12),
cancer_type = c("Breast", "Lung", "Skin", "Lower GI"),
month_index = 1:12,
KEEP.OUT.ATTRS = FALSE
)
example_data$provider_name <- paste(
"Provider",
example_data$provider_code
)
example_data$standard <- "62-day"
example_data$reporting_date <- seq.Date(
from = as.Date("2026-01-01"),
by = "month",
length.out = 12
)[example_data$month_index]
example_data$total_treated <- sample(
30:120,
nrow(example_data),
replace = TRUE
)
example_data$performance_percent <- round(
runif(
nrow(example_data),
min = 60,
max = 92
),
1
)
head(example_data)
#> provider_code cancer_type month_index provider_name standard reporting_date
#> 1 P1 Breast 1 Provider P1 62-day 2026-01-01
#> 2 P2 Breast 1 Provider P2 62-day 2026-01-01
#> 3 P3 Breast 1 Provider P3 62-day 2026-01-01
#> 4 P4 Breast 1 Provider P4 62-day 2026-01-01
#> 5 P5 Breast 1 Provider P5 62-day 2026-01-01
#> 6 P6 Breast 1 Provider P6 62-day 2026-01-01
#> total_treated performance_percent
#> 1 60 73.0
#> 2 108 63.3
#> 3 80 69.0
#> 4 43 71.6
#> 5 96 68.3
#> 6 71 75.1kpi_summary <- summarise_kpis(
example_data,
group_var = "standard",
performance_var = "performance_percent"
)
kpi_summary
#> # A tibble: 1 × 7
#> standard observations mean_percent median_percent minimum_percent
#> <chr> <int> <dbl> <dbl> <dbl>
#> 1 62-day 576 75.5 75.7 60
#> # ℹ 2 more variables: maximum_percent <dbl>, sd_percent <dbl>provider_summary <- summarise_providers(
filtered_data,
provider_var = "provider_code",
performance_var = "performance_percent",
activity_var = "total_treated"
)
head(provider_summary)
#> # A tibble: 6 × 9
#> provider_code observations mean_performance median_performance min_performance
#> <fct> <int> <dbl> <dbl> <dbl>
#> 1 P2 48 77.6 79.8 60.1
#> 2 P10 48 76.9 77.4 60.4
#> 3 P11 48 76.2 77.7 61.3
#> 4 P6 48 75.9 75.2 60.4
#> 5 P5 48 75.9 74.2 60.2
#> 6 P3 48 75.8 76.8 60
#> # ℹ 4 more variables: max_performance <dbl>, sd_performance <dbl>,
#> # mean_activity <dbl>, total_activity <int>pathway_summary <- summarise_pathways(
filtered_data,
pathway_var = "cancer_type",
performance_var = "performance_percent"
)
pathway_summary
#> # A tibble: 4 × 7
#> cancer_type observations mean_performance median_performance min_performance
#> <fct> <int> <dbl> <dbl> <dbl>
#> 1 Lung 144 76.5 76.9 60.1
#> 2 Lower GI 144 76.1 77.0 60.2
#> 3 Breast 144 74.7 74.2 60.1
#> 4 Skin 144 74.6 75.4 60
#> # ℹ 2 more variables: max_performance <dbl>, sd_performance <dbl>model <- fit_cwt_mixed_model(
filtered_data,
performance_var = "performance_percent",
month_var = "month_index",
pathway_var = "cancer_type",
provider_var = "provider_code"
)
model
#> Linear mixed model fit by maximum likelihood ['lmerMod']
#> Formula: performance_percent ~ month_index + cancer_type + (1 | provider_code)
#> Data: data
#> AIC BIC logLik -2*log(L) df.resid
#> 4172.512 4203.004 -2079.256 4158.512 569
#> Random effects:
#> Groups Name Std.Dev.
#> provider_code (Intercept) 0.000
#> Residual 8.943
#> Number of obs: 576, groups: provider_code, 12
#> Fixed Effects:
#> (Intercept) month_index cancer_typeLung
#> 75.22882 -0.08189 1.78125
#> cancer_typeSkin cancer_typeLower GI
#> -0.08958 1.37500
#> optimizer (nloptwrap) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warningsmodel_effects <- extract_model_effects(model)
model_effects
#> # A tibble: 5 × 7
#> effect term estimate std.error statistic conf.low conf.high
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 fixed (Intercept) 75.2 1.02 73.5 73.2 77.2
#> 2 fixed month_index -0.0819 0.108 -0.759 -0.293 0.130
#> 3 fixed cancer_typeLung 1.78 1.05 1.69 -0.284 3.85
#> 4 fixed cancer_typeSkin -0.0896 1.05 -0.0850 -2.16 1.98
#> 5 fixed cancer_typeLower GI 1.37 1.05 1.30 -0.691 3.44pathway_predictions <- predict_pathway_performance(
model,
filtered_data,
pathway_var = "cancer_type",
month_var = "month_index",
provider_var = "provider_code"
)
pathway_predictions
#> cancer_type month_index provider_code predicted_performance
#> 1 Lung 6.5 P1 76.47778
#> 2 Lower GI 6.5 P1 76.07153
#> 3 Breast 6.5 P1 74.69653
#> 4 Skin 6.5 P1 74.60694provider_clusters <- cluster_providers(
filtered_data,
provider_var = "provider_code",
performance_var = "performance_percent",
activity_var = "total_treated",
k = 3
)
head(provider_clusters)
#> # A tibble: 6 × 8
#> provider_code mean_performance median_performance sd_performance mean_activity
#> <fct> <dbl> <dbl> <dbl> <dbl>
#> 1 P1 75.5 76.9 8.23 84.1
#> 2 P2 77.6 79.8 10.1 73.1
#> 3 P3 75.8 76.8 8.56 82.1
#> 4 P4 74.2 74.6 8.97 73.1
#> 5 P5 75.9 74.2 9.56 74.2
#> 6 P6 75.9 75.2 9.04 71.4
#> # ℹ 3 more variables: total_activity <int>, cluster <fct>, cluster_label <chr>sensitivity_results <- run_sensitivity_analysis(
filtered_data,
provider_var = "provider_code",
activity_var = "total_treated",
performance_var = "performance_percent",
month_var = "month_index",
pathway_var = "cancer_type"
)
sensitivity_results
#> cohort rows providers provider_variance residual_variance
#> 1 Main cohort 576 12 0 79.97877
#> 2 No outlier removal 576 12 0 79.97877
#> 3 Stricter cohort 576 12 0 79.97877
#> total_variance icc month_effect month_p_value status
#> 1 79.97877 0 -0.08189103 NA ok
#> 2 79.97877 0 -0.08189103 NA ok
#> 3 79.97877 0 -0.08189103 NA okwide_table <- pivot_provider_months(
filtered_data,
provider_var = "provider_code",
month_var = "reporting_date",
performance_var = "performance_percent"
)
head(wide_table)
#> # A tibble: 6 × 13
#> provider `2026-01-01` `2026-02-01` `2026-03-01` `2026-04-01` `2026-05-01`
#> <fct> <list> <list> <list> <list> <list>
#> 1 P1 <dbl [4]> <dbl [4]> <dbl [4]> <dbl [4]> <dbl [4]>
#> 2 P2 <dbl [4]> <dbl [4]> <dbl [4]> <dbl [4]> <dbl [4]>
#> 3 P3 <dbl [4]> <dbl [4]> <dbl [4]> <dbl [4]> <dbl [4]>
#> 4 P4 <dbl [4]> <dbl [4]> <dbl [4]> <dbl [4]> <dbl [4]>
#> 5 P5 <dbl [4]> <dbl [4]> <dbl [4]> <dbl [4]> <dbl [4]>
#> 6 P6 <dbl [4]> <dbl [4]> <dbl [4]> <dbl [4]> <dbl [4]>
#> # ℹ 7 more variables: `2026-06-01` <list>, `2026-07-01` <list>,
#> # `2026-08-01` <list>, `2026-09-01` <list>, `2026-10-01` <list>,
#> # `2026-11-01` <list>, `2026-12-01` <list>plot_national_trends(
filtered_data,
month_var = "reporting_date",
performance_var = "performance_percent",
group_var = "standard"
)plot_provider_effects(
provider_effects,
provider_var = "provider_code",
effect_var = "adjusted_effect"
)plot_pathway_predictions(
pathway_predictions,
pathway_var = "cancer_type",
prediction_var = "predicted_performance"
)The package can export tables to Excel. This chunk is not evaluated during package checking because CRAN policies discourage writing files during vignette execution.
export_excel_tables(
tables = list(
kpi_summary = kpi_summary,
provider_summary = provider_summary,
pathway_summary = pathway_summary,
icc_results = icc_results,
model_effects = model_effects,
provider_effects = provider_effects,
pathway_predictions = pathway_predictions,
provider_clusters = provider_clusters,
sensitivity_results = sensitivity_results
),
path = "nhscancerwaits_results.xlsx"
)For real NHS Cancer Waiting Times data, a typical workflow is:
library(nhscancerwaits)
data <- load_cwt_excel(
"your_nhs_cancer_waiting_times_file.xlsx"
)
data <- clean_cwt_data(data)
kpis <- summarise_kpis(data)
filtered <- filter_providers(data)
model <- fit_cwt_mixed_model(filtered)
icc <- calculate_icc(model)
provider_effects <- extract_provider_effects(model)
pathway_predictions <- predict_pathway_performance(
model,
filtered
)
provider_clusters <- cluster_providers(filtered)
sensitivity <- run_sensitivity_analysis(filtered)nhscancerwaits provides a complete workflow for NHS
Cancer Waiting Times analysis, including data import, cleaning, summary
statistics, provider filtering, mixed-effects modelling, ICC estimation,
adjusted provider benchmarking, pathway prediction, clustering,
sensitivity analysis, visualisation and export.
Although this vignette uses simulated data, the functions were designed to support analysis of real NHS Cancer Waiting Times datasets and can be applied directly to appropriately formatted NHS England data sources.