## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5
)

## ----eval = FALSE-------------------------------------------------------------
# # Install from GitHub
# devtools::install_github("silviascarpa/inequantiles")

## -----------------------------------------------------------------------------
library(inequantiles)

## -----------------------------------------------------------------------------
data(synthouse)
head(synthouse)

## -----------------------------------------------------------------------------
### Log-Normal distribution
plot_inequality_curve(qfunction = qlnorm, qfun_args = list(meanlog = 9, sdlog = 0.3),
                      col   = "tomato", lty = 2, label = "LogN(sigma=0.3)")
plot_inequality_curve(qfunction = qlnorm, qfun_args = list(meanlog = 9, sdlog = 1.9),
                      col   = "blue", lty = 2, add = TRUE, label = "LogN(sigma=1.9)")
plot_inequality_curve(qfunction = qlnorm, qfun_args = list(meanlog = 9, sdlog = 3.9),
                      col   = "green", lty = 2, add = TRUE, label = "LogN(sigma=3.9)")



## -----------------------------------------------------------------------------
# Log-normal distribution
superpop_qri(qfunction = qlnorm, meanlog = 9, sdlog = 0.2)
superpop_qri(qfunction = qlnorm, meanlog = 9, sdlog = 1.7)
superpop_qri(qfunction = qlnorm, meanlog = 9, sdlog = 3.2)

# Weibull distribution
superpop_qri(qfunction = qweibull, shape = 2, scale = 30000)
superpop_qri(qfunction = qweibull, shape = 1.5, scale = 30000)

## -----------------------------------------------------------------------------
qri(y = synthouse$eq_income, weights = synthouse$weight, M = 100)

## ----echo=FALSE, message=FALSE, warning=FALSE---------------------------------
library(knitr)
library(kableExtra)

# Creiamo i dati. 
# Nota: usiamo il doppio backslash \\ per i simboli LaTeX in R
df <- data.frame(
  Estimator = c("$\\widehat{Q}_4(p)$", "$\\widehat{Q}_5(p)$", "$\\widehat{Q}_6(p)$", 
                "$\\widehat{Q}_7(p)$", "$\\widehat{Q}_8(p)$", "$\\widehat{Q}_9(p)$"),
  rk = c("$\\frac{W_k}{W_n}$", 
         "$\\frac{W_k-\\frac{1}{2}w_k}{W_n}$", 
         "$\\frac{W_k}{W_n+w_n}$", 
         "$\\frac{W_{k-1}}{W_{n-1}}$", 
         "$\\frac{W_k-\\frac{1}{3}w_k}{W_n+\frac{w_n}{3}}$", 
         "$\\frac{W_k-\\frac{3}{8}w_k}{W_n+\frac{1}{4}w_n}$"),
  mk = c("0", 
         "$\\frac{w_k}{2}$", 
         "$w_np$", 
         "$w_k - w_np$", 
         "$\\frac{w_k}{3} + \\frac{w_n}{3}p$", 
         "$\\frac{3}{8}w_k + \\frac{w_n}{4}p$"),
k = c("$W_{k-1} \\le W_n p \\lt W_k$", 
        "$W_{k-1} - \\frac{w_{k-1}}{2} \\le W_n p \\lt W_{k} - \\frac{w_{k}}{2}$",
        "$W_{k-1} \\le (W_n + w_n)p \\lt W_{k}$",
        "$W_{k-2} \\le W_{n-1}p \\lt W_{k-1}$",
        "$W_{k-1} - \\frac{w_{k-1}}{3} \\le (W_{n} - \\frac{w_n}{3})p \\lt W_{k} - \\frac{w_k}{3}$",
        "$W_{k-1} - \\frac{3w_{k-1}}{8} \\le (W_{n} + \\frac{w_{n}}{4})p \\lt W_{k} - \\frac{3w_{k}}{8}$")
)

# Generiamo la tabella per HTML
kbl(df, 
        caption = "<b>Table 1:</b> Quantile estimators incorporating sampling weights.",
        escape = FALSE, # Fondamentale per far leggere il LaTeX a MathJax
    col.names = c("Estimator", "$\\widehat{r}_k$", "$\\widehat{m}_k$", "$k$"),
    align = "clll") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = F, 
                position = "center") %>%
  column_spec(1, bold = T) %>%
  row_spec(0, bold = T) # Intestazione in grassetto


## -----------------------------------------------------------------------------
# Compute weighted quartiles
csquantile(y = synthouse$eq_income,
           weights = synthouse$weight,
           probs = c(0.25, 0.5, 0.75),
           type = 4)
csquantile(y = synthouse$eq_income,
           weights = synthouse$weight,
           probs = c(0.25, 0.5, 0.75),
           type = 7)

# Compare without considering the sampling weights
csquantile(synthouse$eq_income, probs = c(0.25, 0.5, 0.75), type = 4)

## -----------------------------------------------------------------------------
# Harrell-Davis weighted median
csquantile(y = synthouse$eq_income,
           weights = synthouse$weight,
           probs = 0.5,
           type = "HD")

## -----------------------------------------------------------------------------
# Compare different quantile types by NUTS3
types <- c(4, 5, 6, 7, 8, 9, "HD")
areas <- unique(synthouse$NUTS3)

# Function to compute QRI for all types in one area
compare_quantiles <- function(region_code, data = synthouse) {
  idx <- which(data$NUTS3 == region_code)
  
  results <- sapply(types, function(t) {
    csquantile(y = data$eq_income[idx], 
        weights = data$weight[idx], 
        type = t,
        probs = 0.95)
  })
  
  return(results)
}

# Compute for all areas
results_quantiles <- sapply(areas, compare_quantiles)
rownames(results_quantiles) <- types
colnames(results_quantiles) <- areas


## === Quantile estimators for Each NUTS3 Region ==="
print(head(t(results_quantiles), n = 10))

## -----------------------------------------------------------------------------
# Compute weighted QRI
qri(y = synthouse$eq_income, 
    weights = synthouse$weight, 
    type = 6)  # Type 6 quantile estimator (default)

## ----eval = FALSE-------------------------------------------------------------
# # Pseudo-code for rescaled bootstrap for the estimation of the sampling variance of the QRI estimator
# var_qri <- rescaled_bootstrap(
#   data = synthouse,
#   y = "eq_income",
#   strata = "NUTS2",
#   psu = "municipality",
#   weights = "weight",
#   estimator = qri,
#   by_strata = TRUE,
#   B = 100,
#   seed = 456)

## -----------------------------------------------------------------------------
# Compute QSR
share_ratio(y = synthouse$eq_income, 
    weights = synthouse$weight, type = 4)

## -----------------------------------------------------------------------------
# Compute Palma ratio
share_ratio(y = synthouse$eq_income, 
            weights = synthouse$weight, 
            type = 7,
            prob_numerator = 0.90,
            prob_denominator = 0.40)

## -----------------------------------------------------------------------------
# P90/P10 ratio
ratio_quantiles(y = synthouse$eq_income,
                weights = synthouse$weight,
                type = 7)

# P75/P25 ratio
ratio_quantiles(y = synthouse$eq_income,
                weights = synthouse$weight,
                prob_numerator = 0.75,
                prob_denominator = 0.25, type = 6)

## -----------------------------------------------------------------------------
# Compare all indicators
inequantiles(y          = synthouse$eq_income,
             weights    = synthouse$weight,
             indicators = "all")

## ----se-example, eval=FALSE---------------------------------------------------
# # QRI and QSR with their standard errors for one region via rescaled bootstrap
# nord <- synthouse[synthouse$NUTS1 == "N", ]
# 
# qri_qsr <- inequantiles(
#   y       = nord$eq_income,
#   weights = nord$weight,
#   indicators = c("qri", "qsr"),
#   se      = TRUE,
#   data    = nord,
#   strata  = "NUTS2",
#   psu     = "municipality",
#   B       = 200,
#   seed    = 42
# )

## -----------------------------------------------------------------------------
# Select a subset for clearer visualization
n_obs <- 200
set.seed(123)
idx <- sample(nrow(synthouse), n_obs)

# Extract data
y_subset <- synthouse$eq_income[idx]
w_subset <- synthouse$weight[idx]


# Compute the IF for some indicators
if_gini_vals <- if_gini(y_subset, w_subset)
if_qri_vals <- if_qri(y_subset, w_subset, type = 6)
if_qsr_vals <- if_share_ratio(y_subset, w_subset, type = 4)
if_palma_vals <- if_share_ratio(y_subset, w_subset, type = 4, prob_numerator = 0.90, prob_denominator = 0.40)
if_q50_vals <- if_quantile(y_subset, w_subset, probs = 0.5, type = 6)
if_ratioquantiles_vals <- if_ratio_quantiles(y_subset, w_subset, prob_numerator = 0.90,
                                             prob_denominator = 0.10, type = 6)


## -----------------------------------------------------------------------------
# Create the plot
library(ggplot2)
library(scales)

# 
plot_df <- data.frame(
  Income = rep(y_subset, 6),
  IF_Value = c(
    if_qri_vals,   
    if_qsr_vals,   
    if_gini_vals,  
    if_palma_vals,
    if_q50_vals,
    if_ratioquantiles_vals
  ),
  Indicator = rep(c("QRI", "QSR", "Gini", "Palma", "Median", "P90/P10"), each = n_obs)
)

ggplot(plot_df, aes(x = Income, y = IF_Value, color = Indicator)) +
  geom_line(linewidth = 0.8, alpha = 0.7) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray40") +
  facet_wrap(~ Indicator, scales = "free_y", ncol = 2) +
  scale_x_continuous(labels = scales::comma) +
  labs(
    title = "Influence Functions: How Each Observation Affects the Estimate",
    subtitle = paste("Based on", n_obs, "randomly sampled households"),
    x = "Equivalized Income",
    y = "Influence Function Value",
  ) +
  theme_minimal(base_size = 11) +
  theme(
    legend.position = "none",
    strip.text = element_text(face = "bold", size = 11),
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "gray40"),
    panel.grid.minor = element_blank()
  )

## -----------------------------------------------------------------------------
# Example: Income distribution in frequency table format
income_freq <- c(120, 180, 150, 80, 40, 20, 10)
income_tot <- c(18800, 16300, 44700, 33900, 21500, 22100, 98300)
income_lower <- c(0, 15000, 30000, 45000, 60000, 80000, 100000)
income_upper <- c(15000, 30000, 45000, 60000, 80000, 100000, 150000)

## -----------------------------------------------------------------------------
# Estimate quantiles from grouped data
quantile_grouped(freq = income_freq,
                 lower_bounds = income_lower,
                 upper_bounds = income_upper,
                 probs = c(0.25, 0.5, 0.75))

## -----------------------------------------------------------------------------
# Compute QRI from grouped data
qri_grouped(freq = income_freq, 
            lower_bounds = income_lower, 
            upper_bounds = income_upper,
            M = 100)

## -----------------------------------------------------------------------------
# Estimate quantiles from grouped data
gini_grouped(Y = income_tot, freq = income_freq)

## -----------------------------------------------------------------------------
sessionInfo()

