## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 6,
  fig.height = 4
)
library(joinspy)

# Transparent backgrounds for pkgdown light/dark mode
old_par <- par(no.readonly = TRUE)

## ----eval = FALSE-------------------------------------------------------------
# # Install development version from GitHub
# # install.packages("pak")
# pak::pak("gcol33/joinspy")

## -----------------------------------------------------------------------------
# Sample data with issues
orders <- data.frame(

  customer_id = c("A", "B", "B", "C", "D "),
  amount = c(100, 200, 150, 300, 50),
  stringsAsFactors = FALSE
)

customers <- data.frame(
  customer_id = c("A", "B", "C", "D", "E"),
  name = c("Alice", "Bob", "Carol", "David", "Eve"),
  stringsAsFactors = FALSE
)

# Get diagnostic report
report <- join_spy(orders, customers, by = "customer_id")

## -----------------------------------------------------------------------------
summary(report)

## -----------------------------------------------------------------------------
key_check(orders, customers, by = "customer_id")

## -----------------------------------------------------------------------------
key_duplicates(orders, by = "customer_id")

## -----------------------------------------------------------------------------
orders_clean <- data.frame(
  customer_id = c("A", "B", "C"),
  amount = c(100, 200, 300),
  stringsAsFactors = FALSE
)

# Silent mode for pipelines
result <- left_join_spy(orders_clean, customers, by = "customer_id", .quiet = TRUE)
head(result)

# Access diagnostics afterward
last_report()$match_analysis$match_rate

## -----------------------------------------------------------------------------
products <- data.frame(id = 1:3, name = c("Widget", "Gadget", "Gizmo"))
prices <- data.frame(id = 1:3, price = c(10, 20, 30))

# Succeeds - 1:1 relationship
join_strict(products, prices, by = "id", expect = "1:1")

## ----error = TRUE-------------------------------------------------------------
try({
# Fails - duplicates violate 1:1
prices_dup <- data.frame(id = c(1, 1, 2, 3), price = c(10, 15, 20, 30))
join_strict(products, prices_dup, by = "id", expect = "1:1")
})

## -----------------------------------------------------------------------------
messy <- data.frame(
  id = c(" A", "B ", "  C  "),
  value = 1:3,
  stringsAsFactors = FALSE
)

# Preview repairs
join_repair(messy, by = "id", dry_run = TRUE)

# Apply repairs
fixed <- join_repair(messy, by = "id")
fixed$id

## -----------------------------------------------------------------------------
orders_dup <- data.frame(id = 1:3, product = c("A", "B", "C"))
inventory <- data.frame(id = c(1, 1, 2, 3), location = c("NY", "LA", "NY", "LA"))

result <- merge(orders_dup, inventory, by = "id")
join_explain(result, orders_dup, inventory, by = "id", type = "inner")

## -----------------------------------------------------------------------------
before <- data.frame(id = 1:3, val = c("a", "b", "c"))
after <- merge(before, data.frame(id = 2:4, name = c("B", "C", "D")), by = "id", all = TRUE)

join_diff(before, after, by = "id")

## ----fig.width = 5, fig.height = 4--------------------------------------------
orders <- data.frame(id = 1:5, val = 1:5)
customers <- data.frame(id = 3:7, name = letters[3:7])

report <- join_spy(orders, customers, by = "id")
plot(report)  # Venn diagram

## -----------------------------------------------------------------------------
sessionInfo()

