USGDPpresidents
This document describes the process for updating Ecdat::USGDPpresidents
.
First decide the directory in which we want to work and copy this
vignette (*.Rmd
file) into that directory.
(RStudio
does not allow setwd
inside code
chunks to work as one might naively expect. Therefore, it’s best NOT to
try to change the working directory but instead to copy this vignette
into the desired working directory.)
Start by checking the span of years in
USGDPpresidents
:
library(Ecdat)
##
## Attaching package: 'Ecdat'
## The following object is masked from 'package:datasets':
##
## Orange
(rngYrs <- range(USGDPpresidents$Year))
## [1] 1610 2024
Next download “GDP - US” and “CPI - US” from Measuring Worth. On 2022-02-16 this produced two csv files, which I downloaded and copied into a directory in which we wish to work.
getwd()
## [1] "/private/var/folders/2n/zqk768wj3818l8x2wttbc5kw0000gn/T/RtmpHRNbbn/Rbuild11cd57e99bead/Ecfun/vignettes"
(csv2 <- dir(pattern='\\.csv$'))
## character(0)
(CPIcsvs <- grep('^USCPI', csv2, value=TRUE))
## character(0)
(CPIcsv <- tail(CPIcsvs, 1))
## character(0)
(GDPcsvs <- grep('^USGDP', csv2, value=TRUE))
## character(0)
(GDPcsv <- tail(GDPcsvs, 1))
## character(0)
if((length(CPIcsv)==1) & (length(GDPcsv)==1)){
Update0 <- TRUE
} else Update0 <- FALSE
We must verify by visual inspection that CPIcsv
and
GDPcsv
are both of length 1 and are the files we want.
Read them:
Update <- FALSE
if(Update0){
str(USCPI <- read.csv(CPIcsv, skip=2))
str(USGDP. <- read.csv(GDPcsv, skip=1))
library(Ecfun)
USGDP <- asNumericDF(USGDP.)
print(rngCPIyrs <- range(USCPI$Year) )
print(rngGDPyrs <- range(USGDP$Year) )
endYr <- max(rngCPIyrs, rngGDPyrs)
if(endYr>rngYrs[2]) print(Update <- TRUE)
}
If Update, create a local copy of USGDPpresidents
with
the additional rows required to hold the new data:
if(Update){
rowsNeeded <- (endYr - rngYrs[2])
Nold <- nrow(USGDPpresidents)
iRep <- c(1:Nold, rep(Nold, rowsNeeded))
USGDPp2 <- USGDPpresidents[iRep,]
}
Fix the Year and insert NAs for all other columns for the new rows:
if(Update){
iNew <- (Nold+(1:rowsNeeded))
USGDPp2$Year[iNew] <- ((rngYrs[2]+1):endYr)
rownames(USGDPp2) <- USGDPp2$Year
#
USGDPp2[iNew, -1] <- NA
}
Now replace CPI by the new numbers:
if(Update){
selCPI <- (USGDPp2$Year %in% USCPI$Year)
if(any(!is.na(USGDPp2[!selCPI, 2]))){
stop('ERROR: There are CPI numbers ',
'in the current USGDPpresidents ',
'that are not in the new. ',
'Manual review required.')
}
USGDPp2$CPI[selCPI] <- USCPI[,2]
}
Does USGDPpresidents.Rd
needs to be updated to reflect
the proper reference years for the CPI?
if(Update){
readLines(CPIcsv, n=4)
}
If this says “Average 1982-84 = 100”, it should be good. Otherwise that (and this) should be updated.
Now let’s update GDPdeflator
:
if(Update){
selGDP <- (USGDPp2$Year %in% USGDP$Year)
#
if(any(!is.na(USGDPp2[!selGDP, 'GDPdeflator']))){
stop('ERROR: There are GDPdeflator numbers ',
'in the current USGDPpresidents ',
'that are not in the new. ',
'Manual review required.')
}
selDefl <- grep('Deflator', names(USGDP))
USGDPp2$GDPdeflator[selGDP] <- USGDP[,selDefl]
print(names(USGDP)[selDefl])
}
Compare the index year of “GDP.Deflator” with that in
USGDPpresidents.Rd
: If they are different, fix
USGDPpresidents.Rd
.
Now update population:
if(Update){
selPop <- grep('Population', names(USGDP))
sPop <- (USGDP[,selPop]/1000)
quantile(ePop <- ((USGDPp2$population.K[selGDP] /sPop)-1),
na.rm=TRUE)
}
Check. Replace.
if(Update){
USGDPp2$population.K[selGDP] <- sPop
print(names(USGDP)[selPop])
}
Now realGDPperCapita
. This also has a reference year, so
we need to make sure we get them all:
if(Update){
if(any(!is.na(USGDPp2[!selGDP, 'readGDPperCapita']))){
stop('ERROR: There are realGDPperCapita numbers ',
'in the current USGDPpresidents ',
'that are not in the new. ',
'Manual review required.')
}
selGDPperC <- grep('Real.GDP.per.c', names(USGDP))
USGDPp2$realGDPperCapita[selGDP] <- USGDP[,selGDPperC]
print(names(USGDP)[selGDPperC])
}
Compare the index year of Real.GDP.per.capita
with that
in USGDPpresidents.Rd
: If they are different, fix
USGDPpresidents.Rd
.
Next: executive:
NOTE: THIS MAY NEED TO BE CHANGED MANUALLY HERE BEFORE EXECUTING,
BECAUSE IT IS NOT IN USGDP
… BOTH: ** WHO WAS PRESIDENT
SINCE THE PREVIOUS VERSION? ** WAS THAT PERSON NOT IN THE PREVIOUS
VERSION?
if(Update){
exec <- as.character(USGDPp2$executive)
newExec <- 'Biden'
exec[is.na(exec)] <- newExec
lvlexec <- levels(USGDPp2$executive)
if(!(newExec %in% lvlexec))
lvlexec <- c(lvlexec, newexec)
USGDPp2$executive <- ordered(exec, lvlexec)
}
Similarly: war
NOTE: IF THERE HAS BEEN A MAJOR WAR SINCE THE LAST VERSION, THEN THIS TEXT NEEDS TO BE CHANGED, BECAUSE IT ASSUMES THERE HAS NOT BEEN A MAJOR WAR.
if(Update){
war <- as.character(USGDPp2$war)
war[is.na(war)] <- ''
lvlwar <- levels(USGDPp2$war)
USGDPp2$war <- ordered(war, lvlwar)
}
Next: battleDeaths
and battleDeathsPMP
:
NOTE: battleDeaths
ARE ONLY BATTLE DEATHS IN MAJOR WARS
as defined in help(USGDPpresidents)
.
Otherwise, they are 0.
if(Update){
USGDPp2$battleDeaths[iNew] <- 0
#
USGDPp2$battleDeathsPMP <- with(USGDPp2,
1000*battleDeaths/population.K)
}
Keynes (per help(USGDPpresidents)
):
if(Update){
USGDPp2$Keynes[iNew] <- 0
}
Unemployment figures came from different sources for different years.
Since 1940 the source has been the Bureau of Labor Statistics (BLS),
series LNS14000000
from the Current Population Survey.
These data are available as a monthly series from the Current Population Survey of the Bureau
of Labor Statistics.
Download the most recent years as an Excel file, compute row averages,
and transfer the numbers for the most recent years here.
NOTE: When I visited the Current
Population Survey of the Bureau of Labor Statistics on 2025-08-22, I
found a huge number of options. I clicked, “CPS Data”, then “Data retrieval tools”, then
“Labor Force Statistics (Current Population Survey - CPS)” “Top Picks”. Then I
found Unemployment Rate - LNS14000000
and checked that.
Then near the bottom of that page, I clicked, “Retrieve data”. That
produced a table for years 2015:2025 with columns Jan, Feb, …, December,
with the data for 2025 incomplete, as it should be. Just above that
table it said, “Download: xlsx”. I clicked that. I opened that
spreadsheet and added column N = average of columns B - M.
Then I compared those numbers with the numbers in
USGDPp2[c('Year', 'unemployment')]
. The numbers for 2020
were slightly different: 8.091667 in the previous version and 8.1 for
the new number.
Let’s read the new numbers and compare the numbers to confirm that we have read them correctly, then replace the old numbers (including NAs) with the new numbers.
if(Update){
(xls <- dir(pattern='\\.xlsx$'))
(BLSxls <- grep('^Series', xls, value=TRUE))
}
library(readxl)
if(Update){
str(BLS <- read_excel(BLSxls, skip=11))
}
Compute the average unemployment here, so I don’t have to do this separately.
if(Update){
UNEMP <- as.matrix(BLS[2:13])
str(unemp <- apply(UNEMP, 1, mean))
}
Store these unemp
numbers after checking first.
if(Update){
selU4GDP <- (USGDPp2$Year %in% BLS$Year)
selBLS <- (BLS$Year %in% USGDPp2$Year)
dunemp <- (USGDPp2[selU4GDP, 'unemployment'] -
unemp[selBLS])
cbind(USGDPp2[selU4GDP, c('Year', 'unemployment')],
unemp[selBLS], dunemp)
}
As expected. Replace.
if(Update){
USGDPp2[selU4GDP, 'unemployment'] <- unemp[selBLS]
USGDPp2$unempSource[iNew] <- USGDPp2$unempSource[
iNew[1]-1]
tail(USGDPp2)
}
fedReceipts
, fedOutlays
We get fedReceipts
and fedOutlays
from two
different sources. Let’s start with the historical data first.
fedRecepts
and
fedOutlays
We manually copied the historical data from series Y 335 and 336 in
United
States Census Bureau (1975) Bicentennial Edition: Historical Statistics
of the United States, Colonial Times to 1970, Part 2. Chapter Y.
Government into a LibreOffice *.ods
file. We need to
read that once and add it to USGDPp
:
if(Update){
(odsFile <- dir(pattern='\\.ods'))
(odsF <- grep('^hstat', odsFile, value=TRUE))
}
if(Update){
library(readODS)
str(hstat <- read_ods(odsF, sheet='Receipts', skip=2))
}
if(Update){
Hstat <- hstat[!is.na(hstat$Year), 1:3]
oOld <- order(Hstat$Year)
head(Hst <- Hstat[oOld, ])
}
Add as new variables to USGDPp2
:
if(Update){
USGDPp2$fedReceipts <- NA
USGDPp2$fedOutlays <- NA
selGDP4Hst <- (USGDPp2$Year %in% Hst$Year)
USGDPp2[selGDP4Hst, c("fedReceipts", "fedOutlays")] <-
(Hst[2:3] / 1000)
USGDPp2[c('Year', 'fedReceipts', 'fedOutlays')]
}
fedRecepts
and fedOutlays
For the latest data on fedReceipts
,
fedOutlays
, and fedSurplus
, I went to the
website for The
White House President’s Budget Historical Tables. On 2025-08-22, I
saw “Historical Tables” in 3 places on that page. I clicked on the
bottom one and got, "
BUDGET-2026-HIST.xlsx`. The file I got
doing this on 2025-01-22 included “Table 1.1-Summary of Receipts,
Outlays, and Surpluses or Deficits (-): 1789-2026” included budget
forecasts. The version of this table I got 2025-08-22 included data
through 2024 but no forecasts.
(xls2 <- dir(pattern='\\.xlsx$'))
## character(0)
if(Update){
(BudgetFiles <- grep('^BUDGET', xls2, value=TRUE))
(BudgetF2_1 <- grep('2-1', BudgetFiles, value=TRUE))
(BudgetFile <- (if(length(BudgetF2_1)>0)
tail(BudgetF2_1, 1) else tail(BudgetFiles, 1)))
}
Confirm that BudgetFile
is what we want.
From opening this file in spreadsheet software, it appears that we
want tab hist01z1
.
if(Update){
Budget <- read_excel(BudgetFile,
sheet='hist01z1', skip=3)
head(Budget)
tail(Budget)
}
Let’s use only the most recent 40 years, because there are anomalies
in these data like “-*” for a number that is “$500 thousand or less” and
TQ
for “transitional quarter” when the US had its fiscal
year change from starting July 1 to October 1. We also drop the last two
row, because they are comments. And keep only columns 1:4:
if(Update){
library(Ecfun)
nBudg0 <- nrow(Budget)
iBudg <- sort(seq(to=nBudg0-2, length=40))
str(Budg <- asNumericDF(Budget[iBudg, 1:4]))
tail(Budg)
}
if(Update){
selGDP4budg <- (USGDPp2$Year %in% Budg[, 1])
selBudg <- (Budg[, 1] %in% USGDPp2$Year)
dfedR <- (USGDPp2[selGDP4budg, 'fedReceipts']
- Budg[selBudg, 2])
dfedO <- (USGDPp2[selGDP4budg, 'fedOutlays']
- Budg[selBudg, 3])
dfedS <- (USGDPp2[selGDP4budg, 'fedSurplus']
- Budg[selBudg, 4])
tail(cbind(USGDPp2[selGDP4budg, c('Year',
'fedReceipts', 'fedOutlays', 'fedSurplus')],
Budg[selBudg, 2:4], dfedR, dfedO, dfedS), 10)
matplot(cbind(dfedR, dfedO, dfedS), type='l')
}
There are tiny changes in the years since 2017. There may also be a few in earlier years, but we will ignore the earlier years.
Let’s replace the numbers for fedReceipts
,
fedOutlays
, and fedSurplus
for 2017:2024.
if(Update){
table(sel2017_2024 <- (USGDPp2$Year %in% 2017:2024))
table(s2017_2024 <- (Budg[, 1] %in% 2017:2024))
USGDPp2[sel2017_2024, c('fedReceipts', 'fedOutlays',
'fedSurplus')] <- Budg[s2017_2024, 2:4]
tail(USGDPp2)
}
Let’s plot these budget numbers before proceeding.
if(Update){
Xlim <- c(1790, max(USGDPp2$Year, na.rm=TRUE))
plot(fedReceipts ~Year, USGDPp2, log='y', type='l',
xlim=Xlim, las=2)
Xlim <- c(1790, max(USGDPp2$Year, na.rm=TRUE))
plot(fedOutlays ~Year, USGDPp2, log='y', type='l',
xlim=Xlim, las=2)
plot(fedSurplus ~Year, USGDPp2, type='l',
xlim=Xlim, las=2)
}
fedDebt
is not the negative of a simple
cumulative sum fedSurplus
. The sources of the discrepancies
are not clear.However, some outlays are “Off-budget” including a “black
budget” that is not revealed to many (and perhaps all) members of
the US Congress. It’s not obvious, at least to this researcher, if
interest on the national debt is included in the official budget.
fedDebt
are available as “Historical
Debt Outstanding” from the US Treasury. On 2025-08-23 we requested
“Date Range (Record Date): All”, then “CSV” and “Download CSV File”. The
result was HstDebt_17900101_20240930.csv
.
(csv3 <- dir(pattern='\\.csv$'))
## character(0)
if(Update){
(debtFiles <- grep('^HstDebt', csv3, value=TRUE))
tail(HstDebt <- read.csv(debtFiles))
(HstDebt6 <- head(HstDebt))
tail(USGDPp2[c('Year', 'fedDebt')])
}
Visual inspection suggests that the numbers match for 2019:2021. Let’s compute the difference to confirm.
if(Update){
nobs <- nrow(USGDPp2)
(endRows <- seq(nobs, by=-1, length=6))
(dHstDebt6 <- (USGDPp2$fedDebt[endRows]-HstDebt6[, 2]))
}
Roundoff error. Let’s replace those numbers.
if(Update){
(USGDPp2$fedDebt[endRows] <-HstDebt6[, 2])
tail(USGDPp2)
plot(fedDebt ~Year, USGDPp2, type='l', log='y',
xlim=Xlim, las=2)
}
Finally: fedOutlays
, … fedDebt
as a percent
of GDP
.
For *_pGDP
, I’m getting discrepancies that seem a little
more than roundoff error. Let’s look at the numbers since 1843, which
was the year the US first adopted a fiscal year different from the
calendar year.
if(Update){
selEnd <- (USGDPp2$Year>1843)
currentGDP <- with(USGDPp2[selEnd, ],
1000 * population.K * realGDPperCapita
* GDPdeflator / 100)
plot(USGDPp2$Year[selEnd], currentGDP,
log='y', type='l', las=2)
tail(currentGDP)
}
GDP for 2024 is just over 29 trillion. Confirmed. And the plot also looks plausible. Continue.
if(Update){
plot(fedReceipts~Year, USGDPp2[selEnd, ], log='y',
type='l', las=2)
}
Plausible.
if(Update){
fedR_p <- (1e6*USGDPp2$fedReceipts[selEnd] /
currentGDP)
plot(USGDPp2$Year[selEnd], fedR_p, type='l',
las=2, log='y')
matplot(USGDPp2$Year[selEnd],
cbind(USGDPp2$fedReceipts_pGDP[selEnd], fedR_p),
type='l', las=2, log='y')
}
Good. Ratio?
if(Update){
plot(USGDPp2$Year[selEnd],
USGDPp2$fedReceipts_pGDP[selEnd] / fedR_p,
type='l', las=2, log='y')
}
The new numbers differ by less than 3 percent from the previous numbers. I don’t think I care.
Use the new numbers.
if(Update){
USGDPp2$fedReceipts_pGDP[selEnd] <- fedR_p
tail(USGDPp2)
}
Next fedOutlays_pGDP
.
if(Update){
fedO_p <- (1e6*USGDPp2$fedOutlays[selEnd] /
currentGDP)
matplot(USGDPp2$Year[selEnd],
cbind(USGDPp2$fedOutlays_pGDP[selEnd], fedO_p),
type='l', las=2, log='y')
}
Good, similar to Receipts. Ratio?
if(Update){
plot(USGDPp2$Year[selEnd],
USGDPp2$fedOutlays_pGDP[selEnd] / fedO_p,
type='l', las=2, log='y')
}
Like Receipts. Store.
if(Update){
USGDPp2$fedOutlays_pGDP[selEnd] <- fedO_p
tail(USGDPp2)
}
Good. Surplus?
if(Update){
fedS_p <- (1e6*USGDPp2$fedSurplus[selEnd] /
currentGDP)
matplot(USGDPp2$Year[selEnd],
cbind(USGDPp2$fedSurplus_pGDP[selEnd], fedS_p),
type='l', las=2)
}
Good, similar to Receipts and Outlays. Ratio?
if(Update){
plot(USGDPp2$Year[selEnd],
USGDPp2$fedSurplus_pGDP[selEnd] / fedS_p,
type='l', las=2)
quantile(rSup <- (USGDPp2$fedSurplus_pGDP[selEnd] / fedS_p),
na.rm=TRUE)
}
Good, similar to Receipts and Outlays. Store.
if(Update){
USGDPp2$fedSurplus_pGDP[selEnd] <- fedS_p
tail(USGDPp2)
}
fedDebt
?
if(Update){
fedD_p <- (USGDPp2$fedDebt[selEnd] /
currentGDP)
matplot(USGDPp2$Year[selEnd],
cbind(USGDPp2$fedDebt_pGDP[selEnd], fedD_p),
type='l', las=2, log='y')
}
Good, similar to Receipts, Outlays and Surplus. Ratio?
if(Update){
plot(USGDPp2$Year[selEnd],
USGDPp2$fedDebt_pGDP[selEnd] / fedD_p,
type='l', las=2)
}
As before. Store.
if(Update){
USGDPp2$fedDebt_pGDP[selEnd] <- fedD_p
tail(USGDPp2)
}
if(Update){
USGDPpresidents <- USGDPp2
sel <- !is.na(USGDPpresidents$fedOutlays_pGDP)
plot(100*fedOutlays_pGDP~Year,
USGDPpresidents[sel,], type='l', log='y',
xlab='', ylab='US federal outlays, % of GDP')
abline(h=2:3)
War <- (USGDPpresidents$war !='')
abline(v=USGDPpresidents$Year[War],
lty='dotted', col='light gray')
abline(v=c(1929, 1933), col='red', lty='dotted')
text(1931, 22, 'Hoover', srt=90, col='red')
}
How about the same plot of Deficit =
-fedSurplus_pGDP
?
if(Update){
selD <- !is.na(USGDPpresidents$fedSurplus_pGDP)
plot(-100*fedSurplus_pGDP~Year,
USGDPpresidents[sel,], type='l',
xlab='', ylab='US federal deficit, % of GDP')
abline(h=2:3)
abline(v=USGDPpresidents$Year[War],
lty='dotted', col='light gray')
abline(v=c(1929, 1933), col='red', lty='dotted')
text(1931, 22, 'Hoover', srt=90, col='red')
}
What about inflation = diff(log(CPI))
?
if(Update){
selI <- (USGDPpresidents$Year>1789)
quantile(diff(USGDPpresidents$Year[selI]))
}
if(Update){
infl <- 100*diff(log(USGDPpresidents$CPI[selI]))
yr2 <- USGDPpresidents$Year[selI][-1]
plot(yr2, infl, type='l', las=2)
abline(h=c(-2, 0, 2, 10))
abline(v=USGDPpresidents$Year[War],
lty='dotted', col='light gray')
abline(v=c(1929, 1933), col='red', lty='dotted')
text(1931, 22, 'Hoover', srt=90, col='red')
}
if(Update){
infl2 <- 100*diff(log(
USGDPpresidents$GDPdeflator[selI]))
plot(yr2, infl2, type='l', las=2)
abline(h=c(-2, 0, 2, 10))
abline(v=USGDPpresidents$Year[War],
lty='dotted', col='light gray')
abline(v=c(1929, 1933), col='red', lty='dotted')
text(1931, 22, 'Hoover', srt=90, col='red')
}
if(Update){
plot(battleDeathsPMP~Year, USGDPpresidents,
type='l', las=2, xlim=Xlim)
abline(h=100)
plot(1+battleDeathsPMP~Year, USGDPpresidents,
type='l', las=2, xlim=Xlim, log='y')
abline(h=100)
abline(v=USGDPpresidents$Year[War],
lty='dotted', col='light gray')
abline(v=c(1929, 1933), col='red', lty='dotted')
text(1931, 22, 'Hoover', srt=90, col='red')
}
if(Update){
save(USGDPpresidents, file='USGDPpresidents.rda')
getwd()
}
Now copy this file from the current working directory to
~Ecdat\data
, overwriting the previous version.