# load libraries
library(dygraphs)
library(xts)
library(dplyr)
library(ggplot2)
# load data
# CPIAUCSL - Consumer price index (https://fred.stlouisfed.org/series/CPIAUCSL)
# GDPC1 - Gross Domestic Product (https://fred.stlouisfed.org/series/GDPC1)
# PAYEMS - Total Nonfarm Payroll (https://fred.stlouisfed.org/series/PAYEMS)
# UNRATE - Civilian Unemployment Rate (https://fred.stlouisfed.org/series/UNRATE)
#
# Everything except GDPC1 is monthly data (GDPC1 is quarterly)
cpi_df = read.csv("../data/CPIAUCSL.csv", stringsAsFactors = FALSE)
gdp_df = read.csv("../data/GDPC1.csv", stringsAsFactors = FALSE)
payems_df = read.csv("../data/PAYEMS.csv", stringsAsFactors = FALSE)
unrate_df = read.csv("../data/UNRATE.csv", stringsAsFactors = FALSE)
# convert data to xts format
cpi = xts(cpi_df[,2], order.by = as.Date(cpi_df[,1]))
gdp = xts(gdp_df[,2], order.by = as.Date(gdp_df[,1]))
payems = xts(payems_df[,2], order.by = as.Date(payems_df[,1]))
unrate = xts(unrate_df[,2], order.by = as.Date(unrate_df[,1]))
# plot the graphs for each of the series
dygraph(cpi)
dygraph(gdp)
dygraph(payems)
dygraph(unrate)
# Explore pairwise plots of the series
# merge different series into one dataframe (note: GDP is at quarterly level)
datelist = unique(c(cpi_df$DATE, gdp_df$DATE, payems_df$DATE, unrate_df$DATE))
alldf = data.frame(DATE = datelist, stringsAsFactors = FALSE)
alldf = left_join(alldf, cpi_df, by = "DATE")
alldf = left_join(alldf, payems_df, by = "DATE")
alldf = left_join(alldf, unrate_df, by = "DATE")
alldf = left_join(alldf, gdp_df, by = "DATE")
# Check pairs plot
pairs(alldf[,c(2,3,4,5)])
Here a very simplistic model is applied to the timeseries and used to forecast the next time point. The model just fits a linear model with dependent variable as the current time point with independent variable as previous time point
# Simplistic model based on previous time point
# create a lag variable
cpi_df$CPIAUCSL_lag = lag(cpi_df$CPIAUCSL)
# time series of CPI
dygraph(cpi)
# linear model of current time point with previous time point
ggplot(data = cpi_df, aes(x = CPIAUCSL_lag, y = CPIAUCSL)) + geom_point() + theme_bw()
## Warning: Removed 1 rows containing missing values (geom_point).
#
# Check prediction error by running a model that covers a sliding 12 month period
# covering 24 instances starting with "2017-04-01" (second last time point) and going back
#
#
thor = 12 # pick time horizon - 12 months
tperiods = 24 # number of time periods
# index of end dates of models
idxstseq = seq(nrow(cpi_df) - tperiods, nrow(cpi_df) - 1)
# initialize the predicted residual of forecasted time point
predres = rep(NA, tperiods)
# loop over different end dates and fit models with 12 month
# period ending with each end date
#
for(i in seq_along(idxstseq)){
print(i)
idxend = idxstseq[i]
# filter the timeseries for the 12 month period ending for a given end date
dffilt = cpi_df[(idxend - thor + 1):idxend,]
# fit linear model
cpi_lm = lm(CPIAUCSL ~ CPIAUCSL_lag, dffilt)
# print model summary
#summary(cpi_lm)
# store predicted residual
predres[i] = cpi_df[idxend+1,"CPIAUCSL"] - predict(cpi_lm, newdata = cpi_df[idxend+1,])
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
# summary of predicted residuals
summary(predres)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.42100 -0.19150 0.20000 0.09029 0.49970 0.84690
# plot of predicted residuals
predres_xts = xts(predres, order.by = as.Date(cpi_df$DATE[idxstseq]))
dygraph(predres_xts)
# forecast for next time point
fcst_cpi = predict(cpi_lm, data.frame(CPIAUCSL_lag = cpi_df$CPIAUCSL[nrow(cpi_df)]))
fcst_cpi
## 1
## 244.1495
# % change in CPI from one year ago
cpi_1yrago = cpi_df$CPIAUCSL[cpi_df$DATE == "2016-06-01"]
(fcst_cpi - cpi_1yrago)*100/cpi_1yrago
## 1
## 1.79596
The methodology for other series follows the same as for the CPI series
# Simplistic model based on previous time point
payems_df$PAYEMS_lag = lag(payems_df$PAYEMS)
dygraph(payems)
ggplot(data = payems_df, aes(x = PAYEMS_lag, y = PAYEMS)) + geom_point() + theme_bw()
## Warning: Removed 1 rows containing missing values (geom_point).
# Check prediction error by running a 12 month horizon for 24 ending dates
# starting with "2017-04-01" (second last time point) and going back
#
thor = 12 # pick time horizon - 12 months
tperiods = 24 # number of time periods
# idxst sequence
idxstseq = seq(nrow(payems_df) - tperiods, nrow(payems_df) - 1)
predres = rep(NA, tperiods)
for(i in seq_along(idxstseq)){
#print(i)
idxend = idxstseq[i]
dffilt = payems_df[(idxend - thor + 1):idxend,]
payems_lm = lm(PAYEMS ~ PAYEMS_lag, dffilt)
summary(payems_lm)
predres[i] = payems_df[idxend+1,"PAYEMS"] - predict(payems_lm, newdata = payems_df[idxend+1,])
}
summary(predres)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -144.6000 -43.1100 0.2719 -1.4270 43.5300 150.1000
predres_xts = xts(predres, order.by = as.Date(payems_df$DATE[idxstseq]))
dygraph(predres_xts)
fcst_payems = predict(payems_lm, data.frame(PAYEMS_lag = payems_df$PAYEMS[nrow(payems_df)]))
fcst_payems
## 1
## 146280.4
# change in employment
(fcst_payems - payems_df$PAYEMS[nrow(payems_df)])*1000
## 1
## 145416.2
# Simplistic model based on previous time point
gdp_df$GDPC1_lag = lag(gdp_df$GDPC1)
dygraph(gdp)
ggplot(data = gdp_df, aes(x = GDPC1_lag, y = GDPC1)) + geom_point() + theme_bw()
## Warning: Removed 1 rows containing missing values (geom_point).
# Check prediction error by running a 12 qtr horizon for 24 ending dates
# starting with "2016-10-01" (second last time point) and going back
#
thor = 12 # pick time horizon - 12 qtrs
tperiods = 24 # number of time periods
# idxst sequence
idxstseq = seq(nrow(gdp_df) - tperiods, nrow(gdp_df) - 1)
predres = rep(NA, tperiods)
for(i in seq_along(idxstseq)){
#print(i)
idxend = idxstseq[i]
dffilt = gdp_df[(idxend - thor + 1):idxend,]
gdp_lm = lm(GDPC1 ~ GDPC1_lag, dffilt)
summary(gdp_lm)
predres[i] = gdp_df[idxend+1,"GDPC1"] - predict(gdp_lm, newdata = gdp_df[idxend+1,])
}
summary(predres)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -147.400 -41.100 -5.363 22.100 87.280 201.400
predres_xts = xts(predres, order.by = as.Date(gdp_df$DATE[idxstseq]))
dygraph(predres_xts)
fcst_gdp = predict(gdp_lm, data.frame(GDPC1_lag = gdp_df$GDPC1[nrow(gdp_df)]))
fcst_gdp
## 1
## 16934.48
# real GDP growth rate
(fcst_gdp - gdp_df$GDPC1[nrow(gdp_df)])*100/gdp_df$GDPC1[nrow(gdp_df)]
## 1
## 0.4322309
# Simplistic model based on previous time point
unrate_df$UNRATE_lag = lag(unrate_df$UNRATE)
dygraph(unrate)
ggplot(data = unrate_df, aes(x = UNRATE_lag, y = UNRATE)) + geom_point() + theme_bw()
## Warning: Removed 1 rows containing missing values (geom_point).
# Check prediction error by running a 12 month horizon for 24 ending dates
# starting with "2017-04-01" (second last time point) and going back
#
thor = 12 # pick time horizon - 12 qtrs
tperiods = 24 # number of time periods
# idxst sequence
idxstseq = seq(nrow(unrate_df) - tperiods, nrow(unrate_df) - 1)
predres = rep(NA, tperiods)
for(i in seq_along(idxstseq)){
#print(i)
idxend = idxstseq[i]
dffilt = unrate_df[(idxend - thor + 1):idxend,]
unrate_lm = lm(UNRATE ~ UNRATE_lag, dffilt)
summary(unrate_lm)
predres[i] = unrate_df[idxend+1,"UNRATE"] - predict(unrate_lm, newdata = unrate_df[idxend+1,])
}
summary(predres)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.29700 -0.14270 -0.07128 -0.07633 0.01221 0.14620
predres_xts = xts(predres, order.by = as.Date(unrate_df$DATE[idxstseq]))
dygraph(predres_xts)
fcst_unrate = predict(unrate_lm, data.frame(UNRATE_lag = unrate_df$UNRATE[nrow(unrate_df)]))
fcst_unrate
## 1
## 4.41338
sessionInfo()
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.11.1 (El Capitan)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_2.2.1 dplyr_0.5.0 xts_0.9-7 zoo_1.7-13
## [5] dygraphs_1.1.1.4
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.10 knitr_1.15.1 magrittr_1.5 munsell_0.4.3
## [5] colorspace_1.2-6 lattice_0.20-33 R6_2.1.3 plyr_1.8.4
## [9] stringr_1.2.0 tools_3.3.1 grid_3.3.1 gtable_0.2.0
## [13] DBI_0.5-1 htmltools_0.3.6 lazyeval_0.2.0 yaml_2.1.14
## [17] rprojroot_1.2 digest_0.6.12 assertthat_0.1 tibble_1.2
## [21] htmlwidgets_0.8 evaluate_0.10 rmarkdown_1.5 labeling_0.3
## [25] stringi_1.1.5 scales_0.4.1 backports_1.0.5 jsonlite_1.4