# load libraries
library(dygraphs)
library(xts)
library(dplyr)
library(ggplot2)

# load data
# CPIAUCSL - Consumer price index (https://fred.stlouisfed.org/series/CPIAUCSL)
# GDPC1 - Gross Domestic Product (https://fred.stlouisfed.org/series/GDPC1)
# PAYEMS - Total Nonfarm Payroll (https://fred.stlouisfed.org/series/PAYEMS)
# UNRATE - Civilian Unemployment Rate (https://fred.stlouisfed.org/series/UNRATE)
#
# Everything except GDPC1 is monthly data (GDPC1 is quarterly)

cpi_df = read.csv("../data/CPIAUCSL.csv", stringsAsFactors = FALSE)
gdp_df = read.csv("../data/GDPC1.csv", stringsAsFactors = FALSE)
payems_df = read.csv("../data/PAYEMS.csv", stringsAsFactors = FALSE)
unrate_df = read.csv("../data/UNRATE.csv", stringsAsFactors = FALSE)

# convert data to xts format
cpi = xts(cpi_df[,2], order.by = as.Date(cpi_df[,1]))
gdp = xts(gdp_df[,2], order.by = as.Date(gdp_df[,1]))
payems = xts(payems_df[,2], order.by = as.Date(payems_df[,1]))
unrate = xts(unrate_df[,2], order.by = as.Date(unrate_df[,1]))

# plot the graphs for each of the series
dygraph(cpi)
dygraph(gdp)
dygraph(payems)
dygraph(unrate)
# Explore pairwise plots of the series
# merge different series into one dataframe (note: GDP is at quarterly level)
datelist = unique(c(cpi_df$DATE, gdp_df$DATE, payems_df$DATE, unrate_df$DATE))
alldf = data.frame(DATE = datelist, stringsAsFactors = FALSE)
alldf = left_join(alldf, cpi_df, by = "DATE")
alldf = left_join(alldf, payems_df, by = "DATE")
alldf = left_join(alldf, unrate_df, by = "DATE")
alldf = left_join(alldf, gdp_df, by = "DATE")

# Check pairs plot
pairs(alldf[,c(2,3,4,5)])

Here a very simplistic model is applied to the timeseries and used to forecast the next time point. The model just fits a linear model with dependent variable as the current time point with independent variable as previous time point

# Simplistic model based on previous time point

# create a lag variable
cpi_df$CPIAUCSL_lag = lag(cpi_df$CPIAUCSL)

# time series of CPI
dygraph(cpi)
# linear model of current time point with previous time point
ggplot(data = cpi_df, aes(x = CPIAUCSL_lag, y = CPIAUCSL)) + geom_point() + theme_bw()
## Warning: Removed 1 rows containing missing values (geom_point).

#
# Check prediction error by running a model that covers a sliding 12 month period
# covering 24 instances starting with "2017-04-01" (second last time point) and going back
# 
#

thor = 12 # pick time horizon - 12 months
tperiods = 24 # number of time periods 
# index of end dates of models
idxstseq = seq(nrow(cpi_df) - tperiods, nrow(cpi_df) - 1)

# initialize the predicted residual of forecasted time point
predres = rep(NA, tperiods)

# loop over different end dates and fit models with 12 month 
# period ending with each end date
#
for(i in seq_along(idxstseq)){
  
  print(i)
  idxend = idxstseq[i]

  # filter the timeseries for the 12 month period ending for a given end date
  dffilt = cpi_df[(idxend - thor + 1):idxend,]
  
  # fit linear model
  cpi_lm = lm(CPIAUCSL ~ CPIAUCSL_lag, dffilt)
  
  # print model summary
  #summary(cpi_lm)
  
  # store predicted residual
  predres[i] = cpi_df[idxend+1,"CPIAUCSL"] - predict(cpi_lm, newdata = cpi_df[idxend+1,])
  
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
# summary of predicted residuals
summary(predres)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -1.42100 -0.19150  0.20000  0.09029  0.49970  0.84690
# plot of predicted residuals
predres_xts = xts(predres, order.by = as.Date(cpi_df$DATE[idxstseq]))
dygraph(predres_xts)
# forecast for next time point
fcst_cpi = predict(cpi_lm, data.frame(CPIAUCSL_lag = cpi_df$CPIAUCSL[nrow(cpi_df)]))
fcst_cpi
##        1 
## 244.1495
# % change in CPI from one year ago
cpi_1yrago = cpi_df$CPIAUCSL[cpi_df$DATE == "2016-06-01"]
(fcst_cpi - cpi_1yrago)*100/cpi_1yrago
##       1 
## 1.79596

The methodology for other series follows the same as for the CPI series

# Simplistic model based on previous time point

payems_df$PAYEMS_lag = lag(payems_df$PAYEMS)

dygraph(payems)
ggplot(data = payems_df, aes(x = PAYEMS_lag, y = PAYEMS)) + geom_point() + theme_bw()
## Warning: Removed 1 rows containing missing values (geom_point).

# Check prediction error by running a 12 month horizon for 24 ending dates 
# starting with "2017-04-01" (second last time point) and going back
#

thor = 12 # pick time horizon - 12 months
tperiods = 24 # number of time periods 
# idxst sequence
idxstseq = seq(nrow(payems_df) - tperiods, nrow(payems_df) - 1)

predres = rep(NA, tperiods)

for(i in seq_along(idxstseq)){
  
  #print(i)
  idxend = idxstseq[i]
  
  dffilt = payems_df[(idxend - thor + 1):idxend,]
  
  payems_lm = lm(PAYEMS ~ PAYEMS_lag, dffilt)
  summary(payems_lm)
  
  predres[i] = payems_df[idxend+1,"PAYEMS"] - predict(payems_lm, newdata = payems_df[idxend+1,])
  
}

summary(predres)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -144.6000  -43.1100    0.2719   -1.4270   43.5300  150.1000
predres_xts = xts(predres, order.by = as.Date(payems_df$DATE[idxstseq]))
dygraph(predres_xts)
fcst_payems = predict(payems_lm, data.frame(PAYEMS_lag = payems_df$PAYEMS[nrow(payems_df)]))
fcst_payems
##        1 
## 146280.4
# change in employment
(fcst_payems - payems_df$PAYEMS[nrow(payems_df)])*1000
##        1 
## 145416.2
# Simplistic model based on previous time point


gdp_df$GDPC1_lag = lag(gdp_df$GDPC1)

dygraph(gdp)
ggplot(data = gdp_df, aes(x = GDPC1_lag, y = GDPC1)) + geom_point() + theme_bw()
## Warning: Removed 1 rows containing missing values (geom_point).

# Check prediction error by running a 12 qtr horizon for 24 ending dates 
# starting with "2016-10-01" (second last time point) and going back
#

thor = 12 # pick time horizon - 12 qtrs
tperiods = 24 # number of time periods 
# idxst sequence
idxstseq = seq(nrow(gdp_df) - tperiods, nrow(gdp_df) - 1)

predres = rep(NA, tperiods)

for(i in seq_along(idxstseq)){
  
  #print(i)
  idxend = idxstseq[i]
  
  dffilt = gdp_df[(idxend - thor + 1):idxend,]
  
  gdp_lm = lm(GDPC1 ~ GDPC1_lag, dffilt)
  summary(gdp_lm)
  
  predres[i] = gdp_df[idxend+1,"GDPC1"] - predict(gdp_lm, newdata = gdp_df[idxend+1,])
  
}

summary(predres)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -147.400  -41.100   -5.363   22.100   87.280  201.400
predres_xts = xts(predres, order.by = as.Date(gdp_df$DATE[idxstseq]))
dygraph(predres_xts)
fcst_gdp = predict(gdp_lm, data.frame(GDPC1_lag = gdp_df$GDPC1[nrow(gdp_df)]))
fcst_gdp
##        1 
## 16934.48
# real GDP growth rate
(fcst_gdp - gdp_df$GDPC1[nrow(gdp_df)])*100/gdp_df$GDPC1[nrow(gdp_df)]
##         1 
## 0.4322309
# Simplistic model based on previous time point


unrate_df$UNRATE_lag = lag(unrate_df$UNRATE)

dygraph(unrate)
ggplot(data = unrate_df, aes(x = UNRATE_lag, y = UNRATE)) + geom_point() + theme_bw()
## Warning: Removed 1 rows containing missing values (geom_point).

# Check prediction error by running a 12 month horizon for 24 ending dates 
# starting with "2017-04-01" (second last time point) and going back
#

thor = 12 # pick time horizon - 12 qtrs
tperiods = 24 # number of time periods 
# idxst sequence
idxstseq = seq(nrow(unrate_df) - tperiods, nrow(unrate_df) - 1)

predres = rep(NA, tperiods)

for(i in seq_along(idxstseq)){
  
  #print(i)
  idxend = idxstseq[i]
  
  dffilt = unrate_df[(idxend - thor + 1):idxend,]
  
  unrate_lm = lm(UNRATE ~ UNRATE_lag, dffilt)
  summary(unrate_lm)
  
  predres[i] = unrate_df[idxend+1,"UNRATE"] - predict(unrate_lm, newdata = unrate_df[idxend+1,])
  
}

summary(predres)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.29700 -0.14270 -0.07128 -0.07633  0.01221  0.14620
predres_xts = xts(predres, order.by = as.Date(unrate_df$DATE[idxstseq]))
dygraph(predres_xts)
fcst_unrate = predict(unrate_lm, data.frame(UNRATE_lag = unrate_df$UNRATE[nrow(unrate_df)]))
fcst_unrate
##       1 
## 4.41338

Session Info

sessionInfo()
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.11.1 (El Capitan)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggplot2_2.2.1    dplyr_0.5.0      xts_0.9-7        zoo_1.7-13      
## [5] dygraphs_1.1.1.4
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.10     knitr_1.15.1     magrittr_1.5     munsell_0.4.3   
##  [5] colorspace_1.2-6 lattice_0.20-33  R6_2.1.3         plyr_1.8.4      
##  [9] stringr_1.2.0    tools_3.3.1      grid_3.3.1       gtable_0.2.0    
## [13] DBI_0.5-1        htmltools_0.3.6  lazyeval_0.2.0   yaml_2.1.14     
## [17] rprojroot_1.2    digest_0.6.12    assertthat_0.1   tibble_1.2      
## [21] htmlwidgets_0.8  evaluate_0.10    rmarkdown_1.5    labeling_0.3    
## [25] stringi_1.1.5    scales_0.4.1     backports_1.0.5  jsonlite_1.4