# set api key
# gather data
start_date <- "1978-01-01"
end_date <- "2024-03-01"
# UMICH Consumer Sentiment Index:
consumer_sentiment <- fredr(
series_id = "UMCSENT",
observation_start = as.Date(start_date),
observation_end = as.Date(end_date),
frequency = "m", # monthly
units = "lin"
# Unemployment Rate:
unemployment <- fredr(
series_id = "UNRATE",
observation_start = as.Date(start_date),
observation_end = as.Date(end_date),
frequency = "m", # monthly
units = "lin"
# YOY % Change in Unemployment Rate:
unemployment_change <- fredr(
series_id = "UNRATE",
observation_start = as.Date(start_date),
observation_end = as.Date(end_date),
frequency = "m", # monthly
units = "pc1"
# YOY % Change in Real Income:
income <- fredr(
series_id = "DSPIC96",
observation_start = as.Date(start_date),
observation_end = as.Date(end_date),
frequency = "m", # monthly
units = "pc1"
# YOY % Change in CPI:
cpi <- fredr(
series_id = "CPIAUCSL",
observation_start = as.Date(start_date),
observation_end = as.Date(end_date),
frequency = "m", # monthly
units = "pc1"
# Avg 30-year Mortgage Rate:
mortgage <- fredr(
series_id = "MORTGAGE30US",
observation_start = as.Date(start_date),
observation_end = as.Date(end_date),
frequency = "m", # monthly
units = "lin"
# YOY % Change in Mortgage Rate:
mortgage_change <- fredr(
series_id = "MORTGAGE30US",
observation_start = as.Date(start_date),
observation_end = as.Date(end_date),
frequency = "m", # monthly
units = "pc1"
# YOY % Change in Median House Price:
house <- fredr(
series_id = "MSPUS",
observation_start = as.Date(start_date),
observation_end = as.Date(end_date),
frequency = "q", # quarterly
units = "pc1"
# YOY % Change in Personal Consumption:
consumption <- fredr(
series_id = "PCE",
observation_start = as.Date(start_date),
observation_end = as.Date(end_date),
frequency = "m", # monthly
units = "pc1"
# NBER Recession Indicator:
recession_indicator <- fredr(
series_id = "USREC",
observation_start = as.Date(start_date),
observation_end = as.Date(end_date),
frequency = "m", # monthly
units = "lin"
# YOY % Change in S&P 500 Index:
getSymbols("^GSPC", from = "1977-01-01",
to = end_date,
warnings = FALSE,
auto.assign = TRUE,
src = "yahoo")
## [1] "GSPC"
# YOY % Change in US Dollar Index:
getSymbols("DX-Y.NYB", from = "1977-01-01",
to = end_date,
warnings = FALSE,
auto.assign = TRUE,
src = "yahoo")
## [1] "DX-Y.NYB"
# process fred data
process <- function(data, name) {
data[name] <- data$value
data %>%
select(date, all_of(name))
consumer_sentiment_modified <- process(consumer_sentiment, 'consumer_sentiment')
unemployment_modified <- process(unemployment, 'unemployment_rate')
unemployment_change_modified <- process(unemployment_change, 'unemployment_rate_change')
income_modified <- process(income, 'income')
cpi_modified <- process(cpi, 'cpi')
mortgage_modified <- process(mortgage, 'mortgage')
mortgage_change_modified <- process(mortgage_change, 'mortgage_change')
house_modified <- process(house, 'house')
consumption_modified <- process(consumption, 'consumption')
recession_indicator_modified <- process(recession_indicator, 'recession_indicator')
# breakout quarters to months for house data
house_modified <- house_modified %>%
complete(date = seq.Date(min(date), max(date) %m+% months(2), by = "month")) %>%
# process sp500 data
sp500_modified <- %>%
rownames_to_column('date_day') %>%
select(date_day, GSPC.Close) %>%
mutate(date = as.Date(paste(substr(date_day, start = 1, stop = 7), "-01", sep=''))) %>%
group_by(date) %>%
summarize(avg_close = mean(GSPC.Close)) %>%
mutate(sp500 = ((avg_close - lag(avg_close, 12)) / lag(avg_close, 12)) * 100) %>%
select(-avg_close) #%>%
#filter(date >= start_date)
# process dollar_index data
dollar_index_modified <-`DX-Y.NYB`) %>%
rownames_to_column('date_day') %>%
select(date_day, `DX-Y.NYB.Close`) %>%
filter(!`DX-Y.NYB.Close`)) %>%
mutate(date = as.Date(paste(substr(date_day, start = 1, stop = 7), "-01", sep=''))) %>%
group_by(date) %>%
summarize(avg_close = mean(`DX-Y.NYB.Close`)) %>%
mutate(dollar_index = ((avg_close - lag(avg_close, 12)) / lag(avg_close, 12)) * 100) %>%
select(-avg_close) #%>%
#filter(date >= start_date)
# join data
join <- consumer_sentiment_modified %>%
left_join(unemployment_modified, by = c('date' = 'date')) %>%
left_join(unemployment_change_modified, by = c('date' = 'date')) %>%
left_join(income_modified, by = c('date' = 'date')) %>%
left_join(cpi_modified, by = c('date' = 'date')) %>%
left_join(mortgage_modified, by = c('date' = 'date')) %>%
left_join(mortgage_change_modified, by = c('date' = 'date')) %>%
left_join(house_modified, by = c('date' = 'date')) %>%
left_join(consumption_modified, by = c('date' = 'date')) %>%
left_join(sp500_modified, by = c('date' = 'date')) %>%
left_join(dollar_index_modified, by = c('date' = 'date')) %>%
left_join(recession_indicator_modified, by = c('date' = 'date'))
## # A tibble: 6 × 13
## date consumer_sentiment unemployment_rate unemployment_rate_cha…¹ income
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 1978-01-01 83.7 6.4 -14.7 4.28
## 2 1978-02-01 84.3 6.3 -17.1 5.84
## 3 1978-03-01 78.8 6.3 -14.9 5.24
## 4 1978-04-01 81.6 6.1 -15.3 5.45
## 5 1978-05-01 82.9 6 -14.3 5.36
## 6 1978-06-01 80 5.9 -18.1 4.93
## # ℹ abbreviated name: ¹​unemployment_rate_change
## # ℹ 8 more variables: cpi <dbl>, mortgage <dbl>, mortgage_change <dbl>,
## # house <dbl>, consumption <dbl>, sp500 <dbl>, dollar_index <dbl>,
## # recession_indicator <dbl>
# processing for modeling
final <- join[rowSums( == 0,]
# visualize
final_pivoted <- final %>%
mutate(`UMICH Consumer Sentiment` = consumer_sentiment,
`Unemployment Rate` = unemployment_rate,
`YOY % Change in Unemployment Rate` = unemployment_rate_change,
`YOY % Change in Median Real Disposable Income` = income,
`YOY % Change in Personal Consumption` = consumption,
`YOY % Change in CPI` = cpi,
`YOY % Change in S&P 500` = sp500,
`YOY % Change in Dollar Index` = dollar_index,
`Avg 30-year Mortgage Rate` = mortgage,
`YOY % Change in Mortgage Rate` = mortgage_change,
`YOY % Change in Median House Price` = house,
`NBER Recession Indicator` = recession_indicator) %>%
select(date, contains(" ")) %>%
pivot_longer(cols = -c(date),
names_to = 'variable', values_to = 'value')
sentiment <- final_pivoted %>%
filter(variable %in% c("UMICH Consumer Sentiment"))
visual1 <- final_pivoted %>%
filter(variable %in% c("UMICH Consumer Sentiment",
"Unemployment Rate",
"YOY % Change in Unemployment Rate",
"YOY % Change in Median Real Disposable Income",
"YOY % Change in Personal Consumption",
"YOY % Change in CPI"))
visual2 <- final_pivoted %>%
filter(variable %in% c("YOY % Change in S&P 500",
"YOY % Change in Dollar Index",
"Avg 30-year Mortgage Rate",
"YOY % Change in Mortgage Rate",
"YOY % Change in Median House Price",
"NBER Recession Indicator"))
# create visuals
ggplot(sentiment, aes(x = date, y = value, color = variable)) +
geom_line() +
scale_x_date(date_breaks = '5 year', date_labels = "%Y") +
labs(x = "", y = "Consumer Sentiment Index", color = 'Variable', title = "UMICH Consumer Sentiment Over Time", caption = 'Source: Federal Reserve Bank of St Louis (FRED) | Visual by Michael Dunphy, @mtdunphy') +
theme_bw() +
theme(legend.position = "none", plot.caption = element_text(size = 7, hjust=0))
ggplot(visual1, aes(x = date, y = value, color = variable)) +
geom_line() +
labs(x = "", y = "Value", color = 'Variable', title = "Economic Indicators Over Time", caption = 'Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance | Visual by Michael Dunphy, @mtdunphy') +
theme_minimal() +
theme(legend.position = "none", plot.caption = element_text(size = 7, hjust=0)) +
facet_wrap(~variable, scales = "free_y", ncol = 2)
ggplot(visual2, aes(x = date, y = value, color = variable)) +
geom_line() +
labs(x = "", y = "Value", color = 'Variable', title = "Economic Indicators Over Time", caption = 'Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance | Visual by Michael Dunphy, @mtdunphy') +
theme_minimal() +
theme(legend.position = "none", plot.caption = element_text(size = 7, hjust=0)) +
facet_wrap(~variable, scales = "free_y", ncol = 2)
# eda
# correlation matrix
cor_matrix <- cor(final %>% select(-date))
# multivariate forecast:
train <- as_tsibble(final) %>%
filter(date < '2017-01-01')
## Using `date` as index variable.
test <- as_tsibble(final) %>%
filter(date >= '1978-01-01')
## Using `date` as index variable.
fit <- train %>%
model(TSLM(consumer_sentiment ~ . - date - consumer_sentiment + season() + trend()))
check <- lm(consumer_sentiment ~ . - date, data = train)
## Call:
## lm(formula = consumer_sentiment ~ . - date, data = train)
## Residuals:
## Min 1Q Median 3Q Max
## -20.2296 -3.5643 0.2821 3.6820 15.8167
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 105.609873 1.333508 79.197 < 2e-16 ***
## unemployment_rate -4.662906 0.189088 -24.660 < 2e-16 ***
## unemployment_rate_change 0.010160 0.024373 0.417 0.676989
## income 0.607496 0.191557 3.171 0.001620 **
## cpi -3.501803 0.190017 -18.429 < 2e-16 ***
## mortgage 1.674235 0.142533 11.746 < 2e-16 ***
## mortgage_change 0.007035 0.024968 0.282 0.778261
## house 0.231323 0.062763 3.686 0.000256 ***
## consumption 0.803399 0.234420 3.427 0.000665 ***
## sp500 0.137487 0.019165 7.174 2.97e-12 ***
## dollar_index 0.082948 0.026574 3.121 0.001914 **
## recession_indicator -5.001298 1.092625 -4.577 6.08e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 5.268 on 456 degrees of freedom
## Multiple R-squared: 0.8327, Adjusted R-squared: 0.8287
## F-statistic: 206.3 on 11 and 456 DF, p-value: < 2.2e-16
## Breusch-Godfrey test for serial correlation of order up to 15
## data: Residuals
## LM test = 226.52, df = 15, p-value < 2.2e-16
forecast_consumption <- forecast(fit, new_data = test)
forecast_consumption %>%
autoplot(as_tsibble(final)) +
labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators from 1978-2016") +
ggtitle("Expected vs Actual Consumer Sentiment from 1978 to 2024") +
labs(caption = "Note: Economic indicators used in estimating consumer sentiment include unemployement rate, YOY % change in unemployment rate,
YOY % change in median real disposable income, YOY % change in personal consumption, YOY % change in consumer price index,
YOY % change in S&P 500 index, YOY % change in dollar index, Avg 30-year mortgage rate, YOY % change in mortgage rate,
YOY % change in median house price, and NBER recession indicator. Training data is monthly from Jan 1978 to Jan 2017.
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance | Visual by Michael Dunphy, @mtdunphy") +
scale_x_date(date_breaks = '5 year', date_labels = "%Y") +
theme_bw() +
theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
## Using `date` as index variable.
# narrow date range
start_date <- '2017-01-01'
plot_range <- as_tsibble(final) %>%
filter(date >= start_date)
## Using `date` as index variable.
test <- as_tsibble(final) %>%
filter(date >= start_date)
## Using `date` as index variable.
fit <- train %>%
model(TSLM(consumer_sentiment ~ . - date - consumer_sentiment + season() + trend()))
forecast_consumption <- forecast(fit, new_data = test)
forecast_consumption %>%
autoplot(plot_range) +
labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators from 1978-2016") +
ggtitle("Expected vs Actual Consumer Sentiment from 2017 to 2024") +
labs(caption = "Note: Economic indicators used in estimating consumer sentiment include unemployement rate, YOY % change in unemployment rate,
YOY % change in median real disposable income, YOY % change in personal consumption, YOY % change in consumer price index,
YOY % change in S&P 500 index, YOY % change in dollar index, Avg 30-year mortgage rate, YOY % change in mortgage rate,
YOY % change in median house price, and NBER recession indicator. Training data is monthly from Jan 1978 to Jan 2017.
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance | Visual by Michael Dunphy, @mtdunphy") +
scale_x_date(date_breaks = '1 year', date_labels = "%Y") +
theme_bw() +
theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
# adding google trends
# news category is represented with 16, all categories is 0
# search1 <- gtrends(c("inflation"), category = 0, time= "all", geo = "US")$interest_over_time %>%
# select(date, keyword, hits)
# search2 <- gtrends(c("recession"), category = 0, time= "all", geo = "US")$interest_over_time %>%
# select(date, keyword, hits)
# search3 <- gtrends(c("prices"), category = 0, time= "all", geo = "US")$interest_over_time %>%
# select(date, keyword, hits)
# search4 <- gtrends(c("interest rates"), category = 0, time= "all", geo = "US")$interest_over_time %>%
# select(date, keyword, hits)
# search5 <- gtrends(c("layoff"), category = 0, time= "all", geo = "US")$interest_over_time %>%
# select(date, keyword, hits)
# search6 <- gtrends(c("unemployment"), category = 0, time= "all", geo = "US")$interest_over_time %>%
# select(date, keyword, hits)
# search <- union(search1, union(search2, union(search3, union(search4, union(search5, search6)))))
# write.csv(search, "GTrends Exported - All.csv")
# search1_news <- gtrends(c("inflation"), category = 16, time= "all", geo = "US")$interest_over_time %>%
# select(date, keyword, hits)
# search2_news <- gtrends(c("recession"), category = 16, time= "all", geo = "US")$interest_over_time %>%
# select(date, keyword, hits)
# search3_news <- gtrends(c("prices"), category = 16, time= "all", geo = "US")$interest_over_time %>%
# select(date, keyword, hits)
# search4_news <- gtrends(c("interest rates"), category = 16, time= "all", geo = "US")$interest_over_time %>%
# select(date, keyword, hits)
# search5_news <- gtrends(c("layoff"), category = 16, time= "all", geo = "US")$interest_over_time %>%
# select(date, keyword, hits)
# search6_news <- gtrends(c("unemployment"), category = 16, time= "all", geo = "US")$interest_over_time %>%
# select(date, keyword, hits)
# search_news <- union(search1_news, union(search2_news, union(search3_news, union(search4_news, union(search5_news, search6_news)))))
# write.csv(search_news, "GTrends Exported - News.csv")
search <- read.csv("GTrends Exported - All.csv") %>%
mutate(date = as.Date(date)) %>%
filter(date <= end_date)
search_pivoted <- search %>%
pivot_wider(names_from = keyword, values_from = hits) %>%
select(date, inflation, recession, prices, layoff, `interest rates`, unemployment)
p <- ggplot() +
geom_line(data = search, aes(x = date, y = hits, col = keyword)) +
labs(x = "", y = "Interest over time", subtitle="Among All Categories in US, Normalized 0 to 100, Monthly", color = 'Keyword') +
ggtitle("Google Search Trend for Keywords from 2004 to 2024") +
labs(legend = 'Keyword', caption = "Source: Google | Visual by Michael Dunphy, @mtdunphy") +
scale_x_date(date_breaks = '3 year', date_labels = "%Y") +
theme_minimal() +
theme(legend.position = "none", plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
p + facet_wrap(~keyword, scales = "free_y", ncol = 2)
# combine with economic indicators
combined_wide <- final %>%
filter(date >= '2004-01-01') %>%
left_join(search_pivoted, by = c('date' = 'date'))
combined_long <- final %>%
filter(date >= '2004-01-01') %>%
left_join(search, by = c('date' = 'date'))
# correlation matrix
#cor_matrix <- cor(combined_wide %>% select(-date))
# build function to visualize model predictions for each keyword
visualize_model <- function(data, name) {
train <- data %>%
filter(date < '2017-01-01' & keyword == name) %>%
select(- keyword) %>%
test <- data %>%
filter(date >= '2017-01-01' & keyword == name) %>%
select(- keyword) %>%
fit <- train %>%
model(TSLM(consumer_sentiment ~ . - date - consumer_sentiment + season() + trend()))
forecast_consumption <- forecast(fit, new_data = test)
p <- forecast_consumption %>%
autoplot(test) +
scale_x_date(date_breaks = '1 year', date_labels = "%Y") +
ylim(35, 145) +
labs(y = "UMICH Consumer Sentiment",
subtitle = paste('Keyword: ', str_to_title(name), sep = '')) +
theme_bw() +
theme(legend.position = "none",
axis.title.x = element_blank(),
axis.title.y = element_text(size = 8),
plot.subtitle = element_text(size = 10, hjust=0))
p1 <- visualize_model(combined_long, "recession")
## Using `date` as index variable.
## Using `date` as index variable.
p2 <- visualize_model(combined_long, "prices")
## Using `date` as index variable.
## Using `date` as index variable.
p3 <- visualize_model(combined_long, "inflation")
## Using `date` as index variable.
## Using `date` as index variable.
p4 <- visualize_model(combined_long, "interest rates")
## Using `date` as index variable.
## Using `date` as index variable.
p5 <- visualize_model(combined_long, "layoff")
## Using `date` as index variable.
## Using `date` as index variable.
p6 <- visualize_model(combined_long, "unemployment")
## Using `date` as index variable.
## Using `date` as index variable.
# create grids of two
grid1 <- plot_grid(p1, p4, ncol = 2, nrow = 1)
grid2 <- plot_grid(p2, p3, ncol = 2, nrow = 1)
grid3 <- plot_grid(p5, p6, ncol = 2, nrow = 1)
# function to format and label grids
visualize_grids <- function(grid, category) {
visual <- ggdraw() +
draw_plot(grid, x = 0, y = .2, width = 1, height = .7) +
theme(plot.margin = margin(t = 20, r = 5, b = 30, l = 5)) +
draw_label("Expected vs Actual Consumer Sentiment from 2017 to 2024", x = .05, y = 1, size = 14, hjust = 0) +
draw_label(paste("Based on Economic Indicators + Google Search Trend among ", category, " Categories", sep = ''), x = .05, y = .94, size = 10, hjust = 0) +
draw_label(paste("Note: Google search trend is among ", category, " categories in the US. Economic indicators used in estimating consumer sentiment include
unemployement rate, YOY % change in unemployment rate, YOY % change in median real disposable income, YOY % change in
personal consumption, YOY % change in consumer price index, YOY % change in S&P 500 index, YOY % change in dollar index,
Avg 30-year mortgage rate, YOY % change in mortgage rate, YOY % change in median house price, and NBER recession indicator.
Training data is monthly from Jan 2004 to Jan 2017.
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance, Google | Visual by Michael Dunphy, @mtdunphy", sep = ''), x = .05, , y = 0.05, size = 8, hjust=0)
visualize_grids(grid1, 'All')
visualize_grids(grid2, 'All')
visualize_grids(grid3, 'All')
# keywords in the context of news
search <- read.csv("GTrends Exported - News.csv") %>%
mutate(date = as.Date(date)) %>%
filter(date <= end_date)
search_pivoted <- search %>%
pivot_wider(names_from = keyword, values_from = hits) %>%
select(date, `inflation`, recession, prices)
p <- ggplot() +
geom_line(data = search, aes(x = date, y = hits, col = keyword)) +
labs(x = "", y = "Interest over time", subtitle="Among News Categories in US, Normalized 0 to 100, Monthly", color = 'Keyword') +
ggtitle("Google Search Trend for Keywords from 2004 to 2024") +
labs(legend = 'Keyword', caption = "Source: Google | Visual by Michael Dunphy, @mtdunphy") +
scale_x_date(date_breaks = '3 year', date_labels = "%Y") +
theme_minimal() +
theme(legend.position = "none", plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
p + facet_wrap(~keyword, scales = "free_y", ncol = 2)
# combine with economic indicators
combined_wide <- final %>%
filter(date >= '2004-01-01') %>%
left_join(search_pivoted, by = c('date' = 'date'))
combined_long <- final %>%
filter(date >= '2004-01-01') %>%
left_join(search, by = c('date' = 'date'))
# correlation matrix
#cor_matrix <- cor(combined_wide %>% select(-date))
p1 <- visualize_model(combined_long, "recession")
## Using `date` as index variable.
## Using `date` as index variable.
p2 <- visualize_model(combined_long, "prices")
## Using `date` as index variable.
## Using `date` as index variable.
p3 <- visualize_model(combined_long, "inflation")
## Using `date` as index variable.
## Using `date` as index variable.
p4 <- visualize_model(combined_long, "interest rates")
## Using `date` as index variable.
## Using `date` as index variable.
p5 <- visualize_model(combined_long, "layoff")
## Using `date` as index variable.
## Using `date` as index variable.
p6 <- visualize_model(combined_long, "unemployment")
## Using `date` as index variable.
## Using `date` as index variable.
grid1 <- plot_grid(p1, p4, ncol = 2, nrow = 1)
grid2 <- plot_grid(p2, p3, ncol = 2, nrow = 1)
grid3 <- plot_grid(p5, p6, ncol = 2, nrow = 1)
visualize_grids(grid1, 'News')
visualize_grids(grid2, 'News')
visualize_grids(grid3, 'News')