|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
from PIL import Image |
|
import seaborn as sns |
|
import codecs |
|
import streamlit.components.v1 as components |
|
import dagshub |
|
import matplotlib.pyplot as plt |
|
|
|
from sklearn.model_selection import train_test_split |
|
from sklearn.linear_model import LinearRegression |
|
from sklearn.ensemble import RandomForestRegressor |
|
from sklearn import metrics |
|
|
|
from shapash.explainer.smart_explainer import SmartExplainer |
|
from prophet import Prophet |
|
import mlflow |
|
import mlflow.sklearn |
|
from mlflow import log_metric, log_param, log_artifact |
|
from sklearn.model_selection import train_test_split, GridSearchCV |
|
import joblib |
|
from datetime import datetime |
|
from prophet.plot import plot_components_plotly |
|
|
|
|
|
|
|
st.markdown( |
|
""" |
|
<style> |
|
.css-18e3th9 { |
|
background-color: #f0f8ff; /* Light white/blue background */ |
|
} |
|
.css-1d391kg { |
|
background-color: #f0f8ff; /* For widgets */ |
|
} |
|
.stButton button { |
|
background-color: #ff4b4b; /* Red buttons */ |
|
color: white; |
|
} |
|
.css-1offfwp h1 { |
|
color: #1e90ff; /* Blue titles */ |
|
} |
|
.stMarkdown p, .stMarkdown h2, .stMarkdown h3 { |
|
color: #2e8b57; /* U.S.-themed green text for subheaders */ |
|
} |
|
</style> |
|
""", |
|
unsafe_allow_html=True |
|
) |
|
|
|
|
|
st.title("U.S. Fed's Next Move Prediction") |
|
|
|
image_path = Image.open("fed.png") |
|
st.image(image_path, width=400) |
|
|
|
|
|
app_page = st.sidebar.selectbox( |
|
"Select Page", |
|
['Business Case', 'Data Exploration & Visualization', 'Prediction & Feature Importance', 'Hyperparameter Tuning', 'Conclusions and Data Insights'] |
|
) |
|
|
|
df = pd.read_csv('econdataset.csv') |
|
|
|
if app_page == 'Business Case': |
|
st.title("1. Business Case") |
|
st.subheader("Objective:") |
|
st.write("The purpose of this dashboard is to analyze the Federal Reserve’s interest rate decisions from 1955 to 2024 and explore potential relationships between various economic indicators and the Federal Funds Rate. The dashboard applies time series analysis to forecast future Federal Funds Rate changes and utilizes linear regression to identify and quantify relationships between key economic variables and monetary policy decisions. These tools provide a comprehensive view of historical trends and predictive insights, enabling users to better understand the dynamics of Federal Reserve interest rate adjustments.") |
|
st.subheader("Key Questions:") |
|
st.write("1. How have key economic indicators (such as unemployment rate, inflation rate, CPI, and GDP) evolved over the past decades, and how do these trends correlate with the Federal Reserve’s interest rate decisions?") |
|
st.write("2. What patterns emerge in the Federal Funds Rate over time, and how do other variables such as stock market indices (S&P 500, DOW Jones), bond yields, and GDP relate to these changes?") |
|
st.write("3. Can we establish reliable relationships between economic indicators (e.g., unemployment rate, inflation, CPI, S&P 500, Bond Yield, Real GDP) to predict future Federal Reserve interest rate adjustments?") |
|
st.subheader("Use of Analytical Models") |
|
st.write("This dashboard showcases two key analytical approaches to provide insights:") |
|
st.subheader("1.Time Series Analysis:") |
|
st.write("Time series models are employed to forecast future Federal Funds Rate changes by analyzing historical trends in the data. These forecasts help identify patterns that may indicate upcoming rate hikes or cuts.") |
|
st.subheader("2.Linear Regression:") |
|
st.write("Linear regression models are used to uncover relationships between variables such as unemployment rate, inflation, bond yields, and the Federal Funds Rate. This analysis highlights the economic factors most closely associated with interest rate changes, providing a framework for understanding how these decisions are influenced by broader economic conditions.") |
|
st.subheader("Relevance and Value") |
|
st.write("By integrating predictive analytics, the dashboard provides valuable insights for businesses and decision-makers:") |
|
st.write("For Businesses:") |
|
st.write("Understanding and predicting interest rates is crucial for managing borrowing costs, planning capital investments, and making strategic financial decisions.") |
|
st.write("For Financial Institutions:") |
|
st.write("Insights into how economic conditions drive rate changes can improve decision-making around interest-sensitive products, such as loans and mortgages.") |
|
st.write("For Investors:") |
|
st.write("Predicting rate changes provides an advantage in portfolio management, particularly in optimizing the allocation of equities, bonds, and other interest rate-sensitive assets. Ultimately, understanding the dynamics of Federal Reserve decisions can inform strategies across multiple sectors.") |
|
|
|
if app_page == 'Data Exploration & Visualization': |
|
st.title("2. Data Exploration & Visualization") |
|
|
|
st.write("Sample dataset loaded:") |
|
st.dataframe(df.head(5)) |
|
|
|
|
|
st.write("Cleaning the data:") |
|
|
|
for column in ['S&P 500 Price', 'Nominal GDP Index (in billion USD)', 'Real GDP Index', 'DOW Jones Price']: |
|
df[column] = df[column].replace('[\$,]', '', regex=True).astype(float) |
|
|
|
|
|
|
|
for year in df[df['Year'] <= 1991]['Year'].unique(): |
|
|
|
jan_gdp_nominal = df.loc[(df['Year'] == year) & (df['Month'] == 'January'), 'Nominal GDP Index (in billion USD)'].values[0] |
|
jan_gdp_real = df.loc[(df['Year'] == year) & (df['Month'] == 'January'), 'Real GDP Index'].values[0] |
|
|
|
|
|
df.loc[(df['Year'] == year) & (df['Month'] != 'January'), 'Nominal GDP Index (in billion USD)'] = jan_gdp_nominal |
|
df.loc[(df['Year'] == year) & (df['Month'] != 'January'), 'Real GDP Index'] = jan_gdp_real |
|
|
|
df.dropna(inplace=True) |
|
|
|
cleaning_summary = { |
|
"dtypes_after_conversion": df.dtypes, |
|
"missing_values_after_conversion": df.isnull().sum() |
|
} |
|
|
|
st.write(cleaning_summary) |
|
|
|
st.subheader("01 Description of the dataset") |
|
st.dataframe(df.describe()) |
|
st.write("This dataset contains key statistics from various observations.") |
|
st.subheader("02 Missing values") |
|
dfnull = df.isnull().sum() / len(df) * 100 |
|
st.write(dfnull) |
|
if dfnull.sum() == 0: |
|
st.success("No missing values found!") |
|
|
|
st.write("Generate an automated report:") |
|
if st.button("Generate Report"): |
|
st.balloons() |
|
def read_html_report(file_path): |
|
with codecs.open(file_path, 'r', encoding="utf-8") as f: |
|
return f.read() |
|
html_report = read_html_report("report.html") |
|
st.components.v1.html(html_report, height=1000, scrolling=True) |
|
|
|
|
|
st.write("Visualize key relationships and importance of variables.") |
|
|
|
|
|
list_columns = df.columns |
|
values = st.multiselect("Select two variables to compare:", list_columns, ["Fed Effective Funds Rate", "Bond Yield (US 10Y TN)"]) |
|
|
|
st.line_chart(df, x=values[0], y=values[1]) |
|
|
|
st.bar_chart(df, x=values[0], y=values[1]) |
|
|
|
if app_page == 'Prediction & Feature Importance': |
|
st.title("3. Prediction") |
|
for column in ['S&P 500 Price', 'Nominal GDP Index (in billion USD)', 'Real GDP Index', 'DOW Jones Price']: |
|
df[column] = df[column].replace('[\$,]', '', regex=True).astype(float) |
|
df2 = df.dropna() |
|
for year in df[df['Year'] <= 1991]['Year'].unique(): |
|
jan_gdp_nominal = df.loc[(df['Year'] == year) & (df['Month'] == 'January'), 'Nominal GDP Index (in billion USD)'].values[0] |
|
jan_gdp_real = df.loc[(df['Year'] == year) & (df['Month'] == 'January'), 'Real GDP Index'].values[0] |
|
df.loc[(df['Year'] == year) & (df['Month'] != 'January'), 'Nominal GDP Index (in billion USD)'] = jan_gdp_nominal |
|
df.loc[(df['Year'] == year) & (df['Month'] != 'January'), 'Real GDP Index'] = jan_gdp_real |
|
df.dropna(inplace=True) |
|
|
|
|
|
df['Month'] = pd.Categorical(df['Month'], categories=[ |
|
'January', 'February', 'March', 'April', 'May', 'June', |
|
'July', 'August', 'September', 'October', 'November', 'December' |
|
], ordered=True) |
|
|
|
|
|
df['Month'] = df['Month'].cat.codes + 1 |
|
|
|
st.write("Now, we have all our numerical features ready to be used in our model") |
|
|
|
st.dataframe(df.head()) |
|
|
|
list_columns = df.columns.drop("Fed Effective Funds Rate") |
|
|
|
input_lr = st.multiselect("Select variables:",list_columns,["Bond Yield (US 10Y TN)", "Unemployment Rate"]) |
|
|
|
df_lr = df[input_lr] |
|
|
|
|
|
X= df_lr |
|
|
|
y= df["Fed Effective Funds Rate"] |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
lr_model = LinearRegression() |
|
lr_model.fit(X_train, y_train) |
|
|
|
|
|
rf_model = RandomForestRegressor(random_state=42) |
|
rf_model.fit(X_train, y_train) |
|
|
|
|
|
lr_pred = lr_model.predict(X_test) |
|
rf_pred = rf_model.predict(X_test) |
|
|
|
predictions_df_lr = pd.Series(lr_pred, index=y_test.index, name="LR Predictions") |
|
predictions_df_rf = pd.Series(rf_pred, index=y_test.index, name="RF Predictions") |
|
result = pd.concat([predictions_df_lr, y_test,predictions_df_rf], axis=1) |
|
|
|
st.dataframe(result) |
|
|
|
|
|
|
|
st.subheader("Model Performance on Test Data") |
|
st.write(f"Linear Regression RMSE: **{metrics.root_mean_squared_error(y_test, lr_pred):.2f}**") |
|
st.write(f"Random Forest RMSE: **{metrics.root_mean_squared_error(y_test, rf_pred):.2f}**") |
|
|
|
st.bar_chart(result) |
|
|
|
|
|
st.subheader("Time Series Modeling using FB Prophet") |
|
st.write("Dropping all values before January 1992") |
|
df2['Day'] = 15 |
|
df2['ds'] = pd.to_datetime(df2[['Year', 'Month', 'Day']].astype(str).agg('-'.join, axis=1)) |
|
df2 = df2[(df2['ds'] >= "1992-01-15 00:00:00")] |
|
|
|
df_fp = pd.concat([df2['ds'], df2['Fed Effective Funds Rate']], axis=1) |
|
df_fp.rename(columns={'ds': 'ds', 'Fed Effective Funds Rate': 'y'}, inplace=True) |
|
st.dataframe(df_fp) |
|
|
|
st.write("Future predictions after September 2024") |
|
fp_model = Prophet(weekly_seasonality=True) |
|
fp_model.fit(df_fp) |
|
future = fp_model.make_future_dataframe(periods=48, freq='M') |
|
|
|
fp_pred = fp_model.predict(future) |
|
|
|
fp_pred = fp_pred[["ds","trend"]] |
|
|
|
fp_pred = fp_pred[fp_pred['ds'] > "2024-09-15 00:00:00"] |
|
st.dataframe(fp_pred) |
|
|
|
st.line_chart(fp_pred, x='ds', y='trend') |
|
|
|
st.write("Time Series Predictions between 2020 and 2024") |
|
df2 = df2[(df2['ds'] >= "1992-01-15 00:00:00") & (df2['ds'] <= "2020-01-15 00:00:00")] |
|
|
|
df_fp = pd.concat([df2['ds'], df2['Fed Effective Funds Rate']], axis=1) |
|
df_fp.rename(columns={'ds': 'ds', 'Fed Effective Funds Rate': 'y'}, inplace=True) |
|
st.dataframe(df_fp) |
|
|
|
fp_model = Prophet() |
|
fp_model.fit(df_fp) |
|
future = fp_model.make_future_dataframe(periods=57, freq='M') |
|
|
|
fp_pred = fp_model.predict(future) |
|
chart_fp_pred = fp_pred |
|
fp_pred = fp_pred[["ds","trend"]] |
|
fp_pred = fp_pred[fp_pred['ds'] > "2020-01-15 00:00:00"] |
|
st.dataframe(fp_pred) |
|
|
|
|
|
fig = fp_model.plot_components(chart_fp_pred) |
|
|
|
st.pyplot(fig) |
|
|
|
st.title("4. Feature Importance") |
|
|
|
|
|
xpl = SmartExplainer(rf_model) |
|
y_pred = pd.Series(rf_pred) |
|
|
|
|
|
X_test = X_test.reset_index(drop=True) |
|
|
|
|
|
xpl.compile(x=X_test, y_pred=y_pred) |
|
|
|
|
|
st.subheader("Overall Feature Importance") |
|
fig = xpl.plot.features_importance() |
|
st.plotly_chart(fig) |
|
|
|
|
|
st.subheader("Feature Importance for a Subset") |
|
subset = X_test.sample(n=50, random_state=42).index |
|
fig_subset = xpl.plot.features_importance(selection=subset) |
|
st.plotly_chart(fig_subset) |
|
|
|
|
|
st.subheader("Contribution Plot: Bond Yield (US 10Y TN)") |
|
fig_contribution = xpl.plot.contribution_plot('Bond Yield (US 10Y TN)') |
|
st.plotly_chart(fig_contribution) |
|
|
|
|
|
if app_page == 'Hyperparameter Tuning': |
|
st.title("5. Hyperparameter Tuning") |
|
dagshub.init(repo_owner='shreykharbanda31', repo_name='fed-interest-rate-prediction', mlflow=True) |
|
|
|
def process_data(X, y, test_size=0.3, random_state=42): |
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state) |
|
return X_train, X_test, y_train, y_test |
|
|
|
def train_and_optimize_model(X_train, y_train, model, param_grid, cv=5): |
|
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=cv) |
|
grid_search.fit(X_train, y_train) |
|
return grid_search |
|
|
|
def evaluate_model(model, X_test, y_test): |
|
y_pred = model.predict(X_test) |
|
rmse = metrics.root_mean_squared_error(y_test, y_pred) |
|
|
|
mlflow.log_metric("RMSE", rmse) |
|
|
|
|
|
with mlflow.start_run(): |
|
for column in ['S&P 500 Price', 'Nominal GDP Index (in billion USD)', 'Real GDP Index', 'DOW Jones Price']: |
|
df[column] = df[column].replace('[\$,]', '', regex=True).astype(float) |
|
|
|
for year in df[df['Year'] <= 1991]['Year'].unique(): |
|
jan_gdp_nominal = df.loc[(df['Year'] == year) & (df['Month'] == 'January'), 'Nominal GDP Index (in billion USD)'].values[0] |
|
jan_gdp_real = df.loc[(df['Year'] == year) & (df['Month'] == 'January'), 'Real GDP Index'].values[0] |
|
df.loc[(df['Year'] == year) & (df['Month'] != 'January'), 'Nominal GDP Index (in billion USD)'] = jan_gdp_nominal |
|
df.loc[(df['Year'] == year) & (df['Month'] != 'January'), 'Real GDP Index'] = jan_gdp_real |
|
df.dropna(inplace=True) |
|
|
|
|
|
df['Month'] = pd.Categorical(df['Month'], categories=[ |
|
'January', 'February', 'March', 'April', 'May', 'June', |
|
'July', 'August', 'September', 'October', 'November', 'December' |
|
], ordered=True) |
|
|
|
|
|
df['Month'] = df['Month'].cat.codes + 1 |
|
|
|
list_columns = df.columns.drop("Fed Effective Funds Rate") |
|
|
|
X, y = df[list_columns], df["Fed Effective Funds Rate"] |
|
X_train, X_test, y_train, y_test = process_data(X, y) |
|
|
|
rfg_param_grid = { |
|
'n_estimators': [50, 100, 200], |
|
'max_depth': [10, 20, None], |
|
'min_samples_split': [2, 5, 10] |
|
} |
|
|
|
rf_model = RandomForestRegressor(random_state=42) |
|
rf_grid_search = train_and_optimize_model(X_train, y_train, rf_model, rfg_param_grid) |
|
best_rf = rf_grid_search.best_estimator_ |
|
|
|
mlflow.log_params(rf_grid_search.best_params_) |
|
mlflow.sklearn.log_model(best_rf, datetime.now().strftime("%Y-%m-%d %H:%M:%S")+"best_rf") |
|
mlflow.sklearn.save_model(best_rf, datetime.now().strftime("%Y-%m-%d %H:%M:%S")+"best_rf_model") |
|
evaluate_model(best_rf, X_test, y_test) |
|
|
|
|
|
if app_page == 'Conclusions and Data Insights': |
|
st.title("6. Conclusions and Data Insights") |
|
st.subheader("Linear Regression & Random Forest Regressor") |
|
st.write("While these two go about creating the models in different ways their use cases are similarly two fold. The first use case for these models would be as a sort of extension of a correlation matrix. And the second use case would be as a hypothetical predictive model.") |
|
st.write("The models and the correlation matrix are similar in that they are both very effective at giving the correlation between variables, so you could decipher from either which economic factor creates the largest impact on the interest rate. The value in this instance specifically for the models is that they can give correlation for a set of variables, while a correlation matrix can only compare one value to another at a given time.") |
|
st.write("For the second use case this would come into play if you wanted to construct a hypothetical scenario, and see what the resulting change in the interest rate would be. For example if you wanted to predict the interest rate after the economic changes of covid had reverted back to the “base” form that we expect from the US economy, you could input those values into the model and spit out a prediction for what the interest rate would be.") |
|
st.subheader("Prophet Time Series Model") |
|
st.write("The time series model is primarily useful compared to the other models for its ability to predict the future. But this comes at the cost of accuracy.") |
|
st.write("Based on the limited number of variables used it is particularly blind to socio-political factors that influence economic outcomes. For example the model’s prediction is entirely decimated covid, and resulting policy changes. The model is going to be behind humans in the ability to predict the depressionary effect that a pandemic like covid would have on an economy before government intervention, and because it cannot look at other economic factors like inflation rate is unable to see the precise point at which interest rate would shift.") |
|
st.write("A time series is meant to look at slightly longer term trends, because typically small changes aren’t too telling, but the interest rate is a number set intentionally by the Fed, meaning that small changes are quite telling.") |
|
|
|
|