Spaces:
Running
Running
import streamlit as st | |
import yfinance as yf | |
import numpy as np | |
from ripser import Rips | |
import persim | |
import plotly.graph_objs as go | |
import warnings | |
# Function to fetch stock data | |
def fetch_data(ticker_name, start_date, end_date): | |
raw_data = yf.download(ticker_name, start=start_date, end=end_date) | |
adjusted_close = raw_data['Adj Close'].dropna() | |
prices = adjusted_close.values | |
log_returns = np.log(prices[1:] / prices[:-1]) | |
return adjusted_close, log_returns | |
# Function to compute Wasserstein distances | |
def compute_wasserstein_distances(log_returns, window_size, rips): | |
n = len(log_returns) - (2 * window_size) + 1 | |
distances = np.full((n, 1), np.nan) | |
for i in range(n): | |
segment1 = log_returns[i:i+window_size].reshape(-1, 1) | |
segment2 = log_returns[i+window_size:i+(2*window_size)].reshape(-1, 1) | |
if segment1.shape[0] != window_size or segment2.shape[0] != window_size: | |
continue | |
dgm1 = rips.fit_transform(segment1) | |
dgm2 = rips.fit_transform(segment2) | |
distance = persim.wasserstein(dgm1[0], dgm2[0], matching=False) | |
distances[i] = distance | |
return distances | |
# Streamlit app | |
def main(): | |
st.set_page_config(layout="wide") | |
st.title('Stock Analysis Using Topological Data Analysis') | |
st.write(""" | |
This application analyzes stock data using Wasserstein distances to detect changes in price dynamics over time. | |
Wasserstein distances, derived from persistence diagrams in Topological Data Analysis (TDA), help identify significant shifts in stock price behaviors. | |
""") | |
st.sidebar.title('Parameters') | |
# Input fields for user | |
ticker_name = st.sidebar.text_input('Enter Ticker Symbol', '^GSPC') | |
start_date_string = st.sidebar.text_input('Start Date (YYYY-MM-DD)', '2020-01-01') | |
end_date_string = st.sidebar.text_input('End Date (YYYY-MM-DD)', '2025-01-01') | |
window_size = st.sidebar.slider('Window Size', min_value=5, max_value=50, value=20) | |
threshold = st.sidebar.slider('Alert Threshold', min_value=0.05, max_value=0.2, value=0.075, step=0.005) | |
st.sidebar.write(""" | |
**How to use:** | |
1. Enter the stock ticker symbol (e.g., `^GSPC` for S&P 500). | |
2. Specify the start and end dates for the analysis period. | |
3. Adjust the window size for the sliding window analysis. | |
4. Set the alert threshold for detecting significant changes. | |
5. Click 'Run Analysis' to start. | |
""") | |
if st.sidebar.button('Run Analysis'): | |
st.write(f"Analyzing {ticker_name} from {start_date_string} to {end_date_string} with window size {window_size} and threshold {threshold}") | |
# Fetch data | |
prices, log_returns = fetch_data(ticker_name, start_date_string, end_date_string) | |
rips = Rips(maxdim=2) | |
wasserstein_dists = compute_wasserstein_distances(log_returns, window_size, rips) | |
# Plotting with Plotly | |
dates = prices.index[window_size:-window_size] | |
valid_indices = ~np.isnan(wasserstein_dists.flatten()) | |
valid_dates = dates[valid_indices] | |
valid_distances = wasserstein_dists[valid_indices].flatten() | |
alert_indices = [i for i, d in enumerate(valid_distances) if d > threshold] | |
alert_dates = [valid_dates[i] for i in alert_indices] | |
alert_values = [prices.iloc[i + window_size] for i in alert_indices] | |
# Plot price and alerts | |
fig = go.Figure() | |
fig.add_trace(go.Scatter(x=valid_dates, y=prices.iloc[window_size:-window_size], mode='lines', name='Price')) | |
fig.add_trace(go.Scatter(x=alert_dates, y=alert_values, mode='markers', name='Alert', marker=dict(color='red', size=8))) | |
fig.update_layout(title=f'{ticker_name} Prices Over Time', xaxis_title='Date', yaxis_title='Price') | |
st.plotly_chart(fig, use_container_width=True) | |
# Plot Wasserstein distances | |
fig = go.Figure() | |
fig.add_trace(go.Scatter(x=valid_dates, y=valid_distances, mode='lines', name='Wasserstein Distance', line=dict(color='blue', width=2))) | |
fig.add_hline(y=threshold, line_dash='dash', line_color='red', annotation_text=f'Threshold: {threshold}', annotation_position='bottom right') | |
fig.update_layout(title='Wasserstein Distances Over Time', xaxis_title='Date', yaxis_title='Wasserstein Distance') | |
st.plotly_chart(fig, use_container_width=True) | |
# Interpretation of results | |
st.subheader("Interpretation of Results") | |
st.write(""" | |
**Wasserstein Distance Analysis:** | |
The Wasserstein distance measures the difference between two distributions. In this context, it quantifies changes in the log returns of stock prices over time. | |
A high Wasserstein distance indicates a significant change in the price dynamics, which might suggest a market event or shift in investor sentiment. | |
""") | |
st.latex(r''' | |
W(P, Q) = \inf_{\gamma \in \Pi(P, Q)} \mathbb{E}_{(x,y) \sim \gamma} [d(x, y)] | |
''') | |
st.write(""" | |
- Where \( W(P, Q) \) is the Wasserstein distance between distributions \( P \) and \( Q \). | |
- \( d(x, y) \) is the distance between points \( x \) and \( y \). | |
- \( \gamma \) is a joint distribution with marginals \( P \) and \( Q \). | |
**Alert Threshold:** | |
The alert threshold is set to identify significant changes in the Wasserstein distances. Alerts are triggered when the distance exceeds the threshold. | |
""") | |
st.write(f"Threshold: {threshold}") | |
st.write(""" | |
**Plot Interpretation:** | |
- The first plot shows the stock price over time with alerts marked in red. | |
- The second plot displays the Wasserstein distances over time, with the threshold indicated by a dashed red line. Peaks above this line represent significant changes in price dynamics. | |
""") | |
# Main function call | |
if __name__ == "__main__": | |
warnings.filterwarnings('ignore') | |
main() | |
hide_streamlit_style = """ | |
<style> | |
#MainMenu {visibility: hidden;} | |
footer {visibility: hidden;} | |
</style> | |
""" | |
st.markdown(hide_streamlit_style, unsafe_allow_html=True) | |