import streamlit as st import yfinance as yf import numpy as np from ripser import Rips import persim import plotly.graph_objs as go import warnings import pandas as pd # Function to fetch stock or crypto data def fetch_data(ticker_name, start_date, end_date): raw_data = yf.download(ticker_name, start=start_date, end=end_date) adjusted_close = raw_data['Adj Close'].dropna() prices = adjusted_close.values log_returns = np.log(prices[1:] / prices[:-1]) return adjusted_close, log_returns # Function to compute Wasserstein distances def compute_wasserstein_distances(log_returns, window_size, rips): n = len(log_returns) - (2 * window_size) + 1 distances = np.full((n, 1), np.nan) for i in range(n): segment1 = log_returns[i:i+window_size].reshape(-1, 1) segment2 = log_returns[i+window_size:i+(2*window_size)].reshape(-1, 1) if segment1.shape[0] != window_size or segment2.shape[0] != window_size: continue dgm1 = rips.fit_transform(segment1) dgm2 = rips.fit_transform(segment2) distance = persim.wasserstein(dgm1[0], dgm2[0], matching=False) distances[i] = distance return distances # Streamlit app configuration st.set_page_config(layout="wide") st.title("Market Crash Analysis with Topology") st.write(""" This application analyzes asset price data using Wasserstein distances to detect changes in price dynamics over time. Wasserstein distances, derived from persistence diagrams in Topological Data Analysis (TDA), help identify significant shifts in asset price behaviors for both stocks and cryptocurrencies. """) with st.expander("Wasserstein Distance Methodology", expanded=False): # Explanation of the Wasserstein Distance method st.subheader("Wasserstein Distance Methodology") st.write(""" The Wasserstein distance is a measure from optimal transport theory, used here to compare distributions of log returns in different time windows. A high Wasserstein distance indicates a significant change in the price dynamics, which might suggest a market event or shift in investor sentiment. """) st.latex(r''' W(P, Q) = \inf_{\gamma \in \Pi(P, Q)} \mathbb{E}_{(x,y) \sim \gamma} [d(x, y)] ''') st.write(""" - Where \( W(P, Q) \) is the Wasserstein distance between distributions \( P \) and \( Q \). - \( d(x, y) \) is the distance between points \( x \) and \( y \). - \( \gamma \) is a joint distribution with marginals \( P \) and \( Q \). """) # Interpretation of results st.subheader("Interpretation of Results") st.write(""" **Wasserstein Distance Analysis:** The Wasserstein distance quantifies changes in the log returns of asset prices over time. A high distance indicates a significant shift in price dynamics, potentially due to a market event or a change in investor behavior. """) st.sidebar.title(""" Input Parameters """) #st.write(f"Threshold: {threshold}") # Sidebar for "How to Use" instructions inside an expander, closed by default with st.sidebar.expander("How to Use", expanded=False): st.write(""" **How to use this app:** 1. Enter the stock or crypto ticker symbol (e.g., `^GSPC` for S&P 500 or `BTC-USD` for Bitcoin). 2. Specify the start and end dates for the analysis period. 3. Adjust the window size for the sliding window analysis. 4. Set the alert threshold for detecting significant changes in price dynamics. 5. Click 'Run Analysis' to start. """) # Input parameters inside an expander, open by default with st.sidebar.expander("Input Parameters", expanded=True): ticker_name = st.text_input('Enter Stock or Crypto Symbol (e.g., AAPL or BTC-USD)', '^GSPC', help="Enter the ticker symbol for the stock or cryptocurrency you want to analyze.") start_date_string = st.date_input('Start Date', pd.to_datetime('2020-01-01'), help="Select the start date for the data range.") end_date_string = st.date_input('End Date', pd.to_datetime(pd.Timestamp.now().date() + pd.Timedelta(days=1)), help="Select the end date for the data range.") # Parameters for the selected method inside an expander, open by default with st.sidebar.expander("Parameters", expanded=True): window_size = st.slider('Window Size', min_value=5, max_value=50, value=20, help="Set the window size for the sliding window analysis.") threshold = st.slider('Alert Threshold', min_value=0.02, max_value=0.2, value=0.075, step=0.005, help="Set the threshold for detecting significant changes in price dynamics.") # Run Analysis button in the sidebar if st.sidebar.button('Run Analysis'): st.write(f"Analyzing {ticker_name} from {start_date_string} to {end_date_string} with window size {window_size} and threshold {threshold}") # Fetch data prices, log_returns = fetch_data(ticker_name, start_date_string, end_date_string) rips = Rips(maxdim=2) wasserstein_dists = compute_wasserstein_distances(log_returns, window_size, rips) # Plotting with Plotly dates = prices.index[window_size:-window_size] valid_indices = ~np.isnan(wasserstein_dists.flatten()) valid_dates = dates[valid_indices] valid_distances = wasserstein_dists[valid_indices].flatten() alert_indices = [i for i, d in enumerate(valid_distances) if d > threshold] alert_dates = [valid_dates[i] for i in alert_indices] alert_values = [prices.iloc[i + window_size] for i in alert_indices] # Plot price and alerts fig = go.Figure() fig.add_trace(go.Scatter(x=valid_dates, y=prices.iloc[window_size:-window_size], mode='lines', name='Price')) fig.add_trace(go.Scatter(x=alert_dates, y=alert_values, mode='markers', name='Alert', marker=dict(color='red', size=8))) fig.update_layout(title=f'{ticker_name} Prices Over Time', xaxis_title='Date', yaxis_title='Price') st.plotly_chart(fig, use_container_width=True) # Plot Wasserstein distances fig = go.Figure() fig.add_trace(go.Scatter(x=valid_dates, y=valid_distances, mode='lines', name='Wasserstein Distance', line=dict(color='blue', width=2))) fig.add_hline(y=threshold, line_dash='dash', line_color='red', annotation_text=f'Threshold: {threshold}', annotation_position='bottom right') fig.update_layout(title='Wasserstein Distances Over Time', xaxis_title='Date', yaxis_title='Wasserstein Distance') st.plotly_chart(fig, use_container_width=True) st.write(""" **Plot Interpretation:** - The first plot shows the asset price over time with alerts marked in red. - The second plot displays the Wasserstein distances over time, with the threshold indicated by a dashed red line. Peaks above this line represent significant changes in price dynamics. """) st.markdown( """ """, unsafe_allow_html=True ) # Hide the default Streamlit menu and footer hide_streamlit_style = """ """ st.markdown(hide_streamlit_style, unsafe_allow_html=True)