| import os |
| import gradio as gr |
| import pandas as pd |
| import matplotlib.pyplot as plt |
|
|
| base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
| orig_data_path = os.path.join(base_dir, 'data', 'orig_processed.parquet') |
| combined_data_path = os.path.join(base_dir, 'data', 'final_data.parquet') |
|
|
| orig_df = pd.read_parquet(orig_data_path) |
| combined_df = pd.read_parquet(combined_data_path) |
|
|
| for df in [orig_df, combined_df]: |
| if 'Date' in df.columns: |
| df['Date'] = pd.to_datetime(df['Date']) |
|
|
| orig_df = orig_df.sort_values(['Ticker', 'Date']).reset_index(drop=True) |
| combined_df = combined_df.sort_values(['Ticker', 'Date']).reset_index(drop=True) |
|
|
| FEATURE_COLS = [c for c in ['Open', 'High', 'Low', 'Close', 'Volume'] if c in orig_df.columns] |
|
|
| def plot_ticker_data(ticker, feature): |
| """ |
| Plot last 10 years of time series for selected feature for original and combined datasets. |
| """ |
| if feature not in FEATURE_COLS: |
| return f"Feature '{feature}' not found in dataset." |
|
|
| orig_data = orig_df[orig_df['Ticker'] == ticker].sort_values('Date').reset_index(drop=True) |
| synth_data = combined_df[combined_df['Ticker'] == ticker].sort_values('Date').reset_index(drop=True) |
|
|
| if orig_data.empty and synth_data.empty: |
| return f"No data found for ticker: {ticker}" |
| if orig_data.empty: |
| return f"No original data found for {ticker}" |
| if synth_data.empty: |
| return f"No combined/synthetic data found for {ticker}" |
|
|
| latest_date = min(orig_data['Date'].max(), synth_data['Date'].max()) |
| cutoff_date = latest_date - pd.DateOffset(years=5) |
|
|
| orig_data = orig_data[orig_data['Date'] >= cutoff_date] |
| synth_data = synth_data[synth_data['Date'] >= cutoff_date] |
|
|
| orig_series = orig_data[['Date', feature]].dropna() |
| synth_series = synth_data[['Date', feature]].dropna() |
|
|
| fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=False) |
| fig.suptitle(f"{ticker} — {feature} (Last 10 Years)", fontsize=14) |
|
|
| axes[0].plot(orig_series['Date'], orig_series[feature], linewidth=1.0, alpha=0.9) |
| axes[0].set_title("Original Data") |
| axes[0].set_ylabel(feature) |
| axes[0].grid(True) |
|
|
| axes[1].plot(synth_series['Date'], synth_series[feature], linewidth=1.0, alpha=0.9) |
| axes[1].set_title("Synthetic Data") |
| axes[1].set_ylabel(feature) |
| axes[1].grid(True) |
|
|
| try: |
| min_date = min(orig_series['Date'].min(), synth_series['Date'].min()) |
| max_date = max(orig_series['Date'].max(), synth_series['Date'].max()) |
| axes[0].set_xlim(min_date, max_date) |
| axes[1].set_xlim(min_date, max_date) |
| except Exception: |
| pass |
|
|
| plt.tight_layout(rect=[0, 0, 1, 0.96]) |
| return fig |
|
|
| unique_tickers = sorted(orig_df['Ticker'].unique()) |
|
|
| demo = gr.Interface( |
| fn=plot_ticker_data, |
| inputs=[ |
| gr.Dropdown(unique_tickers, label="Select Stock Ticker"), |
| gr.Dropdown(FEATURE_COLS, label="Select Feature (Open/High/Low/Close/Volume)") |
| ], |
| outputs=gr.Plot(label="Time Series Comparison"), |
| title="Real vs Synthetic Time Series Viewer", |
| description="Pick a ticker and feature to view the last 5 years of data from original and synthetic datasets." |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|