import os import logging import pandas as pd import numpy as np from transformers import pipeline from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.model_selection import TimeSeriesSplit from sklearn.metrics import accuracy_score, f1_score from sklearn.base import clone import joblib import torch from tqdm import tqdm import warnings import gradio as gr import matplotlib.pyplot as plt import shap from datetime import datetime, timedelta import sys from wordcloud import WordCloud from typing import Optional, Tuple, Dict, List # Suppress warnings warnings.filterwarnings("ignore", category=UserWarning) # -------------------------- # Configuration & Setup # -------------------------- class Config: DATA_PATH = "data/ibm_cleaned.parquet" MODEL_PATH = "models/stock_prediction_model.joblib" CACHE_DIR = "./cache" LOGS_DIR = "./logs" PLOTS_DIR = "./plots" SENTIMENT_MODEL = "distilbert-base-uncased-finetuned-sst-2-english" SENTIMENT_BATCH_SIZE = 8 SENTIMENT_MAX_LEN = 256 CONFIDENCE_THRESHOLD = 0.3 DATA_REFRESH_DAYS = 7 MAX_HISTORY_ENTRIES = 50 @classmethod def setup(cls): os.makedirs(cls.CACHE_DIR, exist_ok=True) os.makedirs(cls.LOGS_DIR, exist_ok=True) os.makedirs(cls.PLOTS_DIR, exist_ok=True) os.makedirs(os.path.dirname(cls.MODEL_PATH), exist_ok=True) Config.setup() # Logger config logging.basicConfig( filename=os.path.join(Config.LOGS_DIR, 'app.log'), level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', ) logger = logging.getLogger(__name__) # -------------------------- # Data loading and optimization # -------------------------- def load_data(test_mode: bool = False, force_refresh: bool = False) -> pd.DataFrame: """Load and preprocess the stock data with caching and validation.""" try: # Check if data needs refresh if not force_refresh and os.path.exists(Config.DATA_PATH): file_age = datetime.now() - datetime.fromtimestamp(os.path.getmtime(Config.DATA_PATH)) if file_age.days < Config.DATA_REFRESH_DAYS: logger.info("Using cached data") df = pd.read_parquet(Config.DATA_PATH) # Validate required columns required_cols = {'Open', 'High', 'Low', 'Close', 'Volume', 'Date', 'News'} missing_cols = required_cols - set(df.columns) if missing_cols: raise ValueError(f"Missing columns in data: {missing_cols}") # Optimize dtypes num_cols = ['Open', 'High', 'Low', 'Close'] df[num_cols] = df[num_cols].astype('float32') if (df['Volume'] < 0).any(): logger.warning("Negative values found in 'Volume'; downcasting as signed int.") df['Volume'] = pd.to_numeric(df['Volume'], downcast='integer') else: df['Volume'] = pd.to_numeric(df['Volume'], downcast='unsigned') if not pd.api.types.is_datetime64_any_dtype(df['Date']): df['Date'] = pd.to_datetime(df['Date'], errors='coerce') if df['Date'].isna().any(): raise ValueError("Date column contains invalid dates after parsing.") # Data quality checks if df.isnull().sum().sum() > 0: logger.warning("Data contains null values - filling with forward fill") df = df.ffill() if test_mode: sample_size = min(5000, len(df)) if len(df) < 5000: logger.warning(f"Data has only {len(df)} rows - using full dataset in test mode") return df.sample(sample_size, random_state=42).sort_values('Date').reset_index(drop=True) return df.sort_values('Date').reset_index(drop=True) except Exception as e: logger.error(f"Data loading failed: {e}") raise # -------------------------- # Sentiment Analysis # -------------------------- class SentimentAnalyzer: """Handles sentiment analysis with caching and batching.""" def __init__(self): self._initialize_pipeline() self.sentiment_cache = {} def _initialize_pipeline(self): """Initialize the sentiment analysis pipeline.""" try: self.pipeline = pipeline( "text-classification", model=Config.SENTIMENT_MODEL, device=0 if torch.cuda.is_available() else -1, batch_size=Config.SENTIMENT_BATCH_SIZE, truncation=True, max_length=Config.SENTIMENT_MAX_LEN, ) except Exception as e: logger.error(f"Sentiment model load failed: {e}") self.pipeline = None raise def analyze_texts(self, texts: List[str]) -> np.ndarray: """Analyze sentiment for a batch of texts with caching.""" if self.pipeline is None or not texts: return np.zeros(len(texts), dtype='float32') # Check cache first uncached_texts = [] uncached_indices = [] cached_scores = np.zeros(len(texts), dtype='float32') for i, text in enumerate(texts): text = str(text)[:Config.SENTIMENT_MAX_LEN] if text else "" if text in self.sentiment_cache: cached_scores[i] = self.sentiment_cache[text] else: uncached_texts.append(text) uncached_indices.append(i) # Process uncached texts if uncached_texts: try: results = self.pipeline( uncached_texts, truncation=True, max_length=Config.SENTIMENT_MAX_LEN, batch_size=Config.SENTIMENT_BATCH_SIZE ) for idx, res in zip(uncached_indices, results): label = res[0]['label'] score = res[0]['score'] sentiment_score = score if label == "POSITIVE" else -score cached_scores[idx] = sentiment_score self.sentiment_cache[uncached_texts[idx]] = sentiment_score except Exception as e: logger.error(f"Sentiment analysis error: {e}") return cached_scores # -------------------------- # Feature Engineering # -------------------------- def calculate_rsi(series: pd.Series, window: int = 14) -> pd.Series: """Calculate Relative Strength Index.""" try: series = pd.to_numeric(series, errors='coerce').ffill() delta = series.diff() gain = delta.clip(lower=0) loss = -delta.clip(upper=0) avg_gain = gain.rolling(window, min_periods=1).mean() avg_loss = loss.rolling(window, min_periods=1).mean().replace(0, 1e-10) rs = avg_gain / avg_loss return (100 - (100 / (1 + rs))).astype('float32') except Exception as e: logger.error(f"RSI calculation failed: {e}") return pd.Series(np.nan, index=series.index) def calculate_macd( series: pd.Series, fast: int = 12, slow: int = 26, signal: int = 9 ) -> Tuple[pd.Series, pd.Series, pd.Series]: """Calculate MACD line, signal line, and histogram.""" try: series = pd.to_numeric(series, errors='coerce').ffill() ema_fast = series.ewm(span=fast, adjust=False).mean() ema_slow = series.ewm(span=slow, adjust=False).mean() macd_line = ema_fast - ema_slow signal_line = macd_line.ewm(span=signal, adjust=False).mean() return ( macd_line.astype('float32'), signal_line.astype('float32'), (macd_line - signal_line).astype('float32') ) except Exception as e: logger.error(f"MACD calculation failed: {e}") return ( pd.Series(np.nan, index=series.index), pd.Series(np.nan, index=series.index), pd.Series(np.nan, index=series.index) ) def calculate_bollinger_bands( series: pd.Series, window: int = 20, no_of_std: int = 2 ) -> Tuple[pd.Series, pd.Series]: """Calculate Bollinger Bands.""" try: series = pd.to_numeric(series, errors='coerce').ffill() rolling_mean = series.rolling(window, min_periods=1).mean() rolling_std = series.rolling(window, min_periods=1).std() return ( (rolling_mean + (no_of_std * rolling_std)).astype('float32'), (rolling_mean - (no_of_std * rolling_std)).astype('float32') ) except Exception as e: logger.error(f"Bollinger Bands calculation failed: {e}") return ( pd.Series(np.nan, index=series.index), pd.Series(np.nan, index=series.index) ) def calculate_volatility(series: pd.Series, window: int = 10) -> pd.Series: """Calculate price volatility.""" try: series = pd.to_numeric(series, errors='coerce').ffill() return series.pct_change().rolling(window, min_periods=1).std().astype('float32') except Exception as e: logger.error(f"Volatility calculation failed: {e}") return pd.Series(np.nan, index=series.index) def create_features(df: pd.DataFrame, analyzer: SentimentAnalyzer) -> pd.DataFrame: """Create all features for the prediction model.""" try: df = df.copy() # Validate required columns required_cols = {'Close', 'News'} missing_cols = required_cols - set(df.columns) if missing_cols: raise ValueError(f"Missing columns for feature creation: {missing_cols}") # Price features df['Price_Change'] = df['Close'].pct_change().astype('float32') df['Log_Return'] = np.log1p(df['Price_Change'].clip(lower=-0.9999)).astype('float32') df['MA_5'] = df['Close'].rolling(5, min_periods=1).mean().astype('float32') df['MA_20'] = df['Close'].rolling(20, min_periods=1).mean().astype('float32') df['RSI'] = calculate_rsi(df['Close']) df['Volatility_10'] = calculate_volatility(df['Close'], 10) # MACD features macd_line, signal_line, hist = calculate_macd(df['Close']) df['MACD'] = macd_line df['MACD_Signal'] = signal_line df['MACD_Hist'] = hist # Bollinger Bands bb_upper, bb_lower = calculate_bollinger_bands(df['Close']) df['BB_Upper'] = bb_upper df['BB_Lower'] = bb_lower # Sentiment features if 'News' not in df or df['News'].isnull().all(): logger.warning("No news data - setting Sentiment features to zero") df['Sentiment'] = 0.0 else: df['Sentiment'] = analyzer.analyze_texts(df['News'].fillna('').tolist()) df['Sentiment_MA'] = df['Sentiment'].rolling(5, min_periods=1).mean().astype('float32') df['Sentiment_Lag1'] = df['Sentiment'].shift(1).fillna(0).astype('float32') # Target df['Target'] = (df['Close'].shift(-1) > df['Close']).astype('int8') return df.dropna().reset_index(drop=True) except Exception as e: logger.error(f"Feature engineering error: {e}") raise # -------------------------- # Model Training # -------------------------- def train_model() -> HistGradientBoostingClassifier: """Train and evaluate the prediction model.""" try: logger.info("Starting model training...") df = load_data(test_mode=True) # Train-test split maintaining temporal order split_idx = int(0.8 * len(df)) train_df = df.iloc[:split_idx].copy() test_df = df.iloc[split_idx:].copy() analyzer = SentimentAnalyzer() features = [] # Process in chunks for memory efficiency chunk_size = 1000 for i in tqdm(range(0, len(train_df), chunk_size), desc="Creating features"): chunk = train_df.iloc[i:i+chunk_size] features.append(create_features(chunk, analyzer)) train_processed = pd.concat(features) # Define expected features expected_features = [ 'Sentiment', 'Sentiment_MA', 'Sentiment_Lag1', 'Price_Change', 'Log_Return', 'MA_5', 'MA_20', 'RSI', 'Volatility_10', 'MACD', 'MACD_Signal', 'MACD_Hist', 'BB_Upper', 'BB_Lower', 'Target' ] # Validate features missing_cols = [c for c in expected_features if c not in train_processed.columns] if missing_cols: raise ValueError(f"Missing features in training data: {missing_cols}") # Ensure only numeric data is passed to model train_processed = train_processed[expected_features + ['Date']] # Keep 'Date' only for dropping X_train = train_processed.drop(columns=['Date', 'Target']) y_train = train_processed['Target'] # Initialize model with good defaults base_model = HistGradientBoostingClassifier( max_iter=100, max_depth=5, learning_rate=0.05, random_state=42, verbose=1, early_stopping=True, validation_fraction=0.1 ) # Time-series cross-validation tscv = TimeSeriesSplit(n_splits=3) f1_scores = [] for train_idx, val_idx in tscv.split(X_train): model = clone(base_model) model.fit(X_train.iloc[train_idx], y_train.iloc[train_idx]) preds = model.predict(X_train.iloc[val_idx]) f1_scores.append(f1_score(y_train.iloc[val_idx], preds)) logger.info(f"Fold F1: {f1_scores[-1]:.3f}") logger.info(f"Validation F1: {np.mean(f1_scores):.3f} Âą {np.std(f1_scores):.3f}") # Final training model = clone(base_model) model.fit(X_train, y_train) # Test evaluation test_processed = create_features(test_df, analyzer) X_test = test_processed[expected_features[:-1]] y_test = test_processed['Target'] # Drop Date if present if 'Date' in X_test.columns: X_test = X_test.drop(columns=['Date']) y_pred = model.predict(X_test) acc = accuracy_score(y_test, y_pred) f1 = f1_score(y_test, y_pred) logger.info(f"Test Accuracy: {acc:.3f} | F1: {f1:.3f}") # Save model joblib.dump(model, Config.MODEL_PATH) logger.info(f"Model saved to {Config.MODEL_PATH}") return model except Exception as e: logger.error(f"Model training failed: {e}") raise # -------------------------- # Model Prediction # -------------------------- def predict(df: pd.DataFrame, threshold: float = Config.CONFIDENCE_THRESHOLD) -> pd.DataFrame: """Make predictions with confidence scores.""" try: if model is None: raise ValueError("Model is not loaded") df_features = create_features(df, analyzer) required_features = [ 'Sentiment', 'Sentiment_MA', 'Sentiment_Lag1', 'Price_Change', 'Log_Return', 'MA_5', 'MA_20', 'RSI', 'Volatility_10', 'MACD', 'MACD_Signal', 'MACD_Hist', 'BB_Upper', 'BB_Lower' ] X = df_features[required_features] proba = model.predict_proba(X)[:, 1] # Probability of 'up' class # Generate predictions with confidence threshold preds = np.where( proba > (0.5 + threshold), "Buy", np.where(proba < (0.5 - threshold), "Sell", "Hold") ) df_features['Prediction'] = preds df_features['Confidence'] = np.abs(proba - 0.5) * 2 # Normalized to 0-1 return df_features[['Date', 'Close', 'Prediction', 'Confidence', 'Sentiment']] except Exception as e: logger.error(f"Prediction error: {e}") return pd.DataFrame(columns=['Date', 'Close', 'Prediction', 'Confidence', 'Sentiment']) # -------------------------- # Visualization Functions # -------------------------- def generate_shap_plot(df: pd.DataFrame) -> Optional[str]: """Generate SHAP explanation plot.""" try: df = df.tail(50) # Use most recent 50 samples required_features = [ 'Sentiment', 'Sentiment_MA', 'Sentiment_Lag1', 'Price_Change', 'Log_Return', 'MA_5', 'MA_20', 'RSI', 'Volatility_10', 'MACD', 'MACD_Signal', 'MACD_Hist', 'BB_Upper', 'BB_Lower' ] X = df[required_features] explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(X) plt.figure(figsize=(10, 6)) shap.summary_plot(shap_values, X, show=False, plot_size=(10, 6)) plot_path = os.path.join(Config.PLOTS_DIR, "shap_summary.png") plt.savefig(plot_path, bbox_inches='tight') plt.close() return plot_path except Exception as e: logger.error(f"SHAP plot generation failed: {e}") return None def plot_sentiment(df: pd.DataFrame) -> Optional[str]: """Generate sentiment trend plot.""" try: plt.figure(figsize=(10, 4)) plt.plot(df['Date'], df['Sentiment'], label="Daily Sentiment") plt.plot(df['Date'], df['Sentiment_MA'], label="5-Day MA", alpha=0.7) plt.title("News Sentiment Over Time") plt.xlabel("Date") plt.ylabel("Sentiment Score") plt.legend() plt.grid(True) plot_path = os.path.join(Config.PLOTS_DIR, "sentiment_plot.png") plt.savefig(plot_path, bbox_inches='tight') plt.close() return plot_path except Exception as e: logger.error(f"Sentiment plot failed: {e}") return None def generate_wordcloud(texts: List[str]) -> Optional[str]: """Generate word cloud from news texts.""" try: if not texts or all(pd.isna(texts)): return None text = " ".join(str(t) for t in texts if t and not pd.isna(t)) wc = WordCloud(width=600, height=400, background_color='white').generate(text) plot_path = os.path.join(Config.PLOTS_DIR, "wordcloud.png") wc.to_file(plot_path) return plot_path except Exception as e: logger.error(f"Word cloud generation failed: {e}") return None # -------------------------- # UI Helper Functions # -------------------------- # -------------------------- # UI Functions Implementation # -------------------------- def predict_single(text_input: str, threshold: float, history_state: list) -> Tuple[pd.DataFrame, list, dict]: """Make a single prediction based on news input.""" try: # Create input DataFrame today = datetime.today().strftime('%Y-%m-%d') data = { 'Date': [today], 'Open': [100.0], 'High': [105.0], 'Low': [95.0], 'Close': [100.0], 'Volume': [1000000], 'News': [text_input] } df = pd.DataFrame(data) # Try to get real close price from historical data try: hist_data = load_data() if not hist_data.empty: df['Close'] = hist_data['Close'].iloc[-1] except Exception as e: logger.warning(f"Couldn't load historical data: {e}") # Get predictions preds = predict(df, threshold) if preds.empty: # Create error response error_row = { 'Date': [today], 'Close': [0], 'Prediction': ["Error"], 'Confidence': [0], 'Sentiment': [0] } error_df = pd.DataFrame(error_row) return error_df, history_state, None # Update history new_history = history_state.copy() if history_state else [] new_history.append(preds.iloc[0].to_dict()) # Keep only last MAX_HISTORY_ENTRIES if len(new_history) > Config.MAX_HISTORY_ENTRIES: new_history.pop(0) return preds, new_history, preds.iloc[0].to_dict() except Exception as e: logger.error(f"Single prediction failed: {e}") today = datetime.today().strftime('%Y-%m-%d') error_df = pd.DataFrame({ 'Date': [today], 'Close': [0], 'Prediction': ["Error"], 'Confidence': [0], 'Sentiment': [0] }) return error_df, history_state, None def batch_predict(csv_file: str, threshold: float) -> pd.DataFrame: """Process batch prediction from uploaded CSV file.""" try: if not csv_file: return pd.DataFrame() # Read and validate CSV df = pd.read_csv(csv_file) required_cols = {'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'News'} missing_cols = required_cols - set(df.columns) if missing_cols: logger.error(f"Missing columns in CSV: {missing_cols}") return pd.DataFrame(columns=["Date", "Close", "Prediction", "Confidence", "Sentiment"]) # Make predictions preds = predict(df, threshold) return preds except Exception as e: logger.error(f"Batch prediction failed: {e}") return pd.DataFrame(columns=["Date", "Close", "Prediction", "Confidence", "Sentiment"]) def explain_current() -> Optional[str]: """Generate SHAP explanation plot using current data.""" try: df = load_data().tail(100) df = create_features(df, analyzer) return generate_shap_plot(df) except Exception as e: logger.error(f"SHAP explanation failed: {e}") return None def show_sentiment_plot() -> Optional[str]: """Generate sentiment visualization plot.""" try: df = load_data().tail(100) df = create_features(df, analyzer) return plot_sentiment(df) except Exception as e: logger.error(f"Sentiment plot failed: {e}") return None def show_history_table(history_state: list) -> pd.DataFrame: """Display prediction history as a DataFrame.""" try: if not history_state: return pd.DataFrame(columns=['Date', 'Close', 'Prediction', 'Confidence', 'Sentiment']) return pd.DataFrame(history_state) except Exception as e: logger.error(f"History display failed: {e}") return pd.DataFrame(columns=['Date', 'Close', 'Prediction', 'Confidence', 'Sentiment']) def summarize_session(df: pd.DataFrame) -> str: """Generate summary statistics for the session.""" if df.empty: return "No predictions to summarize" return f""" ✅ **Total Predictions:** {len(df)} 📊 **Avg Confidence:** {df['Confidence'].mean():.2f} đŸŽ¯ **Predictions:** {df['Prediction'].value_counts().to_dict()} 😊 **Avg Sentiment:** {df['Sentiment'].mean():.2f} 📅 **Date Range:** {df['Date'].min()} → {df['Date'].max()}""" def generate_downloadable(df: pd.DataFrame) -> str: """Generate downloadable CSV file.""" path = os.path.join(Config.CACHE_DIR, "predictions.csv") df.to_csv(path, index=False) return path def prediction_distribution(df: pd.DataFrame) -> Optional[str]: """Generate prediction distribution plot.""" try: if df.empty: return None fig, ax = plt.subplots(figsize=(8, 4)) df['Prediction'].value_counts().plot( kind='bar', ax=ax, color=['green' if x == 'Buy' else 'red' if x == 'Sell' else 'gray' for x in df['Prediction'].unique()] ) plt.title("Prediction Distribution") plt.ylabel("Count") plot_path = os.path.join(Config.PLOTS_DIR, "pred_dist.png") fig.savefig(plot_path, bbox_inches='tight') plt.close() return plot_path except Exception as e: logger.error(f"Prediction distribution plot failed: {e}") return None # -------------------------- # Initialize Components # -------------------------- model = None try: model = joblib.load(Config.MODEL_PATH) logger.info("Model loaded successfully") except Exception as e: logger.warning(f"Model loading failed: {e}") try: model = train_model() logger.info("New model trained successfully") except Exception as e: logger.error(f"Model training fallback failed: {e}") raise analyzer = SentimentAnalyzer() # -------------------------- # Gradio UI # -------------------------- with gr.Blocks(title="Stock Prediction", theme=gr.themes.Soft()) as demo: # State variables history_state = gr.State(value=[]) last_prediction_state = gr.State(value=None) # Header gr.Markdown(""" # 📈 Stock Price Prediction with News Sentiment Analysis *Predict next-day stock movements using technical indicators and news sentiment* """) with gr.Tab("🔍 Single Prediction"): with gr.Row(): text_input = gr.Textbox( label="Latest News Headline/Text", placeholder="Enter news text about the company...", lines=3, max_lines=5 ) with gr.Row(): threshold_slider = gr.Slider( minimum=0, maximum=0.5, value=Config.CONFIDENCE_THRESHOLD, step=0.01, label="Confidence Threshold", info="Higher values require more confidence for Buy/Sell decisions" ) with gr.Row(): predict_btn = gr.Button("Predict", variant="primary") last_pred_btn = gr.Button("Show Last Prediction") refresh_data_btn = gr.Button("Refresh Data") with gr.Row(): with gr.Column(): prediction_output = gr.Dataframe( headers=["Date", "Close", "Prediction", "Confidence", "Sentiment"], label="Prediction Results", interactive=False ) with gr.Column(): last_prediction_output = gr.Dataframe( headers=["Date", "Close", "Prediction", "Confidence", "Sentiment"], label="Last Prediction", interactive=False, ) with gr.Tab("📂 Batch Prediction"): with gr.Row(): batch_file = gr.File( label="Upload CSV File (CSV should contain: Date, Open, High, Low, Close, Volume, News)", file_types=[".csv"], type="filepath", ) with gr.Row(): batch_threshold = gr.Slider( label="Confidence Threshold", minimum=0, maximum=0.5, value=Config.CONFIDENCE_THRESHOLD, step=0.01 ) with gr.Row(): batch_predict_btn = gr.Button("Run Batch Prediction", variant="primary") with gr.Row(): batch_output = gr.Dataframe( headers=["Date", "Close", "Prediction", "Confidence", "Sentiment"], label="Batch Predictions", interactive=False ) with gr.Row(): batch_summary = gr.Markdown() with gr.Row(): download_btn = gr.Button("Download Predictions") download_file = gr.File(label="Download", visible=False) with gr.Tab("📊 Explanations"): with gr.Row(): explain_btn = gr.Button("Generate SHAP Explanation") sentiment_plot_btn = gr.Button("Show Sentiment Trend") with gr.Row(): shap_image = gr.Image(label="SHAP Feature Importance", interactive=False) sentiment_image = gr.Image(label="Sentiment Over Time", interactive=False) with gr.Tab("📜 History"): with gr.Row(): history_btn = gr.Button("Show Prediction History") clear_history_btn = gr.Button("Clear History", variant="stop") with gr.Row(): history_output = gr.Dataframe( headers=["Date", "Close", "Prediction", "Confidence", "Sentiment"], label="Prediction History", interactive=False ) with gr.Row(): history_summary = gr.Markdown() dist_image = gr.Image(label="Prediction Distribution", interactive=False) with gr.Tab("â„šī¸ About"): gr.Markdown(""" ## Stock Prediction App This application predicts next-day stock price movements using: - **Technical Indicators**: RSI, MACD, Bollinger Bands, Moving Averages - **News Sentiment Analysis**: Using transformer models - **Machine Learning**: Gradient Boosted Decision Trees ### Features: - Single prediction with news input - Batch prediction from CSV files - Model explainability with SHAP values - Sentiment analysis visualization - Prediction history tracking ### Technical Details: - Built with Python, scikit-learn, and Gradio - Uses DistilBERT for sentiment analysis - Implements time-series validation """) # Event handlers predict_btn.click( fn=predict_single, inputs=[text_input, threshold_slider, history_state], outputs=[prediction_output, history_state, last_prediction_state], api_name="predict" ) last_pred_btn.click( fn=lambda x: pd.DataFrame([x]) if x else pd.DataFrame(), inputs=[last_prediction_state], outputs=[last_prediction_output] ) refresh_data_btn.click( fn=lambda: load_data(force_refresh=True), outputs=[] ) batch_predict_btn.click( fn=batch_predict, inputs=[batch_file, batch_threshold], outputs=[batch_output] ).then( fn=summarize_session, inputs=[batch_output], outputs=[batch_summary] ).then( fn=prediction_distribution, inputs=[batch_output], outputs=[dist_image] ) download_btn.click( fn=generate_downloadable, inputs=[batch_output], outputs=[download_file] ).then( fn=lambda: gr.File(visible=True), outputs=[download_file] ) explain_btn.click( fn=explain_current, outputs=[shap_image] ) sentiment_plot_btn.click( fn=show_sentiment_plot, outputs=[sentiment_image] ) history_btn.click( fn=show_history_table, outputs=[history_output] ).then( fn=summarize_session, inputs=[history_output], outputs=[history_summary] ).then( fn=prediction_distribution, inputs=[history_output], outputs=[dist_image] ) clear_history_btn.click( fn=lambda: [], outputs=[history_state] ).then( fn=lambda: pd.DataFrame(columns=["Date", "Close", "Prediction", "Confidence", "Sentiment"]), outputs=[history_output] ).then( fn=lambda: "History cleared", outputs=[history_summary] ) # -------------------------- # Main Execution # -------------------------- if __name__ == "__main__": try: demo.launch( server_name="0.0.0.0", server_port=7860, show_error=True, share=False ) except Exception as e: logger.error(f"Application failed: {e}") raise