Spaces:

GLAkavya
/

Predict_Rating

Sleeping

App Files Files Community

GLAkavya commited on Oct 21, 2025

Commit

c6937ca

verified ·

1 Parent(s): 38ddd40

Update app.py

Browse files

Files changed (1) hide show

app.py +150 -633

app.py CHANGED Viewed

@@ -1,644 +1,161 @@
-"""
-🌟 PROFESSIONAL CUSTOMER FEEDBACK RATING PREDICTOR
-Complete Dashboard with CSV, URL, and Text Input
-"""
-import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import torch
 import pandas as pd
 import plotly.graph_objects as go
-import plotly.express as px
-from collections import Counter
-import requests
-from bs4 import BeautifulSoup
-import json
-# ============================================================================
-# MODEL LOADING
-# ============================================================================
-# 🔴 CHANGE THIS TO YOUR MODEL
-MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"  # Demo model
-# MODEL_NAME = "YOUR_USERNAME/feedback-rating-predictor"  # Your trained model
-try:
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
-    print("✅ Model loaded successfully!")
-except Exception as e:
-    print(f"❌ Error: {e}")
-# ============================================================================
-# PREDICTION FUNCTIONS
-# ============================================================================
-def predict_single(text):
-    """Predict rating for single text"""
-    if not text or len(text.strip()) < 3:
-        return None
-    try:
-        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
-        with torch.no_grad():
-            outputs = model(**inputs)
-            probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
-            pred_class = torch.argmax(probs).item()
-            confidence = probs[0][pred_class].item()
-        rating = pred_class + 1
-        all_probs = probs[0].cpu().numpy()
-        return {
-            'text': text,
-            'rating': rating,
-            'confidence': confidence,
-            'probabilities': all_probs,
-            'sentiment': 'Negative' if rating <= 2 else ('Neutral' if rating == 3 else 'Positive')
-        }
-    except Exception as e:
-        print(f"Error in prediction: {e}")
-        return None
-def predict_batch(texts):
-    """Predict ratings for multiple texts"""
-    results = []
-    for text in texts:
-        result = predict_single(text)
-        if result:
-            results.append(result)
-    return results
-# ============================================================================
-# DATA PROCESSING FUNCTIONS
-# ============================================================================
-def process_csv(file):
-    """Process uploaded CSV file"""
-    try:
-        df = pd.read_csv(file.name)
-        # Try to find text column
-        text_columns = ['feedback', 'review', 'text', 'comment', 'Review Text', 'Feedback']
-        text_col = None
-        for col in text_columns:
-            if col in df.columns:
-                text_col = col
-                break
-        if text_col is None:
-            text_col = df.columns[0]  # Use first column
-        texts = df[text_col].dropna().astype(str).tolist()[:100]  # Limit to 100 for performance
-        return texts
-    except Exception as e:
-        return [f"Error reading CSV: {str(e)}"]
-def fetch_from_url(url):
-    """Fetch reviews from URL (basic scraping)"""
     try:
-        headers = {'User-Agent': 'Mozilla/5.0'}
-        response = requests.get(url, headers=headers, timeout=10)
-        soup = BeautifulSoup(response.content, 'html.parser')
-        # Try to find review-like content
-        reviews = []
-        # Look for common review patterns
-        for tag in soup.find_all(['p', 'div', 'span'], class_=lambda x: x and any(
-            word in str(x).lower() for word in ['review', 'comment', 'feedback']
-        )):
-            text = tag.get_text().strip()
-            if len(text) > 20 and len(text) < 1000:
-                reviews.append(text)
-        if not reviews:
-            # Fallback: get all paragraph texts
-            reviews = [p.get_text().strip() for p in soup.find_all('p') if len(p.get_text().strip()) > 20]
-        return reviews[:50]  # Limit to 50
     except Exception as e:
-        return [f"Error fetching URL: {str(e)}"]
-# ============================================================================
-# VISUALIZATION FUNCTIONS
-# ============================================================================
-def create_rating_pie_chart(results):
-    """Create pie chart for rating distribution"""
-    ratings = [r['rating'] for r in results]
-    rating_counts = Counter(ratings)
-    fig = go.Figure(data=[go.Pie(
-        labels=[f"{i}⭐" for i in range(1, 6)],
-        values=[rating_counts.get(i, 0) for i in range(1, 6)],
-        hole=0.4,
-        marker=dict(colors=['#e74c3c', '#e67e22', '#f39c12', '#2ecc71', '#27ae60']),
-        textinfo='label+percent+value',
-        textfont=dict(size=14, color='white'),
-        hovertemplate='<b>%{label}</b><br>Count: %{value}<br>Percentage: %{percent}<extra></extra>'
-    )])
-    fig.update_layout(
-        title=dict(
-            text="Rating Distribution",
-            font=dict(size=20, color='#2c3e50', family='Arial Black')
-        ),
-        showlegend=True,
-        height=400,
-        paper_bgcolor='rgba(0,0,0,0)',
-        plot_bgcolor='rgba(0,0,0,0)',
-        font=dict(size=12)
-    )
-    return fig
-def create_sentiment_bar_chart(results):
-    """Create bar chart for sentiment distribution"""
-    sentiments = [r['sentiment'] for r in results]
-    sentiment_counts = Counter(sentiments)
-    colors = {
-        'Positive': '#27ae60',
-        'Neutral': '#f39c12',
-        'Negative': '#e74c3c'
-    }
-    fig = go.Figure(data=[go.Bar(
-        x=list(sentiment_counts.keys()),
-        y=list(sentiment_counts.values()),
-        marker=dict(
-            color=[colors.get(s, '#3498db') for s in sentiment_counts.keys()],
-            line=dict(color='white', width=2)
-        ),
-        text=list(sentiment_counts.values()),
-        textposition='outside',
-        textfont=dict(size=16, color='#2c3e50', family='Arial Black'),
-        hovertemplate='<b>%{x}</b><br>Count: %{y}<extra></extra>'
-    )])
-    fig.update_layout(
-        title=dict(
-            text="Sentiment Analysis",
-            font=dict(size=20, color='#2c3e50', family='Arial Black')
-        ),
-        xaxis=dict(title="Sentiment", titlefont=dict(size=14)),
-        yaxis=dict(title="Count", titlefont=dict(size=14)),
-        height=400,
-        paper_bgcolor='rgba(0,0,0,0)',
-        plot_bgcolor='rgba(240,240,240,0.5)',
-        font=dict(size=12),
-        showlegend=False
-    )
-    return fig
-def create_confidence_histogram(results):
-    """Create histogram for confidence scores"""
-    confidences = [r['confidence'] * 100 for r in results]
-    fig = go.Figure(data=[go.Histogram(
-        x=confidences,
-        nbinsx=20,
-        marker=dict(
-            color='#3498db',
-            line=dict(color='white', width=1)
-        ),
-        hovertemplate='Confidence: %{x:.1f}%<br>Count: %{y}<extra></extra>'
-    )])
-    fig.update_layout(
-        title=dict(
-            text="Confidence Distribution",
-            font=dict(size=20, color='#2c3e50', family='Arial Black')
-        ),
-        xaxis=dict(title="Confidence (%)", titlefont=dict(size=14)),
-        yaxis=dict(title="Frequency", titlefont=dict(size=14)),
-        height=400,
-        paper_bgcolor='rgba(0,0,0,0)',
-        plot_bgcolor='rgba(240,240,240,0.5)',
-        font=dict(size=12)
-    )
-    return fig
-def create_detailed_table(results):
-    """Create detailed results table"""
-    df = pd.DataFrame([{
-        'Feedback': r['text'][:100] + '...' if len(r['text']) > 100 else r['text'],
-        'Rating': '⭐' * r['rating'],
-        'Stars': r['rating'],
-        'Sentiment': r['sentiment'],
-        'Confidence': f"{r['confidence']*100:.1f}%"
-    } for r in results])
-    return df
-def create_summary_stats(results):
-    """Create summary statistics"""
-    if not results:
-        return "No data to analyze"
-    total = len(results)
-    avg_rating = sum(r['rating'] for r in results) / total
-    avg_confidence = sum(r['confidence'] for r in results) / total * 100
-    sentiments = Counter(r['sentiment'] for r in results)
-    ratings = Counter(r['rating'] for r in results)
-    summary = f"""
-    ## 📊 Analysis Summary
-    **Total Reviews Analyzed:** {total}
-    **Average Rating:** {'⭐' * int(avg_rating)} ({avg_rating:.2f}/5.0)
-    **Average Confidence:** {avg_confidence:.1f}%
-    **Sentiment Breakdown:**
-    - 😊 Positive: {sentiments.get('Positive', 0)} ({sentiments.get('Positive', 0)/total*100:.1f}%)
-    - 😐 Neutral: {sentiments.get('Neutral', 0)} ({sentiments.get('Neutral', 0)/total*100:.1f}%)
-    - 😞 Negative: {sentiments.get('Negative', 0)} ({sentiments.get('Negative', 0)/total*100:.1f}%)
-    **Rating Breakdown:**
-    - 5⭐: {ratings.get(5, 0)} reviews
-    - 4⭐: {ratings.get(4, 0)} reviews
-    - 3⭐: {ratings.get(3, 0)} reviews
-    - 2⭐: {ratings.get(2, 0)} reviews
-    - 1⭐: {ratings.get(1, 0)} reviews
-    """
-    return summary
-# ============================================================================
-# MAIN PROCESSING FUNCTION
-# ============================================================================
-def analyze_feedbacks(input_type, text_input, csv_file, url_input):
-    """Main function to analyze feedbacks from different sources"""
-    texts = []
-    # Get texts based on input type
-    if input_type == "✍️ Manual Text":
-        if text_input:
-            texts = [t.strip() for t in text_input.split('\n') if t.strip()]
-    elif input_type == "📄 CSV Upload":
-        if csv_file:
-            texts = process_csv(csv_file)
-    elif input_type == "🌐 URL Fetch":
-        if url_input:
-            texts = fetch_from_url(url_input)
-    if not texts:
-        return (
-            "⚠️ No valid input provided!",
-            None, None, None, None, None
-        )
-    # Predict ratings
-    results = predict_batch(texts)
-    if not results:
-        return (
-            "❌ Error in prediction!",
-            None, None, None, None, None
-        )
-    # Create visualizations
-    summary = create_summary_stats(results)
-    pie_chart = create_rating_pie_chart(results)
-    bar_chart = create_sentiment_bar_chart(results)
-    histogram = create_confidence_histogram(results)
-    table = create_detailed_table(results)
-    return summary, pie_chart, bar_chart, histogram, table
-# ============================================================================
-# SINGLE TEXT PREDICTION (CHAT MODE)
-# ============================================================================
-def predict_single_chat(text):
-    """Predict rating for single text (chat interface)"""
-    result = predict_single(text)
-    if not result:
-        return "⚠️ Please enter valid feedback", None, None
-    # Create star display
-    stars = "⭐" * result['rating'] + "☆" * (5 - result['rating'])
-    # Create emoji
-    emoji = "😞" if result['rating'] <= 2 else ("😐" if result['rating'] == 3 else "😊")
-    # Response text
-    response = f"""
-{emoji} **{result['sentiment']} Feedback**
-**Rating:** {stars} ({result['rating']}/5)
-**Confidence:** {result['confidence']*100:.1f}%
-**Analysis:**
-This feedback has been classified as **{result['sentiment'].lower()}** with high confidence.
-"""
-    # Probability chart
-    prob_dict = {
-        "1⭐": float(result['probabilities'][0]),
-        "2⭐⭐": float(result['probabilities'][1]),
-        "3⭐⭐⭐": float(result['probabilities'][2]),
-        "4⭐⭐⭐⭐": float(result['probabilities'][3]),
-        "5⭐⭐⭐⭐⭐": float(result['probabilities'][4])
-    }
-    # Create small viz
-    fig = go.Figure(data=[go.Bar(
-        x=list(prob_dict.keys()),
-        y=list(prob_dict.values()),
-        marker=dict(
-            color=['#e74c3c', '#e67e22', '#f39c12', '#2ecc71', '#27ae60'],
-            line=dict(color='white', width=2)
-        ),
-        text=[f"{v*100:.1f}%" for v in prob_dict.values()],
-        textposition='outside'
-    )])
-    fig.update_layout(
-        title="Rating Probabilities",
-        height=300,
-        showlegend=False,
-        paper_bgcolor='rgba(0,0,0,0)',
-        plot_bgcolor='rgba(240,240,240,0.5)'
-    )
-    return response, prob_dict, fig
-# ============================================================================
-# GRADIO INTERFACE
-# ============================================================================
-# Custom CSS
-custom_css = """
-<style>
-    .gradio-container {
-        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important;
-    }
-    .main-header {
-        text-align: center;
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-        padding: 2rem;
-        border-radius: 10px;
-        color: white;
-        margin-bottom: 2rem;
-    }
-    .stat-box {
-        background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
-        padding: 1rem;
-        border-radius: 10px;
-        text-align: center;
-        color: white;
-        margin: 0.5rem;
-    }
-</style>
-"""
-# Create interface
-with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
-    gr.HTML("""
-        <div class="main-header">
-            <h1 style="font-size: 3em; margin: 0;">🌟 Customer Feedback Rating Predictor</h1>
-            <p style="font-size: 1.2em; margin-top: 1rem;">AI-Powered Sentiment Analysis & Rating Dashboard</p>
-            <p style="font-size: 0.9em; opacity: 0.9;">Analyze feedback from text, CSV, or URLs with beautiful visualizations</p>
-        </div>
-    """)
-    with gr.Tabs() as tabs:
-        # ====================================================================
-        # TAB 1: CHAT MODE (Single Text)
-        # ====================================================================
-        with gr.Tab("💬 Quick Analysis", id=0):
-            gr.Markdown("### Enter any feedback to get instant rating prediction")
-            with gr.Row():
-                with gr.Column(scale=2):
-                    chat_input = gr.Textbox(
-                        label="✍️ Enter Feedback",
-                        placeholder="Type feedback here... e.g., 'What a good food! Loved it!' or 'Ewww, terrible service'",
-                        lines=5
-                    )
-                    chat_btn = gr.Button("🔮 Predict Rating", variant="primary", size="lg")
-                    gr.Examples(
-                        examples=[
-                            ["What a good food! Absolutely delicious! 😋"],
-                            ["Ewww, terrible taste. Never ordering again! 🤮"],
-                            ["It's okay, nothing special but edible"],
-                            ["Amazing service! Best restaurant in town! ⭐⭐⭐⭐⭐"],
-                            ["Disappointed with the quality. Expected better"],
-                            ["Pretty decent meal. Good value for money"],
-                        ],
-                        inputs=chat_input
-                    )
-                with gr.Column(scale=1):
-                    chat_output = gr.Markdown(label="📊 Result")
-                    chat_prob = gr.Label(label="Rating Probabilities", num_top_classes=5)
-            chat_viz = gr.Plot(label="Probability Distribution")
-            chat_btn.click(
-                predict_single_chat,
-                inputs=chat_input,
-                outputs=[chat_output, chat_prob, chat_viz]
-            )
-        # ====================================================================
-        # TAB 2: BATCH ANALYSIS (CSV/URL/Multiple Texts)
-        # ====================================================================
-        with gr.Tab("📊 Batch Analysis Dashboard", id=1):
-            gr.Markdown("### Analyze multiple feedbacks with comprehensive dashboard")
-            with gr.Row():
-                input_type = gr.Radio(
-                    choices=["✍️ Manual Text", "📄 CSV Upload", "🌐 URL Fetch"],
-                    value="✍️ Manual Text",
-                    label="Input Method"
-                )
-            with gr.Row():
-                with gr.Column():
-                    text_input = gr.Textbox(
-                        label="Enter Multiple Feedbacks (one per line)",
-                        placeholder="Enter feedbacks, one per line...\nExample:\nAmazing product!\nTerrible quality\nIt's okay",
-                        lines=10,
-                        visible=True
-                    )
-                    csv_input = gr.File(
-                        label="Upload CSV File (must have 'feedback' or 'review' column)",
-                        file_types=[".csv"],
-                        visible=False
-                    )
-                    url_input = gr.Textbox(
-                        label="Enter URL (e.g., review page URL)",
-                        placeholder="https://example.com/reviews",
-                        visible=False
-                    )
-                    analyze_btn = gr.Button("🚀 Analyze All", variant="primary", size="lg")
-            # Change visibility based on input type
-            def update_visibility(choice):
-                return (
-                    gr.update(visible=choice == "✍️ Manual Text"),
-                    gr.update(visible=choice == "📄 CSV Upload"),
-                    gr.update(visible=choice == "🌐 URL Fetch")
-                )
-            input_type.change(
-                update_visibility,
-                inputs=input_type,
-                outputs=[text_input, csv_input, url_input]
-            )
-            # Results section
-            gr.Markdown("---")
-            gr.Markdown("## 📈 Analysis Results")
-            summary_output = gr.Markdown(label="Summary")
-            with gr.Row():
-                with gr.Column():
-                    pie_output = gr.Plot(label="Rating Distribution")
-                with gr.Column():
-                    bar_output = gr.Plot(label="Sentiment Analysis")
-            hist_output = gr.Plot(label="Confidence Distribution")
-            table_output = gr.Dataframe(
-                label="Detailed Results",
-                headers=["Feedback", "Rating", "Stars", "Sentiment", "Confidence"],
-                interactive=False
-            )
-            # Download button
-            gr.Markdown("### 💾 Download Results")
-            download_btn = gr.Button("📥 Download as CSV")
-            analyze_btn.click(
-                analyze_feedbacks,
-                inputs=[input_type, text_input, csv_input, url_input],
-                outputs=[summary_output, pie_output, bar_output, hist_output, table_output]
-            )
-        # ====================================================================
-        # TAB 3: ABOUT & HELP
-        # ====================================================================
-        with gr.Tab("ℹ️ About & Help", id=2):
-            gr.Markdown("""
-            # 🌟 About This Application
-            ## What is this?
-            This is an AI-powered customer feedback rating predictor that automatically analyzes text feedback
-            and predicts satisfaction ratings from 1 to 5 stars.
-            ## 🎯 Features
-            ### 💬 Quick Analysis
-            - Instant single feedback analysis
-            - Real-time rating prediction
-            - Sentiment classification (Positive/Neutral/Negative)
-            - Confidence scores
-            ### 📊 Batch Analysis Dashboard
-            - Analyze multiple feedbacks at once
-            - Three input methods:
-              - **Manual Text**: Enter feedbacks line by line
-              - **CSV Upload**: Upload a CSV file with feedback data
-              - **URL Fetch**: Extract reviews from a webpage
-            ### 📈 Beautiful Visualizations
-            - **Rating Distribution**: Pie chart showing breakdown of 1-5 star ratings
-            - **Sentiment Analysis**: Bar chart of positive/neutral/negative sentiments
-            - **Confidence Distribution**: Histogram of prediction confidence levels
-            - **Detailed Table**: Comprehensive view of all analyzed feedbacks
-            ## 🔧 How to Use
-            ### Quick Analysis (Chat Mode)
-            1. Go to "Quick Analysis" tab
-            2. Type your feedback
-            3. Click "Predict Rating"
-            4. Get instant results!
-            ### Batch Analysis
-            1. Go to "Batch Analysis Dashboard" tab
-            2. Choose input method:
-               - **Manual**: Type feedbacks (one per line)
-               - **CSV**: Upload file (must have 'feedback' or 'review' column)
-               - **URL**: Paste review page URL
-            3. Click "Analyze All"
-            4. View comprehensive dashboard with graphs and statistics
-            ## 📊 Understanding Results
-            - **Rating**: 1-5 stars (1 = very negative, 5 = very positive)
-            - **Sentiment**: Overall emotion (Positive/Neutral/Negative)
-            - **Confidence**: How sure the model is (0-100%)
-            - **Probabilities**: Likelihood for each rating level
-            ## 💡 Tips for Best Results
-            1. **Clear Feedback**: More detailed feedback = better predictions
-            2. **Language**: Works best with English text
-            3. **Length**: 10-500 characters ideal
-            4. **CSV Format**: Use column names like 'feedback', 'review', or 'text'
-            5. **Batch Size**: For performance, analyze up to 100 feedbacks at once
-            ## 🎨 Use Cases
-            - **E-commerce**: Analyze product reviews
-            - **Restaurants**: Monitor food and service feedback
-            - **Hotels**: Assess guest satisfaction
-            - **Customer Service**: Evaluate support interactions
-            - **Market Research**: Understand customer sentiment
-            ## 🤖 Model Details
-            - **Architecture**: BERT-based transformer model
-            - **Training**: Fine-tuned on customer review datasets
-            - **Accuracy**: 75-85% (depending on feedback quality)
-            - **Speed**: ~100-200ms per prediction
-            ## 📧 Support
-            Found a bug or have suggestions? Open an issue on GitHub or contact support.
-            ---
-            **Made with ❤️ using Transformers & Gradio**
-            """)
-    # Footer
-    gr.HTML("""
-        <div style="text-align: center; padding: 2rem; color: #7f8c8d;">
-            <p style="font-size: 0.9em;">
-                Powered by Hugging Face Transformers 🤗 | Built with Gradio ⚡ | Deployed on HF Spaces 🚀
-            </p>
-        </div>
-    """)
-# ============================================================================
-# LAUNCH
-# ============================================================================
 if __name__ == "__main__":
-    demo.launch(share=False, show_error=True)

+# app.py (Hugging Face Space friendly)
+import os, warnings
+warnings.filterwarnings("ignore")
+import numpy as np
 import pandas as pd
+import yfinance as yf
+from datetime import datetime, timedelta
+import joblib
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import roc_auc_score
 import plotly.graph_objects as go
+import gradio as gr
+# ----- Utilities -----
+def download_data(ticker, period='6y', interval='1d'):
+    df = yf.download(ticker, period=period, interval=interval, progress=False)
+    if df is None or df.empty:
+        raise ValueError(f"No data for {ticker}")
+    df.index = pd.to_datetime(df.index)
+    return df.dropna()
+def add_features(df):
+    df = df.copy()
+    df['AdjClose'] = df['Adj Close']
+    df['ret'] = df['AdjClose'].pct_change()
+    df['logret'] = np.log(df['AdjClose']).diff()
+    df['ma5'] = df['AdjClose'].rolling(5).mean()
+    df['ma20'] = df['AdjClose'].rolling(20).mean()
+    df['vol20'] = df['logret'].rolling(20).std()
+    delta = df['AdjClose'].diff()
+    up = delta.clip(lower=0); down = -1*delta.clip(upper=0)
+    ma_up = up.rolling(14).mean(); ma_down = down.rolling(14).mean()
+    rs = ma_up / (ma_down + 1e-9)
+    df['rsi14'] = 100 - (100 / (1 + rs))
+    df['mom5'] = df['AdjClose'].pct_change(5)
+    return df.dropna()
+def make_label(df, threshold_pct=-0.10, horizon=30):
+    closes = df['AdjClose'].values
+    n = len(closes)
+    label = np.zeros(n, dtype=int)
+    for i in range(n):
+        end = min(n, i + horizon + 1)
+        future = closes[i+1:end]
+        if future.size==0:
+            label[i]=0; continue
+        minf = np.min(future)
+        drop = (minf - closes[i]) / closes[i]
+        if drop <= threshold_pct:
+            label[i]=1
+    df['label']=label
+    return df
+# ----- Training (light) -----
+def train_if_missing(ticker, threshold_pct=-0.10, horizon=30):
+    model_path = f"models/{ticker}_rf.pkl"
+    os.makedirs("models", exist_ok=True)
+    if os.path.exists(model_path):
+        return model_path
+    df = download_data(ticker, period='6y')
+    df = add_features(df)
+    df = make_label(df, threshold_pct=threshold_pct, horizon=horizon)
+    features = ['ret','logret','ma5','ma20','vol20','rsi14','mom5']
+    df = df.dropna(subset=features+['label'])
+    X = df[features].values; y = df['label'].values
+    if len(y) < 250:
+        # still train but warn
+        pass
+    # LIGHTER model for Spaces: fewer trees
+    clf = RandomForestClassifier(n_estimators=50, random_state=42, n_jobs=-1, class_weight='balanced')
+    # Use time-ordered split (no shuffle)
+    split = int(len(X)*0.8)
+    X_train, y_train = X[:split], y[:split]
+    clf.fit(X_train, y_train)
+    joblib.dump({'model':clf, 'features':features}, model_path)
+    return model_path
+# ----- Predict probability -----
+def predict_prob(ticker, threshold_pct_pos, horizon):
+    ticker = ticker.strip().upper()
+    threshold = -abs(threshold_pct_pos)/100.0
+    model_path = train_if_missing(ticker, threshold_pct=threshold, horizon=horizon)
+    saved = joblib.load(model_path)
+    clf = saved['model']; features = saved['features']
+    df = download_data(ticker, period='6y')
+    df = add_features(df)
+    X_latest = df[features].iloc[-1].values.reshape(1,-1)
+    prob = float(clf.predict_proba(X_latest)[:,1][0])
+    return prob, df
+# ----- GBM Monte Carlo (smaller sims default) -----
+def simulate_gbm(S0, mu, sigma, days=252, n_sims=500, seed=0):
+    np.random.seed(seed)
+    dt = 1/252
+    paths = np.zeros((days+1, n_sims)); paths[0]=S0
+    for t in range(1, days+1):
+        z = np.random.normal(size=n_sims)
+        paths[t] = paths[t-1] * np.exp((mu - 0.5*sigma**2)*dt + sigma*np.sqrt(dt)*z)
+    return paths
+def build_candles_from_paths(paths, start_date):
+    median = np.percentile(paths,50,axis=1)
+    q10 = np.percentile(paths,10,axis=1)
+    q90 = np.percentile(paths,90,axis=1)
+    o = median[:-1]; c = median[1:]
+    h = np.maximum(c, q90[1:]); l = np.minimum(c, q10[1:])
+    dates = pd.bdate_range(start=start_date, periods=len(c))
+    df = pd.DataFrame({'Open':o, 'High':h, 'Low':l, 'Close':c}, index=dates)
+    return df
+def plot_candles(df):
+    fig = go.Figure(data=[go.Candlestick(x=df.index, open=df['Open'], high=df['High'],
+                                         low=df['Low'], close=df['Close'])])
+    fig.update_layout(xaxis_rangeslider_visible=False, height=600)
+    return fig
+# ----- Main function used by Gradio -----
+def run(ticker="RELIANCE.NS", threshold=10.0, horizon=30, sims=500):
     try:
+        prob, df = predict_prob(ticker, threshold, horizon)
     except Exception as e:
+        return None, f"Error: {e}"
+    # VaR/CVaR simple (historical daily)
+    returns = df['Adj Close'].pct_change().dropna().values
+    sorted_ret = np.sort(returns)
+    idx = max(0, int(0.05*len(sorted_ret))-1)
+    var = -sorted_ret[idx]
+    cvar = -sorted_ret[:idx+1].mean() if idx>=0 else -sorted_ret.mean()
+    # GBM simulate
+    logrets = np.log(df['Adj Close']).diff().dropna()
+    mu = float(logrets.mean()*252); sigma = float(logrets.std()*np.sqrt(252))
+    S0 = float(df['Adj Close'].iloc[-1])
+    sims = int(max(100, min(2000, sims)))
+    model_paths = simulate_gbm(S0, mu, sigma, days=252, n_sims=sims, seed=1)
+    start_date = (df.index[-1] + pd.Timedelta(days=1)).normalize()
+    df_candles = build_candles_from_paths(model_paths, start_date)
+    fig = plot_candles(df_candles)
+    summary = (f"Ticker: {ticker}\nThreshold: {threshold}% drop within {horizon} days\n"
+               f"Predicted prob: {prob*100:.2f}%\nHistorical VaR(5%): {var:.4f}, CVaR: {cvar:.4f}\n"
+               f"Annual mu: {mu:.4f}, sigma: {sigma:.4f}")
+    return fig, summary
+# ----- Gradio UI -----
+title = "Stock Risk Predictor + 1Y Candle Simulator (Hugging Face Space)"
+desc = "Enter ticker (eg RELIANCE.NS). Threshold (percent), horizon days, sims (keep small for hosted Space)."
+iface = gr.Interface(
+    fn=run,
+    inputs=[gr.Textbox(label="Ticker", value="RELIANCE.NS"),
+            gr.Number(label="Threshold percent (drop)", value=10.0),
+            gr.Number(label="Horizon days", value=30, precision=0),
+            gr.Number(label="Monte Carlo sims (100-2000)", value=500, precision=0)],
+    outputs=[gr.Plot(label="Simulated 1Y Candles"), gr.Textbox(label="Summary")],
+    title=title, description=desc, allow_flagging="never",
+    examples=[["RELIANCE.NS",10,30,500], ["AAPL",15,30,500]]
+)
 if __name__ == "__main__":
+    iface.launch()