Spaces:

maazamjad
/

sent_analysis

Sleeping

File size: 21,391 Bytes

9cbecf4
 
b4b21b0
2bef3bb
b4b21b0
 
 
 
2bef3bb
9cbecf4
 
 
 
2bef3bb
b4b21b0
 
9cbecf4
b4b21b0
 
 
2bef3bb
b4b21b0
 
 
9cbecf4
b4b21b0
 
 
9cbecf4
b4b21b0
 
9cbecf4
b4b21b0
 
 
9cbecf4
b4b21b0
 
9cbecf4
b4b21b0
 
 
9cbecf4
b4b21b0
 
9cbecf4
b4b21b0
 
 
9cbecf4
b4b21b0
 
9cbecf4
b4b21b0
 
 
9cbecf4
b4b21b0
 
2bef3bb
b4b21b0
 
2bef3bb
 
 
b4b21b0
9cbecf4
b4b21b0
 
9cbecf4
 
2bef3bb
9cbecf4
 
 
 
 
2bef3bb
9cbecf4
 
 
 
 
 
 
 
 
b4b21b0
 
2bef3bb
 
 
 
 
 
 
 
9cbecf4
 
b4b21b0
2bef3bb
 
 
 
 
9cbecf4
 
b4b21b0
9cbecf4
 
 
 
 
b4b21b0
 
9cbecf4
2bef3bb
9cbecf4
 
2bef3bb
 
9cbecf4
2bef3bb
 
9cbecf4
b4b21b0
9cbecf4
2bef3bb
 
 
9cbecf4
b4b21b0
2bef3bb
9cbecf4
 
2bef3bb
b4b21b0
2bef3bb
 
b4b21b0
 
2bef3bb
b4b21b0
 
9cbecf4
2bef3bb
 
 
 
 
 
 
9cbecf4
 
 
 
 
2bef3bb
9cbecf4
2bef3bb
9cbecf4
2bef3bb
9cbecf4
2bef3bb
9cbecf4
2bef3bb
 
9cbecf4
2bef3bb
9cbecf4
2bef3bb
9cbecf4
b4b21b0
9cbecf4
 
2bef3bb
9cbecf4
2bef3bb
 
9cbecf4
2bef3bb
 
9cbecf4
2bef3bb
9cbecf4
 
2bef3bb
 
9cbecf4
2bef3bb
 
9cbecf4
2bef3bb
 
9cbecf4
2bef3bb
9cbecf4
2bef3bb
 
 
9cbecf4
2bef3bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9cbecf4
2bef3bb
 
 
 
9cbecf4
 
 
 
 
 
2bef3bb
9cbecf4
 
2bef3bb
9cbecf4
 
 
 
2bef3bb
9cbecf4
2bef3bb
9cbecf4
2bef3bb
 
9cbecf4
b4b21b0
9cbecf4
2bef3bb
 
9cbecf4
b4b21b0
9cbecf4
 
b4b21b0
2bef3bb
 
 
9cbecf4
2bef3bb
9cbecf4
 
b4b21b0
2bef3bb
 
 
 
9cbecf4
2bef3bb
 
 
9cbecf4
 
2bef3bb
9cbecf4
2bef3bb
9cbecf4
2bef3bb
9cbecf4
2bef3bb
9cbecf4
2bef3bb
 
 
9cbecf4
2bef3bb
 
9cbecf4
2bef3bb
9cbecf4
2bef3bb
9cbecf4
b4b21b0
9cbecf4
 
2bef3bb
9cbecf4
2bef3bb
 
9cbecf4
2bef3bb
b4b21b0
2bef3bb
 
b4b21b0
9cbecf4
b4b21b0
9cbecf4
 
2bef3bb
 
 
b4b21b0
2bef3bb
9cbecf4
2bef3bb
 
 
 
 
b4b21b0
9cbecf4
2bef3bb
9cbecf4
 
2bef3bb
 
9cbecf4
2bef3bb
9cbecf4
2bef3bb
 
 
 
 
9cbecf4
2bef3bb
9cbecf4
2bef3bb
 
9cbecf4
 
 
 
2bef3bb
 
9cbecf4
 
 
 
 
 
 
 
 
 
 
2bef3bb
 
9cbecf4
2bef3bb
9cbecf4
2bef3bb
9cbecf4
b4b21b0
 
2bef3bb
b4b21b0
 
9cbecf4
 
2bef3bb
9cbecf4
2bef3bb
 
 
9cbecf4
 
 
2bef3bb
 
 
9cbecf4
2bef3bb
b4b21b0
9cbecf4
2bef3bb
9cbecf4
2bef3bb
 
9cbecf4
2bef3bb
 
 
9cbecf4
2bef3bb
 
 
 
9cbecf4
 
2bef3bb
 
 
9cbecf4
 
 
2bef3bb
9cbecf4
2bef3bb
9cbecf4
 
 
2bef3bb
9cbecf4
 
 
2bef3bb
 
9cbecf4
 
2bef3bb
 
9cbecf4
2bef3bb
 
b4b21b0
 
2bef3bb
 
9cbecf4
2bef3bb
9cbecf4
2bef3bb
 
9cbecf4
2bef3bb
9cbecf4
2bef3bb
 
 
 
9cbecf4
 
2bef3bb
 
9cbecf4
2bef3bb
 
9cbecf4
2bef3bb
 
9cbecf4
2bef3bb
9cbecf4
2bef3bb
 
9cbecf4
2bef3bb
b4b21b0
9cbecf4
2bef3bb
 
9cbecf4
 
2bef3bb
9cbecf4
2bef3bb
9cbecf4
 
 
2bef3bb
b4b21b0
9cbecf4
2bef3bb
9cbecf4
2bef3bb
 
 
9cbecf4
2bef3bb
9cbecf4
 
2bef3bb
 
9cbecf4
2bef3bb
 
 
 
 
 
9cbecf4
2bef3bb
9cbecf4
2bef3bb
9cbecf4
2bef3bb
 
9cbecf4
2bef3bb
 
 
9cbecf4
2bef3bb
 
9cbecf4
2bef3bb
9cbecf4
 
 
2bef3bb
b4b21b0
9cbecf4
2bef3bb
9cbecf4
2bef3bb
 
 
 
9cbecf4
 
2bef3bb
 
 
9cbecf4
2bef3bb
9cbecf4
 
2bef3bb
b4b21b0
2bef3bb
 
b4b21b0
 
9cbecf4
b4b21b0
 
2bef3bb
9cbecf4
2bef3bb
 
 
9cbecf4
 
b4b21b0
9cbecf4
 
2bef3bb
9cbecf4
 
 
 
2bef3bb

# GRADIO ML CLASSIFICATION APP - SIMPLIFIED VERSION
# =================================================

import gradio as gr
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import warnings
import tempfile
import os
from typing import Tuple, List, Optional

warnings.filterwarnings('ignore')

# ============================================================================
# MODEL LOADING
# ============================================================================

def load_models():
    """Load all available ML models"""
    models = {}
    
    try:
        # Load pipeline
        try:
            models['pipeline'] = joblib.load('models/sentiment_analysis_pipeline.pkl')
            models['pipeline_available'] = True
        except:
            models['pipeline_available'] = False
        
        # Load vectorizer
        try:
            models['vectorizer'] = joblib.load('models/tfidf_vectorizer.pkl')
            models['vectorizer_available'] = True
        except:
            models['vectorizer_available'] = False
        
        # Load LR model
        try:
            models['logistic_regression'] = joblib.load('models/logistic_regression_model.pkl')
            models['lr_available'] = True
        except:
            models['lr_available'] = False
        
        # Load NB model
        try:
            models['naive_bayes'] = joblib.load('models/multinomial_nb_model.pkl')
            models['nb_available'] = True
        except:
            models['nb_available'] = False
        
        # Check if we have working models
        pipeline_ready = models['pipeline_available']
        individual_ready = models['vectorizer_available'] and (models['lr_available'] or models['nb_available'])
        
        return models if (pipeline_ready or individual_ready) else None
        
    except Exception as e:
        print(f"Error loading models: {e}")
        return None

# Load models globally
MODELS = load_models()

# ============================================================================
# CORE FUNCTIONS
# ============================================================================

def get_available_models():
    """Get available model names"""
    if MODELS is None:
        return ["No models available"]
    
    available = []
    if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')):
        available.append("Logistic Regression")
    
    if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
        available.append("Multinomial Naive Bayes")
    
    return available if available else ["No models available"]

def make_prediction(text, model_choice):
    """Make prediction using selected model"""
    if MODELS is None or not text.strip():
        return None, None, "Please enter text and ensure models are loaded"
    
    try:
        if model_choice == "Logistic Regression":
            if MODELS.get('pipeline_available'):
                prediction = MODELS['pipeline'].predict([text])[0]
                probabilities = MODELS['pipeline'].predict_proba([text])[0]
            elif MODELS.get('vectorizer_available') and MODELS.get('lr_available'):
                X = MODELS['vectorizer'].transform([text])
                prediction = MODELS['logistic_regression'].predict(X)[0]
                probabilities = MODELS['logistic_regression'].predict_proba(X)[0]
            else:
                return None, None, "Logistic Regression model not available"
                
        elif model_choice == "Multinomial Naive Bayes":
            if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
                X = MODELS['vectorizer'].transform([text])
                prediction = MODELS['naive_bayes'].predict(X)[0]
                probabilities = MODELS['naive_bayes'].predict_proba(X)[0]
            else:
                return None, None, "Naive Bayes model not available"
        
        # Convert prediction
        class_names = ['Negative', 'Positive']
        prediction_label = class_names[prediction] if isinstance(prediction, int) else str(prediction)
        
        return prediction_label, probabilities, "Success"
        
    except Exception as e:
        return None, None, f"Error: {str(e)}"

def create_plot(probabilities):
    """Create probability plot"""
    fig, ax = plt.subplots(figsize=(8, 5))
    
    classes = ['Negative', 'Positive']
    colors = ['#ff6b6b', '#51cf66']
    
    bars = ax.bar(classes, probabilities, color=colors, alpha=0.8)
    
    # Add labels
    for bar, prob in zip(bars, probabilities):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
                f'{prob:.1%}', ha='center', va='bottom', fontweight='bold')
    
    ax.set_ylim(0, 1.1)
    ax.set_ylabel('Probability')
    ax.set_title('Sentiment Prediction Probabilities')
    ax.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    return fig

# ============================================================================
# INTERFACE FUNCTIONS
# ============================================================================

def predict_text(text, model_choice):
    """Single text prediction interface"""
    prediction, probabilities, status = make_prediction(text, model_choice)
    
    if prediction and probabilities is not None:
        confidence = max(probabilities)
        
        # Format results
        result = f"**Prediction:** {prediction} Sentiment\n"
        result += f"**Confidence:** {confidence:.1%}\n\n"
        result += f"**Detailed Probabilities:**\n"
        result += f"- Negative: {probabilities[0]:.1%}\n"
        result += f"- Positive: {probabilities[1]:.1%}\n\n"
        
        # Interpretation
        if confidence >= 0.8:
            result += "**High Confidence:** The model is very confident about this prediction."
        elif confidence >= 0.6:
            result += "**Medium Confidence:** The model is reasonably confident."
        else:
            result += "**Low Confidence:** The model is uncertain about this prediction."
        
        # Create plot
        plot = create_plot(probabilities)
        
        return result, plot
    else:
        return f"Error: {status}", None

def process_file(file, model_choice, max_texts):
    """Process uploaded file"""
    if file is None:
        return "Please upload a file!", None
    
    if MODELS is None:
        return "No models loaded!", None
    
    try:
        # Read file
        if file.name.endswith('.txt'):
            with open(file.name, 'r', encoding='utf-8') as f:
                content = f.read()
            texts = [line.strip() for line in content.split('\n') if line.strip()]
        elif file.name.endswith('.csv'):
            df = pd.read_csv(file.name)
            texts = df.iloc[:, 0].astype(str).tolist()
        else:
            return "Unsupported file format! Use .txt or .csv", None
        
        if not texts:
            return "No text found in file!", None
        
        # Limit texts
        if len(texts) > max_texts:
            texts = texts[:max_texts]
        
        # Process texts
        results = []
        for i, text in enumerate(texts):
            if text.strip():
                prediction, probabilities, _ = make_prediction(text, model_choice)
                
                if prediction and probabilities is not None:
                    results.append({
                        'Index': i + 1,
                        'Text': text[:100] + "..." if len(text) > 100 else text,
                        'Prediction': prediction,
                        'Confidence': f"{max(probabilities):.1%}",
                        'Negative_Prob': f"{probabilities[0]:.1%}",
                        'Positive_Prob': f"{probabilities[1]:.1%}"
                    })
        
        if results:
            # Create summary
            positive_count = sum(1 for r in results if r['Prediction'] == 'Positive')
            negative_count = len(results) - positive_count
            avg_confidence = np.mean([float(r['Confidence'].strip('%')) for r in results])
            
            summary = f"**Processing Complete!**\n\n"
            summary += f"**Summary Statistics:**\n"
            summary += f"- Total Processed: {len(results)}\n"
            summary += f"- Positive: {positive_count} ({positive_count/len(results):.1%})\n"
            summary += f"- Negative: {negative_count} ({negative_count/len(results):.1%})\n"
            summary += f"- Average Confidence: {avg_confidence:.1f}%\n"
            
            # Create CSV for download
            results_df = pd.DataFrame(results)
            
            # Save to temporary file
            with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
                results_df.to_csv(f, index=False)
                temp_file = f.name
            
            return summary, temp_file
        else:
            return "No valid texts could be processed!", None
            
    except Exception as e:
        return f"Error processing file: {str(e)}", None

def compare_models_func(text):
    """Compare predictions from different models"""
    if MODELS is None:
        return "No models loaded!", None
    
    if not text.strip():
        return "Please enter text to compare!", None
    
    available_models = get_available_models()
    
    if len(available_models) < 2:
        return "Need at least 2 models for comparison.", None
    
    results = []
    all_probs = []
    
    for model_name in available_models:
        prediction, probabilities, _ = make_prediction(text, model_name)
        
        if prediction and probabilities is not None:
            results.append({
                'Model': model_name,
                'Prediction': prediction,
                'Confidence': f"{max(probabilities):.1%}",
                'Negative': f"{probabilities[0]:.1%}",
                'Positive': f"{probabilities[1]:.1%}"
            })
            all_probs.append(probabilities)
    
    if results:
        # Create comparison text
        comparison_text = "**Model Comparison Results:**\n\n"
        
        for result in results:
            comparison_text += f"**{result['Model']}:**\n"
            comparison_text += f"- Prediction: {result['Prediction']}\n"
            comparison_text += f"- Confidence: {result['Confidence']}\n"
            comparison_text += f"- Negative: {result['Negative']}, Positive: {result['Positive']}\n\n"
        
        # Agreement analysis
        predictions = [r['Prediction'] for r in results]
        if len(set(predictions)) == 1:
            comparison_text += f"**Agreement:** All models agree on {predictions[0]} sentiment!"
        else:
            comparison_text += "**Disagreement:** Models have different predictions."
        
        # Create comparison plot
        fig, axes = plt.subplots(1, len(results), figsize=(6*len(results), 5))
        
        if len(results) == 1:
            axes = [axes]
        
        for i, (result, probs) in enumerate(zip(results, all_probs)):
            ax = axes[i]
            
            classes = ['Negative', 'Positive']
            colors = ['#ff6b6b', '#51cf66']
            
            bars = ax.bar(classes, probs, color=colors, alpha=0.8)
            
            # Add labels
            for bar, prob in zip(bars, probs):
                height = bar.get_height()
                ax.text(bar.get_x() + bar.get_width()/2., height + 0.02,
                       f'{prob:.0%}', ha='center', va='bottom', fontweight='bold')
            
            ax.set_ylim(0, 1.1)
            ax.set_title(f"{result['Model']}\n{result['Prediction']}")
            ax.grid(axis='y', alpha=0.3)
        
        plt.tight_layout()
        
        return comparison_text, fig
    else:
        return "Failed to get predictions!", None

def get_model_info():
    """Get model information"""
    if MODELS is None:
        return """
        **No models loaded!**
        
        Please ensure you have model files in the 'models/' directory:
        - sentiment_analysis_pipeline.pkl (complete pipeline), OR
        - tfidf_vectorizer.pkl + logistic_regression_model.pkl, OR
        - tfidf_vectorizer.pkl + multinomial_nb_model.pkl
        """
    
    info = "**Models loaded successfully!**\n\n"
    
    info += "**Available Models:**\n\n"
    
    if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')):
        info += "**Logistic Regression**\n"
        info += "- Type: Linear Classification\n"
        info += "- Features: TF-IDF vectors\n"
        info += "- Strengths: Fast, interpretable\n\n"
    
    if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
        info += "**Multinomial Naive Bayes**\n"
        info += "- Type: Probabilistic Classification\n"
        info += "- Features: TF-IDF vectors\n"
        info += "- Strengths: Works well with small data\n\n"
    
    info += "**File Status:**\n"
    files = [
        ("sentiment_analysis_pipeline.pkl", MODELS.get('pipeline_available', False)),
        ("tfidf_vectorizer.pkl", MODELS.get('vectorizer_available', False)),
        ("logistic_regression_model.pkl", MODELS.get('lr_available', False)),
        ("multinomial_nb_model.pkl", MODELS.get('nb_available', False))
    ]
    
    for filename, status in files:
        status_icon = "✅" if status else "❌"
        info += f"- {filename}: {status_icon}\n"
    
    return info

# ============================================================================
# GRADIO INTERFACE
# ============================================================================

def create_app():
    """Create Gradio interface"""
    
    with gr.Blocks(title="ML Text Classification") as app:
        
        # Header
        gr.HTML("""
        <div style="text-align: center; margin-bottom: 2rem;">
            <h1 style="color: #1f77b4; font-size: 2.5rem;">🤖 ML Text Classification App</h1>
            <p style="font-size: 1.2rem; color: #666;">Advanced Sentiment Analysis with Multiple ML Models</p>
        </div>
        """)
        
        # Main interface with tabs
        with gr.Tabs():
            
            # Single Prediction Tab
            with gr.Tab("🔮 Single Prediction"):
                gr.Markdown("### Enter text and select a model for sentiment analysis")
                
                with gr.Row():
                    with gr.Column(scale=1):
                        model_dropdown = gr.Dropdown(
                            choices=get_available_models(),
                            value=get_available_models()[0] if get_available_models() else None,
                            label="Choose Model"
                        )
                        
                        text_input = gr.Textbox(
                            lines=5,
                            placeholder="Enter your text here...",
                            label="Text Input"
                        )
                        
                        with gr.Row():
                            example1_btn = gr.Button("Good Example", size="sm")
                            example2_btn = gr.Button("Bad Example", size="sm")
                            example3_btn = gr.Button("Neutral Example", size="sm")
                        
                        predict_btn = gr.Button("🚀 Analyze Sentiment", variant="primary")
                    
                    with gr.Column(scale=1):
                        prediction_output = gr.Markdown(label="Results")
                        prediction_plot = gr.Plot(label="Probability Chart")
                
                # Example handlers
                example1_btn.click(
                    lambda: "This product is absolutely amazing! Best purchase ever!",
                    outputs=text_input
                )
                example2_btn.click(
                    lambda: "Terrible quality, broke immediately. Waste of money!",
                    outputs=text_input
                )
                example3_btn.click(
                    lambda: "It's okay, nothing special but does the job.",
                    outputs=text_input
                )
                
                # Prediction handler
                predict_btn.click(
                    predict_text,
                    inputs=[text_input, model_dropdown],
                    outputs=[prediction_output, prediction_plot]
                )
            
            # Batch Processing Tab
            with gr.Tab("📁 Batch Processing"):
                gr.Markdown("### Upload a file to process multiple texts")
                
                with gr.Row():
                    with gr.Column():
                        file_upload = gr.File(
                            label="Upload File (.txt or .csv)",
                            file_types=[".txt", ".csv"]
                        )
                        
                        batch_model = gr.Dropdown(
                            choices=get_available_models(),
                            value=get_available_models()[0] if get_available_models() else None,
                            label="Model for Batch Processing"
                        )
                        
                        max_texts = gr.Slider(
                            minimum=10,
                            maximum=500,
                            value=100,
                            step=10,
                            label="Max Texts to Process"
                        )
                        
                        process_btn = gr.Button("📊 Process File", variant="primary")
                    
                    with gr.Column():
                        batch_output = gr.Markdown(label="Processing Results")
                        download_file = gr.File(label="Download Results")
                
                # Process handler
                process_btn.click(
                    process_file,
                    inputs=[file_upload, batch_model, max_texts],
                    outputs=[batch_output, download_file]
                )
            
            # Model Comparison Tab
            with gr.Tab("⚖️ Model Comparison"):
                gr.Markdown("### Compare predictions from different models")
                
                with gr.Row():
                    with gr.Column():
                        comparison_input = gr.Textbox(
                            lines=4,
                            placeholder="Enter text to compare models...",
                            label="Text for Comparison"
                        )
                        
                        compare_btn = gr.Button("🔍 Compare Models", variant="primary")
                        
                        with gr.Row():
                            comp_ex1 = gr.Button("Mixed Example 1", size="sm")
                            comp_ex2 = gr.Button("Mixed Example 2", size="sm")
                    
                    with gr.Column():
                        comparison_output = gr.Markdown(label="Comparison Results")
                
                comparison_plot = gr.Plot(label="Model Comparison")
                
                # Example handlers
                comp_ex1.click(
                    lambda: "This movie was okay but not great.",
                    outputs=comparison_input
                )
                comp_ex2.click(
                    lambda: "The product is fine, I guess.",
                    outputs=comparison_input
                )
                
                # Compare handler
                compare_btn.click(
                    compare_models_func,
                    inputs=comparison_input,
                    outputs=[comparison_output, comparison_plot]
                )
            
            # Model Info Tab
            with gr.Tab("📊 Model Info"):
                model_info = gr.Markdown(
                    value=get_model_info(),
                    label="Model Information"
                )
                
                refresh_btn = gr.Button("🔄 Refresh", size="sm")
                refresh_btn.click(get_model_info, outputs=model_info)
        
        # Footer
        gr.HTML("""
        <div style="text-align: center; margin-top: 2rem; padding: 1rem; border-top: 1px solid #eee; color: #666;">
            <p><strong>🤖 ML Text Classification App</strong></p>
            <p>Built with Gradio | By Maaz Amjad</p>
            <p><small>Part of Introduction to Large Language Models course</small></p>
        </div>
        """)
    
    return app

# ============================================================================
# MAIN
# ============================================================================

if __name__ == "__main__":
    # Check models
    if MODELS is None:
        print("⚠️ Warning: No models loaded!")
    else:
        available = get_available_models()
        print(f"✅ Successfully loaded {len(available)} model(s): {', '.join(available)}")
    
    # Launch app
    app = create_app()
    app.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        debug=True
    )