# GRADIO ML CLASSIFICATION APP - SIMPLIFIED VERSION # ================================================= import gradio as gr import pandas as pd import numpy as np import joblib import matplotlib.pyplot as plt import warnings import tempfile import os from typing import Tuple, List, Optional warnings.filterwarnings('ignore') # ============================================================================ # MODEL LOADING # ============================================================================ def load_models(): """Load all available ML models""" models = {} try: # Load pipeline try: models['pipeline'] = joblib.load('models/sentiment_analysis_pipeline.pkl') models['pipeline_available'] = True except: models['pipeline_available'] = False # Load vectorizer try: models['vectorizer'] = joblib.load('models/tfidf_vectorizer.pkl') models['vectorizer_available'] = True except: models['vectorizer_available'] = False # Load LR model try: models['logistic_regression'] = joblib.load('models/logistic_regression_model.pkl') models['lr_available'] = True except: models['lr_available'] = False # Load NB model try: models['naive_bayes'] = joblib.load('models/multinomial_nb_model.pkl') models['nb_available'] = True except: models['nb_available'] = False # Check if we have working models pipeline_ready = models['pipeline_available'] individual_ready = models['vectorizer_available'] and (models['lr_available'] or models['nb_available']) return models if (pipeline_ready or individual_ready) else None except Exception as e: print(f"Error loading models: {e}") return None # Load models globally MODELS = load_models() # ============================================================================ # CORE FUNCTIONS # ============================================================================ def get_available_models(): """Get available model names""" if MODELS is None: return ["No models available"] available = [] if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')): available.append("Logistic Regression") if MODELS.get('vectorizer_available') and MODELS.get('nb_available'): available.append("Multinomial Naive Bayes") return available if available else ["No models available"] def make_prediction(text, model_choice): """Make prediction using selected model""" if MODELS is None or not text.strip(): return None, None, "Please enter text and ensure models are loaded" try: if model_choice == "Logistic Regression": if MODELS.get('pipeline_available'): prediction = MODELS['pipeline'].predict([text])[0] probabilities = MODELS['pipeline'].predict_proba([text])[0] elif MODELS.get('vectorizer_available') and MODELS.get('lr_available'): X = MODELS['vectorizer'].transform([text]) prediction = MODELS['logistic_regression'].predict(X)[0] probabilities = MODELS['logistic_regression'].predict_proba(X)[0] else: return None, None, "Logistic Regression model not available" elif model_choice == "Multinomial Naive Bayes": if MODELS.get('vectorizer_available') and MODELS.get('nb_available'): X = MODELS['vectorizer'].transform([text]) prediction = MODELS['naive_bayes'].predict(X)[0] probabilities = MODELS['naive_bayes'].predict_proba(X)[0] else: return None, None, "Naive Bayes model not available" # Convert prediction class_names = ['Negative', 'Positive'] prediction_label = class_names[prediction] if isinstance(prediction, int) else str(prediction) return prediction_label, probabilities, "Success" except Exception as e: return None, None, f"Error: {str(e)}" def create_plot(probabilities): """Create probability plot""" fig, ax = plt.subplots(figsize=(8, 5)) classes = ['Negative', 'Positive'] colors = ['#ff6b6b', '#51cf66'] bars = ax.bar(classes, probabilities, color=colors, alpha=0.8) # Add labels for bar, prob in zip(bars, probabilities): height = bar.get_height() ax.text(bar.get_x() + bar.get_width()/2., height + 0.01, f'{prob:.1%}', ha='center', va='bottom', fontweight='bold') ax.set_ylim(0, 1.1) ax.set_ylabel('Probability') ax.set_title('Sentiment Prediction Probabilities') ax.grid(axis='y', alpha=0.3) plt.tight_layout() return fig # ============================================================================ # INTERFACE FUNCTIONS # ============================================================================ def predict_text(text, model_choice): """Single text prediction interface""" prediction, probabilities, status = make_prediction(text, model_choice) if prediction and probabilities is not None: confidence = max(probabilities) # Format results result = f"**Prediction:** {prediction} Sentiment\n" result += f"**Confidence:** {confidence:.1%}\n\n" result += f"**Detailed Probabilities:**\n" result += f"- Negative: {probabilities[0]:.1%}\n" result += f"- Positive: {probabilities[1]:.1%}\n\n" # Interpretation if confidence >= 0.8: result += "**High Confidence:** The model is very confident about this prediction." elif confidence >= 0.6: result += "**Medium Confidence:** The model is reasonably confident." else: result += "**Low Confidence:** The model is uncertain about this prediction." # Create plot plot = create_plot(probabilities) return result, plot else: return f"Error: {status}", None def process_file(file, model_choice, max_texts): """Process uploaded file""" if file is None: return "Please upload a file!", None if MODELS is None: return "No models loaded!", None try: # Read file if file.name.endswith('.txt'): with open(file.name, 'r', encoding='utf-8') as f: content = f.read() texts = [line.strip() for line in content.split('\n') if line.strip()] elif file.name.endswith('.csv'): df = pd.read_csv(file.name) texts = df.iloc[:, 0].astype(str).tolist() else: return "Unsupported file format! Use .txt or .csv", None if not texts: return "No text found in file!", None # Limit texts if len(texts) > max_texts: texts = texts[:max_texts] # Process texts results = [] for i, text in enumerate(texts): if text.strip(): prediction, probabilities, _ = make_prediction(text, model_choice) if prediction and probabilities is not None: results.append({ 'Index': i + 1, 'Text': text[:100] + "..." if len(text) > 100 else text, 'Prediction': prediction, 'Confidence': f"{max(probabilities):.1%}", 'Negative_Prob': f"{probabilities[0]:.1%}", 'Positive_Prob': f"{probabilities[1]:.1%}" }) if results: # Create summary positive_count = sum(1 for r in results if r['Prediction'] == 'Positive') negative_count = len(results) - positive_count avg_confidence = np.mean([float(r['Confidence'].strip('%')) for r in results]) summary = f"**Processing Complete!**\n\n" summary += f"**Summary Statistics:**\n" summary += f"- Total Processed: {len(results)}\n" summary += f"- Positive: {positive_count} ({positive_count/len(results):.1%})\n" summary += f"- Negative: {negative_count} ({negative_count/len(results):.1%})\n" summary += f"- Average Confidence: {avg_confidence:.1f}%\n" # Create CSV for download results_df = pd.DataFrame(results) # Save to temporary file with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f: results_df.to_csv(f, index=False) temp_file = f.name return summary, temp_file else: return "No valid texts could be processed!", None except Exception as e: return f"Error processing file: {str(e)}", None def compare_models_func(text): """Compare predictions from different models""" if MODELS is None: return "No models loaded!", None if not text.strip(): return "Please enter text to compare!", None available_models = get_available_models() if len(available_models) < 2: return "Need at least 2 models for comparison.", None results = [] all_probs = [] for model_name in available_models: prediction, probabilities, _ = make_prediction(text, model_name) if prediction and probabilities is not None: results.append({ 'Model': model_name, 'Prediction': prediction, 'Confidence': f"{max(probabilities):.1%}", 'Negative': f"{probabilities[0]:.1%}", 'Positive': f"{probabilities[1]:.1%}" }) all_probs.append(probabilities) if results: # Create comparison text comparison_text = "**Model Comparison Results:**\n\n" for result in results: comparison_text += f"**{result['Model']}:**\n" comparison_text += f"- Prediction: {result['Prediction']}\n" comparison_text += f"- Confidence: {result['Confidence']}\n" comparison_text += f"- Negative: {result['Negative']}, Positive: {result['Positive']}\n\n" # Agreement analysis predictions = [r['Prediction'] for r in results] if len(set(predictions)) == 1: comparison_text += f"**Agreement:** All models agree on {predictions[0]} sentiment!" else: comparison_text += "**Disagreement:** Models have different predictions." # Create comparison plot fig, axes = plt.subplots(1, len(results), figsize=(6*len(results), 5)) if len(results) == 1: axes = [axes] for i, (result, probs) in enumerate(zip(results, all_probs)): ax = axes[i] classes = ['Negative', 'Positive'] colors = ['#ff6b6b', '#51cf66'] bars = ax.bar(classes, probs, color=colors, alpha=0.8) # Add labels for bar, prob in zip(bars, probs): height = bar.get_height() ax.text(bar.get_x() + bar.get_width()/2., height + 0.02, f'{prob:.0%}', ha='center', va='bottom', fontweight='bold') ax.set_ylim(0, 1.1) ax.set_title(f"{result['Model']}\n{result['Prediction']}") ax.grid(axis='y', alpha=0.3) plt.tight_layout() return comparison_text, fig else: return "Failed to get predictions!", None def get_model_info(): """Get model information""" if MODELS is None: return """ **No models loaded!** Please ensure you have model files in the 'models/' directory: - sentiment_analysis_pipeline.pkl (complete pipeline), OR - tfidf_vectorizer.pkl + logistic_regression_model.pkl, OR - tfidf_vectorizer.pkl + multinomial_nb_model.pkl """ info = "**Models loaded successfully!**\n\n" info += "**Available Models:**\n\n" if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')): info += "**Logistic Regression**\n" info += "- Type: Linear Classification\n" info += "- Features: TF-IDF vectors\n" info += "- Strengths: Fast, interpretable\n\n" if MODELS.get('vectorizer_available') and MODELS.get('nb_available'): info += "**Multinomial Naive Bayes**\n" info += "- Type: Probabilistic Classification\n" info += "- Features: TF-IDF vectors\n" info += "- Strengths: Works well with small data\n\n" info += "**File Status:**\n" files = [ ("sentiment_analysis_pipeline.pkl", MODELS.get('pipeline_available', False)), ("tfidf_vectorizer.pkl", MODELS.get('vectorizer_available', False)), ("logistic_regression_model.pkl", MODELS.get('lr_available', False)), ("multinomial_nb_model.pkl", MODELS.get('nb_available', False)) ] for filename, status in files: status_icon = "✅" if status else "❌" info += f"- {filename}: {status_icon}\n" return info # ============================================================================ # GRADIO INTERFACE # ============================================================================ def create_app(): """Create Gradio interface""" with gr.Blocks(title="ML Text Classification") as app: # Header gr.HTML("""
Advanced Sentiment Analysis with Multiple ML Models
🤖 ML Text Classification App
Built with Gradio | By Maaz Amjad
Part of Introduction to Large Language Models course