sent_analysis / app.py
maazamjad's picture
Update app.py
9cbecf4 verified
# GRADIO ML CLASSIFICATION APP - SIMPLIFIED VERSION
# =================================================
import gradio as gr
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import warnings
import tempfile
import os
from typing import Tuple, List, Optional
warnings.filterwarnings('ignore')
# ============================================================================
# MODEL LOADING
# ============================================================================
def load_models():
"""Load all available ML models"""
models = {}
try:
# Load pipeline
try:
models['pipeline'] = joblib.load('models/sentiment_analysis_pipeline.pkl')
models['pipeline_available'] = True
except:
models['pipeline_available'] = False
# Load vectorizer
try:
models['vectorizer'] = joblib.load('models/tfidf_vectorizer.pkl')
models['vectorizer_available'] = True
except:
models['vectorizer_available'] = False
# Load LR model
try:
models['logistic_regression'] = joblib.load('models/logistic_regression_model.pkl')
models['lr_available'] = True
except:
models['lr_available'] = False
# Load NB model
try:
models['naive_bayes'] = joblib.load('models/multinomial_nb_model.pkl')
models['nb_available'] = True
except:
models['nb_available'] = False
# Check if we have working models
pipeline_ready = models['pipeline_available']
individual_ready = models['vectorizer_available'] and (models['lr_available'] or models['nb_available'])
return models if (pipeline_ready or individual_ready) else None
except Exception as e:
print(f"Error loading models: {e}")
return None
# Load models globally
MODELS = load_models()
# ============================================================================
# CORE FUNCTIONS
# ============================================================================
def get_available_models():
"""Get available model names"""
if MODELS is None:
return ["No models available"]
available = []
if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')):
available.append("Logistic Regression")
if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
available.append("Multinomial Naive Bayes")
return available if available else ["No models available"]
def make_prediction(text, model_choice):
"""Make prediction using selected model"""
if MODELS is None or not text.strip():
return None, None, "Please enter text and ensure models are loaded"
try:
if model_choice == "Logistic Regression":
if MODELS.get('pipeline_available'):
prediction = MODELS['pipeline'].predict([text])[0]
probabilities = MODELS['pipeline'].predict_proba([text])[0]
elif MODELS.get('vectorizer_available') and MODELS.get('lr_available'):
X = MODELS['vectorizer'].transform([text])
prediction = MODELS['logistic_regression'].predict(X)[0]
probabilities = MODELS['logistic_regression'].predict_proba(X)[0]
else:
return None, None, "Logistic Regression model not available"
elif model_choice == "Multinomial Naive Bayes":
if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
X = MODELS['vectorizer'].transform([text])
prediction = MODELS['naive_bayes'].predict(X)[0]
probabilities = MODELS['naive_bayes'].predict_proba(X)[0]
else:
return None, None, "Naive Bayes model not available"
# Convert prediction
class_names = ['Negative', 'Positive']
prediction_label = class_names[prediction] if isinstance(prediction, int) else str(prediction)
return prediction_label, probabilities, "Success"
except Exception as e:
return None, None, f"Error: {str(e)}"
def create_plot(probabilities):
"""Create probability plot"""
fig, ax = plt.subplots(figsize=(8, 5))
classes = ['Negative', 'Positive']
colors = ['#ff6b6b', '#51cf66']
bars = ax.bar(classes, probabilities, color=colors, alpha=0.8)
# Add labels
for bar, prob in zip(bars, probabilities):
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
f'{prob:.1%}', ha='center', va='bottom', fontweight='bold')
ax.set_ylim(0, 1.1)
ax.set_ylabel('Probability')
ax.set_title('Sentiment Prediction Probabilities')
ax.grid(axis='y', alpha=0.3)
plt.tight_layout()
return fig
# ============================================================================
# INTERFACE FUNCTIONS
# ============================================================================
def predict_text(text, model_choice):
"""Single text prediction interface"""
prediction, probabilities, status = make_prediction(text, model_choice)
if prediction and probabilities is not None:
confidence = max(probabilities)
# Format results
result = f"**Prediction:** {prediction} Sentiment\n"
result += f"**Confidence:** {confidence:.1%}\n\n"
result += f"**Detailed Probabilities:**\n"
result += f"- Negative: {probabilities[0]:.1%}\n"
result += f"- Positive: {probabilities[1]:.1%}\n\n"
# Interpretation
if confidence >= 0.8:
result += "**High Confidence:** The model is very confident about this prediction."
elif confidence >= 0.6:
result += "**Medium Confidence:** The model is reasonably confident."
else:
result += "**Low Confidence:** The model is uncertain about this prediction."
# Create plot
plot = create_plot(probabilities)
return result, plot
else:
return f"Error: {status}", None
def process_file(file, model_choice, max_texts):
"""Process uploaded file"""
if file is None:
return "Please upload a file!", None
if MODELS is None:
return "No models loaded!", None
try:
# Read file
if file.name.endswith('.txt'):
with open(file.name, 'r', encoding='utf-8') as f:
content = f.read()
texts = [line.strip() for line in content.split('\n') if line.strip()]
elif file.name.endswith('.csv'):
df = pd.read_csv(file.name)
texts = df.iloc[:, 0].astype(str).tolist()
else:
return "Unsupported file format! Use .txt or .csv", None
if not texts:
return "No text found in file!", None
# Limit texts
if len(texts) > max_texts:
texts = texts[:max_texts]
# Process texts
results = []
for i, text in enumerate(texts):
if text.strip():
prediction, probabilities, _ = make_prediction(text, model_choice)
if prediction and probabilities is not None:
results.append({
'Index': i + 1,
'Text': text[:100] + "..." if len(text) > 100 else text,
'Prediction': prediction,
'Confidence': f"{max(probabilities):.1%}",
'Negative_Prob': f"{probabilities[0]:.1%}",
'Positive_Prob': f"{probabilities[1]:.1%}"
})
if results:
# Create summary
positive_count = sum(1 for r in results if r['Prediction'] == 'Positive')
negative_count = len(results) - positive_count
avg_confidence = np.mean([float(r['Confidence'].strip('%')) for r in results])
summary = f"**Processing Complete!**\n\n"
summary += f"**Summary Statistics:**\n"
summary += f"- Total Processed: {len(results)}\n"
summary += f"- Positive: {positive_count} ({positive_count/len(results):.1%})\n"
summary += f"- Negative: {negative_count} ({negative_count/len(results):.1%})\n"
summary += f"- Average Confidence: {avg_confidence:.1f}%\n"
# Create CSV for download
results_df = pd.DataFrame(results)
# Save to temporary file
with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
results_df.to_csv(f, index=False)
temp_file = f.name
return summary, temp_file
else:
return "No valid texts could be processed!", None
except Exception as e:
return f"Error processing file: {str(e)}", None
def compare_models_func(text):
"""Compare predictions from different models"""
if MODELS is None:
return "No models loaded!", None
if not text.strip():
return "Please enter text to compare!", None
available_models = get_available_models()
if len(available_models) < 2:
return "Need at least 2 models for comparison.", None
results = []
all_probs = []
for model_name in available_models:
prediction, probabilities, _ = make_prediction(text, model_name)
if prediction and probabilities is not None:
results.append({
'Model': model_name,
'Prediction': prediction,
'Confidence': f"{max(probabilities):.1%}",
'Negative': f"{probabilities[0]:.1%}",
'Positive': f"{probabilities[1]:.1%}"
})
all_probs.append(probabilities)
if results:
# Create comparison text
comparison_text = "**Model Comparison Results:**\n\n"
for result in results:
comparison_text += f"**{result['Model']}:**\n"
comparison_text += f"- Prediction: {result['Prediction']}\n"
comparison_text += f"- Confidence: {result['Confidence']}\n"
comparison_text += f"- Negative: {result['Negative']}, Positive: {result['Positive']}\n\n"
# Agreement analysis
predictions = [r['Prediction'] for r in results]
if len(set(predictions)) == 1:
comparison_text += f"**Agreement:** All models agree on {predictions[0]} sentiment!"
else:
comparison_text += "**Disagreement:** Models have different predictions."
# Create comparison plot
fig, axes = plt.subplots(1, len(results), figsize=(6*len(results), 5))
if len(results) == 1:
axes = [axes]
for i, (result, probs) in enumerate(zip(results, all_probs)):
ax = axes[i]
classes = ['Negative', 'Positive']
colors = ['#ff6b6b', '#51cf66']
bars = ax.bar(classes, probs, color=colors, alpha=0.8)
# Add labels
for bar, prob in zip(bars, probs):
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height + 0.02,
f'{prob:.0%}', ha='center', va='bottom', fontweight='bold')
ax.set_ylim(0, 1.1)
ax.set_title(f"{result['Model']}\n{result['Prediction']}")
ax.grid(axis='y', alpha=0.3)
plt.tight_layout()
return comparison_text, fig
else:
return "Failed to get predictions!", None
def get_model_info():
"""Get model information"""
if MODELS is None:
return """
**No models loaded!**
Please ensure you have model files in the 'models/' directory:
- sentiment_analysis_pipeline.pkl (complete pipeline), OR
- tfidf_vectorizer.pkl + logistic_regression_model.pkl, OR
- tfidf_vectorizer.pkl + multinomial_nb_model.pkl
"""
info = "**Models loaded successfully!**\n\n"
info += "**Available Models:**\n\n"
if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')):
info += "**Logistic Regression**\n"
info += "- Type: Linear Classification\n"
info += "- Features: TF-IDF vectors\n"
info += "- Strengths: Fast, interpretable\n\n"
if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
info += "**Multinomial Naive Bayes**\n"
info += "- Type: Probabilistic Classification\n"
info += "- Features: TF-IDF vectors\n"
info += "- Strengths: Works well with small data\n\n"
info += "**File Status:**\n"
files = [
("sentiment_analysis_pipeline.pkl", MODELS.get('pipeline_available', False)),
("tfidf_vectorizer.pkl", MODELS.get('vectorizer_available', False)),
("logistic_regression_model.pkl", MODELS.get('lr_available', False)),
("multinomial_nb_model.pkl", MODELS.get('nb_available', False))
]
for filename, status in files:
status_icon = "✅" if status else "❌"
info += f"- {filename}: {status_icon}\n"
return info
# ============================================================================
# GRADIO INTERFACE
# ============================================================================
def create_app():
"""Create Gradio interface"""
with gr.Blocks(title="ML Text Classification") as app:
# Header
gr.HTML("""
<div style="text-align: center; margin-bottom: 2rem;">
<h1 style="color: #1f77b4; font-size: 2.5rem;">🤖 ML Text Classification App</h1>
<p style="font-size: 1.2rem; color: #666;">Advanced Sentiment Analysis with Multiple ML Models</p>
</div>
""")
# Main interface with tabs
with gr.Tabs():
# Single Prediction Tab
with gr.Tab("🔮 Single Prediction"):
gr.Markdown("### Enter text and select a model for sentiment analysis")
with gr.Row():
with gr.Column(scale=1):
model_dropdown = gr.Dropdown(
choices=get_available_models(),
value=get_available_models()[0] if get_available_models() else None,
label="Choose Model"
)
text_input = gr.Textbox(
lines=5,
placeholder="Enter your text here...",
label="Text Input"
)
with gr.Row():
example1_btn = gr.Button("Good Example", size="sm")
example2_btn = gr.Button("Bad Example", size="sm")
example3_btn = gr.Button("Neutral Example", size="sm")
predict_btn = gr.Button("🚀 Analyze Sentiment", variant="primary")
with gr.Column(scale=1):
prediction_output = gr.Markdown(label="Results")
prediction_plot = gr.Plot(label="Probability Chart")
# Example handlers
example1_btn.click(
lambda: "This product is absolutely amazing! Best purchase ever!",
outputs=text_input
)
example2_btn.click(
lambda: "Terrible quality, broke immediately. Waste of money!",
outputs=text_input
)
example3_btn.click(
lambda: "It's okay, nothing special but does the job.",
outputs=text_input
)
# Prediction handler
predict_btn.click(
predict_text,
inputs=[text_input, model_dropdown],
outputs=[prediction_output, prediction_plot]
)
# Batch Processing Tab
with gr.Tab("📁 Batch Processing"):
gr.Markdown("### Upload a file to process multiple texts")
with gr.Row():
with gr.Column():
file_upload = gr.File(
label="Upload File (.txt or .csv)",
file_types=[".txt", ".csv"]
)
batch_model = gr.Dropdown(
choices=get_available_models(),
value=get_available_models()[0] if get_available_models() else None,
label="Model for Batch Processing"
)
max_texts = gr.Slider(
minimum=10,
maximum=500,
value=100,
step=10,
label="Max Texts to Process"
)
process_btn = gr.Button("📊 Process File", variant="primary")
with gr.Column():
batch_output = gr.Markdown(label="Processing Results")
download_file = gr.File(label="Download Results")
# Process handler
process_btn.click(
process_file,
inputs=[file_upload, batch_model, max_texts],
outputs=[batch_output, download_file]
)
# Model Comparison Tab
with gr.Tab("⚖️ Model Comparison"):
gr.Markdown("### Compare predictions from different models")
with gr.Row():
with gr.Column():
comparison_input = gr.Textbox(
lines=4,
placeholder="Enter text to compare models...",
label="Text for Comparison"
)
compare_btn = gr.Button("🔍 Compare Models", variant="primary")
with gr.Row():
comp_ex1 = gr.Button("Mixed Example 1", size="sm")
comp_ex2 = gr.Button("Mixed Example 2", size="sm")
with gr.Column():
comparison_output = gr.Markdown(label="Comparison Results")
comparison_plot = gr.Plot(label="Model Comparison")
# Example handlers
comp_ex1.click(
lambda: "This movie was okay but not great.",
outputs=comparison_input
)
comp_ex2.click(
lambda: "The product is fine, I guess.",
outputs=comparison_input
)
# Compare handler
compare_btn.click(
compare_models_func,
inputs=comparison_input,
outputs=[comparison_output, comparison_plot]
)
# Model Info Tab
with gr.Tab("📊 Model Info"):
model_info = gr.Markdown(
value=get_model_info(),
label="Model Information"
)
refresh_btn = gr.Button("🔄 Refresh", size="sm")
refresh_btn.click(get_model_info, outputs=model_info)
# Footer
gr.HTML("""
<div style="text-align: center; margin-top: 2rem; padding: 1rem; border-top: 1px solid #eee; color: #666;">
<p><strong>🤖 ML Text Classification App</strong></p>
<p>Built with Gradio | By Maaz Amjad</p>
<p><small>Part of Introduction to Large Language Models course</small></p>
</div>
""")
return app
# ============================================================================
# MAIN
# ============================================================================
if __name__ == "__main__":
# Check models
if MODELS is None:
print("⚠️ Warning: No models loaded!")
else:
available = get_available_models()
print(f"✅ Successfully loaded {len(available)} model(s): {', '.join(available)}")
# Launch app
app = create_app()
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
debug=True
)