Spaces:
Sleeping
Sleeping
# GRADIO ML CLASSIFICATION APP - SIMPLIFIED VERSION | |
# ================================================= | |
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
import joblib | |
import matplotlib.pyplot as plt | |
import warnings | |
import tempfile | |
import os | |
from typing import Tuple, List, Optional | |
warnings.filterwarnings('ignore') | |
# ============================================================================ | |
# MODEL LOADING | |
# ============================================================================ | |
def load_models(): | |
"""Load all available ML models""" | |
models = {} | |
try: | |
# Load pipeline | |
try: | |
models['pipeline'] = joblib.load('models/sentiment_analysis_pipeline.pkl') | |
models['pipeline_available'] = True | |
except: | |
models['pipeline_available'] = False | |
# Load vectorizer | |
try: | |
models['vectorizer'] = joblib.load('models/tfidf_vectorizer.pkl') | |
models['vectorizer_available'] = True | |
except: | |
models['vectorizer_available'] = False | |
# Load LR model | |
try: | |
models['logistic_regression'] = joblib.load('models/logistic_regression_model.pkl') | |
models['lr_available'] = True | |
except: | |
models['lr_available'] = False | |
# Load NB model | |
try: | |
models['naive_bayes'] = joblib.load('models/multinomial_nb_model.pkl') | |
models['nb_available'] = True | |
except: | |
models['nb_available'] = False | |
# Check if we have working models | |
pipeline_ready = models['pipeline_available'] | |
individual_ready = models['vectorizer_available'] and (models['lr_available'] or models['nb_available']) | |
return models if (pipeline_ready or individual_ready) else None | |
except Exception as e: | |
print(f"Error loading models: {e}") | |
return None | |
# Load models globally | |
MODELS = load_models() | |
# ============================================================================ | |
# CORE FUNCTIONS | |
# ============================================================================ | |
def get_available_models(): | |
"""Get available model names""" | |
if MODELS is None: | |
return ["No models available"] | |
available = [] | |
if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')): | |
available.append("Logistic Regression") | |
if MODELS.get('vectorizer_available') and MODELS.get('nb_available'): | |
available.append("Multinomial Naive Bayes") | |
return available if available else ["No models available"] | |
def make_prediction(text, model_choice): | |
"""Make prediction using selected model""" | |
if MODELS is None or not text.strip(): | |
return None, None, "Please enter text and ensure models are loaded" | |
try: | |
if model_choice == "Logistic Regression": | |
if MODELS.get('pipeline_available'): | |
prediction = MODELS['pipeline'].predict([text])[0] | |
probabilities = MODELS['pipeline'].predict_proba([text])[0] | |
elif MODELS.get('vectorizer_available') and MODELS.get('lr_available'): | |
X = MODELS['vectorizer'].transform([text]) | |
prediction = MODELS['logistic_regression'].predict(X)[0] | |
probabilities = MODELS['logistic_regression'].predict_proba(X)[0] | |
else: | |
return None, None, "Logistic Regression model not available" | |
elif model_choice == "Multinomial Naive Bayes": | |
if MODELS.get('vectorizer_available') and MODELS.get('nb_available'): | |
X = MODELS['vectorizer'].transform([text]) | |
prediction = MODELS['naive_bayes'].predict(X)[0] | |
probabilities = MODELS['naive_bayes'].predict_proba(X)[0] | |
else: | |
return None, None, "Naive Bayes model not available" | |
# Convert prediction | |
class_names = ['Negative', 'Positive'] | |
prediction_label = class_names[prediction] if isinstance(prediction, int) else str(prediction) | |
return prediction_label, probabilities, "Success" | |
except Exception as e: | |
return None, None, f"Error: {str(e)}" | |
def create_plot(probabilities): | |
"""Create probability plot""" | |
fig, ax = plt.subplots(figsize=(8, 5)) | |
classes = ['Negative', 'Positive'] | |
colors = ['#ff6b6b', '#51cf66'] | |
bars = ax.bar(classes, probabilities, color=colors, alpha=0.8) | |
# Add labels | |
for bar, prob in zip(bars, probabilities): | |
height = bar.get_height() | |
ax.text(bar.get_x() + bar.get_width()/2., height + 0.01, | |
f'{prob:.1%}', ha='center', va='bottom', fontweight='bold') | |
ax.set_ylim(0, 1.1) | |
ax.set_ylabel('Probability') | |
ax.set_title('Sentiment Prediction Probabilities') | |
ax.grid(axis='y', alpha=0.3) | |
plt.tight_layout() | |
return fig | |
# ============================================================================ | |
# INTERFACE FUNCTIONS | |
# ============================================================================ | |
def predict_text(text, model_choice): | |
"""Single text prediction interface""" | |
prediction, probabilities, status = make_prediction(text, model_choice) | |
if prediction and probabilities is not None: | |
confidence = max(probabilities) | |
# Format results | |
result = f"**Prediction:** {prediction} Sentiment\n" | |
result += f"**Confidence:** {confidence:.1%}\n\n" | |
result += f"**Detailed Probabilities:**\n" | |
result += f"- Negative: {probabilities[0]:.1%}\n" | |
result += f"- Positive: {probabilities[1]:.1%}\n\n" | |
# Interpretation | |
if confidence >= 0.8: | |
result += "**High Confidence:** The model is very confident about this prediction." | |
elif confidence >= 0.6: | |
result += "**Medium Confidence:** The model is reasonably confident." | |
else: | |
result += "**Low Confidence:** The model is uncertain about this prediction." | |
# Create plot | |
plot = create_plot(probabilities) | |
return result, plot | |
else: | |
return f"Error: {status}", None | |
def process_file(file, model_choice, max_texts): | |
"""Process uploaded file""" | |
if file is None: | |
return "Please upload a file!", None | |
if MODELS is None: | |
return "No models loaded!", None | |
try: | |
# Read file | |
if file.name.endswith('.txt'): | |
with open(file.name, 'r', encoding='utf-8') as f: | |
content = f.read() | |
texts = [line.strip() for line in content.split('\n') if line.strip()] | |
elif file.name.endswith('.csv'): | |
df = pd.read_csv(file.name) | |
texts = df.iloc[:, 0].astype(str).tolist() | |
else: | |
return "Unsupported file format! Use .txt or .csv", None | |
if not texts: | |
return "No text found in file!", None | |
# Limit texts | |
if len(texts) > max_texts: | |
texts = texts[:max_texts] | |
# Process texts | |
results = [] | |
for i, text in enumerate(texts): | |
if text.strip(): | |
prediction, probabilities, _ = make_prediction(text, model_choice) | |
if prediction and probabilities is not None: | |
results.append({ | |
'Index': i + 1, | |
'Text': text[:100] + "..." if len(text) > 100 else text, | |
'Prediction': prediction, | |
'Confidence': f"{max(probabilities):.1%}", | |
'Negative_Prob': f"{probabilities[0]:.1%}", | |
'Positive_Prob': f"{probabilities[1]:.1%}" | |
}) | |
if results: | |
# Create summary | |
positive_count = sum(1 for r in results if r['Prediction'] == 'Positive') | |
negative_count = len(results) - positive_count | |
avg_confidence = np.mean([float(r['Confidence'].strip('%')) for r in results]) | |
summary = f"**Processing Complete!**\n\n" | |
summary += f"**Summary Statistics:**\n" | |
summary += f"- Total Processed: {len(results)}\n" | |
summary += f"- Positive: {positive_count} ({positive_count/len(results):.1%})\n" | |
summary += f"- Negative: {negative_count} ({negative_count/len(results):.1%})\n" | |
summary += f"- Average Confidence: {avg_confidence:.1f}%\n" | |
# Create CSV for download | |
results_df = pd.DataFrame(results) | |
# Save to temporary file | |
with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f: | |
results_df.to_csv(f, index=False) | |
temp_file = f.name | |
return summary, temp_file | |
else: | |
return "No valid texts could be processed!", None | |
except Exception as e: | |
return f"Error processing file: {str(e)}", None | |
def compare_models_func(text): | |
"""Compare predictions from different models""" | |
if MODELS is None: | |
return "No models loaded!", None | |
if not text.strip(): | |
return "Please enter text to compare!", None | |
available_models = get_available_models() | |
if len(available_models) < 2: | |
return "Need at least 2 models for comparison.", None | |
results = [] | |
all_probs = [] | |
for model_name in available_models: | |
prediction, probabilities, _ = make_prediction(text, model_name) | |
if prediction and probabilities is not None: | |
results.append({ | |
'Model': model_name, | |
'Prediction': prediction, | |
'Confidence': f"{max(probabilities):.1%}", | |
'Negative': f"{probabilities[0]:.1%}", | |
'Positive': f"{probabilities[1]:.1%}" | |
}) | |
all_probs.append(probabilities) | |
if results: | |
# Create comparison text | |
comparison_text = "**Model Comparison Results:**\n\n" | |
for result in results: | |
comparison_text += f"**{result['Model']}:**\n" | |
comparison_text += f"- Prediction: {result['Prediction']}\n" | |
comparison_text += f"- Confidence: {result['Confidence']}\n" | |
comparison_text += f"- Negative: {result['Negative']}, Positive: {result['Positive']}\n\n" | |
# Agreement analysis | |
predictions = [r['Prediction'] for r in results] | |
if len(set(predictions)) == 1: | |
comparison_text += f"**Agreement:** All models agree on {predictions[0]} sentiment!" | |
else: | |
comparison_text += "**Disagreement:** Models have different predictions." | |
# Create comparison plot | |
fig, axes = plt.subplots(1, len(results), figsize=(6*len(results), 5)) | |
if len(results) == 1: | |
axes = [axes] | |
for i, (result, probs) in enumerate(zip(results, all_probs)): | |
ax = axes[i] | |
classes = ['Negative', 'Positive'] | |
colors = ['#ff6b6b', '#51cf66'] | |
bars = ax.bar(classes, probs, color=colors, alpha=0.8) | |
# Add labels | |
for bar, prob in zip(bars, probs): | |
height = bar.get_height() | |
ax.text(bar.get_x() + bar.get_width()/2., height + 0.02, | |
f'{prob:.0%}', ha='center', va='bottom', fontweight='bold') | |
ax.set_ylim(0, 1.1) | |
ax.set_title(f"{result['Model']}\n{result['Prediction']}") | |
ax.grid(axis='y', alpha=0.3) | |
plt.tight_layout() | |
return comparison_text, fig | |
else: | |
return "Failed to get predictions!", None | |
def get_model_info(): | |
"""Get model information""" | |
if MODELS is None: | |
return """ | |
**No models loaded!** | |
Please ensure you have model files in the 'models/' directory: | |
- sentiment_analysis_pipeline.pkl (complete pipeline), OR | |
- tfidf_vectorizer.pkl + logistic_regression_model.pkl, OR | |
- tfidf_vectorizer.pkl + multinomial_nb_model.pkl | |
""" | |
info = "**Models loaded successfully!**\n\n" | |
info += "**Available Models:**\n\n" | |
if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')): | |
info += "**Logistic Regression**\n" | |
info += "- Type: Linear Classification\n" | |
info += "- Features: TF-IDF vectors\n" | |
info += "- Strengths: Fast, interpretable\n\n" | |
if MODELS.get('vectorizer_available') and MODELS.get('nb_available'): | |
info += "**Multinomial Naive Bayes**\n" | |
info += "- Type: Probabilistic Classification\n" | |
info += "- Features: TF-IDF vectors\n" | |
info += "- Strengths: Works well with small data\n\n" | |
info += "**File Status:**\n" | |
files = [ | |
("sentiment_analysis_pipeline.pkl", MODELS.get('pipeline_available', False)), | |
("tfidf_vectorizer.pkl", MODELS.get('vectorizer_available', False)), | |
("logistic_regression_model.pkl", MODELS.get('lr_available', False)), | |
("multinomial_nb_model.pkl", MODELS.get('nb_available', False)) | |
] | |
for filename, status in files: | |
status_icon = "✅" if status else "❌" | |
info += f"- {filename}: {status_icon}\n" | |
return info | |
# ============================================================================ | |
# GRADIO INTERFACE | |
# ============================================================================ | |
def create_app(): | |
"""Create Gradio interface""" | |
with gr.Blocks(title="ML Text Classification") as app: | |
# Header | |
gr.HTML(""" | |
<div style="text-align: center; margin-bottom: 2rem;"> | |
<h1 style="color: #1f77b4; font-size: 2.5rem;">🤖 ML Text Classification App</h1> | |
<p style="font-size: 1.2rem; color: #666;">Advanced Sentiment Analysis with Multiple ML Models</p> | |
</div> | |
""") | |
# Main interface with tabs | |
with gr.Tabs(): | |
# Single Prediction Tab | |
with gr.Tab("🔮 Single Prediction"): | |
gr.Markdown("### Enter text and select a model for sentiment analysis") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
model_dropdown = gr.Dropdown( | |
choices=get_available_models(), | |
value=get_available_models()[0] if get_available_models() else None, | |
label="Choose Model" | |
) | |
text_input = gr.Textbox( | |
lines=5, | |
placeholder="Enter your text here...", | |
label="Text Input" | |
) | |
with gr.Row(): | |
example1_btn = gr.Button("Good Example", size="sm") | |
example2_btn = gr.Button("Bad Example", size="sm") | |
example3_btn = gr.Button("Neutral Example", size="sm") | |
predict_btn = gr.Button("🚀 Analyze Sentiment", variant="primary") | |
with gr.Column(scale=1): | |
prediction_output = gr.Markdown(label="Results") | |
prediction_plot = gr.Plot(label="Probability Chart") | |
# Example handlers | |
example1_btn.click( | |
lambda: "This product is absolutely amazing! Best purchase ever!", | |
outputs=text_input | |
) | |
example2_btn.click( | |
lambda: "Terrible quality, broke immediately. Waste of money!", | |
outputs=text_input | |
) | |
example3_btn.click( | |
lambda: "It's okay, nothing special but does the job.", | |
outputs=text_input | |
) | |
# Prediction handler | |
predict_btn.click( | |
predict_text, | |
inputs=[text_input, model_dropdown], | |
outputs=[prediction_output, prediction_plot] | |
) | |
# Batch Processing Tab | |
with gr.Tab("📁 Batch Processing"): | |
gr.Markdown("### Upload a file to process multiple texts") | |
with gr.Row(): | |
with gr.Column(): | |
file_upload = gr.File( | |
label="Upload File (.txt or .csv)", | |
file_types=[".txt", ".csv"] | |
) | |
batch_model = gr.Dropdown( | |
choices=get_available_models(), | |
value=get_available_models()[0] if get_available_models() else None, | |
label="Model for Batch Processing" | |
) | |
max_texts = gr.Slider( | |
minimum=10, | |
maximum=500, | |
value=100, | |
step=10, | |
label="Max Texts to Process" | |
) | |
process_btn = gr.Button("📊 Process File", variant="primary") | |
with gr.Column(): | |
batch_output = gr.Markdown(label="Processing Results") | |
download_file = gr.File(label="Download Results") | |
# Process handler | |
process_btn.click( | |
process_file, | |
inputs=[file_upload, batch_model, max_texts], | |
outputs=[batch_output, download_file] | |
) | |
# Model Comparison Tab | |
with gr.Tab("⚖️ Model Comparison"): | |
gr.Markdown("### Compare predictions from different models") | |
with gr.Row(): | |
with gr.Column(): | |
comparison_input = gr.Textbox( | |
lines=4, | |
placeholder="Enter text to compare models...", | |
label="Text for Comparison" | |
) | |
compare_btn = gr.Button("🔍 Compare Models", variant="primary") | |
with gr.Row(): | |
comp_ex1 = gr.Button("Mixed Example 1", size="sm") | |
comp_ex2 = gr.Button("Mixed Example 2", size="sm") | |
with gr.Column(): | |
comparison_output = gr.Markdown(label="Comparison Results") | |
comparison_plot = gr.Plot(label="Model Comparison") | |
# Example handlers | |
comp_ex1.click( | |
lambda: "This movie was okay but not great.", | |
outputs=comparison_input | |
) | |
comp_ex2.click( | |
lambda: "The product is fine, I guess.", | |
outputs=comparison_input | |
) | |
# Compare handler | |
compare_btn.click( | |
compare_models_func, | |
inputs=comparison_input, | |
outputs=[comparison_output, comparison_plot] | |
) | |
# Model Info Tab | |
with gr.Tab("📊 Model Info"): | |
model_info = gr.Markdown( | |
value=get_model_info(), | |
label="Model Information" | |
) | |
refresh_btn = gr.Button("🔄 Refresh", size="sm") | |
refresh_btn.click(get_model_info, outputs=model_info) | |
# Footer | |
gr.HTML(""" | |
<div style="text-align: center; margin-top: 2rem; padding: 1rem; border-top: 1px solid #eee; color: #666;"> | |
<p><strong>🤖 ML Text Classification App</strong></p> | |
<p>Built with Gradio | By Maaz Amjad</p> | |
<p><small>Part of Introduction to Large Language Models course</small></p> | |
</div> | |
""") | |
return app | |
# ============================================================================ | |
# MAIN | |
# ============================================================================ | |
if __name__ == "__main__": | |
# Check models | |
if MODELS is None: | |
print("⚠️ Warning: No models loaded!") | |
else: | |
available = get_available_models() | |
print(f"✅ Successfully loaded {len(available)} model(s): {', '.join(available)}") | |
# Launch app | |
app = create_app() | |
app.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=False, | |
debug=True | |
) |