|
import gradio as gr |
|
import pandas as pd |
|
import joblib |
|
import re |
|
import os |
|
from io import StringIO |
|
import warnings |
|
|
|
|
|
warnings.filterwarnings('ignore', category=UserWarning) |
|
|
|
|
|
try: |
|
model = joblib.load('model.pkl') |
|
vectorizer = joblib.load('vectorizer.pkl') |
|
label_encoder = joblib.load('label_encoder.pkl') |
|
print("β
Model files loaded successfully!") |
|
except Exception as e: |
|
print(f"β Error loading model files: {e}") |
|
raise e |
|
|
|
|
|
def clean_text(text: str) -> str: |
|
if not isinstance(text, str): |
|
return "" |
|
text = text.lower() |
|
text = re.sub(r"http\S+|www\S+", " ", text) |
|
text = re.sub(r"<.*?>", " ", text) |
|
text = re.sub(r"[^a-z\s]", " ", text) |
|
text = re.sub(r"\s+", " ", text).strip() |
|
return text |
|
|
|
|
|
def predict_single_email(email_text): |
|
if not email_text.strip(): |
|
return "Please enter an email text." |
|
|
|
try: |
|
|
|
cleaned_email = clean_text(email_text) |
|
|
|
|
|
email_vector = vectorizer.transform([cleaned_email]) |
|
|
|
|
|
prediction = model.predict(email_vector)[0] |
|
predicted_sector = label_encoder.inverse_transform([prediction])[0] |
|
|
|
|
|
try: |
|
probabilities = model.decision_function(email_vector)[0] |
|
|
|
top_indices = probabilities.argsort()[-3:][::-1] |
|
top_sectors = label_encoder.inverse_transform(top_indices) |
|
top_scores = probabilities[top_indices] |
|
|
|
result = f"**Predicted Sector: {predicted_sector}**\n\n" |
|
result += "**Top 3 Predictions:**\n" |
|
for i, (sector, score) in enumerate(zip(top_sectors, top_scores)): |
|
result += f"{i+1}. {sector}: {score:.3f}\n" |
|
|
|
except: |
|
result = f"**Predicted Sector: {predicted_sector}**" |
|
|
|
return result |
|
|
|
except Exception as e: |
|
return f"Error making prediction: {str(e)}" |
|
|
|
|
|
def predict_csv_file(file): |
|
if file is None: |
|
return None, "Please upload a CSV file." |
|
|
|
try: |
|
|
|
df = pd.read_csv(file.name) |
|
|
|
|
|
if 'Emails' not in df.columns: |
|
return None, "Error: CSV file must contain an 'Emails' column." |
|
|
|
|
|
df['Cleaned_Emails'] = df['Emails'].apply(clean_text) |
|
|
|
|
|
email_vectors = vectorizer.transform(df['Cleaned_Emails']) |
|
predictions = model.predict(email_vectors) |
|
predicted_sectors = label_encoder.inverse_transform(predictions) |
|
|
|
|
|
df['Predicted_Sector'] = predicted_sectors |
|
|
|
|
|
output_csv = StringIO() |
|
df[['Emails', 'Predicted_Sector']].to_csv(output_csv, index=False) |
|
output_csv.seek(0) |
|
|
|
|
|
output_filename = "predictions.csv" |
|
df[['Emails', 'Predicted_Sector']].to_csv(output_filename, index=False) |
|
|
|
success_msg = f"β
Successfully processed {len(df)} emails. Download the results below." |
|
|
|
return output_filename, success_msg |
|
|
|
except Exception as e: |
|
return None, f"Error processing CSV file: {str(e)}" |
|
|
|
|
|
available_sectors = list(label_encoder.classes_) |
|
sectors_text = ", ".join(available_sectors) |
|
|
|
|
|
with gr.Blocks(title="Email Sector Classification", theme=gr.themes.Soft()) as demo: |
|
|
|
gr.Markdown("# π§ Email Sector Classification") |
|
gr.Markdown("Classify emails into business sectors using machine learning.") |
|
|
|
gr.Markdown(f"**Available Sectors:** {sectors_text}") |
|
|
|
with gr.Tabs(): |
|
|
|
with gr.Tab("Single Email Prediction"): |
|
gr.Markdown("### Enter an email to classify its sector") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
email_input = gr.Textbox( |
|
label="Email Text", |
|
placeholder="Enter your email content here...", |
|
lines=8, |
|
max_lines=15 |
|
) |
|
predict_btn = gr.Button("Predict Sector", variant="primary", size="lg") |
|
|
|
with gr.Column(scale=1): |
|
prediction_output = gr.Markdown(label="Prediction Result") |
|
|
|
predict_btn.click( |
|
fn=predict_single_email, |
|
inputs=email_input, |
|
outputs=prediction_output |
|
) |
|
|
|
|
|
gr.Markdown("### Example Emails") |
|
examples = [ |
|
"We are looking for experienced software developers to join our tech team. Requirements include Python, JavaScript, and cloud technologies.", |
|
"Our hospital is seeking qualified nurses for the emergency department. Must have current RN license and BLS certification.", |
|
"Join our sales team! We offer competitive commission rates and comprehensive training for motivated individuals.", |
|
"We provide comprehensive financial planning services including investment management and retirement planning." |
|
] |
|
|
|
gr.Examples( |
|
examples=examples, |
|
inputs=email_input, |
|
outputs=prediction_output, |
|
fn=predict_single_email, |
|
cache_examples=True |
|
) |
|
|
|
|
|
with gr.Tab("Batch CSV Processing"): |
|
gr.Markdown("### Upload a CSV file with emails to classify") |
|
gr.Markdown("**CSV Format:** Your file should have an 'Emails' column containing the email texts.") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
file_input = gr.File( |
|
label="Upload CSV File", |
|
file_types=[".csv"], |
|
file_count="single" |
|
) |
|
process_btn = gr.Button("Process CSV", variant="primary", size="lg") |
|
|
|
with gr.Column(): |
|
file_status = gr.Markdown() |
|
download_file = gr.File(label="Download Results", visible=False) |
|
|
|
def process_and_update(file): |
|
result_file, status = predict_csv_file(file) |
|
if result_file: |
|
return status, gr.File(value=result_file, visible=True) |
|
else: |
|
return status, gr.File(visible=False) |
|
|
|
process_btn.click( |
|
fn=process_and_update, |
|
inputs=file_input, |
|
outputs=[file_status, download_file] |
|
) |
|
|
|
|
|
gr.Markdown("### CSV Format Example") |
|
gr.Markdown(""" |
|
``` |
|
Emails |
|
"We are hiring software engineers with Python experience" |
|
"Our clinic needs registered nurses for patient care" |
|
"Looking for sales representatives in the automotive industry" |
|
``` |
|
""") |
|
|
|
gr.Markdown("---") |
|
gr.Markdown("*Powered by scikit-learn and Gradio*") |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |