MachineLearning / app.py
Tzetha's picture
added database
04da40f
import pandas as pd
import streamlit as st
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
# Load dataset
@st.cache_data
def load_data():
# Replace with your dataset path or URL
url = "spam.csv"
df = pd.read_csv(url, encoding="latin-1")
df = df.rename(columns={"v1": "label", "v2": "text"}) # Rename columns
df = df[['text', 'label']] # Keep only necessary columns
df['label'] = df['label'].map({'spam': 'spam', 'ham': 'legit'}) # Standardize labels
return df
# Load data
df = load_data()
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)
# Build spam classifier model
model = Pipeline([
('vectorizer', CountVectorizer()),
('tfidf', TfidfTransformer()),
('classifier', MultinomialNB())
])
# Train the model
model.fit(X_train, y_train)
# Streamlit UI
st.title("Spam Filter Email Classifier")
st.write("This app classifies emails as **spam** or **legit** based on trained data.")
# File uploader for a custom dataset
uploaded_file = st.file_uploader("Upload your own spam dataset (CSV format)", type=["csv"])
if uploaded_file:
df = pd.read_csv(uploaded_file)
if "text" in df.columns and "label" in df.columns:
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)
model.fit(X_train, y_train)
st.success("Custom dataset loaded and model retrained!")
else:
st.error("CSV file must contain 'text' and 'label' columns.")
# Text input for email classification
email_input = st.text_area("Enter email content:")
if st.button("Classify Email"):
if email_input:
prediction = model.predict([email_input])[0]
st.subheader(f"The email is classified as: **{prediction}**")
else:
st.write("Please enter an email to classify.")