Spaces:

Tzetha
/

MachineLearning

Sleeping

App Files Files Community

MachineLearning / app.py

Tzetha

added database

04da40f 10 months ago

raw

history blame contribute delete

2.08 kB

	import pandas as pd
	import streamlit as st
	from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
	from sklearn.model_selection import train_test_split
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.pipeline import Pipeline

	# Load dataset
	@st.cache_data
	def load_data():
	# Replace with your dataset path or URL
	url = "spam.csv"

	df = pd.read_csv(url, encoding="latin-1")
	df = df.rename(columns={"v1": "label", "v2": "text"}) # Rename columns
	df = df[['text', 'label']] # Keep only necessary columns
	df['label'] = df['label'].map({'spam': 'spam', 'ham': 'legit'}) # Standardize labels

	return df

	# Load data
	df = load_data()

	# Split dataset into training and testing sets
	X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

	# Build spam classifier model
	model = Pipeline([
	('vectorizer', CountVectorizer()),
	('tfidf', TfidfTransformer()),
	('classifier', MultinomialNB())
	])

	# Train the model
	model.fit(X_train, y_train)

	# Streamlit UI
	st.title("Spam Filter Email Classifier")
	st.write("This app classifies emails as spam or legit based on trained data.")

	# File uploader for a custom dataset
	uploaded_file = st.file_uploader("Upload your own spam dataset (CSV format)", type=["csv"])

	if uploaded_file:
	df = pd.read_csv(uploaded_file)
	if "text" in df.columns and "label" in df.columns:
	X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)
	model.fit(X_train, y_train)
	st.success("Custom dataset loaded and model retrained!")
	else:
	st.error("CSV file must contain 'text' and 'label' columns.")

	# Text input for email classification
	email_input = st.text_area("Enter email content:")
	if st.button("Classify Email"):
	if email_input:
	prediction = model.predict([email_input])[0]
	st.subheader(f"The email is classified as: {prediction}")
	else:
	st.write("Please enter an email to classify.")