Spam_SMS_Detection

Sleeping

App Files Files Community

Spam_SMS_Detection / app.py

MLDeveloper

Update app.py

ed1b0c1 verified 2 months ago

raw

history blame contribute delete

3.71 kB

	import streamlit as st
	import pandas as pd
	import re
	import string
	import google.generativeai as genai
	from sklearn.model_selection import train_test_split
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.metrics.pairwise import cosine_similarity

	# --- Set Gemini API Key ---
	genai.configure(api_key="AIzaSyCVRGVxIe1vESoAgykgHWOej-jZxiU-RKE") # <-- Replace this with your actual Gemini API key
	gemini_model = genai.GenerativeModel("gemini-pro")

	# Title & Intro
	st.set_page_config(page_title="SMS Spam Detection", layout="centered")
	st.title("📩 SMS Spam Detection App")
	st.markdown("🔍 Enter an SMS message below to check if it's Spam or Not Spam (Ham)")

	# --- Load CSV Dataset ---
	@st.cache_data
	def load_data():
	url = "https://huggingface.co/spaces/MLDeveloper/Spam_SMS_Detection/resolve/main/spam.csv"
	df = pd.read_csv(url, encoding='latin-1')
	df = df[['v1', 'v2']]
	df.columns = ['label', 'message']
	return df

	df = load_data()

	# --- Preprocessing ---
	df['label'] = df['label'].map({'ham': 0, 'spam': 1})

	# --- Train Model ---
	X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)
	vectorizer = TfidfVectorizer()
	X_train_tfidf = vectorizer.fit_transform(X_train)

	model = MultinomialNB()
	model.fit(X_train_tfidf, y_train)

	# --- Clean Text Function ---
	def clean_text(text):
	text = text.lower()
	text = re.sub(r"http\S+\|www\S+\|https\S+", '', text)
	text = re.sub(r'\@w+\|\#','', text)
	text = re.sub(r'[^\w\s]', '', text)
	text = re.sub(r'\d+', '', text)
	text = text.translate(str.maketrans('', '', string.punctuation))
	return text.strip()

	# --- Predict Function ---
	def predict_spam(text):
	cleaned = clean_text(text)
	vector = vectorizer.transform([cleaned])
	prediction = model.predict(vector)
	return "Spam" if prediction[0] == 1 else "Not Spam (Ham)"

	# --- Gemini Fallback ---
	def ask_gemini(text):
	prompt = f"""You are an expert SMS spam detector.
	Classify the following message as 'Spam' or 'Not Spam (Ham)'.
	Message: "{text}"
	Reply with only: Spam or Not Spam (Ham)."""
	try:
	response = gemini_model.generate_content(prompt)
	return response.text.strip()
	except Exception as e:
	return f"Error using Gemini: {str(e)}"

	# --- Input ---
	user_input = st.text_area("✉️ Enter your SMS message here:")

	if st.button("Check Message"):
	if user_input.strip() == "":
	st.warning("⚠️ Please enter a message.")
	else:
	cleaned = clean_text(user_input)
	input_vector = vectorizer.transform([cleaned])
	similarities = cosine_similarity(input_vector, X_train_tfidf)
	max_similarity = similarities.max()

	# Check similarity threshold (e.g., < 0.3 = unknown message)
	if max_similarity < 0.3:
	st.info("🧠 Message not found in training data. Using Gemini for prediction...")
	gemini_result = ask_gemini(user_input)
	if "spam" in gemini_result.lower():
	st.error("🚫 Gemini says: This message is SPAM.")
	else:
	st.success("✅ Gemini says: This message is NOT SPAM (HAM).")
	else:
	result = predict_spam(user_input)
	if result == "Spam":
	st.error("🚫 This message is classified as SPAM.")
	else:
	st.success("✅ This message is classified as NOT SPAM (HAM).")

	# --- Dataset preview ---
	with st.expander("📄 View sample dataset"):
	st.dataframe(df.head())

	st.markdown("---")
	st.markdown("🔒 Note: This app is for educational purposes only.")