Spaces:
Sleeping
Sleeping
File size: 3,714 Bytes
8f460b5 1840ab8 308314b ed1b0c1 00b4891 4f6ca42 ed1b0c1 d481617 00b4891 308314b 00b4891 308314b 00b4891 7c07748 00b4891 4f6ca42 00b4891 308314b 00b4891 308314b 00b4891 308314b 00b4891 308314b 00b4891 308314b ed1b0c1 00b4891 308314b 00b4891 483b677 ed1b0c1 308314b ed1b0c1 f4ba322 00b4891 ef88b24 00b4891 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import streamlit as st
import pandas as pd
import re
import string
import google.generativeai as genai
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics.pairwise import cosine_similarity
# --- Set Gemini API Key ---
genai.configure(api_key="AIzaSyCVRGVxIe1vESoAgykgHWOej-jZxiU-RKE") # <-- Replace this with your actual Gemini API key
gemini_model = genai.GenerativeModel("gemini-pro")
# Title & Intro
st.set_page_config(page_title="SMS Spam Detection", layout="centered")
st.title("📩 SMS Spam Detection App")
st.markdown("🔍 Enter an SMS message below to check if it's **Spam** or **Not Spam (Ham)**")
# --- Load CSV Dataset ---
@st.cache_data
def load_data():
url = "https://huggingface.co/spaces/MLDeveloper/Spam_SMS_Detection/resolve/main/spam.csv"
df = pd.read_csv(url, encoding='latin-1')
df = df[['v1', 'v2']]
df.columns = ['label', 'message']
return df
df = load_data()
# --- Preprocessing ---
df['label'] = df['label'].map({'ham': 0, 'spam': 1})
# --- Train Model ---
X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)
# --- Clean Text Function ---
def clean_text(text):
text = text.lower()
text = re.sub(r"http\S+|www\S+|https\S+", '', text)
text = re.sub(r'\@w+|\#','', text)
text = re.sub(r'[^\w\s]', '', text)
text = re.sub(r'\d+', '', text)
text = text.translate(str.maketrans('', '', string.punctuation))
return text.strip()
# --- Predict Function ---
def predict_spam(text):
cleaned = clean_text(text)
vector = vectorizer.transform([cleaned])
prediction = model.predict(vector)
return "Spam" if prediction[0] == 1 else "Not Spam (Ham)"
# --- Gemini Fallback ---
def ask_gemini(text):
prompt = f"""You are an expert SMS spam detector.
Classify the following message as 'Spam' or 'Not Spam (Ham)'.
Message: "{text}"
Reply with only: Spam or Not Spam (Ham)."""
try:
response = gemini_model.generate_content(prompt)
return response.text.strip()
except Exception as e:
return f"Error using Gemini: {str(e)}"
# --- Input ---
user_input = st.text_area("✉️ Enter your SMS message here:")
if st.button("Check Message"):
if user_input.strip() == "":
st.warning("⚠️ Please enter a message.")
else:
cleaned = clean_text(user_input)
input_vector = vectorizer.transform([cleaned])
similarities = cosine_similarity(input_vector, X_train_tfidf)
max_similarity = similarities.max()
# Check similarity threshold (e.g., < 0.3 = unknown message)
if max_similarity < 0.3:
st.info("🧠 Message not found in training data. Using Gemini for prediction...")
gemini_result = ask_gemini(user_input)
if "spam" in gemini_result.lower():
st.error("🚫 Gemini says: This message is **SPAM**.")
else:
st.success("✅ Gemini says: This message is **NOT SPAM (HAM)**.")
else:
result = predict_spam(user_input)
if result == "Spam":
st.error("🚫 This message is classified as **SPAM**.")
else:
st.success("✅ This message is classified as **NOT SPAM (HAM)**.")
# --- Dataset preview ---
with st.expander("📄 View sample dataset"):
st.dataframe(df.head())
st.markdown("---")
st.markdown("🔒 *Note: This app is for educational purposes only.*")
|