import streamlit as st import pickle import spacy import re nlp = spacy.load("en_core_web_sm") # Load saved model, vectorizer, and target names with open("logreg_model.pkl", "rb") as f: model = pickle.load(f) with open("tfidf_vectorizer.pkl", "rb") as f: vectorizer = pickle.load(f) with open("target_names.pkl", "rb") as f: target_names = pickle.load(f) friendly_labels = { 'alt.atheism': "🌌 Atheism Discussions", 'comp.graphics': "💻 Computer Graphics", 'comp.os.ms-windows.misc': "🪟 Windows OS Topics", 'comp.sys.ibm.pc.hardware': "🖥️ IBM PC Hardware", 'comp.sys.mac.hardware': "🍏 Mac Hardware", 'comp.windows.x': "🧱 X Window System", 'misc.forsale': "🛒 Miscellaneous For Sale", 'rec.autos': "🚗 Automobiles", 'rec.motorcycles': "🏍️ Motorcycles", 'rec.sport.baseball': "⚾ Baseball", 'rec.sport.hockey': "🏒 Hockey", 'sci.crypt': "🔐 Cryptography", 'sci.electronics': "🔌 Electronics", 'sci.med': "🧬 Medical Science", 'sci.space': "🚀 Space", 'soc.religion.christian': "✝️ Christianity", 'talk.politics.guns': "🔫 Gun Politics", 'talk.politics.mideast': "🌍 Middle East Politics", 'talk.politics.misc': "💬 General Politics", 'talk.religion.misc': "🙏 Other Religious Topics" } def preprocess_text(text): text = text.lower() text = re.sub(r"[^a-z\s]", " ", text) doc = nlp(text) tokens = [token.lemma_ for token in doc if token.is_alpha and not token.is_stop] return " ".join(tokens) # Streamlit UI st.set_page_config(page_title="Newsgroups Text Classifier", layout="centered") st.title(" Newsgroups Text Classification") st.markdown("Enter any piece of text and the model will predict its category.") user_input = st.text_area("Enter your text here:", height=200) if st.button("Classify"): if user_input.strip() == "": st.warning("Please enter some text.") else: preprocessed = preprocess_text(user_input) vect_text = vectorizer.transform([preprocessed]) prediction = model.predict(vect_text)[0] predicted_label = target_names[prediction] friendly_name = friendly_labels.get(predicted_label, predicted_label) st.success(f"🔍 Predicted Category: **{friendly_name}**")