Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import joblib | |
| import os | |
| import re | |
| import emoji | |
| import demoji | |
| import numpy as np | |
| import nltk | |
| # Ensure both resources are downloaded | |
| nltk.download("punkt") | |
| nltk.download("punkt_tab") | |
| # ========================================================== | |
| # 📦 Load all models | |
| # ========================================================== | |
| vectorizer_en = joblib.load("tfidf_vectorizer_en.pkl") | |
| le_en = joblib.load("label_encoder_en.pkl") | |
| stacking_en = joblib.load("stacking_en.pkl") | |
| vectorizer_fa = joblib.load("tfidf_vectorizer_fa.pkl") | |
| le_fa = joblib.load("label_encoder_fa.pkl") | |
| stacking_fa = joblib.load("stacking_fa.pkl") | |
| # ========================================================== | |
| # 🧹 Text cleaning functions | |
| # ========================================================== | |
| import nltk | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import word_tokenize | |
| from nltk.stem import WordNetLemmatizer | |
| from hazm import Normalizer, Lemmatizer as HazmLemmatizer, word_tokenize as hazm_tokenize | |
| nltk.download("punkt") | |
| nltk.download("stopwords") | |
| nltk.download("wordnet") | |
| # English preprocess | |
| lemmatizer = WordNetLemmatizer() | |
| STOPWORDS = set(stopwords.words("english")) | |
| RE_URL = re.compile(r"http\S+|www\.\S+") | |
| RE_HTML = re.compile(r"<.*?>") | |
| RE_NONALPHA = re.compile(r"[^a-zA-Z\s]") | |
| def preprocess_english(text): | |
| text = str(text).lower() | |
| text = emoji.demojize(text) | |
| text = demoji.replace(text, "") | |
| text = RE_URL.sub(" ", text) | |
| text = RE_HTML.sub(" ", text) | |
| text = RE_NONALPHA.sub(" ", text) | |
| text = re.sub(r"\s+", " ", text).strip() | |
| tokens = word_tokenize(text) | |
| tokens = [lemmatizer.lemmatize(t) for t in tokens if t not in STOPWORDS and len(t) > 2] | |
| return " ".join(tokens) | |
| # Persian preprocess | |
| normalizer = Normalizer() | |
| hazm_lemmatizer = HazmLemmatizer() | |
| RE_URL_FA = re.compile(r"http\S+|www\.\S+") | |
| RE_NONPERSIAN = re.compile(r"[^\u0600-\u06FFA-Za-z\s]") | |
| def preprocess_persian(text): | |
| text = str(text) | |
| text = normalizer.normalize(text) | |
| text = emoji.demojize(text) | |
| text = demoji.replace(text, "") | |
| text = RE_URL_FA.sub(" ", text) | |
| text = re.sub(r"@\w+|#\w+|\d+", " ", text) | |
| text = RE_NONPERSIAN.sub(" ", text) | |
| text = re.sub(r"\s+", " ", text).strip() | |
| tokens = hazm_tokenize(text) | |
| tokens = [hazm_lemmatizer.lemmatize(t) for t in tokens if len(t) > 1] | |
| return " ".join(tokens) | |
| # ========================================================== | |
| # 🔮 Prediction function | |
| # ========================================================== | |
| def predict_sentiment(comment, language): | |
| if language == "English": | |
| clean_text = preprocess_english(comment) | |
| X = vectorizer_en.transform([clean_text]) | |
| pred = stacking_en.predict(X)[0] | |
| probs = stacking_en.predict_proba(X)[0] | |
| classes = le_en.classes_ | |
| else: | |
| clean_text = preprocess_persian(comment) | |
| X = vectorizer_fa.transform([clean_text]) | |
| pred = stacking_fa.predict(X)[0] | |
| probs = stacking_fa.predict_proba(X)[0] | |
| classes = le_fa.classes_ | |
| result_str = f"🔹 **Predicted Sentiment:** {pred}\n\n" | |
| prob_table = "\n".join([f"{cls}: {round(p,3)}" for cls, p in zip(classes, probs)]) | |
| return f"🗣️ **Input:** {comment}\n\n{result_str}**Prediction Probabilities:**\n{prob_table}" | |
| # ========================================================== | |
| # 🎨 Gradio UI | |
| # ========================================================== | |
| lang_dropdown = gr.Dropdown(["English", "Persian"], label="Select Language", value="English") | |
| input_box = gr.Textbox(label="Enter your comment here") | |
| output_box = gr.Markdown() | |
| iface = gr.Interface( | |
| fn=predict_sentiment, | |
| inputs=[input_box, lang_dropdown], | |
| outputs=output_box, | |
| title="🌍 Multilingual Sentiment Analyzer (English + Persian)", | |
| description="Enter a comment in English or Persian to see the predicted sentiment and probabilities.", | |
| examples=[ | |
| ["I loved the show! It was amazing!", "English"], | |
| ["برنامه خیلی عالی بود و مجری هم خوب بود", "Persian"], | |
| ["It was an average episode, not too bad.", "English"], | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |