tunisian-arabiz / app.py
RamiIbrahim's picture
Update app.py
cc65dc4 verified
import gradio as gr
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
# Load the saved model and vectorizer
model = joblib.load('tunisian_arabiz_sentiment_analysis_model.pkl')
vectorizer = joblib.load('tfidf_vectorizer.pkl')
def predict_sentiment(text):
if not text.strip():
return (
"No input provided",
"N/A",
"Please enter some text to get a sentiment prediction."
)
text_vectorized = vectorizer.transform([text])
prediction = model.predict(text_vectorized)[0]
probabilities = model.predict_proba(text_vectorized)[0]
confidence = max(probabilities)
sentiment = "Positive" if prediction == 1 else "Negative"
return (
sentiment,
f"{confidence:.2f}",
f"The model predicts this text is {sentiment.lower()} with {confidence:.2%} confidence."
)
# Function to get predictions for examples
def get_example_predictions(examples):
return [predict_sentiment(ex[0]) for ex in examples]
# Example texts
examples = [
["3jebni barcha el film hedha"],
["ma7abitch el mekla mte3 el restaurant"],
["el jaw fi tounes a7la 7aja"],
["ennes el kol te3ba w ma3andhomch flous"],
["كان جات الدنيا دنيا راني ساهرة في دار حماتي"],
["مبابي مانستعرف بيه مدريدي كان مانشوفو مركى هاتريك بمريول الريال"]
]
# Get predictions for examples
example_predictions = get_example_predictions(examples)
# Create formatted examples with predictions
formatted_examples = [
[ex[0], f"{pred[0]} (Confidence: {pred[1]})"]
for ex, pred in zip(examples, example_predictions)
]
# Create Gradio interface
iface = gr.Interface(
fn=predict_sentiment,
inputs=gr.Textbox(lines=3, placeholder="أدخل النص هنا... / Enter your text here..."),
outputs=[
gr.Label(label="Predicted Sentiment"),
gr.Label(label="Confidence Score"),
gr.Textbox(label="Explanation")
],
examples=formatted_examples,
title="Tunisian Arabiz Sentiment Analysis",
description="""
<p>This model predicts the sentiment of Tunisian text as either Positive or Negative. It works with both Tunisian Arabiz and standard Arabic script.</p>
<h4>What is Tunisian Arabiz? / ما هي العربيزية التونسية؟</h4>
<p>Tunisian Arabiz is a way of writing the Tunisian dialect using Latin characters and numbers. For example:</p>
<ul>
<li>"3ajbetni" means "I liked it""</li>
<li>"7aja" means "thing" "</li>
<li>"a3tini 9ahwa" means "give me a coffee""</li>
</ul>
<p>Try the examples below or enter your own text!</p>
<p>!جرب الأمثلة أو أدخل نصك الخاص</p>
""",
article="""
<h3>About the Model</h3>
<p>This sentiment analysis model was trained on a combined dataset from TuniziDataset and the Tunisian Dialect Corpus.
It uses TF-IDF vectorization for feature extraction and Logistic Regression for classification.</p>
<p>The model accepts Tunisian Arabiz written with Latin and Arabic script.</p>
<h3>Limitations</h3>
<p>Due to dataset limitations, neutral sentiment data was removed to achieve maximum performance. </p>
<p>The model may not perform well on very colloquial expressions or new slang terms not present in the training data.
Sentiment can be nuanced and context-dependent, which may not always be captured accurately by this model.</p>
<a href="https://github.com/RamiIbrahim2002/Tunisian-Arabiz/tree/main">Github</a>
<center>
<h2>This model is open-source, and contributions of additional datasets are welcome to improve its capabilities.</h2>
<h2>هذا النموذج مفتوح المصدر، ونرحب بمساهمات مجموعات البيانات الإضافية لتحسين قدراته.</h2>
</center>
"""
)
# Launch the interface
iface.launch()