|
|
""" |
|
|
Bangla News Sentiment Analysis |
|
|
MSc IT Thesis Project |
|
|
""" |
|
|
import os |
|
|
|
|
|
import gradio as gr |
|
|
from transformers import pipeline |
|
|
import pandas as pd |
|
|
import torch |
|
|
|
|
|
|
|
|
device = 0 if torch.cuda.is_available() else -1 |
|
|
print(f"Using device: {'GPU' if device == 0 else 'CPU'}") |
|
|
|
|
|
|
|
|
print("Loading model...") |
|
|
sentiment_analyzer = pipeline( |
|
|
"sentiment-analysis", |
|
|
model="tabularisai/multilingual-sentiment-analysis", |
|
|
device=device |
|
|
) |
|
|
print("Model loaded successfully!") |
|
|
|
|
|
|
|
|
def analyze_single_text(text): |
|
|
"""Analyze sentiment of single text""" |
|
|
if not text or not text.strip(): |
|
|
return {"Error": 1.0} |
|
|
|
|
|
try: |
|
|
result = sentiment_analyzer(text[:512])[0] |
|
|
|
|
|
|
|
|
label_map = { |
|
|
'positive': '✅ Positive (ইতিবাচক)', |
|
|
'negative': '❌ Negative (নেতিবাচক)', |
|
|
'neutral': '➖ Neutral (নিরপেক্ষ)' |
|
|
} |
|
|
|
|
|
sentiment = result['label'].lower() |
|
|
confidence = float(result['score']) |
|
|
|
|
|
|
|
|
return { |
|
|
label_map.get(sentiment, sentiment): confidence |
|
|
} |
|
|
except Exception as e: |
|
|
print(f"Error in analyze_single_text: {e}") |
|
|
return {"Error": 1.0} |
|
|
|
|
|
|
|
|
def analyze_batch_file(file): |
|
|
"""Analyze sentiment of uploaded CSV file""" |
|
|
if file is None: |
|
|
return pd.DataFrame({"Error": ["Please upload a file"]}) |
|
|
|
|
|
try: |
|
|
|
|
|
df = pd.read_csv(file.name) |
|
|
|
|
|
if 'text' not in df.columns: |
|
|
return pd.DataFrame({"Error": ["CSV must have 'text' column"]}) |
|
|
|
|
|
|
|
|
results = [] |
|
|
for idx, text in enumerate(df['text'][:100]): |
|
|
try: |
|
|
result = sentiment_analyzer(str(text)[:512])[0] |
|
|
results.append({ |
|
|
'ID': idx + 1, |
|
|
'Text Preview': str(text)[:80] + '...' if len(str(text)) > 80 else str(text), |
|
|
'Sentiment': result['label'], |
|
|
'Confidence': f"{result['score']:.2%}" |
|
|
}) |
|
|
except Exception as e: |
|
|
results.append({ |
|
|
'ID': idx + 1, |
|
|
'Text Preview': str(text)[:80] + '...', |
|
|
'Sentiment': 'ERROR', |
|
|
'Confidence': 'N/A' |
|
|
}) |
|
|
|
|
|
return pd.DataFrame(results) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error in analyze_batch_file: {e}") |
|
|
return pd.DataFrame({"Error": [f"File processing failed: {str(e)}"]}) |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Bangla Sentiment Analysis") as demo: |
|
|
gr.Markdown(""" |
|
|
# 🇧🇩 Bangla News Sentiment Analysis System |
|
|
### Analyze sentiment of Bangla text using AI |
|
|
**MSc IT Thesis Project** | Model: Multilingual Sentiment Transformer |
|
|
""") |
|
|
|
|
|
with gr.Tab("📝 Single Text Analysis"): |
|
|
gr.Markdown("### Analyze sentiment of individual Bangla text") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
text_input = gr.Textbox( |
|
|
lines=8, |
|
|
placeholder="এখানে বাংলা টেক্সট লিখুন... (Enter Bangla text here)", |
|
|
label="Input Text" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
clear_btn = gr.Button("Clear") |
|
|
analyze_btn = gr.Button("🔍 Analyze Sentiment") |
|
|
|
|
|
with gr.Column(): |
|
|
output_label = gr.Label( |
|
|
label="📊 Sentiment Result", |
|
|
num_top_classes=3 |
|
|
) |
|
|
|
|
|
gr.Markdown("### 💡 Try these examples:") |
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["বাংলাদেশ ক্রিকেট দল দুর্দান্ত পারফরম্যান্স করেছে আজকের ম্যাচে!"], |
|
|
["সরকারের এই সিদ্ধান্তে জনগণ অসন্তুষ্ট এবং ক্ষুব্ধ"], |
|
|
["আজকের আবহাওয়া মোটামুটি ভালো থাকবে সারাদিন"], |
|
|
["শিক্ষা ব্যবস্থায় উন্নতি প্রয়োজন কিন্তু পদক্ষেপ নেওয়া হচ্ছে"], |
|
|
["এই রেস্তোরাঁর খাবারের মান অত্যন্ত খারাপ ছিল"], |
|
|
["বাজারে সাধারণ শেয়ারধারীদের স্বার্থ রক্ষার সুযোগ নেই"] |
|
|
], |
|
|
inputs=text_input |
|
|
) |
|
|
|
|
|
clear_btn.click(lambda: "", outputs=text_input) |
|
|
analyze_btn.click( |
|
|
fn=analyze_single_text, |
|
|
inputs=text_input, |
|
|
outputs=output_label |
|
|
) |
|
|
|
|
|
with gr.Tab("📊 Batch Analysis"): |
|
|
gr.Markdown(""" |
|
|
### Analyze multiple texts at once |
|
|
Upload a CSV file with a column named **'text'** containing Bangla text |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
file_input = gr.File( |
|
|
label="📁 Upload CSV File", |
|
|
file_types=[".csv"] |
|
|
) |
|
|
batch_btn = gr.Button("🚀 Process Batch") |
|
|
|
|
|
gr.Markdown(""" |
|
|
**CSV Format Example:** |
|
|
``` |
|
|
text |
|
|
প্রথম বাংলা টেক্সট |
|
|
দ্বিতীয় বাংলা টেক্সট |
|
|
তৃতীয় বাংলা টেক্সট |
|
|
``` |
|
|
""") |
|
|
|
|
|
with gr.Column(): |
|
|
batch_output = gr.Dataframe( |
|
|
label="Results" |
|
|
) |
|
|
|
|
|
batch_btn.click( |
|
|
fn=analyze_batch_file, |
|
|
inputs=file_input, |
|
|
outputs=batch_output |
|
|
) |
|
|
|
|
|
with gr.Tab("ℹ️ About"): |
|
|
gr.Markdown(""" |
|
|
## About This Project |
|
|
|
|
|
### 🎯 Objective |
|
|
Develop an automated sentiment analysis system for Bangla news and social media text. |
|
|
|
|
|
### 🤖 Model Information |
|
|
- **Model**: `tabularisai/multilingual-sentiment-analysis` |
|
|
- **Architecture**: Transformer-based (BERT family) |
|
|
- **Languages Supported**: 100+ including Bangla |
|
|
- **Sentiment Classes**: Positive, Negative, Neutral |
|
|
|
|
|
### 📚 Dataset |
|
|
- Bangla news articles from major Bangladeshi newspapers |
|
|
- Social media comments and reviews |
|
|
- Manually validated samples |
|
|
|
|
|
### 🛠️ Technology Stack |
|
|
- Python 3.12 |
|
|
- Hugging Face Transformers |
|
|
- Gradio (Web Interface) |
|
|
- PyTorch |
|
|
|
|
|
### 👨💻 Developer |
|
|
**[Rakib Hossain]** |
|
|
MSc in Information Technology |
|
|
Jahangirnagar University |
|
|
|
|
|
### 📧 Contact |
|
|
Email: [rakibhoossain@gmail.com](mailto:rakibhoossain@gmail.com) |
|
|
GitHub: [rakibhoossain](https://github.com/rakibhoossain) |
|
|
|
|
|
--- |
|
|
*Last Updated: December 2025* |
|
|
""") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("\n🚀 Starting Gradio interface...") |
|
|
print("=" * 50) |
|
|
port = int(os.environ.get("PORT", 7860)) |
|
|
|
|
|
demo.launch( |
|
|
share=True, |
|
|
|
|
|
|
|
|
show_error=True |
|
|
) |