|
from flask import Flask, request, jsonify, send_from_directory |
|
import pandas as pd |
|
import torch |
|
from transformers import BertTokenizer, BertForSequenceClassification |
|
from wordcloud import WordCloud |
|
import uuid |
|
import io |
|
import base64 |
|
import os |
|
from PIL import Image |
|
|
|
app = Flask(__name__) |
|
UPLOAD_FOLDER = "uploads" |
|
|
|
|
|
@app.route('/uploads/<filename>') |
|
def uploaded_file(filename): |
|
return send_from_directory(app.config['UPLOAD_FOLDER'], filename) |
|
|
|
|
|
tokenizer = BertTokenizer.from_pretrained("bert-base-multilingual-cased") |
|
model_path = "./src/emotion_final_model" |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model = BertForSequenceClassification.from_pretrained(model_path).to(device) |
|
model.eval() |
|
|
|
|
|
label_mapping = {0: "negative", 1: "neutral", 2: "positive"} |
|
|
|
@app.route('/predict', methods=['POST']) |
|
def predict(): |
|
data = request.get_json() |
|
text = data.get('text') |
|
|
|
if not text: |
|
return jsonify({"error": "No text provided"}), 400 |
|
|
|
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512) |
|
inputs = {key: value.to(device) for key, value in inputs.items()} |
|
|
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
|
|
logits = outputs.logits |
|
predicted_class_idx = torch.argmax(logits, dim=-1).item() |
|
sentiment = label_mapping[predicted_class_idx] |
|
|
|
return jsonify({"sentiment": sentiment}) |
|
|
|
aspect_keywords = { |
|
"Quality": ["quality", "material", "durable", "performance", "sturdy", "broken", "defective", "معیار", "ٹوٹا ہوا", "خراب"], |
|
"Price": ["price", "cheap", "expensive", "value", "cost", "قیمت", "مہنگا", "سستا", "قیمت زیادہ"], |
|
"Delivery": ["delivery", "shipping", "arrived", "late", "courier", "ترسیل", "شپنگ", "تاخیر", "دیر سے پہنچا"], |
|
"Usability": ["easy to use", "setup", "installation", "instructions", "user-friendly", "آسان", "استعمال میں آسان", "سیٹ اپ", "تنصیب"], |
|
"Design": ["design", "style", "appearance", "color", "looks", "ڈیزائن", "خوبصورتی", "رنگ", "ساخت"], |
|
"Warranty/Support": ["warranty", "support", "return", "replacement", "service center", "وارنٹی", "واپسی", "تبادلہ", "سروس سینٹر"] |
|
} |
|
|
|
def detect_aspects(text): |
|
text_lower = text.lower() |
|
detected = [] |
|
for aspect, keywords in aspect_keywords.items(): |
|
if any(keyword in text_lower for keyword in keywords): |
|
detected.append(aspect) |
|
return detected |
|
|
|
@app.route("/analyze", methods=["POST"]) |
|
def analyze(): |
|
if 'file' not in request.files: |
|
return jsonify({"error": "No file uploaded"}), 400 |
|
file = request.files['file'] |
|
print(file.filename) |
|
df = pd.read_csv(file) |
|
print(df.to_string()) |
|
total_positive = 0 |
|
total_negative = 0 |
|
total_neutral = 0 |
|
all_text = "" |
|
|
|
aspect_summary = {aspect: {"positive": 0, "negative": 0, "neutral": 0, "total": 0} for aspect in aspect_keywords} |
|
for text in df['Review'].dropna(): |
|
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512) |
|
inputs = {k: v.to(device) for k, v in inputs.items()} |
|
|
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
|
|
predicted_class_idx = torch.argmax(outputs.logits, dim=-1).item() |
|
sentiment = label_mapping[predicted_class_idx] |
|
|
|
if sentiment == "positive": |
|
total_positive += 1 |
|
elif sentiment == "negative": |
|
total_negative += 1 |
|
else: |
|
total_neutral += 1 |
|
|
|
all_text += " " + text |
|
|
|
detected_aspects = detect_aspects(text) |
|
for aspect in detected_aspects: |
|
aspect_summary[aspect][sentiment] += 1 |
|
aspect_summary[aspect]["total"] += 1 |
|
|
|
|
|
wordcloud = WordCloud(width=800, height=400, background_color='white', font_path='src/urdu_font.ttf').generate(all_text) |
|
|
|
|
|
|
|
|
|
|
|
wordcloud_path = os.path.join("uploads", f"wordcloud{uuid.uuid4()}.png") |
|
wordcloud.to_file(wordcloud_path) |
|
|
|
|
|
with open(wordcloud_path, "rb") as image_file: |
|
encoded_image = base64.b64encode(image_file.read()).decode('utf-8') |
|
|
|
print({ |
|
"total_positive": total_positive, |
|
"total_negative": total_negative, |
|
"total_neutral": total_neutral, |
|
"aspect_summary": aspect_summary, |
|
"wordcloud_image_path": wordcloud_path, |
|
}) |
|
|
|
return jsonify({ |
|
"total_positive": total_positive, |
|
"total_negative": total_negative, |
|
"total_neutral": total_neutral, |
|
"aspect_summary": aspect_summary, |
|
"wordcloud_image_path": wordcloud_path, |
|
|
|
}) |
|
|
|
def run_flask(): |
|
app.run(host="0.0.0.0", port=5000) |
|
|
|
import threading |
|
threading.Thread(target=run_flask).start() |
|
|
|
import streamlit as st |
|
import pandas as pd |
|
import plotly.express as px |
|
from io import BytesIO, StringIO |
|
from PIL import Image |
|
import random |
|
import requests |
|
import os |
|
import uuid |
|
import tempfile |
|
|
|
API_URL = 'http://127.0.0.1:5000/analyze' |
|
|
|
|
|
|
|
|
|
st.set_page_config( |
|
page_title="Multilingual Sentiment Analyzer", |
|
layout="wide" |
|
) |
|
|
|
st.markdown(""" |
|
<style> |
|
/* Light theme override */ |
|
html, body, .stApp { |
|
background-color: #ffffff !important; |
|
color: #000000 !important; |
|
} |
|
|
|
h1, h2, h3, h4, h5, h6, p, div, span, label, section, .markdown-text-container { |
|
color: #000000 !important; |
|
} |
|
|
|
.stFileUploader > div, .stFileUploader div div { |
|
background-color: #f9f9f9 !important; |
|
border: 1px solid #ccc !important; |
|
color: #000000 !important; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
st.markdown(""" |
|
<div style='text-align: center; padding-top: 10px;'> |
|
<h1 style='font-size: 40px;'>🌍 Multilingual Sentiment Analysis Dashboard</h1> |
|
<p style='font-size: 18px; color: #ccc; max-width: 720px; margin: auto;'> |
|
Upload a CSV to explore sentiment Report. With sentiment analysis, you can catch early signals, reduce risk, and validate market fit — even across global audiences. |
|
</p> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
|
def load_dummy_data(): |
|
return pd.DataFrame({ |
|
"Review": [ |
|
"La livraison était très rapide et le service excellent.", |
|
"The product quality was terrible, I want a refund.", |
|
"Servicio al cliente fue amable pero no resolvieron mi problema.", |
|
"Das Produkt kam beschädigt an und der Support war unhöflich.", |
|
"Great value for the price, I'm very happy!", |
|
"Muy mal embalaje, pero el envío fue rápido.", |
|
"客服很好,但产品描述不准确。", |
|
"Perfect fit, just as described. Will buy again!" |
|
] |
|
}) |
|
|
|
|
|
|
|
|
|
with st.expander("📁 Upload Your CSV File", expanded=True): |
|
uploaded_file = st.file_uploader("Choose a CSV file with reviews", type=["csv"]) |
|
|
|
run_analysis = st.button("🚀 Run Analysis", type="primary") |
|
|
|
|
|
if uploaded_file: |
|
try: |
|
|
|
df = pd.read_csv(uploaded_file) |
|
if df.empty: |
|
st.error("The uploaded CSV file is empty.") |
|
df = load_dummy_data() |
|
else: |
|
st.success("✅ File uploaded successfully!") |
|
except Exception as e: |
|
st.error(f"Error reading CSV: {e}") |
|
df = load_dummy_data() |
|
else: |
|
st.info("Using built-in demo data. Upload a CSV to use your own.") |
|
df = load_dummy_data() |
|
|
|
|
|
st.write("✅ App is running! Here's a sample:") |
|
st.dataframe(df.head()) |
|
|
|
|
|
|
|
|
|
text_column = 'Review' |
|
enable_aspect = True |
|
|
|
|
|
|
|
|
|
st.markdown("---") |
|
st.markdown("### 🔎 Sentiment Analysis Results") |
|
|
|
|
|
def fake_sentiment_predict(text): |
|
return random.choice(["Positive", "Negative", "Neutral"]), round(random.uniform(0.65, 0.99), 2) |
|
|
|
|
|
if 'analyzed_df' not in st.session_state: |
|
st.session_state.analyzed_df = df.copy() |
|
|
|
|
|
positive, negative, neutral, total = 0, 0, 0, 0 |
|
|
|
|
|
if run_analysis: |
|
if not enable_aspect: |
|
|
|
fake_results = [fake_sentiment_predict(text) for text in df[text_column]] |
|
sentiments, confidences = zip(*fake_results) |
|
|
|
|
|
st.session_state.analyzed_df = df.copy() |
|
st.session_state.analyzed_df["Sentiment"] = sentiments |
|
st.session_state.analyzed_df["Confidence"] = confidences |
|
|
|
|
|
sentiment_counts = pd.Series(sentiments).value_counts() |
|
positive = sentiment_counts.get("Positive", 0) |
|
negative = sentiment_counts.get("Negative", 0) |
|
neutral = sentiment_counts.get("Neutral", 0) |
|
total = positive + negative + neutral |
|
else: |
|
|
|
pass |
|
|
|
def percent(part): |
|
return f"{round((part / total) * 100)}%" if total else "0%" |
|
|
|
|
|
card_col, chart_col = st.columns([1.2, 2]) |
|
|
|
with card_col: |
|
st.markdown(""" |
|
<style> |
|
.card-container { |
|
max-width: 10px; |
|
margin: 0 auto; |
|
} |
|
.card { |
|
padding: 10px; |
|
border-radius: 12px; |
|
margin-bottom: 10px; |
|
font-size: 16px; |
|
font-weight: 500; |
|
line-height: 1.5; |
|
background-color: var(--secondary-background-color); |
|
border: 1px solid rgba(255,255,255,0.15); |
|
color: white; |
|
text-align: center; |
|
} |
|
.card strong { |
|
font-size: 20px; |
|
display: block; |
|
margin-top: 5px; |
|
} |
|
</style> |
|
<div class="card-container"> |
|
""", unsafe_allow_html=True) |
|
|
|
if total > 0: |
|
with chart_col: |
|
fig = px.pie( |
|
names=["Positive", "Negative", "Neutral"], |
|
values=[positive, negative, neutral], |
|
color_discrete_map={ |
|
"Positive": "#66bb6a", |
|
"Negative": "#ef5350", |
|
"Neutral": "#42a5f5" |
|
} |
|
) |
|
fig.update_traces( |
|
textinfo='percent+label', |
|
hoverinfo='label+percent+value', |
|
pull=[0.03, 0.03, 0.03] |
|
) |
|
fig.update_layout( |
|
margin=dict(t=20, b=20, l=10, r=10), |
|
paper_bgcolor="rgba(0,0,0,0)", |
|
plot_bgcolor="rgba(0,0,0,0)", |
|
font_color="white" |
|
) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
|
|
|
|
if run_analysis and enable_aspect: |
|
st.subheader("📌 Aspect Sentiment Summary") |
|
|
|
with st.spinner("Fetching Aspect Report from API..."): |
|
try: |
|
|
|
api_df = df.copy() |
|
|
|
|
|
unique_filename = f"temp_reviews_{uuid.uuid4()}.csv" |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=unique_filename) as tmp: |
|
api_df.to_csv(tmp.name, index=False) |
|
tmp_file_path = tmp.name |
|
|
|
|
|
with open(tmp_file_path, 'rb') as file: |
|
|
|
files = {'file': (unique_filename, file, 'text/csv')} |
|
|
|
response = requests.post(API_URL, files=files) |
|
|
|
|
|
try: |
|
os.unlink(tmp_file_path) |
|
except: |
|
pass |
|
|
|
if response.status_code == 200: |
|
response_json = response.json() |
|
|
|
|
|
st.session_state.api_response_json = response_json |
|
|
|
|
|
positive = response_json.get("total_positive", 0) |
|
negative = response_json.get("total_negative", 0) |
|
neutral = response_json.get("total_neutral", 0) |
|
total = positive + negative + neutral |
|
|
|
|
|
if "positive_percentage" not in response_json and total > 0: |
|
response_json["positive_percentage"] = round((positive / total) * 100) |
|
response_json["negative_percentage"] = round((negative / total) * 100) |
|
response_json["neutral_percentage"] = round((neutral / total) * 100) |
|
|
|
|
|
if "total_reviews" not in response_json: |
|
response_json["total_reviews"] = total |
|
|
|
|
|
positive = response_json.get("total_positive", 0) |
|
negative = response_json.get("total_negative", 0) |
|
neutral = response_json.get("total_neutral", 0) |
|
total = positive + negative + neutral |
|
|
|
|
|
with card_col: |
|
st.markdown(f""" |
|
<div class="card" style="border-color:#bfbfbf;"> |
|
📊 <strong>Total Reviews</strong> |
|
{total} |
|
</div> |
|
<div class="card" style="border-color:#66bb6a;"> |
|
✅ <strong>{positive} Positive</strong> |
|
{percent(positive)} of total |
|
</div> |
|
<div class="card" style="border-color:#ef5350;"> |
|
❗ <strong>{negative} Negative</strong> |
|
{percent(negative)} of total |
|
</div> |
|
<div class="card" style="border-color:#42a5f5;"> |
|
😐 <strong>{neutral} Neutral</strong> |
|
{percent(neutral)} of total |
|
</div> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
with chart_col: |
|
fig = px.pie( |
|
names=["Positive", "Negative", "Neutral"], |
|
values=[positive, negative, neutral], |
|
color_discrete_map={ |
|
"Positive": "#66bb6a", |
|
"Negative": "#ef5350", |
|
"Neutral": "#42a5f5" |
|
} |
|
) |
|
fig.update_traces( |
|
textinfo='percent+label', |
|
hoverinfo='label+percent+value', |
|
pull=[0.03, 0.03, 0.03] |
|
) |
|
fig.update_layout( |
|
margin=dict(t=20, b=20, l=10, r=10), |
|
paper_bgcolor="rgba(0,0,0,0)", |
|
plot_bgcolor="rgba(0,0,0,0)", |
|
font_color="white" |
|
) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
if "review_details" in response_json: |
|
|
|
api_result_df = pd.DataFrame(response_json["review_details"]) |
|
|
|
|
|
st.session_state.analyzed_df = api_result_df |
|
else: |
|
|
|
st.session_state.analyzed_df = df.copy() |
|
|
|
if "sentiments" in response_json: |
|
st.session_state.analyzed_df["Sentiment"] = response_json["sentiments"] |
|
|
|
for key in ["confidences", "languages"]: |
|
if key in response_json: |
|
column_name = key.rstrip("s").capitalize() |
|
st.session_state.analyzed_df[column_name] = response_json[key] |
|
|
|
|
|
aspect_rows = [] |
|
for aspect, values in response_json["aspect_summary"].items(): |
|
aspect_rows.append({ |
|
"Aspect": aspect, |
|
"Positive": values["positive"], |
|
"Negative": values["negative"], |
|
"Neutral": values["neutral"], |
|
"Total": values["total"] |
|
}) |
|
aspect_df = pd.DataFrame(aspect_rows) |
|
|
|
|
|
if not aspect_df.empty and aspect_df["Total"].sum() > 0: |
|
|
|
|
|
st.session_state.aspect_dataframe = aspect_df |
|
st.dataframe(aspect_df) |
|
|
|
|
|
melted = aspect_df.melt( |
|
id_vars="Aspect", |
|
value_vars=["Positive", "Negative", "Neutral"], |
|
var_name="Sentiment", |
|
value_name="Count" |
|
) |
|
|
|
col1, col2 = st.columns([4, 2]) |
|
|
|
with col1: |
|
st.markdown("### 📊 Sentiment by Aspect") |
|
bar_chart = px.bar( |
|
melted, |
|
x="Aspect", |
|
y="Count", |
|
color="Sentiment", |
|
barmode="group", |
|
title=None, |
|
color_discrete_map={ |
|
"Positive": "#66bb6a", |
|
"Negative": "#ef5350", |
|
"Neutral": "#42a5f5" |
|
} |
|
) |
|
|
|
bar_chart.update_layout( |
|
paper_bgcolor="rgba(0,0,0,0)", |
|
plot_bgcolor="rgba(0,0,0,0)", |
|
font_color="white", |
|
xaxis=dict(gridcolor="rgba(255,255,255,0.1)"), |
|
yaxis=dict(gridcolor="rgba(255,255,255,0.1)") |
|
) |
|
st.plotly_chart(bar_chart, use_container_width=True) |
|
|
|
with col2: |
|
st.markdown("### 🌀 Review Keywords") |
|
|
|
|
|
if "wordcloud_image_base64" in response_json: |
|
import base64 |
|
st.markdown("<div style='padding-top:60px'></div>", unsafe_allow_html=True) |
|
st.image( |
|
BytesIO(base64.b64decode(response_json["wordcloud_image_base64"])), |
|
caption="Keyword Cloud", |
|
use_container_width=True |
|
) |
|
else: |
|
try: |
|
|
|
wordcloud_path = response_json.get("wordcloud_image_path") |
|
if wordcloud_path and os.path.exists(wordcloud_path): |
|
image = Image.open(wordcloud_path) |
|
st.markdown("<div style='padding-top:60px'></div>", unsafe_allow_html=True) |
|
st.image(image, caption="Keywords", use_container_width=True) |
|
else: |
|
|
|
if os.path.exists("wordcloud.jpg"): |
|
image = Image.open("wordcloud.jpg") |
|
st.markdown("<div style='padding-top:60px'></div>", unsafe_allow_html=True) |
|
st.image(image, caption="Keywords", use_container_width=True) |
|
except Exception as e: |
|
st.warning(f"⚠ Word cloud image not found: {e}") |
|
else: |
|
st.info("No aspects detected in the reviews.") |
|
else: |
|
st.error(f"API Error: {response.status_code} - {response.text}") |
|
except Exception as e: |
|
st.error(f"API call failed: {e}") |
|
import traceback |
|
st.code(traceback.format_exc(), language="python") |
|
|
|
|
|
|
|
|
|
|
|
if run_analysis or total > 0: |
|
st.subheader("📥 Download Analyzed File") |
|
|
|
def generate_excel_report(df, aspect_data=None, response_json=None): |
|
output = BytesIO() |
|
|
|
with pd.ExcelWriter(output, engine='xlsxwriter') as writer: |
|
|
|
df.to_excel(writer, index=False, sheet_name='Sentiment_Report') |
|
|
|
|
|
if aspect_data is not None and not aspect_data.empty: |
|
aspect_data.to_excel(writer, index=False, sheet_name='Aspect_Analysis') |
|
|
|
|
|
if response_json: |
|
|
|
summary_data = { |
|
'Metric': ['Total Reviews', 'Positive', 'Negative', 'Neutral'], |
|
'Count': [ |
|
response_json.get('total_reviews', 0), |
|
response_json.get('total_positive', 0), |
|
response_json.get('total_negative', 0), |
|
response_json.get('total_neutral', 0) |
|
], |
|
'Percentage': [ |
|
'100%', |
|
f"{response_json.get('positive_percentage', 0)}%", |
|
f"{response_json.get('negative_percentage', 0)}%", |
|
f"{response_json.get('neutral_percentage', 0)}%" |
|
] |
|
} |
|
summary_df = pd.DataFrame(summary_data) |
|
summary_df.to_excel(writer, index=False, sheet_name='Summary') |
|
|
|
|
|
if 'review_details' in response_json: |
|
details_df = pd.DataFrame(response_json['review_details']) |
|
details_df.to_excel(writer, index=False, sheet_name='Review_Details') |
|
|
|
|
|
workbook = writer.book |
|
|
|
|
|
header_format = workbook.add_format({ |
|
'bold': True, |
|
'text_wrap': True, |
|
'valign': 'top', |
|
'border': 1 |
|
}) |
|
|
|
|
|
for sheet_name in writer.sheets: |
|
worksheet = writer.sheets[sheet_name] |
|
|
|
if sheet_name == 'Sentiment_Report': |
|
columns = df.columns |
|
elif sheet_name == 'Aspect_Analysis' and aspect_data is not None: |
|
columns = aspect_data.columns |
|
elif sheet_name == 'Summary': |
|
columns = summary_data.keys() |
|
elif sheet_name == 'Review_Details' and 'review_details' in response_json: |
|
columns = details_df.columns |
|
else: |
|
continue |
|
|
|
|
|
for col_num, value in enumerate(columns): |
|
worksheet.write(0, col_num, value, header_format) |
|
|
|
|
|
try: |
|
worksheet.autofit() |
|
except AttributeError: |
|
|
|
for col_num, value in enumerate(columns): |
|
|
|
worksheet.set_column(col_num, col_num, max(10, len(str(value)) + 2)) |
|
|
|
return output.getvalue() |
|
|
|
|
|
if 'api_response_json' not in st.session_state: |
|
st.session_state.api_response_json = None |
|
if 'aspect_dataframe' not in st.session_state: |
|
st.session_state.aspect_dataframe = None |
|
|
|
|
|
if run_analysis and enable_aspect and 'response_json' in locals(): |
|
st.session_state.api_response_json = response_json |
|
if 'aspect_df' in locals() and not aspect_df.empty: |
|
st.session_state.aspect_dataframe = aspect_df |
|
|
|
st.download_button( |
|
label="📥 Download Results as Excel", |
|
data=generate_excel_report( |
|
st.session_state.analyzed_df, |
|
st.session_state.aspect_dataframe, |
|
st.session_state.api_response_json |
|
), |
|
file_name="sentiment_analysis_report.xlsx", |
|
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" |
|
) |
|
|
|
|
|
st.markdown(""" |
|
<div style="text-align: center; margin-top: 50px; padding: 20px; color: #888; font-size: 14px;"> |
|
<p>Multilingual Sentiment Analysis Dashboard | Made with Streamlit</p> |
|
</div> |
|
""", unsafe_allow_html=True) |