|
|
from flask import Flask, request, render_template_string, jsonify, send_from_directory |
|
|
import pandas as pd |
|
|
import re |
|
|
import os |
|
|
from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer |
|
|
from peft import PeftModel, PeftConfig |
|
|
import torch |
|
|
from collections import defaultdict |
|
|
from werkzeug.utils import secure_filename |
|
|
|
|
|
|
|
|
flask_app = Flask(__name__) |
|
|
UPLOAD_FOLDER = 'uploads' |
|
|
os.makedirs(UPLOAD_FOLDER, exist_ok=True) |
|
|
flask_app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER |
|
|
|
|
|
|
|
|
tokenizer = XLMRobertaTokenizer.from_pretrained("letijo03/lora-adapter-32",use_fast=True, trust_remote_code=True) |
|
|
base_model = XLMRobertaForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=3) |
|
|
config = PeftConfig.from_pretrained("letijo03/lora-adapter-32") |
|
|
model = PeftModel.from_pretrained(base_model, "letijo03/lora-adapter-32") |
|
|
|
|
|
model.eval() |
|
|
|
|
|
|
|
|
|
|
|
def generate_ngrams(text, n=2): |
|
|
text = text.lower() |
|
|
words = text.split() |
|
|
if len(words) < n: |
|
|
return [] |
|
|
return [' '.join(words[i:i+n]) for i in range(len(words) - n + 1)] |
|
|
|
|
|
def get_top_phrases(comments, top_n=5, min_occurrence=2, ngram_size=2): |
|
|
phrase_counts = defaultdict(int) |
|
|
for comment in comments: |
|
|
phrases = generate_ngrams(comment, n=ngram_size) |
|
|
for phrase in phrases: |
|
|
phrase_counts[phrase] += 1 |
|
|
filtered_phrases = {phrase: count for phrase, count in phrase_counts.items() if count >= min_occurrence} |
|
|
sorted_phrases = sorted(filtered_phrases.items(), key=lambda x: x[1], reverse=True) |
|
|
return sorted_phrases[:top_n] |
|
|
|
|
|
def clean_data(df): |
|
|
df['Comment'] = df['Comment'].apply(lambda x: re.sub(r'[^a-zA-Z0-9\s]', '', str(x))) |
|
|
df = df[df['Comment'].str.strip() != ''] |
|
|
return df |
|
|
|
|
|
def classify_sentiment_batch(texts): |
|
|
|
|
|
inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512) |
|
|
inputs = {key: value.to(device) for key, value in inputs.items()} |
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs) |
|
|
predictions = torch.argmax(outputs.logits, dim=-1) |
|
|
return predictions.cpu().numpy() |
|
|
|
|
|
|
|
|
def generate_insights(df): |
|
|
insights = {} |
|
|
sentiment_mapping = {2: 'Positive', 1: 'Neutral', 0: 'Negative'} |
|
|
|
|
|
for sentiment_value, sentiment_label in sentiment_mapping.items(): |
|
|
subset = df[df['Sentiment'] == sentiment_value] |
|
|
count = len(subset) |
|
|
|
|
|
if count == 0: |
|
|
insights[sentiment_label] = f"There are no significant comments for {sentiment_label.lower()} sentiment." |
|
|
else: |
|
|
comments = subset['Comment'].dropna().tolist() |
|
|
insights[sentiment_label] = generate_contextual_insight(comments, sentiment_label) |
|
|
|
|
|
return insights |
|
|
|
|
|
def generate_contextual_insight(comments, sentiment_label): |
|
|
if sentiment_label == "Positive": |
|
|
return generate_positive_insight(comments) |
|
|
elif sentiment_label == "Neutral": |
|
|
return generate_neutral_insight(comments) |
|
|
elif sentiment_label == "Negative": |
|
|
return generate_negative_insight(comments) |
|
|
|
|
|
def generate_positive_insight(comments): |
|
|
positive_insight = "Positive comments show strong satisfaction, commonly highlighting fast delivery, good packaging, cheap prices, and quality products. " |
|
|
|
|
|
if any("fast" in comment.lower() or "quick" in comment.lower() for comment in comments): |
|
|
positive_insight += "Many buyers praised fast shipping. " |
|
|
if any("packaging" in comment.lower() or "sealed" in comment.lower() for comment in comments): |
|
|
positive_insight += "Well-packaged items were frequently mentioned. " |
|
|
if any("cheap" in comment.lower() or "affordable" in comment.lower() for comment in comments): |
|
|
positive_insight += "Affordability and value for money stood out." |
|
|
|
|
|
return positive_insight |
|
|
|
|
|
def generate_neutral_insight(comments): |
|
|
neutral_insight = "Neutral comments are mostly factual, sharing moderate satisfaction without strong praise or complaint. " |
|
|
|
|
|
if any("average" in comment.lower() or "normal" in comment.lower() for comment in comments): |
|
|
neutral_insight += "Some users found the product quality or service to be average. " |
|
|
if any("okay" in comment.lower() or "fine" in comment.lower() for comment in comments): |
|
|
neutral_insight += "Others simply stated that the item or service was acceptable, without notable issues. " |
|
|
if any("small issue" in comment.lower() or "minor defect" in comment.lower() for comment in comments): |
|
|
neutral_insight += "Minor imperfections like scratches or small delivery delays were sometimes noted." |
|
|
|
|
|
return neutral_insight |
|
|
|
|
|
def generate_negative_insight(comments): |
|
|
negative_insight = "Negative comments emphasize dissatisfaction, commonly about defective products, incorrect items, late deliveries, and unresponsive customer service. " |
|
|
|
|
|
if any("broken" in comment.lower() or "defective" in comment.lower() or "damage" in comment.lower() for comment in comments): |
|
|
negative_insight += "Broken, defective, or damaged products were often mentioned. " |
|
|
if any("wrong item" in comment.lower() or "incorrect" in comment.lower() for comment in comments): |
|
|
negative_insight += "Receiving the wrong item was a frequent complaint. " |
|
|
if any("late" in comment.lower() or "delay" in comment.lower() for comment in comments): |
|
|
negative_insight += "Delivery delays frustrated several buyers. " |
|
|
if any("no response" in comment.lower() or "ignored" in comment.lower() or "no reply" in comment.lower() for comment in comments): |
|
|
negative_insight += "Lack of seller support or poor customer service was also criticized." |
|
|
|
|
|
return negative_insight |
|
|
|
|
|
|
|
|
html_template = """ |
|
|
<!DOCTYPE html> |
|
|
<html lang="en"> |
|
|
<head> |
|
|
<meta charset="UTF-8"> |
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
|
<title>Shopee Comment Sentiment Analysis</title> |
|
|
|
|
|
<style> |
|
|
body { |
|
|
font-family: 'Poppins', sans-serif; |
|
|
background: linear-gradient(to right, #f8f9fa, #ffe0c3); |
|
|
margin: 0; |
|
|
padding: 0; |
|
|
display: flex; |
|
|
flex-direction: column; |
|
|
min-height: 100vh; |
|
|
} |
|
|
|
|
|
header { |
|
|
background: linear-gradient(90deg, #ff5722, #ff7043); |
|
|
color: white; |
|
|
padding: 1.5rem; |
|
|
text-align: center; |
|
|
font-size: 2rem; |
|
|
box-shadow: 0 4px 10px rgba(0, 0, 0, 0.2); |
|
|
} |
|
|
|
|
|
main { |
|
|
flex: 1; |
|
|
padding: 2rem; |
|
|
display: flex; |
|
|
flex-direction: column; |
|
|
align-items: center; |
|
|
} |
|
|
|
|
|
form { |
|
|
background: white; |
|
|
padding: 2rem; |
|
|
border-radius: 20px; |
|
|
box-shadow: 0 8px 24px rgba(0, 0, 0, 0.2); |
|
|
display: flex; |
|
|
flex-direction: column; |
|
|
align-items: center; |
|
|
gap: 1.5rem; |
|
|
width: 100%; |
|
|
max-width: 400px; |
|
|
transform: perspective(1000px) rotateX(2deg); |
|
|
} |
|
|
|
|
|
input[type="file"] { |
|
|
padding: 0.8rem; |
|
|
border: 2px solid #ff7043; |
|
|
border-radius: 12px; |
|
|
width: 100%; |
|
|
} |
|
|
|
|
|
button { |
|
|
background: linear-gradient(90deg, #ff7043, #ff5722); |
|
|
color: white; |
|
|
border: none; |
|
|
padding: 1rem 2rem; |
|
|
border-radius: 16px; |
|
|
font-size: 1.2rem; |
|
|
cursor: pointer; |
|
|
transition: transform 0.3s, background 0.3s; |
|
|
} |
|
|
|
|
|
button:hover { |
|
|
background: linear-gradient(90deg, #ff5722, #e64a19); |
|
|
transform: scale(1.05); |
|
|
} |
|
|
|
|
|
#loadingContainer { |
|
|
display: none; |
|
|
flex-direction: column; |
|
|
align-items: center; |
|
|
margin-top: 2rem; |
|
|
} |
|
|
|
|
|
.spinner { |
|
|
width: 60px; |
|
|
height: 60px; |
|
|
border: 8px solid #eee; |
|
|
border-top: 8px solid #ff5722; |
|
|
border-radius: 50%; |
|
|
animation: spin 1s linear infinite; |
|
|
} |
|
|
|
|
|
@keyframes spin { |
|
|
0% { transform: rotate(0deg); } |
|
|
100% { transform: rotate(360deg); } |
|
|
} |
|
|
|
|
|
#result { |
|
|
margin-top: 2rem; |
|
|
width: 100%; |
|
|
max-width: 700px; |
|
|
text-align: center; |
|
|
} |
|
|
|
|
|
.insights { |
|
|
margin-top: 2rem; |
|
|
padding: 2rem; |
|
|
background: white; |
|
|
border-radius: 16px; |
|
|
box-shadow: 0 6px 18px rgba(0, 0, 0, 0.15); |
|
|
text-align: left; |
|
|
} |
|
|
|
|
|
.insights h3 { |
|
|
margin-bottom: 1rem; |
|
|
color: #ff5722; |
|
|
} |
|
|
|
|
|
.insights p { |
|
|
font-size: 1.1rem; |
|
|
margin: 0.5rem 0; |
|
|
} |
|
|
|
|
|
footer { |
|
|
background: linear-gradient(90deg, #ff5722, #ff7043); |
|
|
color: white; |
|
|
text-align: center; |
|
|
padding: 1rem; |
|
|
font-size: 0.9rem; |
|
|
margin-top: auto; |
|
|
} |
|
|
</style> |
|
|
|
|
|
<script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script> |
|
|
<script> |
|
|
google.charts.load('current', { 'packages': ['corechart'] }); |
|
|
|
|
|
document.addEventListener("DOMContentLoaded", function () { |
|
|
const form = document.getElementById("uploadForm"); |
|
|
const loadingContainer = document.getElementById("loadingContainer"); |
|
|
const resultDiv = document.getElementById("result"); |
|
|
|
|
|
form.onsubmit = async function (e) { |
|
|
e.preventDefault(); |
|
|
loadingContainer.style.display = "flex"; |
|
|
resultDiv.innerHTML = ""; |
|
|
|
|
|
const formData = new FormData(form); |
|
|
|
|
|
try { |
|
|
const response = await fetch('/analyze', { |
|
|
method: 'POST', |
|
|
body: formData |
|
|
}); |
|
|
const data = await response.json(); |
|
|
|
|
|
loadingContainer.style.display = "none"; |
|
|
|
|
|
if (data.error) { |
|
|
resultDiv.innerHTML = `<p style="color:red;">${data.error}</p>`; |
|
|
} else { |
|
|
const chartData = [ |
|
|
["Sentiment", "Count"], |
|
|
["Positive", data.chart_data.Positive || 0], |
|
|
["Neutral", data.chart_data.Neutral || 0], |
|
|
["Negative", data.chart_data.Negative || 0] |
|
|
]; |
|
|
drawPieChart(chartData); |
|
|
|
|
|
// Display insights |
|
|
const insightsDiv = document.createElement('div'); |
|
|
insightsDiv.classList.add('insights'); |
|
|
insightsDiv.innerHTML = ` |
|
|
<h3>Insights</h3> |
|
|
<p><strong>Positive:</strong> ${data.insights.Positive}</p> |
|
|
<p><strong>Neutral:</strong> ${data.insights.Neutral}</p> |
|
|
<p><strong>Negative:</strong> ${data.insights.Negative}</p> |
|
|
`; |
|
|
resultDiv.appendChild(insightsDiv); |
|
|
} |
|
|
} catch (error) { |
|
|
loadingContainer.style.display = "none"; |
|
|
resultDiv.innerHTML = `<p style="color:red;">An error occurred: ${error.message}</p>`; |
|
|
} |
|
|
}; |
|
|
}); |
|
|
|
|
|
function drawPieChart(chartData) { |
|
|
const data = google.visualization.arrayToDataTable(chartData); |
|
|
|
|
|
const options = { |
|
|
title: 'Sentiment Distribution', |
|
|
pieHole: 0.5, |
|
|
colors: ['#4caf50', '#ffc107', '#f44336'], |
|
|
legend: { position: 'bottom' }, |
|
|
backgroundColor: 'transparent' |
|
|
}; |
|
|
|
|
|
const chart = new google.visualization.PieChart(document.getElementById('result')); |
|
|
chart.draw(data, options); |
|
|
} |
|
|
</script> |
|
|
|
|
|
</head> |
|
|
|
|
|
<body> |
|
|
|
|
|
<header> |
|
|
Shopee Comment Sentiment Analysis |
|
|
</header> |
|
|
|
|
|
<main> |
|
|
<form id="uploadForm" enctype="multipart/form-data"> |
|
|
<input type="file" name="file" accept=".csv" required> |
|
|
<button type="submit">Analyze CSV</button> |
|
|
</form> |
|
|
|
|
|
<div id="loadingContainer"> |
|
|
<div class="spinner"></div> |
|
|
<p>Analyzing, please wait...</p> |
|
|
</div> |
|
|
|
|
|
<div id="result"></div> |
|
|
<div id="downloadLink"></div> |
|
|
</main> |
|
|
|
|
|
<footer> |
|
|
© 2025 Shopee Sentiment Analyzer. All rights reserved. |
|
|
</footer> |
|
|
|
|
|
</body> |
|
|
</html> |
|
|
""" |
|
|
|
|
|
@flask_app.route('/') |
|
|
def index(): |
|
|
return render_template_string(html_template) |
|
|
|
|
|
|
|
|
@flask_app.route('/analyze', methods=['POST']) |
|
|
def analyze(): |
|
|
if 'file' not in request.files: |
|
|
return jsonify({'error': 'No file part'}) |
|
|
|
|
|
file = request.files['file'] |
|
|
if file.filename == '': |
|
|
return jsonify({'error': 'No selected file'}) |
|
|
|
|
|
filename = secure_filename(file.filename) |
|
|
filepath = os.path.join(flask_app.config['UPLOAD_FOLDER'], filename) |
|
|
file.save(filepath) |
|
|
|
|
|
try: |
|
|
df = pd.read_csv(filepath) |
|
|
if 'Comment' not in df.columns: |
|
|
return jsonify({'error': "CSV must contain a 'Comment' column."}) |
|
|
|
|
|
df = clean_data(df) |
|
|
|
|
|
|
|
|
batch_size = 32 |
|
|
comments = df['Comment'].tolist() |
|
|
all_predictions = [] |
|
|
|
|
|
for i in tqdm(range(0, len(comments), batch_size)): |
|
|
batch = comments[i:i+batch_size] |
|
|
batch_predictions = classify_sentiment_batch(batch) |
|
|
all_predictions.extend(batch_predictions) |
|
|
|
|
|
df['Sentiment'] = all_predictions |
|
|
|
|
|
|
|
|
positive_count = len(df[df['Sentiment'] == 2]) |
|
|
neutral_count = len(df[df['Sentiment'] == 1]) |
|
|
negative_count = len(df[df['Sentiment'] == 0]) |
|
|
|
|
|
chart_data_counts = { |
|
|
"Positive": positive_count, |
|
|
"Neutral": neutral_count, |
|
|
"Negative": negative_count |
|
|
} |
|
|
|
|
|
insights = generate_insights(df) |
|
|
|
|
|
return jsonify({ |
|
|
'message': 'Analysis completed', |
|
|
'chart_data': chart_data_counts, |
|
|
'insights': insights |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
return jsonify({'error': f'Error processing file: {str(e)}'}) |
|
|
|
|
|
|
|
|
@flask_app.route('/download/<path:filename>') |
|
|
def download(filename): |
|
|
return send_from_directory('static', filename, as_attachment=True) |
|
|
|
|
|
from asgiref.wsgi import WsgiToAsgi |
|
|
app = WsgiToAsgi(flask_app) |
|
|
|
|
|
if __name__ == '__main__': |
|
|
import uvicorn |
|
|
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860))) |
|
|
|
|
|
|