letijo03's picture
Update app.py
ccb2bfa verified
from flask import Flask, request, render_template_string, jsonify, send_from_directory
import pandas as pd
import re
import os
from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer
from peft import PeftModel, PeftConfig
import torch
from collections import defaultdict
from werkzeug.utils import secure_filename
# Initialize Flask app
flask_app = Flask(__name__)
UPLOAD_FOLDER = 'uploads'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
flask_app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
# Load the base XLM-RoBERTa model with the correct number of labels (3 labels for classification)
tokenizer = XLMRobertaTokenizer.from_pretrained("letijo03/lora-adapter-32",use_fast=True, trust_remote_code=True)
base_model = XLMRobertaForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=3)
config = PeftConfig.from_pretrained("letijo03/lora-adapter-32")
model = PeftModel.from_pretrained(base_model, "letijo03/lora-adapter-32")
model.eval()
# Helper Functions
def generate_ngrams(text, n=2):
text = text.lower()
words = text.split()
if len(words) < n:
return []
return [' '.join(words[i:i+n]) for i in range(len(words) - n + 1)]
def get_top_phrases(comments, top_n=5, min_occurrence=2, ngram_size=2):
phrase_counts = defaultdict(int)
for comment in comments:
phrases = generate_ngrams(comment, n=ngram_size)
for phrase in phrases:
phrase_counts[phrase] += 1
filtered_phrases = {phrase: count for phrase, count in phrase_counts.items() if count >= min_occurrence}
sorted_phrases = sorted(filtered_phrases.items(), key=lambda x: x[1], reverse=True)
return sorted_phrases[:top_n]
def clean_data(df):
df['Comment'] = df['Comment'].apply(lambda x: re.sub(r'[^a-zA-Z0-9\s]', '', str(x)))
df = df[df['Comment'].str.strip() != '']
return df
def classify_sentiment_batch(texts):
# Batch sentiment classification
inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
inputs = {key: value.to(device) for key, value in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
predictions = torch.argmax(outputs.logits, dim=-1)
return predictions.cpu().numpy()
def generate_insights(df):
insights = {}
sentiment_mapping = {2: 'Positive', 1: 'Neutral', 0: 'Negative'}
for sentiment_value, sentiment_label in sentiment_mapping.items():
subset = df[df['Sentiment'] == sentiment_value] # <-- use 'polarity' now
count = len(subset)
if count == 0:
insights[sentiment_label] = f"There are no significant comments for {sentiment_label.lower()} sentiment."
else:
comments = subset['Comment'].dropna().tolist()
insights[sentiment_label] = generate_contextual_insight(comments, sentiment_label)
return insights
def generate_contextual_insight(comments, sentiment_label):
if sentiment_label == "Positive":
return generate_positive_insight(comments)
elif sentiment_label == "Neutral":
return generate_neutral_insight(comments)
elif sentiment_label == "Negative":
return generate_negative_insight(comments)
def generate_positive_insight(comments):
positive_insight = "Positive comments show strong satisfaction, commonly highlighting fast delivery, good packaging, cheap prices, and quality products. "
if any("fast" in comment.lower() or "quick" in comment.lower() for comment in comments):
positive_insight += "Many buyers praised fast shipping. "
if any("packaging" in comment.lower() or "sealed" in comment.lower() for comment in comments):
positive_insight += "Well-packaged items were frequently mentioned. "
if any("cheap" in comment.lower() or "affordable" in comment.lower() for comment in comments):
positive_insight += "Affordability and value for money stood out."
return positive_insight
def generate_neutral_insight(comments):
neutral_insight = "Neutral comments are mostly factual, sharing moderate satisfaction without strong praise or complaint. "
if any("average" in comment.lower() or "normal" in comment.lower() for comment in comments):
neutral_insight += "Some users found the product quality or service to be average. "
if any("okay" in comment.lower() or "fine" in comment.lower() for comment in comments):
neutral_insight += "Others simply stated that the item or service was acceptable, without notable issues. "
if any("small issue" in comment.lower() or "minor defect" in comment.lower() for comment in comments):
neutral_insight += "Minor imperfections like scratches or small delivery delays were sometimes noted."
return neutral_insight
def generate_negative_insight(comments):
negative_insight = "Negative comments emphasize dissatisfaction, commonly about defective products, incorrect items, late deliveries, and unresponsive customer service. "
if any("broken" in comment.lower() or "defective" in comment.lower() or "damage" in comment.lower() for comment in comments):
negative_insight += "Broken, defective, or damaged products were often mentioned. "
if any("wrong item" in comment.lower() or "incorrect" in comment.lower() for comment in comments):
negative_insight += "Receiving the wrong item was a frequent complaint. "
if any("late" in comment.lower() or "delay" in comment.lower() for comment in comments):
negative_insight += "Delivery delays frustrated several buyers. "
if any("no response" in comment.lower() or "ignored" in comment.lower() or "no reply" in comment.lower() for comment in comments):
negative_insight += "Lack of seller support or poor customer service was also criticized."
return negative_insight
# HTML Template (same as before)
html_template = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Shopee Comment Sentiment Analysis</title>
<style>
body {
font-family: 'Poppins', sans-serif;
background: linear-gradient(to right, #f8f9fa, #ffe0c3);
margin: 0;
padding: 0;
display: flex;
flex-direction: column;
min-height: 100vh;
}
header {
background: linear-gradient(90deg, #ff5722, #ff7043);
color: white;
padding: 1.5rem;
text-align: center;
font-size: 2rem;
box-shadow: 0 4px 10px rgba(0, 0, 0, 0.2);
}
main {
flex: 1;
padding: 2rem;
display: flex;
flex-direction: column;
align-items: center;
}
form {
background: white;
padding: 2rem;
border-radius: 20px;
box-shadow: 0 8px 24px rgba(0, 0, 0, 0.2);
display: flex;
flex-direction: column;
align-items: center;
gap: 1.5rem;
width: 100%;
max-width: 400px;
transform: perspective(1000px) rotateX(2deg);
}
input[type="file"] {
padding: 0.8rem;
border: 2px solid #ff7043;
border-radius: 12px;
width: 100%;
}
button {
background: linear-gradient(90deg, #ff7043, #ff5722);
color: white;
border: none;
padding: 1rem 2rem;
border-radius: 16px;
font-size: 1.2rem;
cursor: pointer;
transition: transform 0.3s, background 0.3s;
}
button:hover {
background: linear-gradient(90deg, #ff5722, #e64a19);
transform: scale(1.05);
}
#loadingContainer {
display: none;
flex-direction: column;
align-items: center;
margin-top: 2rem;
}
.spinner {
width: 60px;
height: 60px;
border: 8px solid #eee;
border-top: 8px solid #ff5722;
border-radius: 50%;
animation: spin 1s linear infinite;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
#result {
margin-top: 2rem;
width: 100%;
max-width: 700px;
text-align: center;
}
.insights {
margin-top: 2rem;
padding: 2rem;
background: white;
border-radius: 16px;
box-shadow: 0 6px 18px rgba(0, 0, 0, 0.15);
text-align: left;
}
.insights h3 {
margin-bottom: 1rem;
color: #ff5722;
}
.insights p {
font-size: 1.1rem;
margin: 0.5rem 0;
}
footer {
background: linear-gradient(90deg, #ff5722, #ff7043);
color: white;
text-align: center;
padding: 1rem;
font-size: 0.9rem;
margin-top: auto;
}
</style>
<script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
<script>
google.charts.load('current', { 'packages': ['corechart'] });
document.addEventListener("DOMContentLoaded", function () {
const form = document.getElementById("uploadForm");
const loadingContainer = document.getElementById("loadingContainer");
const resultDiv = document.getElementById("result");
form.onsubmit = async function (e) {
e.preventDefault();
loadingContainer.style.display = "flex";
resultDiv.innerHTML = "";
const formData = new FormData(form);
try {
const response = await fetch('/analyze', {
method: 'POST',
body: formData
});
const data = await response.json();
loadingContainer.style.display = "none";
if (data.error) {
resultDiv.innerHTML = `<p style="color:red;">${data.error}</p>`;
} else {
const chartData = [
["Sentiment", "Count"],
["Positive", data.chart_data.Positive || 0],
["Neutral", data.chart_data.Neutral || 0],
["Negative", data.chart_data.Negative || 0]
];
drawPieChart(chartData);
// Display insights
const insightsDiv = document.createElement('div');
insightsDiv.classList.add('insights');
insightsDiv.innerHTML = `
<h3>Insights</h3>
<p><strong>Positive:</strong> ${data.insights.Positive}</p>
<p><strong>Neutral:</strong> ${data.insights.Neutral}</p>
<p><strong>Negative:</strong> ${data.insights.Negative}</p>
`;
resultDiv.appendChild(insightsDiv);
}
} catch (error) {
loadingContainer.style.display = "none";
resultDiv.innerHTML = `<p style="color:red;">An error occurred: ${error.message}</p>`;
}
};
});
function drawPieChart(chartData) {
const data = google.visualization.arrayToDataTable(chartData);
const options = {
title: 'Sentiment Distribution',
pieHole: 0.5,
colors: ['#4caf50', '#ffc107', '#f44336'],
legend: { position: 'bottom' },
backgroundColor: 'transparent'
};
const chart = new google.visualization.PieChart(document.getElementById('result'));
chart.draw(data, options);
}
</script>
</head>
<body>
<header>
Shopee Comment Sentiment Analysis
</header>
<main>
<form id="uploadForm" enctype="multipart/form-data">
<input type="file" name="file" accept=".csv" required>
<button type="submit">Analyze CSV</button>
</form>
<div id="loadingContainer">
<div class="spinner"></div>
<p>Analyzing, please wait...</p>
</div>
<div id="result"></div>
<div id="downloadLink"></div>
</main>
<footer>
&copy; 2025 Shopee Sentiment Analyzer. All rights reserved.
</footer>
</body>
</html>
"""
@flask_app.route('/')
def index():
return render_template_string(html_template)
# Analyze function
@flask_app.route('/analyze', methods=['POST'])
def analyze():
if 'file' not in request.files:
return jsonify({'error': 'No file part'})
file = request.files['file']
if file.filename == '':
return jsonify({'error': 'No selected file'})
filename = secure_filename(file.filename)
filepath = os.path.join(flask_app.config['UPLOAD_FOLDER'], filename)
file.save(filepath)
try:
df = pd.read_csv(filepath)
if 'Comment' not in df.columns:
return jsonify({'error': "CSV must contain a 'Comment' column."})
df = clean_data(df)
# Use batch processing for sentiment classification
batch_size = 32 # You can adjust this based on available memory
comments = df['Comment'].tolist()
all_predictions = []
for i in tqdm(range(0, len(comments), batch_size)):
batch = comments[i:i+batch_size]
batch_predictions = classify_sentiment_batch(batch)
all_predictions.extend(batch_predictions)
df['Sentiment'] = all_predictions
# Sentiment counts
positive_count = len(df[df['Sentiment'] == 2])
neutral_count = len(df[df['Sentiment'] == 1])
negative_count = len(df[df['Sentiment'] == 0])
chart_data_counts = {
"Positive": positive_count,
"Neutral": neutral_count,
"Negative": negative_count
}
insights = generate_insights(df)
return jsonify({
'message': 'Analysis completed',
'chart_data': chart_data_counts,
'insights': insights
})
except Exception as e:
return jsonify({'error': f'Error processing file: {str(e)}'})
# New route to serve download requests from the "static" folder.
@flask_app.route('/download/<path:filename>')
def download(filename):
return send_from_directory('static', filename, as_attachment=True)
from asgiref.wsgi import WsgiToAsgi
app = WsgiToAsgi(flask_app)
if __name__ == '__main__':
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))