Spaces:

letijo03
/

csv-Uploading-sentiment-analysis

Paused

App Files Files Community

csv-Uploading-sentiment-analysis / app.py

letijo03

Update app.py

ccb2bfa verified 8 months ago

raw

history blame contribute delete

15 kB

	from flask import Flask, request, render_template_string, jsonify, send_from_directory
	import pandas as pd
	import re
	import os
	from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer
	from peft import PeftModel, PeftConfig
	import torch
	from collections import defaultdict
	from werkzeug.utils import secure_filename

	# Initialize Flask app
	flask_app = Flask(__name__)
	UPLOAD_FOLDER = 'uploads'
	os.makedirs(UPLOAD_FOLDER, exist_ok=True)
	flask_app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

	# Load the base XLM-RoBERTa model with the correct number of labels (3 labels for classification)
	tokenizer = XLMRobertaTokenizer.from_pretrained("letijo03/lora-adapter-32",use_fast=True, trust_remote_code=True)
	base_model = XLMRobertaForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=3)
	config = PeftConfig.from_pretrained("letijo03/lora-adapter-32")
	model = PeftModel.from_pretrained(base_model, "letijo03/lora-adapter-32")

	model.eval()


	# Helper Functions
	def generate_ngrams(text, n=2):
	text = text.lower()
	words = text.split()
	if len(words) < n:
	return []
	return [' '.join(words[i:i+n]) for i in range(len(words) - n + 1)]

	def get_top_phrases(comments, top_n=5, min_occurrence=2, ngram_size=2):
	phrase_counts = defaultdict(int)
	for comment in comments:
	phrases = generate_ngrams(comment, n=ngram_size)
	for phrase in phrases:
	phrase_counts[phrase] += 1
	filtered_phrases = {phrase: count for phrase, count in phrase_counts.items() if count >= min_occurrence}
	sorted_phrases = sorted(filtered_phrases.items(), key=lambda x: x[1], reverse=True)
	return sorted_phrases[:top_n]

	def clean_data(df):
	df['Comment'] = df['Comment'].apply(lambda x: re.sub(r'[^a-zA-Z0-9\s]', '', str(x)))
	df = df[df['Comment'].str.strip() != '']
	return df

	def classify_sentiment_batch(texts):
	# Batch sentiment classification
	inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
	inputs = {key: value.to(device) for key, value in inputs.items()}
	with torch.no_grad():
	outputs = model(**inputs)
	predictions = torch.argmax(outputs.logits, dim=-1)
	return predictions.cpu().numpy()


	def generate_insights(df):
	insights = {}
	sentiment_mapping = {2: 'Positive', 1: 'Neutral', 0: 'Negative'}

	for sentiment_value, sentiment_label in sentiment_mapping.items():
	subset = df[df['Sentiment'] == sentiment_value] # <-- use 'polarity' now
	count = len(subset)

	if count == 0:
	insights[sentiment_label] = f"There are no significant comments for {sentiment_label.lower()} sentiment."
	else:
	comments = subset['Comment'].dropna().tolist()
	insights[sentiment_label] = generate_contextual_insight(comments, sentiment_label)

	return insights

	def generate_contextual_insight(comments, sentiment_label):
	if sentiment_label == "Positive":
	return generate_positive_insight(comments)
	elif sentiment_label == "Neutral":
	return generate_neutral_insight(comments)
	elif sentiment_label == "Negative":
	return generate_negative_insight(comments)

	def generate_positive_insight(comments):
	positive_insight = "Positive comments show strong satisfaction, commonly highlighting fast delivery, good packaging, cheap prices, and quality products. "

	if any("fast" in comment.lower() or "quick" in comment.lower() for comment in comments):
	positive_insight += "Many buyers praised fast shipping. "
	if any("packaging" in comment.lower() or "sealed" in comment.lower() for comment in comments):
	positive_insight += "Well-packaged items were frequently mentioned. "
	if any("cheap" in comment.lower() or "affordable" in comment.lower() for comment in comments):
	positive_insight += "Affordability and value for money stood out."

	return positive_insight

	def generate_neutral_insight(comments):
	neutral_insight = "Neutral comments are mostly factual, sharing moderate satisfaction without strong praise or complaint. "

	if any("average" in comment.lower() or "normal" in comment.lower() for comment in comments):
	neutral_insight += "Some users found the product quality or service to be average. "
	if any("okay" in comment.lower() or "fine" in comment.lower() for comment in comments):
	neutral_insight += "Others simply stated that the item or service was acceptable, without notable issues. "
	if any("small issue" in comment.lower() or "minor defect" in comment.lower() for comment in comments):
	neutral_insight += "Minor imperfections like scratches or small delivery delays were sometimes noted."

	return neutral_insight

	def generate_negative_insight(comments):
	negative_insight = "Negative comments emphasize dissatisfaction, commonly about defective products, incorrect items, late deliveries, and unresponsive customer service. "

	if any("broken" in comment.lower() or "defective" in comment.lower() or "damage" in comment.lower() for comment in comments):
	negative_insight += "Broken, defective, or damaged products were often mentioned. "
	if any("wrong item" in comment.lower() or "incorrect" in comment.lower() for comment in comments):
	negative_insight += "Receiving the wrong item was a frequent complaint. "
	if any("late" in comment.lower() or "delay" in comment.lower() for comment in comments):
	negative_insight += "Delivery delays frustrated several buyers. "
	if any("no response" in comment.lower() or "ignored" in comment.lower() or "no reply" in comment.lower() for comment in comments):
	negative_insight += "Lack of seller support or poor customer service was also criticized."

	return negative_insight

	# HTML Template (same as before)
	html_template = """
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Shopee Comment Sentiment Analysis</title>

	<style>
	body {
	font-family: 'Poppins', sans-serif;
	background: linear-gradient(to right, #f8f9fa, #ffe0c3);
	margin: 0;
	padding: 0;
	display: flex;
	flex-direction: column;
	min-height: 100vh;
	}

	header {
	background: linear-gradient(90deg, #ff5722, #ff7043);
	color: white;
	padding: 1.5rem;
	text-align: center;
	font-size: 2rem;
	box-shadow: 0 4px 10px rgba(0, 0, 0, 0.2);
	}

	main {
	flex: 1;
	padding: 2rem;
	display: flex;
	flex-direction: column;
	align-items: center;
	}

	form {
	background: white;
	padding: 2rem;
	border-radius: 20px;
	box-shadow: 0 8px 24px rgba(0, 0, 0, 0.2);
	display: flex;
	flex-direction: column;
	align-items: center;
	gap: 1.5rem;
	width: 100%;
	max-width: 400px;
	transform: perspective(1000px) rotateX(2deg);
	}

	input[type="file"] {
	padding: 0.8rem;
	border: 2px solid #ff7043;
	border-radius: 12px;
	width: 100%;
	}

	button {
	background: linear-gradient(90deg, #ff7043, #ff5722);
	color: white;
	border: none;
	padding: 1rem 2rem;
	border-radius: 16px;
	font-size: 1.2rem;
	cursor: pointer;
	transition: transform 0.3s, background 0.3s;
	}

	button:hover {
	background: linear-gradient(90deg, #ff5722, #e64a19);
	transform: scale(1.05);
	}

	#loadingContainer {
	display: none;
	flex-direction: column;
	align-items: center;
	margin-top: 2rem;
	}

	.spinner {
	width: 60px;
	height: 60px;
	border: 8px solid #eee;
	border-top: 8px solid #ff5722;
	border-radius: 50%;
	animation: spin 1s linear infinite;
	}

	@keyframes spin {
	0% { transform: rotate(0deg); }
	100% { transform: rotate(360deg); }
	}

	#result {
	margin-top: 2rem;
	width: 100%;
	max-width: 700px;
	text-align: center;
	}

	.insights {
	margin-top: 2rem;
	padding: 2rem;
	background: white;
	border-radius: 16px;
	box-shadow: 0 6px 18px rgba(0, 0, 0, 0.15);
	text-align: left;
	}

	.insights h3 {
	margin-bottom: 1rem;
	color: #ff5722;
	}

	.insights p {
	font-size: 1.1rem;
	margin: 0.5rem 0;
	}

	footer {
	background: linear-gradient(90deg, #ff5722, #ff7043);
	color: white;
	text-align: center;
	padding: 1rem;
	font-size: 0.9rem;
	margin-top: auto;
	}
	</style>

	<script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
	<script>
	google.charts.load('current', { 'packages': ['corechart'] });

	document.addEventListener("DOMContentLoaded", function () {
	const form = document.getElementById("uploadForm");
	const loadingContainer = document.getElementById("loadingContainer");
	const resultDiv = document.getElementById("result");

	form.onsubmit = async function (e) {
	e.preventDefault();
	loadingContainer.style.display = "flex";
	resultDiv.innerHTML = "";

	const formData = new FormData(form);

	try {
	const response = await fetch('/analyze', {
	method: 'POST',
	body: formData
	});
	const data = await response.json();

	loadingContainer.style.display = "none";

	if (data.error) {
	resultDiv.innerHTML = `<p style="color:red;">${data.error}</p>`;
	} else {
	const chartData = [
	["Sentiment", "Count"],
	["Positive", data.chart_data.Positive \|\| 0],
	["Neutral", data.chart_data.Neutral \|\| 0],
	["Negative", data.chart_data.Negative \|\| 0]
	];
	drawPieChart(chartData);

	// Display insights
	const insightsDiv = document.createElement('div');
	insightsDiv.classList.add('insights');
	insightsDiv.innerHTML = `
	<h3>Insights</h3>
	<p><strong>Positive:</strong> ${data.insights.Positive}</p>
	<p><strong>Neutral:</strong> ${data.insights.Neutral}</p>
	<p><strong>Negative:</strong> ${data.insights.Negative}</p>
	`;
	resultDiv.appendChild(insightsDiv);
	}
	} catch (error) {
	loadingContainer.style.display = "none";
	resultDiv.innerHTML = `<p style="color:red;">An error occurred: ${error.message}</p>`;
	}
	};
	});

	function drawPieChart(chartData) {
	const data = google.visualization.arrayToDataTable(chartData);

	const options = {
	title: 'Sentiment Distribution',
	pieHole: 0.5,
	colors: ['#4caf50', '#ffc107', '#f44336'],
	legend: { position: 'bottom' },
	backgroundColor: 'transparent'
	};

	const chart = new google.visualization.PieChart(document.getElementById('result'));
	chart.draw(data, options);
	}
	</script>

	</head>

	<body>

	<header>
	Shopee Comment Sentiment Analysis
	</header>

	<main>
	<form id="uploadForm" enctype="multipart/form-data">
	<input type="file" name="file" accept=".csv" required>
	<button type="submit">Analyze CSV</button>
	</form>

	<div id="loadingContainer">
	<div class="spinner"></div>
	<p>Analyzing, please wait...</p>
	</div>

	<div id="result"></div>
	<div id="downloadLink"></div>
	</main>

	<footer>
	© 2025 Shopee Sentiment Analyzer. All rights reserved.
	</footer>

	</body>
	</html>
	"""

	@flask_app.route('/')
	def index():
	return render_template_string(html_template)

	# Analyze function
	@flask_app.route('/analyze', methods=['POST'])
	def analyze():
	if 'file' not in request.files:
	return jsonify({'error': 'No file part'})

	file = request.files['file']
	if file.filename == '':
	return jsonify({'error': 'No selected file'})

	filename = secure_filename(file.filename)
	filepath = os.path.join(flask_app.config['UPLOAD_FOLDER'], filename)
	file.save(filepath)

	try:
	df = pd.read_csv(filepath)
	if 'Comment' not in df.columns:
	return jsonify({'error': "CSV must contain a 'Comment' column."})

	df = clean_data(df)

	# Use batch processing for sentiment classification
	batch_size = 32 # You can adjust this based on available memory
	comments = df['Comment'].tolist()
	all_predictions = []

	for i in tqdm(range(0, len(comments), batch_size)):
	batch = comments[i:i+batch_size]
	batch_predictions = classify_sentiment_batch(batch)
	all_predictions.extend(batch_predictions)

	df['Sentiment'] = all_predictions

	# Sentiment counts
	positive_count = len(df[df['Sentiment'] == 2])
	neutral_count = len(df[df['Sentiment'] == 1])
	negative_count = len(df[df['Sentiment'] == 0])

	chart_data_counts = {
	"Positive": positive_count,
	"Neutral": neutral_count,
	"Negative": negative_count
	}

	insights = generate_insights(df)

	return jsonify({
	'message': 'Analysis completed',
	'chart_data': chart_data_counts,
	'insights': insights
	})

	except Exception as e:
	return jsonify({'error': f'Error processing file: {str(e)}'})

	# New route to serve download requests from the "static" folder.
	@flask_app.route('/download/<path:filename>')
	def download(filename):
	return send_from_directory('static', filename, as_attachment=True)

	from asgiref.wsgi import WsgiToAsgi
	app = WsgiToAsgi(flask_app)

	if __name__ == '__main__':
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))