Spaces:

vtdung23
/

Predict_Rating

Sleeping

App Files Files Community

Predict_Rating / app /services /visualization_service.py

vtdung23

Upload folder using huggingface_hub

c09e844 verified 21 days ago

raw

history blame contribute delete

4.02 kB

	"""
	Visualization Service
	WordCloud generation and data visualization utilities
	"""
	import os
	from typing import List, Dict
	from collections import Counter
	from wordcloud import WordCloud
	import matplotlib
	matplotlib.use('Agg') # Use non-GUI backend
	import matplotlib.pyplot as plt
	from datetime import datetime
	from pathlib import Path

	from app.config import WORDCLOUD_DIR


	class VisualizationService:
	"""Service for generating visualizations"""

	def __init__(self):
	# Vietnamese stopwords (common words to exclude)
	self.stopwords = set([
	'và', 'của', 'có', 'cho', 'với', 'từ', 'này', 'được',
	'là', 'để', 'một', 'các', 'trong', 'không', 'đã', 'rất',
	'cũng', 'nhưng', 'thì', 'bị', 'khi', 'nếu', 'như', 'về',
	'tôi', 'bạn', 'mình', 'nó', 'họ', 'em', 'anh', 'chị',
	'vì', 'nên', 'đến', 'lại', 'ra', 'đang', 'sẽ', 'đều',
	'hay', 'thế', 'làm', 'được', 'rồi', 'đó', 'này', 'ở'
	])

	def generate_wordcloud(self, texts: List[str], filename: str = None) -> str:
	"""
	Generate word cloud from list of texts

	Args:
	texts: List of Vietnamese comments
	filename: Optional custom filename

	Returns:
	str: Path to generated word cloud image
	"""
	# Combine all texts
	combined_text = ' '.join(texts)

	# Generate filename if not provided
	if filename is None:
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	filename = f"wordcloud_{timestamp}.png"

	filepath = WORDCLOUD_DIR / filename

	# Create word cloud
	wordcloud = WordCloud(
	width=800,
	height=400,
	background_color='white',
	stopwords=self.stopwords,
	colormap='viridis',
	max_words=100,
	relative_scaling=0.5,
	min_font_size=10
	).generate(combined_text)

	# Save to file
	plt.figure(figsize=(10, 5))
	plt.imshow(wordcloud, interpolation='bilinear')
	plt.axis('off')
	plt.tight_layout(pad=0)
	plt.savefig(filepath, dpi=150, bbox_inches='tight')
	plt.close()

	# Return relative URL path
	return f"/static/uploads/wordclouds/{filename}"

	def calculate_rating_distribution(self, ratings: List[int]) -> Dict[int, int]:
	"""
	Calculate distribution of ratings

	Args:
	ratings: List of ratings (1-5)

	Returns:
	dict: {rating: count}
	"""
	distribution = Counter(ratings)

	# Ensure all ratings 1-5 are present
	for rating in range(1, 6):
	if rating not in distribution:
	distribution[rating] = 0

	return dict(sorted(distribution.items()))

	def get_top_words(self, texts: List[str], top_n: int = 20) -> List[tuple]:
	"""
	Get most frequent words from texts

	Args:
	texts: List of comments
	top_n: Number of top words to return

	Returns:
	list: [(word, count), ...]
	"""
	# Combine and split texts
	words = []
	for text in texts:
	words.extend(text.lower().split())

	# Filter stopwords
	filtered_words = [w for w in words if w not in self.stopwords and len(w) > 2]

	# Count and return top words
	word_counts = Counter(filtered_words)
	return word_counts.most_common(top_n)


	# Singleton instance
	viz_service = VisualizationService()


	def get_viz_service() -> VisualizationService:
	"""Dependency to get visualization service"""
	return viz_service