Predict_Rating / app /services /visualization_service.py
vtdung23's picture
Upload folder using huggingface_hub
c09e844 verified
"""
Visualization Service
WordCloud generation and data visualization utilities
"""
import os
from typing import List, Dict
from collections import Counter
from wordcloud import WordCloud
import matplotlib
matplotlib.use('Agg') # Use non-GUI backend
import matplotlib.pyplot as plt
from datetime import datetime
from pathlib import Path
from app.config import WORDCLOUD_DIR
class VisualizationService:
"""Service for generating visualizations"""
def __init__(self):
# Vietnamese stopwords (common words to exclude)
self.stopwords = set([
'và', 'của', 'có', 'cho', 'với', 'từ', 'này', 'được',
'là', 'để', 'một', 'các', 'trong', 'không', 'đã', 'rất',
'cũng', 'nhưng', 'thì', 'bị', 'khi', 'nếu', 'như', 'về',
'tôi', 'bạn', 'mình', 'nó', 'họ', 'em', 'anh', 'chị',
'vì', 'nên', 'đến', 'lại', 'ra', 'đang', 'sẽ', 'đều',
'hay', 'thế', 'làm', 'được', 'rồi', 'đó', 'này', 'ở'
])
def generate_wordcloud(self, texts: List[str], filename: str = None) -> str:
"""
Generate word cloud from list of texts
Args:
texts: List of Vietnamese comments
filename: Optional custom filename
Returns:
str: Path to generated word cloud image
"""
# Combine all texts
combined_text = ' '.join(texts)
# Generate filename if not provided
if filename is None:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"wordcloud_{timestamp}.png"
filepath = WORDCLOUD_DIR / filename
# Create word cloud
wordcloud = WordCloud(
width=800,
height=400,
background_color='white',
stopwords=self.stopwords,
colormap='viridis',
max_words=100,
relative_scaling=0.5,
min_font_size=10
).generate(combined_text)
# Save to file
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.tight_layout(pad=0)
plt.savefig(filepath, dpi=150, bbox_inches='tight')
plt.close()
# Return relative URL path
return f"/static/uploads/wordclouds/{filename}"
def calculate_rating_distribution(self, ratings: List[int]) -> Dict[int, int]:
"""
Calculate distribution of ratings
Args:
ratings: List of ratings (1-5)
Returns:
dict: {rating: count}
"""
distribution = Counter(ratings)
# Ensure all ratings 1-5 are present
for rating in range(1, 6):
if rating not in distribution:
distribution[rating] = 0
return dict(sorted(distribution.items()))
def get_top_words(self, texts: List[str], top_n: int = 20) -> List[tuple]:
"""
Get most frequent words from texts
Args:
texts: List of comments
top_n: Number of top words to return
Returns:
list: [(word, count), ...]
"""
# Combine and split texts
words = []
for text in texts:
words.extend(text.lower().split())
# Filter stopwords
filtered_words = [w for w in words if w not in self.stopwords and len(w) > 2]
# Count and return top words
word_counts = Counter(filtered_words)
return word_counts.most_common(top_n)
# Singleton instance
viz_service = VisualizationService()
def get_viz_service() -> VisualizationService:
"""Dependency to get visualization service"""
return viz_service