Spaces:
Sleeping
Sleeping
| """ | |
| Visualization Service | |
| WordCloud generation and data visualization utilities | |
| """ | |
| import os | |
| from typing import List, Dict | |
| from collections import Counter | |
| from wordcloud import WordCloud | |
| import matplotlib | |
| matplotlib.use('Agg') # Use non-GUI backend | |
| import matplotlib.pyplot as plt | |
| from datetime import datetime | |
| from pathlib import Path | |
| from app.config import WORDCLOUD_DIR | |
| class VisualizationService: | |
| """Service for generating visualizations""" | |
| def __init__(self): | |
| # Vietnamese stopwords (common words to exclude) | |
| self.stopwords = set([ | |
| 'và', 'của', 'có', 'cho', 'với', 'từ', 'này', 'được', | |
| 'là', 'để', 'một', 'các', 'trong', 'không', 'đã', 'rất', | |
| 'cũng', 'nhưng', 'thì', 'bị', 'khi', 'nếu', 'như', 'về', | |
| 'tôi', 'bạn', 'mình', 'nó', 'họ', 'em', 'anh', 'chị', | |
| 'vì', 'nên', 'đến', 'lại', 'ra', 'đang', 'sẽ', 'đều', | |
| 'hay', 'thế', 'làm', 'được', 'rồi', 'đó', 'này', 'ở' | |
| ]) | |
| def generate_wordcloud(self, texts: List[str], filename: str = None) -> str: | |
| """ | |
| Generate word cloud from list of texts | |
| Args: | |
| texts: List of Vietnamese comments | |
| filename: Optional custom filename | |
| Returns: | |
| str: Path to generated word cloud image | |
| """ | |
| # Combine all texts | |
| combined_text = ' '.join(texts) | |
| # Generate filename if not provided | |
| if filename is None: | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| filename = f"wordcloud_{timestamp}.png" | |
| filepath = WORDCLOUD_DIR / filename | |
| # Create word cloud | |
| wordcloud = WordCloud( | |
| width=800, | |
| height=400, | |
| background_color='white', | |
| stopwords=self.stopwords, | |
| colormap='viridis', | |
| max_words=100, | |
| relative_scaling=0.5, | |
| min_font_size=10 | |
| ).generate(combined_text) | |
| # Save to file | |
| plt.figure(figsize=(10, 5)) | |
| plt.imshow(wordcloud, interpolation='bilinear') | |
| plt.axis('off') | |
| plt.tight_layout(pad=0) | |
| plt.savefig(filepath, dpi=150, bbox_inches='tight') | |
| plt.close() | |
| # Return relative URL path | |
| return f"/static/uploads/wordclouds/{filename}" | |
| def calculate_rating_distribution(self, ratings: List[int]) -> Dict[int, int]: | |
| """ | |
| Calculate distribution of ratings | |
| Args: | |
| ratings: List of ratings (1-5) | |
| Returns: | |
| dict: {rating: count} | |
| """ | |
| distribution = Counter(ratings) | |
| # Ensure all ratings 1-5 are present | |
| for rating in range(1, 6): | |
| if rating not in distribution: | |
| distribution[rating] = 0 | |
| return dict(sorted(distribution.items())) | |
| def get_top_words(self, texts: List[str], top_n: int = 20) -> List[tuple]: | |
| """ | |
| Get most frequent words from texts | |
| Args: | |
| texts: List of comments | |
| top_n: Number of top words to return | |
| Returns: | |
| list: [(word, count), ...] | |
| """ | |
| # Combine and split texts | |
| words = [] | |
| for text in texts: | |
| words.extend(text.lower().split()) | |
| # Filter stopwords | |
| filtered_words = [w for w in words if w not in self.stopwords and len(w) > 2] | |
| # Count and return top words | |
| word_counts = Counter(filtered_words) | |
| return word_counts.most_common(top_n) | |
| # Singleton instance | |
| viz_service = VisualizationService() | |
| def get_viz_service() -> VisualizationService: | |
| """Dependency to get visualization service""" | |
| return viz_service | |