Spaces:
Sleeping
Sleeping
| """ | |
| Report Service | |
| Generate PDF reports for batch predictions | |
| """ | |
| import io | |
| from typing import List, Dict | |
| from datetime import datetime | |
| from pathlib import Path | |
| from reportlab.lib.pagesizes import letter, A4 | |
| from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
| from reportlab.lib.units import inch | |
| from reportlab.platypus import ( | |
| SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, | |
| PageBreak, Image, Preformatted | |
| ) | |
| from reportlab.lib import colors | |
| from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT | |
| from reportlab.pdfbase import pdfmetrics | |
| from reportlab.pdfbase.ttfonts import TTFont | |
| import matplotlib | |
| matplotlib.use('Agg') | |
| import matplotlib.pyplot as plt | |
| from io import BytesIO | |
| from PIL import Image as PILImage | |
| from app.config import WORDCLOUD_DIR | |
| class ReportService: | |
| """Service for generating PDF reports""" | |
| def __init__(self): | |
| self.styles = getSampleStyleSheet() | |
| self._setup_custom_styles() | |
| self._setup_fonts() | |
| def _setup_fonts(self): | |
| """Setup fonts for Vietnamese character support""" | |
| try: | |
| # Try to use DejaVu font which supports Vietnamese characters | |
| pdfmetrics.registerFont(TTFont('DejaVu', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf')) | |
| # Register bold variant | |
| pdfmetrics.registerFont(TTFont('DejaVuBold', '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf')) | |
| except Exception as e: | |
| # If fonts not found, continue with default fonts | |
| print(f"Warning: Could not load Vietnamese fonts: {e}") | |
| def _setup_custom_styles(self): | |
| """Setup custom paragraph styles""" | |
| # Use DejaVu font for Vietnamese support, fallback to Helvetica | |
| font_name = 'DejaVu' | |
| font_name_bold = 'DejaVuBold' | |
| self.styles.add(ParagraphStyle( | |
| name='CustomTitle', | |
| parent=self.styles['Heading1'], | |
| fontSize=24, | |
| textColor=colors.HexColor('#4F46E5'), | |
| spaceAfter=30, | |
| alignment=TA_CENTER, | |
| fontName=font_name_bold | |
| )) | |
| self.styles.add(ParagraphStyle( | |
| name='CustomHeading', | |
| parent=self.styles['Heading2'], | |
| fontSize=14, | |
| textColor=colors.HexColor('#4F46E5'), | |
| spaceAfter=12, | |
| fontName=font_name_bold | |
| )) | |
| self.styles.add(ParagraphStyle( | |
| name='CustomNormal', | |
| parent=self.styles['Normal'], | |
| fontSize=10, | |
| spaceAfter=6, | |
| fontName=font_name | |
| )) | |
| def generate_rating_distribution_chart(self) -> tuple: | |
| """ | |
| Generate a matplotlib chart for rating distribution | |
| Returns: | |
| tuple: (buffer, filename) | |
| """ | |
| pass | |
| def generate_pdf_report( | |
| self, | |
| predictions: List[Dict], | |
| distribution: Dict[int, int], | |
| wordcloud_path: str, | |
| username: str, | |
| filename: str = None | |
| ) -> bytes: | |
| """ | |
| Generate comprehensive PDF report for batch predictions | |
| Args: | |
| predictions: List of prediction results with 'text', 'rating', 'confidence' | |
| distribution: Rating distribution dict {rating: count} | |
| wordcloud_path: Path to generated wordcloud image (URL or file path) | |
| username: Username for the report | |
| filename: Optional custom filename | |
| Returns: | |
| bytes: PDF file content | |
| """ | |
| # Create PDF in memory | |
| pdf_buffer = io.BytesIO() | |
| # Create document | |
| doc = SimpleDocTemplate( | |
| pdf_buffer, | |
| pagesize=A4, | |
| rightMargin=0.75*inch, | |
| leftMargin=0.75*inch, | |
| topMargin=0.75*inch, | |
| bottomMargin=0.75*inch | |
| ) | |
| # Build document content | |
| story = [] | |
| # Title | |
| title = Paragraph( | |
| "Prediction Report", | |
| self.styles['CustomTitle'] | |
| ) | |
| story.append(title) | |
| story.append(Spacer(1, 0.3*inch)) | |
| # Report info | |
| timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| info_text = f"<b>Generated by:</b> {username}<br/><b>Date:</b> {timestamp}" | |
| story.append(Paragraph(info_text, self.styles['CustomNormal'])) | |
| story.append(Spacer(1, 0.3*inch)) | |
| # Summary section | |
| total_predictions = len(predictions) | |
| avg_confidence = sum(p.get('confidence', 0) for p in predictions) / total_predictions if predictions else 0 | |
| summary_heading = Paragraph("Summary", self.styles['CustomHeading']) | |
| story.append(summary_heading) | |
| summary_data = [ | |
| ['Metric', 'Value'], | |
| ['Total Predictions', str(total_predictions)], | |
| ['Average Confidence', f'{avg_confidence:.2%}'], | |
| ] | |
| summary_table = Table(summary_data, colWidths=[3*inch, 2*inch]) | |
| summary_table.setStyle(TableStyle([ | |
| ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')), | |
| ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), | |
| ('ALIGN', (0, 0), (-1, -1), 'CENTER'), | |
| ('FONTNAME', (0, 0), (-1, 0), 'DejaVuBold'), | |
| ('FONTSIZE', (0, 0), (-1, 0), 12), | |
| ('BOTTOMPADDING', (0, 0), (-1, 0), 12), | |
| ('BACKGROUND', (0, 1), (-1, -1), colors.beige), | |
| ('GRID', (0, 0), (-1, -1), 1, colors.black), | |
| ('FONTNAME', (0, 1), (-1, -1), 'DejaVu'), | |
| ('FONTSIZE', (0, 1), (-1, -1), 10) | |
| ])) | |
| story.append(summary_table) | |
| story.append(Spacer(1, 0.3*inch)) | |
| # Rating Distribution section | |
| dist_heading = Paragraph("Rating Distribution", self.styles['CustomHeading']) | |
| story.append(dist_heading) | |
| dist_data = [['Rating', 'Count', 'Percentage']] | |
| # Normalize distribution keys to integers (they might come as strings from JSON) | |
| normalized_dist = {} | |
| for key, value in distribution.items(): | |
| try: | |
| int_key = int(key) | |
| normalized_dist[int_key] = int(value) | |
| except (ValueError, TypeError): | |
| pass | |
| total = sum(normalized_dist.values()) | |
| for rating in range(1, 6): | |
| count = normalized_dist.get(rating, 0) | |
| percentage = (count / total * 100) if total > 0 else 0 | |
| # Use star character β instead of emoji | |
| stars = "β " * rating | |
| dist_data.append([ | |
| f"{stars}", | |
| str(count), | |
| f"{percentage:.1f}%" | |
| ]) | |
| dist_table = Table(dist_data, colWidths=[1.5*inch, 1.5*inch, 1.5*inch]) | |
| dist_table.setStyle(TableStyle([ | |
| ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')), | |
| ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), | |
| ('ALIGN', (0, 0), (-1, -1), 'CENTER'), | |
| ('FONTNAME', (0, 0), (-1, 0), 'DejaVuBold'), | |
| ('FONTSIZE', (0, 0), (-1, 0), 11), | |
| ('BOTTOMPADDING', (0, 0), (-1, 0), 12), | |
| ('BACKGROUND', (0, 1), (-1, -1), colors.lightgrey), | |
| ('GRID', (0, 0), (-1, -1), 1, colors.black), | |
| ('FONTNAME', (0, 1), (-1, -1), 'DejaVu'), | |
| ('FONTSIZE', (0, 1), (-1, -1), 10) | |
| ])) | |
| story.append(dist_table) | |
| story.append(Spacer(1, 0.3*inch)) | |
| # Word Cloud section | |
| if wordcloud_path: | |
| wc_heading = Paragraph("Word Cloud Analysis", self.styles['CustomHeading']) | |
| story.append(wc_heading) | |
| try: | |
| # Convert URL to file path if needed | |
| file_path = wordcloud_path | |
| if wordcloud_path.startswith('/'): | |
| # It's a URL path, convert to file path | |
| file_path = str(WORDCLOUD_DIR / wordcloud_path.split('/')[-1]) | |
| if Path(file_path).exists(): | |
| img = Image(file_path, width=5*inch, height=2.5*inch) | |
| story.append(img) | |
| story.append(Spacer(1, 0.2*inch)) | |
| wc_text = Paragraph( | |
| "<i>Larger words indicate higher frequency in the comments</i>", | |
| self.styles['CustomNormal'] | |
| ) | |
| story.append(wc_text) | |
| except Exception as e: | |
| print(f"Warning: Could not include word cloud: {e}") | |
| story.append(Spacer(1, 0.3*inch)) | |
| # Page break before detailed results | |
| story.append(PageBreak()) | |
| # Detailed Results section | |
| results_heading = Paragraph("Detailed Results", self.styles['CustomHeading']) | |
| story.append(results_heading) | |
| story.append(Spacer(1, 0.2*inch)) | |
| # Results table | |
| results_data = [['Comment', 'Rating', 'Confidence']] | |
| for pred in predictions: | |
| comment = pred.get('text', '') | |
| rating = pred.get('rating', 0) | |
| confidence = pred.get('confidence', 0) | |
| # Create wrapped comment - let ReportLab handle wrapping | |
| comment_paragraph = Paragraph(comment, self.styles['CustomNormal']) | |
| # Use star character β instead of emoji | |
| stars = "β " * rating | |
| results_data.append([ | |
| comment_paragraph, | |
| f"{stars}", | |
| f"{confidence:.2%}" | |
| ]) | |
| # Create table with adjusted column widths - wider comment column for wrapping | |
| results_table = Table(results_data, colWidths=[3.5*inch, 0.8*inch, 1.2*inch]) | |
| results_table.setStyle(TableStyle([ | |
| ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')), | |
| ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), | |
| ('ALIGN', (0, 0), (0, -1), 'LEFT'), | |
| ('ALIGN', (1, 0), (-1, -1), 'CENTER'), | |
| ('FONTNAME', (0, 0), (-1, 0), 'DejaVuBold'), | |
| ('FONTSIZE', (0, 0), (-1, 0), 10), | |
| ('BOTTOMPADDING', (0, 0), (-1, 0), 12), | |
| ('BACKGROUND', (0, 1), (-1, -1), colors.lightgrey), | |
| ('GRID', (0, 0), (-1, -1), 1, colors.grey), | |
| ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.lightgrey]), | |
| ('FONTNAME', (0, 1), (-1, -1), 'DejaVu'), | |
| ('FONTSIZE', (0, 1), (-1, -1), 9), | |
| ('VALIGN', (0, 0), (-1, -1), 'TOP'), # Top alignment for wrapped text | |
| ('LEFTPADDING', (0, 0), (-1, -1), 8), | |
| ('RIGHTPADDING', (0, 0), (-1, -1), 8), | |
| ('TOPPADDING', (0, 0), (-1, -1), 6), | |
| ('BOTTOMPADDING', (0, 0), (-1, -1), 6), | |
| ])) | |
| story.append(results_table) | |
| # Build PDF | |
| doc.build(story) | |
| # Get PDF bytes | |
| pdf_buffer.seek(0) | |
| return pdf_buffer.getvalue() | |
| def get_report_service() -> ReportService: | |
| """Dependency injection for report service""" | |
| return ReportService() | |