Spaces:

vtdung23
/

Predict_Rating

Sleeping

App Files Files Community

Predict_Rating / app /services /report_service.py

vtdung23

Upload folder using huggingface_hub

c09e844 verified 20 days ago

raw

history blame contribute delete

11.5 kB

	"""
	Report Service
	Generate PDF reports for batch predictions
	"""
	import io
	from typing import List, Dict
	from datetime import datetime
	from pathlib import Path
	from reportlab.lib.pagesizes import letter, A4
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	from reportlab.lib.units import inch
	from reportlab.platypus import (
	SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle,
	PageBreak, Image, Preformatted
	)
	from reportlab.lib import colors
	from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT
	from reportlab.pdfbase import pdfmetrics
	from reportlab.pdfbase.ttfonts import TTFont
	import matplotlib
	matplotlib.use('Agg')
	import matplotlib.pyplot as plt
	from io import BytesIO
	from PIL import Image as PILImage

	from app.config import WORDCLOUD_DIR


	class ReportService:
	"""Service for generating PDF reports"""

	def __init__(self):
	self.styles = getSampleStyleSheet()
	self._setup_custom_styles()
	self._setup_fonts()

	def _setup_fonts(self):
	"""Setup fonts for Vietnamese character support"""
	try:
	# Try to use DejaVu font which supports Vietnamese characters
	pdfmetrics.registerFont(TTFont('DejaVu', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf'))
	# Register bold variant
	pdfmetrics.registerFont(TTFont('DejaVuBold', '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf'))
	except Exception as e:
	# If fonts not found, continue with default fonts
	print(f"Warning: Could not load Vietnamese fonts: {e}")

	def _setup_custom_styles(self):
	"""Setup custom paragraph styles"""
	# Use DejaVu font for Vietnamese support, fallback to Helvetica
	font_name = 'DejaVu'
	font_name_bold = 'DejaVuBold'

	self.styles.add(ParagraphStyle(
	name='CustomTitle',
	parent=self.styles['Heading1'],
	fontSize=24,
	textColor=colors.HexColor('#4F46E5'),
	spaceAfter=30,
	alignment=TA_CENTER,
	fontName=font_name_bold
	))

	self.styles.add(ParagraphStyle(
	name='CustomHeading',
	parent=self.styles['Heading2'],
	fontSize=14,
	textColor=colors.HexColor('#4F46E5'),
	spaceAfter=12,
	fontName=font_name_bold
	))

	self.styles.add(ParagraphStyle(
	name='CustomNormal',
	parent=self.styles['Normal'],
	fontSize=10,
	spaceAfter=6,
	fontName=font_name
	))

	def generate_rating_distribution_chart(self) -> tuple:
	"""
	Generate a matplotlib chart for rating distribution

	Returns:
	tuple: (buffer, filename)
	"""
	pass

	def generate_pdf_report(
	self,
	predictions: List[Dict],
	distribution: Dict[int, int],
	wordcloud_path: str,
	username: str,
	filename: str = None
	) -> bytes:
	"""
	Generate comprehensive PDF report for batch predictions

	Args:
	predictions: List of prediction results with 'text', 'rating', 'confidence'
	distribution: Rating distribution dict {rating: count}
	wordcloud_path: Path to generated wordcloud image (URL or file path)
	username: Username for the report
	filename: Optional custom filename

	Returns:
	bytes: PDF file content
	"""
	# Create PDF in memory
	pdf_buffer = io.BytesIO()

	# Create document
	doc = SimpleDocTemplate(
	pdf_buffer,
	pagesize=A4,
	rightMargin=0.75*inch,
	leftMargin=0.75*inch,
	topMargin=0.75*inch,
	bottomMargin=0.75*inch
	)

	# Build document content
	story = []

	# Title
	title = Paragraph(
	"Prediction Report",
	self.styles['CustomTitle']
	)
	story.append(title)
	story.append(Spacer(1, 0.3*inch))

	# Report info
	timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	info_text = f"<b>Generated by:</b> {username}<br/><b>Date:</b> {timestamp}"
	story.append(Paragraph(info_text, self.styles['CustomNormal']))
	story.append(Spacer(1, 0.3*inch))

	# Summary section
	total_predictions = len(predictions)
	avg_confidence = sum(p.get('confidence', 0) for p in predictions) / total_predictions if predictions else 0

	summary_heading = Paragraph("Summary", self.styles['CustomHeading'])
	story.append(summary_heading)

	summary_data = [
	['Metric', 'Value'],
	['Total Predictions', str(total_predictions)],
	['Average Confidence', f'{avg_confidence:.2%}'],
	]

	summary_table = Table(summary_data, colWidths=[3inch, 2inch])
	summary_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')),
	('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
	('ALIGN', (0, 0), (-1, -1), 'CENTER'),
	('FONTNAME', (0, 0), (-1, 0), 'DejaVuBold'),
	('FONTSIZE', (0, 0), (-1, 0), 12),
	('BOTTOMPADDING', (0, 0), (-1, 0), 12),
	('BACKGROUND', (0, 1), (-1, -1), colors.beige),
	('GRID', (0, 0), (-1, -1), 1, colors.black),
	('FONTNAME', (0, 1), (-1, -1), 'DejaVu'),
	('FONTSIZE', (0, 1), (-1, -1), 10)
	]))
	story.append(summary_table)
	story.append(Spacer(1, 0.3*inch))

	# Rating Distribution section
	dist_heading = Paragraph("Rating Distribution", self.styles['CustomHeading'])
	story.append(dist_heading)

	dist_data = [['Rating', 'Count', 'Percentage']]

	# Normalize distribution keys to integers (they might come as strings from JSON)
	normalized_dist = {}
	for key, value in distribution.items():
	try:
	int_key = int(key)
	normalized_dist[int_key] = int(value)
	except (ValueError, TypeError):
	pass

	total = sum(normalized_dist.values())

	for rating in range(1, 6):
	count = normalized_dist.get(rating, 0)
	percentage = (count / total * 100) if total > 0 else 0
	# Use star character ★ instead of emoji
	stars = "★" * rating
	dist_data.append([
	f"{stars}",
	str(count),
	f"{percentage:.1f}%"
	])

	dist_table = Table(dist_data, colWidths=[1.5inch, 1.5inch, 1.5*inch])
	dist_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')),
	('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
	('ALIGN', (0, 0), (-1, -1), 'CENTER'),
	('FONTNAME', (0, 0), (-1, 0), 'DejaVuBold'),
	('FONTSIZE', (0, 0), (-1, 0), 11),
	('BOTTOMPADDING', (0, 0), (-1, 0), 12),
	('BACKGROUND', (0, 1), (-1, -1), colors.lightgrey),
	('GRID', (0, 0), (-1, -1), 1, colors.black),
	('FONTNAME', (0, 1), (-1, -1), 'DejaVu'),
	('FONTSIZE', (0, 1), (-1, -1), 10)
	]))
	story.append(dist_table)
	story.append(Spacer(1, 0.3*inch))

	# Word Cloud section
	if wordcloud_path:
	wc_heading = Paragraph("Word Cloud Analysis", self.styles['CustomHeading'])
	story.append(wc_heading)

	try:
	# Convert URL to file path if needed
	file_path = wordcloud_path
	if wordcloud_path.startswith('/'):
	# It's a URL path, convert to file path
	file_path = str(WORDCLOUD_DIR / wordcloud_path.split('/')[-1])

	if Path(file_path).exists():
	img = Image(file_path, width=5inch, height=2.5inch)
	story.append(img)
	story.append(Spacer(1, 0.2*inch))
	wc_text = Paragraph(
	"<i>Larger words indicate higher frequency in the comments</i>",
	self.styles['CustomNormal']
	)
	story.append(wc_text)
	except Exception as e:
	print(f"Warning: Could not include word cloud: {e}")

	story.append(Spacer(1, 0.3*inch))

	# Page break before detailed results
	story.append(PageBreak())

	# Detailed Results section
	results_heading = Paragraph("Detailed Results", self.styles['CustomHeading'])
	story.append(results_heading)
	story.append(Spacer(1, 0.2*inch))

	# Results table
	results_data = [['Comment', 'Rating', 'Confidence']]

	for pred in predictions:
	comment = pred.get('text', '')
	rating = pred.get('rating', 0)
	confidence = pred.get('confidence', 0)

	# Create wrapped comment - let ReportLab handle wrapping
	comment_paragraph = Paragraph(comment, self.styles['CustomNormal'])

	# Use star character ★ instead of emoji
	stars = "★" * rating

	results_data.append([
	comment_paragraph,
	f"{stars}",
	f"{confidence:.2%}"
	])

	# Create table with adjusted column widths - wider comment column for wrapping
	results_table = Table(results_data, colWidths=[3.5inch, 0.8inch, 1.2*inch])
	results_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')),
	('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
	('ALIGN', (0, 0), (0, -1), 'LEFT'),
	('ALIGN', (1, 0), (-1, -1), 'CENTER'),
	('FONTNAME', (0, 0), (-1, 0), 'DejaVuBold'),
	('FONTSIZE', (0, 0), (-1, 0), 10),
	('BOTTOMPADDING', (0, 0), (-1, 0), 12),
	('BACKGROUND', (0, 1), (-1, -1), colors.lightgrey),
	('GRID', (0, 0), (-1, -1), 1, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.lightgrey]),
	('FONTNAME', (0, 1), (-1, -1), 'DejaVu'),
	('FONTSIZE', (0, 1), (-1, -1), 9),
	('VALIGN', (0, 0), (-1, -1), 'TOP'), # Top alignment for wrapped text
	('LEFTPADDING', (0, 0), (-1, -1), 8),
	('RIGHTPADDING', (0, 0), (-1, -1), 8),
	('TOPPADDING', (0, 0), (-1, -1), 6),
	('BOTTOMPADDING', (0, 0), (-1, -1), 6),
	]))
	story.append(results_table)

	# Build PDF
	doc.build(story)

	# Get PDF bytes
	pdf_buffer.seek(0)
	return pdf_buffer.getvalue()


	def get_report_service() -> ReportService:
	"""Dependency injection for report service"""
	return ReportService()