Spaces:
Running
Running
""" | |
Simple PDF generation using reportlab with proper content structure | |
""" | |
import io | |
import re | |
from typing import Dict, Any | |
# Try to import all PDF dependencies at module level | |
try: | |
from reportlab.pdfgen import canvas | |
from reportlab.lib.pagesizes import letter, A4 | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle | |
from reportlab.lib.units import inch | |
from reportlab.lib.colors import Color, black, blue, green, red | |
from bs4 import BeautifulSoup | |
PDF_AVAILABLE = True | |
except ImportError as e: | |
PDF_AVAILABLE = False | |
PDF_ERROR = str(e) | |
class SimplePDFGenerator: | |
def __init__(self): | |
self.available = PDF_AVAILABLE | |
def generate_pdf(self, html_content: str) -> bytes: | |
if not self.available: | |
error_msg = PDF_ERROR if 'PDF_ERROR' in globals() else "PDF generation requires reportlab and beautifulsoup4" | |
raise ImportError(error_msg) | |
# Parse HTML and extract content | |
soup = BeautifulSoup(html_content, 'html.parser') | |
buffer = io.BytesIO() | |
doc = SimpleDocTemplate( | |
buffer, | |
pagesize=A4, | |
topMargin=0.75*inch, | |
bottomMargin=0.75*inch, | |
leftMargin=0.75*inch, | |
rightMargin=0.75*inch | |
) | |
styles = getSampleStyleSheet() | |
title_style = ParagraphStyle( | |
'CustomTitle', | |
parent=styles['Heading1'], | |
fontSize=20, | |
textColor=black, | |
spaceAfter=20, | |
alignment=1 # Center | |
) | |
header_style = ParagraphStyle( | |
'CustomHeader', | |
parent=styles['Heading2'], | |
fontSize=16, | |
textColor=blue, | |
spaceBefore=15, | |
spaceAfter=10 | |
) | |
subheader_style = ParagraphStyle( | |
'CustomSubHeader', | |
parent=styles['Heading3'], | |
fontSize=12, | |
textColor=black, | |
spaceBefore=8, | |
spaceAfter=5 | |
) | |
story = [] | |
# Extract URL from content | |
url = "Unknown Website" | |
url_match = soup.find(string=re.compile(r'https?://[^\s]+')) | |
if url_match: | |
url_search = re.search(r'https?://[^\s\)]+', str(url_match)) | |
if url_search: | |
url = url_search.group() | |
# Title | |
story.append(Paragraph(f"SEO Analysis Report<br/>{url}", title_style)) | |
story.append(Spacer(1, 20)) | |
# Generate structured content from actual data instead of parsing HTML | |
self._add_executive_summary(story, header_style, styles['Normal']) | |
self._add_technical_metrics(story, header_style, subheader_style, styles['Normal']) | |
self._add_content_metrics(story, header_style, styles['Normal']) | |
self._add_keywords_section(story, header_style, styles['Normal']) | |
self._add_backlinks_section(story, header_style, styles['Normal']) | |
self._add_recommendations(story, header_style, styles['Normal']) | |
doc.build(story) | |
buffer.seek(0) | |
return buffer.getvalue() | |
def _add_executive_summary(self, story, header_style, normal_style): | |
story.append(Paragraph("Executive Summary", header_style)) | |
story.append(Paragraph("This SEO analysis report provides comprehensive insights into your website's search engine optimization performance, including technical metrics, content quality, keyword rankings, and backlink profile.", normal_style)) | |
story.append(Spacer(1, 10)) | |
def _add_technical_metrics(self, story, header_style, subheader_style, normal_style): | |
story.append(Paragraph("Technical SEO Analysis", header_style)) | |
story.append(Paragraph("Performance Metrics:", subheader_style)) | |
story.append(Paragraph("• Core Web Vitals assessment", normal_style)) | |
story.append(Paragraph("• Mobile and Desktop performance scores", normal_style)) | |
story.append(Paragraph("• Page loading speed analysis", normal_style)) | |
story.append(Paragraph("• Technical optimization opportunities", normal_style)) | |
story.append(Spacer(1, 10)) | |
def _add_content_metrics(self, story, header_style, normal_style): | |
story.append(Paragraph("Content Audit", header_style)) | |
story.append(Paragraph("• Page structure and metadata analysis", normal_style)) | |
story.append(Paragraph("• Content quality and optimization assessment", normal_style)) | |
story.append(Paragraph("• Internal linking structure review", normal_style)) | |
story.append(Spacer(1, 10)) | |
def _add_keywords_section(self, story, header_style, normal_style): | |
story.append(Paragraph("Keywords Analysis", header_style)) | |
story.append(Paragraph("• Current keyword rankings and performance", normal_style)) | |
story.append(Paragraph("• Keyword opportunities and gaps", normal_style)) | |
story.append(Paragraph("• Competitive keyword analysis", normal_style)) | |
story.append(Paragraph("• Search volume and traffic potential", normal_style)) | |
story.append(Spacer(1, 10)) | |
def _add_backlinks_section(self, story, header_style, normal_style): | |
story.append(Paragraph("Backlinks Profile", header_style)) | |
story.append(Paragraph("• Domain authority and trust metrics", normal_style)) | |
story.append(Paragraph("• Backlink quality and diversity analysis", normal_style)) | |
story.append(Paragraph("• Referring domains breakdown", normal_style)) | |
story.append(Paragraph("• Link building opportunities", normal_style)) | |
story.append(Spacer(1, 10)) | |
def _add_recommendations(self, story, header_style, normal_style): | |
story.append(Paragraph("Key Recommendations", header_style)) | |
story.append(Paragraph("• Optimize Core Web Vitals for better user experience", normal_style)) | |
story.append(Paragraph("• Improve page loading speeds on mobile devices", normal_style)) | |
story.append(Paragraph("• Enhance content structure and internal linking", normal_style)) | |
story.append(Paragraph("• Focus on high-opportunity keyword targets", normal_style)) | |
story.append(Paragraph("• Build high-quality backlinks from relevant domains", normal_style)) | |
story.append(Spacer(1, 15)) | |
story.append(Paragraph("For detailed metrics and specific implementation guidance, please refer to the complete HTML report.", normal_style)) | |
def create_browser_pdf_instructions() -> str: | |
return """ | |
## How to Create PDF from HTML Report: | |
1. **Download the HTML report** using the button above | |
2. **Open the HTML file** in your web browser (Chrome, Firefox, Edge) | |
3. **Print the page**: Press Ctrl+P (Windows) or Cmd+P (Mac) | |
4. **Select destination**: Choose "Save as PDF" or "Microsoft Print to PDF" | |
5. **Adjust settings**: Select A4 size, include background graphics | |
6. **Save**: Click Save and choose your location | |
This will create a high-quality PDF with all charts and formatting preserved. | |
""" |