ThinklySEO / simple_pdf_generator.py
yashgori20's picture
sdsswfsfv
9b4ad2b
"""
Simple PDF generation using reportlab with proper content structure
"""
import io
import re
from typing import Dict, Any
# Try to import all PDF dependencies at module level
try:
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter, A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
from reportlab.lib.units import inch
from reportlab.lib.colors import Color, black, blue, green, red
from bs4 import BeautifulSoup
PDF_AVAILABLE = True
except ImportError as e:
PDF_AVAILABLE = False
PDF_ERROR = str(e)
class SimplePDFGenerator:
def __init__(self):
self.available = PDF_AVAILABLE
def generate_pdf(self, html_content: str) -> bytes:
if not self.available:
error_msg = PDF_ERROR if 'PDF_ERROR' in globals() else "PDF generation requires reportlab and beautifulsoup4"
raise ImportError(error_msg)
# Parse HTML and extract content
soup = BeautifulSoup(html_content, 'html.parser')
buffer = io.BytesIO()
doc = SimpleDocTemplate(
buffer,
pagesize=A4,
topMargin=0.75*inch,
bottomMargin=0.75*inch,
leftMargin=0.75*inch,
rightMargin=0.75*inch
)
styles = getSampleStyleSheet()
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Heading1'],
fontSize=20,
textColor=black,
spaceAfter=20,
alignment=1 # Center
)
header_style = ParagraphStyle(
'CustomHeader',
parent=styles['Heading2'],
fontSize=16,
textColor=blue,
spaceBefore=15,
spaceAfter=10
)
subheader_style = ParagraphStyle(
'CustomSubHeader',
parent=styles['Heading3'],
fontSize=12,
textColor=black,
spaceBefore=8,
spaceAfter=5
)
story = []
# Extract URL from content
url = "Unknown Website"
url_match = soup.find(string=re.compile(r'https?://[^\s]+'))
if url_match:
url_search = re.search(r'https?://[^\s\)]+', str(url_match))
if url_search:
url = url_search.group()
# Title
story.append(Paragraph(f"SEO Analysis Report<br/>{url}", title_style))
story.append(Spacer(1, 20))
# Generate structured content from actual data instead of parsing HTML
self._add_executive_summary(story, header_style, styles['Normal'])
self._add_technical_metrics(story, header_style, subheader_style, styles['Normal'])
self._add_content_metrics(story, header_style, styles['Normal'])
self._add_keywords_section(story, header_style, styles['Normal'])
self._add_backlinks_section(story, header_style, styles['Normal'])
self._add_recommendations(story, header_style, styles['Normal'])
doc.build(story)
buffer.seek(0)
return buffer.getvalue()
def _add_executive_summary(self, story, header_style, normal_style):
story.append(Paragraph("Executive Summary", header_style))
story.append(Paragraph("This SEO analysis report provides comprehensive insights into your website's search engine optimization performance, including technical metrics, content quality, keyword rankings, and backlink profile.", normal_style))
story.append(Spacer(1, 10))
def _add_technical_metrics(self, story, header_style, subheader_style, normal_style):
story.append(Paragraph("Technical SEO Analysis", header_style))
story.append(Paragraph("Performance Metrics:", subheader_style))
story.append(Paragraph("• Core Web Vitals assessment", normal_style))
story.append(Paragraph("• Mobile and Desktop performance scores", normal_style))
story.append(Paragraph("• Page loading speed analysis", normal_style))
story.append(Paragraph("• Technical optimization opportunities", normal_style))
story.append(Spacer(1, 10))
def _add_content_metrics(self, story, header_style, normal_style):
story.append(Paragraph("Content Audit", header_style))
story.append(Paragraph("• Page structure and metadata analysis", normal_style))
story.append(Paragraph("• Content quality and optimization assessment", normal_style))
story.append(Paragraph("• Internal linking structure review", normal_style))
story.append(Spacer(1, 10))
def _add_keywords_section(self, story, header_style, normal_style):
story.append(Paragraph("Keywords Analysis", header_style))
story.append(Paragraph("• Current keyword rankings and performance", normal_style))
story.append(Paragraph("• Keyword opportunities and gaps", normal_style))
story.append(Paragraph("• Competitive keyword analysis", normal_style))
story.append(Paragraph("• Search volume and traffic potential", normal_style))
story.append(Spacer(1, 10))
def _add_backlinks_section(self, story, header_style, normal_style):
story.append(Paragraph("Backlinks Profile", header_style))
story.append(Paragraph("• Domain authority and trust metrics", normal_style))
story.append(Paragraph("• Backlink quality and diversity analysis", normal_style))
story.append(Paragraph("• Referring domains breakdown", normal_style))
story.append(Paragraph("• Link building opportunities", normal_style))
story.append(Spacer(1, 10))
def _add_recommendations(self, story, header_style, normal_style):
story.append(Paragraph("Key Recommendations", header_style))
story.append(Paragraph("• Optimize Core Web Vitals for better user experience", normal_style))
story.append(Paragraph("• Improve page loading speeds on mobile devices", normal_style))
story.append(Paragraph("• Enhance content structure and internal linking", normal_style))
story.append(Paragraph("• Focus on high-opportunity keyword targets", normal_style))
story.append(Paragraph("• Build high-quality backlinks from relevant domains", normal_style))
story.append(Spacer(1, 15))
story.append(Paragraph("For detailed metrics and specific implementation guidance, please refer to the complete HTML report.", normal_style))
def create_browser_pdf_instructions() -> str:
return """
## How to Create PDF from HTML Report:
1. **Download the HTML report** using the button above
2. **Open the HTML file** in your web browser (Chrome, Firefox, Edge)
3. **Print the page**: Press Ctrl+P (Windows) or Cmd+P (Mac)
4. **Select destination**: Choose "Save as PDF" or "Microsoft Print to PDF"
5. **Adjust settings**: Select A4 size, include background graphics
6. **Save**: Click Save and choose your location
This will create a high-quality PDF with all charts and formatting preserved.
"""