ThinklySEO / report_generator.py
yashgori20's picture
domne
8913f77
import json
from typing import Dict, Any, List
from datetime import datetime
import plotly.graph_objects as go
import plotly.express as px
from plotly.offline import plot
import plotly
import re
from utils import safe_pct
from benchmarks import BENCHMARKS, badge
class ReportGenerator:
def __init__(self):
self.report_template = self._get_report_template()
def _markdown_to_html(self, markdown_text: str) -> str:
"""Convert simple markdown to HTML"""
if not markdown_text:
return ""
html = markdown_text
# Convert headers
html = re.sub(r'^### (.*?)$', r'<h3>\1</h3>', html, flags=re.MULTILINE)
html = re.sub(r'^## (.*?)$', r'<h2>\1</h2>', html, flags=re.MULTILINE)
html = re.sub(r'^# (.*?)$', r'<h1>\1</h1>', html, flags=re.MULTILINE)
# Convert bold text
html = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', html)
# Convert bullet points
html = re.sub(r'^- (.*?)$', r'<li>\1</li>', html, flags=re.MULTILINE)
html = re.sub(r'^β€’ (.*?)$', r'<li>\1</li>', html, flags=re.MULTILINE)
# Wrap consecutive <li> tags in <ul>
html = re.sub(r'(<li>.*?</li>(?:\s*<li>.*?</li>)*)', r'<ul>\1</ul>', html, flags=re.DOTALL)
# Convert double line breaks to paragraphs
paragraphs = html.split('\n\n')
html_paragraphs = []
for para in paragraphs:
para = para.strip()
if para:
# Don't wrap headers or lists in <p> tags
if not (para.startswith('<h') or para.startswith('<ul>') or para.startswith('<li>')):
para = f'<p>{para}</p>'
html_paragraphs.append(para)
html = '\n'.join(html_paragraphs)
# Convert remaining single line breaks to <br> tags within paragraphs
html = re.sub(r'(?<!>)\n(?!<)', '<br>', html)
# Clean up extra <br> tags around block elements
html = re.sub(r'<br>\s*(<h[1-6]>)', r'\1', html)
html = re.sub(r'(</h[1-6]>)\s*<br>', r'\1', html)
html = re.sub(r'<br>\s*(<ul>|<p>)', r'\1', html)
html = re.sub(r'(</ul>|</p>)\s*<br>', r'\1', html)
return html
def generate_html_report(self, url: str, technical_data: Dict[str, Any],
content_data: Dict[str, Any], competitor_data: List[Dict] = None,
keywords_data: Dict[str, Any] = None, backlinks_data: Dict[str, Any] = None,
llm_recommendations: Dict[str, Any] = None, include_charts: bool = True) -> str:
"""Generate complete HTML SEO report"""
# Generate charts
charts_html = ""
if include_charts:
charts_html = self._generate_charts(technical_data, content_data, competitor_data, keywords_data, backlinks_data)
# Generate executive summary with benchmarks
executive_summary = self._generate_executive_summary_with_badges(technical_data, content_data, keywords_data, backlinks_data)
# Generate technical SEO section
technical_section = self._generate_technical_section(technical_data)
# Generate content audit section
content_section = self._generate_content_section(content_data)
# Generate keywords section
keywords_section = self._generate_keywords_section(keywords_data) if keywords_data else ""
# Generate backlinks section
backlinks_section = self._generate_backlinks_section(backlinks_data) if backlinks_data else ""
# Generate LLM recommendations section
recommendations_section = self._generate_recommendations_section(llm_recommendations) if llm_recommendations else ""
# Generate competitor section
competitor_section = ""
if competitor_data:
competitor_section = self._generate_competitor_section(competitor_data, technical_data, content_data)
# Generate recommendations
recommendations = self._generate_recommendations(technical_data, content_data)
# Compile final report
report_html = self.report_template.format(
url=url,
generated_date=datetime.now().strftime("%B %d, %Y at %I:%M %p"),
charts=charts_html,
executive_summary=executive_summary,
technical_section=technical_section,
content_section=content_section,
keywords_section=keywords_section,
backlinks_section=backlinks_section,
competitor_section=competitor_section,
recommendations=recommendations,
llm_recommendations=recommendations_section
)
return report_html
def _generate_charts(self, technical_data: Dict[str, Any], content_data: Dict[str, Any],
competitor_data: List[Dict] = None, keywords_data: Dict[str, Any] = None,
backlinks_data: Dict[str, Any] = None) -> str:
"""Generate interactive charts using Plotly"""
charts_html = ""
# Performance Scores Chart
if not technical_data.get('error'):
mobile_scores = technical_data.get('mobile', {})
desktop_scores = technical_data.get('desktop', {})
performance_fig = go.Figure()
categories = ['Performance', 'SEO', 'Accessibility', 'Best Practices']
mobile_values = [
mobile_scores.get('performance_score', 0),
mobile_scores.get('seo_score', 0),
mobile_scores.get('accessibility_score', 0),
mobile_scores.get('best_practices_score', 0)
]
desktop_values = [
desktop_scores.get('performance_score', 0),
desktop_scores.get('seo_score', 0),
desktop_scores.get('accessibility_score', 0),
desktop_scores.get('best_practices_score', 0)
]
performance_fig.add_trace(go.Bar(
name='Mobile',
x=categories,
y=mobile_values,
marker_color='#FF6B6B'
))
performance_fig.add_trace(go.Bar(
name='Desktop',
x=categories,
y=desktop_values,
marker_color='#4ECDC4'
))
performance_fig.update_layout(
title='PageSpeed Insights Scores',
xaxis_title='Categories',
yaxis_title='Score (0-100)',
barmode='group',
height=400,
showlegend=True
)
charts_html += f'<div class="chart-container">{plot(performance_fig, output_type="div", include_plotlyjs=False)}</div>'
# Core Web Vitals Chart
if not technical_data.get('error'):
cwv_data = technical_data.get('core_web_vitals', {})
mobile_cwv = cwv_data.get('mobile', {})
desktop_cwv = cwv_data.get('desktop', {})
cwv_fig = go.Figure()
metrics = ['LCP (s)', 'CLS', 'INP (ms)', 'FCP (s)']
mobile_cwv_values = [
mobile_cwv.get('lcp', 0),
mobile_cwv.get('cls', 0),
mobile_cwv.get('inp', 0),
mobile_cwv.get('fcp', 0)
]
desktop_cwv_values = [
desktop_cwv.get('lcp', 0),
desktop_cwv.get('cls', 0),
desktop_cwv.get('inp', 0),
desktop_cwv.get('fcp', 0)
]
cwv_fig.add_trace(go.Scatter(
name='Mobile',
x=metrics,
y=mobile_cwv_values,
mode='lines+markers',
line=dict(color='#FF6B6B', width=3),
marker=dict(size=8)
))
cwv_fig.add_trace(go.Scatter(
name='Desktop',
x=metrics,
y=desktop_cwv_values,
mode='lines+markers',
line=dict(color='#4ECDC4', width=3),
marker=dict(size=8)
))
cwv_fig.update_layout(
title='Core Web Vitals Performance',
xaxis_title='Metrics',
yaxis_title='Values',
height=400,
showlegend=True
)
charts_html += f'<div class="chart-container">{plot(cwv_fig, output_type="div", include_plotlyjs=False)}</div>'
# Metadata Completeness Chart
if not content_data.get('error'):
metadata = content_data.get('metadata_completeness', {})
completeness_fig = go.Figure(data=[go.Pie(
labels=['Title Tags', 'Meta Descriptions', 'H1 Tags'],
values=[
metadata.get('title_coverage', 0),
metadata.get('description_coverage', 0),
metadata.get('h1_coverage', 0)
],
hole=0.4,
marker_colors=['#FF6B6B', '#4ECDC4', '#45B7D1']
)])
completeness_fig.update_layout(
title='Metadata Completeness (%)',
height=400,
showlegend=True
)
charts_html += f'<div class="chart-container">{plot(completeness_fig, output_type="div", include_plotlyjs=False)}</div>'
# Content Freshness Chart
if not content_data.get('error'):
freshness = content_data.get('content_freshness', {})
freshness_fig = go.Figure(data=[go.Pie(
labels=['Fresh (<6 months)', 'Moderate (6-18 months)', 'Stale (>18 months)', 'Unknown Date'],
values=[
freshness.get('fresh_content', {}).get('count', 0),
freshness.get('moderate_content', {}).get('count', 0),
freshness.get('stale_content', {}).get('count', 0),
freshness.get('unknown_date', {}).get('count', 0)
],
marker_colors=['#2ECC71', '#F39C12', '#E74C3C', '#95A5A6']
)])
freshness_fig.update_layout(
title='Content Freshness Distribution',
height=400,
showlegend=True
)
charts_html += f'<div class="chart-container">{plot(freshness_fig, output_type="div", include_plotlyjs=False)}</div>'
return charts_html
def _generate_executive_summary(self, technical_data: Dict[str, Any], content_data: Dict[str, Any],
keywords_data: Dict[str, Any] = None, backlinks_data: Dict[str, Any] = None,
llm_recommendations: Dict[str, Any] = None) -> str:
"""Generate executive summary section"""
# Calculate overall health score
mobile_perf = technical_data.get('mobile', {}).get('performance_score', 0)
desktop_perf = technical_data.get('desktop', {}).get('performance_score', 0)
avg_performance = (mobile_perf + desktop_perf) / 2
metadata_avg = 0
if not content_data.get('error'):
metadata = content_data.get('metadata_completeness', {})
metadata_avg = (
metadata.get('title_coverage', 0) +
metadata.get('description_coverage', 0) +
metadata.get('h1_coverage', 0)
) / 3
overall_score = (avg_performance + metadata_avg) / 2
# Health status
if overall_score >= 80:
health_status = "Excellent"
health_color = "#2ECC71"
elif overall_score >= 60:
health_status = "Good"
health_color = "#F39C12"
elif overall_score >= 40:
health_status = "Fair"
health_color = "#FF6B6B"
else:
health_status = "Poor"
health_color = "#E74C3C"
# Quick wins
quick_wins = []
if not content_data.get('error'):
metadata = content_data.get('metadata_completeness', {})
if metadata.get('title_coverage', 0) < 90:
quick_wins.append(f"Complete missing title tags ({100 - metadata.get('title_coverage', 0):.1f}% of pages missing)")
if metadata.get('description_coverage', 0) < 90:
quick_wins.append(f"Add missing meta descriptions ({100 - metadata.get('description_coverage', 0):.1f}% of pages missing)")
if metadata.get('h1_coverage', 0) < 90:
quick_wins.append(f"Add missing H1 tags ({100 - metadata.get('h1_coverage', 0):.1f}% of pages missing)")
if mobile_perf < 70:
quick_wins.append(f"Improve mobile performance score (currently {mobile_perf:.1f}/100)")
quick_wins_html = "".join([f"<li>{win}</li>" for win in quick_wins[:5]])
return f"""
<div class="summary-card">
<div class="health-score">
<h3>Overall SEO Health</h3>
<div class="score-circle" style="border-color: {health_color}">
<span class="score-number" style="color: {health_color}">{overall_score:.0f}</span>
<span class="score-label">/ 100</span>
</div>
<p class="health-status" style="color: {health_color}">{health_status}</p>
</div>
<div class="key-metrics">
<div class="metric">
<h4>Performance Score</h4>
<p>Mobile: {mobile_perf:.1f}/100</p>
<p>Desktop: {desktop_perf:.1f}/100</p>
</div>
<div class="metric">
<h4>Content Analysis</h4>
<p>Pages Analyzed: {content_data.get('pages_analyzed', 0)}</p>
<p>Metadata Completeness: {metadata_avg:.1f}%</p>
</div>
</div>
</div>
<div class="quick-wins">
<h3>🎯 Quick Wins</h3>
<ul>
{quick_wins_html}
{'' if quick_wins else '<li>Great job! No immediate quick wins identified.</li>'}
</ul>
</div>
"""
def _generate_executive_summary_with_badges(self, technical_data: Dict[str, Any],
content_data: Dict[str, Any],
keywords_data: Dict[str, Any] = None,
backlinks_data: Dict[str, Any] = None) -> str:
"""Generate executive summary with benchmark badges"""
# Extract metrics for badges
mobile_score = technical_data.get('mobile', {}).get('performance_score', 0)
cwv = technical_data.get('core_web_vitals', {}).get('mobile', {})
lcp_value = cwv.get('lcp', 0)
cls_value = cwv.get('cls', 0)
meta_complete_pct = content_data.get('meta_complete_pct', 0)
avg_words = content_data.get('avg_words', 0)
keywords_top10_pct = 0
if keywords_data and not keywords_data.get('placeholder'):
dist = keywords_data.get('position_distribution', {})
total = keywords_data.get('total_keywords', 0)
if total > 0:
keywords_top10_pct = (dist.get('top_10', 0) / total) * 100
domain_rating = backlinks_data.get('domain_rating', 0) if backlinks_data else 0
referring_domains = backlinks_data.get('total_ref_domains', 0) if backlinks_data else 0
# Generate badges
badges_html = self._generate_benchmark_badges(
mobile_score, lcp_value, cls_value, meta_complete_pct,
avg_words, keywords_top10_pct, domain_rating, referring_domains
)
# Overall health score
overall_score = (mobile_score + meta_complete_pct) / 2
if overall_score >= 80:
health_status = "Excellent"
health_color = "#2ECC71"
elif overall_score >= 60:
health_status = "Good"
health_color = "#F39C12"
elif overall_score >= 40:
health_status = "Fair"
health_color = "#FF6B6B"
else:
health_status = "Poor"
health_color = "#E74C3C"
return f"""
<div class="summary-card">
<div class="health-score">
<h3>Overall SEO Health</h3>
<div class="score-circle" style="border-color: {health_color}">
<span class="score-number" style="color: {health_color}">{overall_score:.0f}</span>
<span class="score-label">/ 100</span>
</div>
<p class="health-status" style="color: {health_color}">{health_status}</p>
</div>
</div>
<h3>πŸ“Š Benchmark Performance</h3>
{badges_html}
"""
def _generate_benchmark_badges(self, mobile_score, lcp_value, cls_value, meta_complete_pct,
avg_words, keywords_top10_pct, domain_rating, referring_domains) -> str:
"""Generate benchmark badges for executive summary"""
badges = [
badge(f"{mobile_score}", mobile_score >= BENCHMARKS['mobile_score_min']),
badge(f"{lcp_value:.1f}s", lcp_value <= BENCHMARKS['lcp_max'] if lcp_value > 0 else False),
badge(f"{cls_value:.3f}", cls_value <= BENCHMARKS['cls_max'] if cls_value >= 0 else False),
badge(f"{meta_complete_pct:.1f}%", meta_complete_pct >= BENCHMARKS['meta_complete_min']),
badge(f"{avg_words} words", BENCHMARKS['avg_words_min'] <= avg_words <= BENCHMARKS['avg_words_max'] if avg_words > 0 else False),
badge(f"{keywords_top10_pct:.1f}%", keywords_top10_pct >= BENCHMARKS['keywords_top10_min']),
badge(f"DR {domain_rating}", domain_rating >= BENCHMARKS['domain_rating_min']),
badge(f"{referring_domains} domains", referring_domains >= BENCHMARKS['referring_domains_min'])
]
badges_html = '<div class="benchmark-badges">'
labels = [
"Mobile Performance", "LCP", "CLS", "Meta Completeness",
"Content Length", "Top 10 Keywords", "Domain Rating", "Referring Domains"
]
targets = [
f"> {BENCHMARKS['mobile_score_min']}",
f"< {BENCHMARKS['lcp_max']}s",
f"< {BENCHMARKS['cls_max']}",
f"> {BENCHMARKS['meta_complete_min']}%",
f"{BENCHMARKS['avg_words_min']}-{BENCHMARKS['avg_words_max']}",
f"> {BENCHMARKS['keywords_top10_min']}%",
f"> {BENCHMARKS['domain_rating_min']}",
f"> {BENCHMARKS['referring_domains_min']}"
]
for i, (label, target, badge_data) in enumerate(zip(labels, targets, badges)):
status_class = 'pass' if badge_data['status'] == 'pass' else 'fail'
icon = 'βœ“' if badge_data['status'] == 'pass' else 'βœ—'
badges_html += f'''
<div class="benchmark-badge {status_class}">
<div class="badge-icon">{icon}</div>
<div class="badge-content">
<div class="badge-value">{badge_data['value']}</div>
<div class="badge-label">{label}</div>
<div class="badge-target">Target: {target}</div>
</div>
</div>
'''
badges_html += '</div>'
return badges_html
def _generate_technical_section(self, technical_data: Dict[str, Any]) -> str:
"""Generate technical SEO section"""
if technical_data.get('error'):
return f"""
<div class="error-message">
<h3>⚠️ Technical SEO Analysis</h3>
<p>Unable to complete technical analysis: {technical_data.get('error')}</p>
</div>
"""
mobile = technical_data.get('mobile', {})
desktop = technical_data.get('desktop', {})
cwv = technical_data.get('core_web_vitals', {})
opportunities = technical_data.get('opportunities', {}).get('opportunities', [])
# Core Web Vitals analysis
mobile_cwv = cwv.get('mobile', {})
cwv_analysis = []
lcp = mobile_cwv.get('lcp', 0)
if lcp > 2.5:
cwv_analysis.append(f"⚠️ LCP ({lcp:.2f}s) - Should be under 2.5s")
else:
cwv_analysis.append(f"βœ… LCP ({lcp:.2f}s) - Good")
cls = mobile_cwv.get('cls', 0)
if cls > 0.1:
cwv_analysis.append(f"⚠️ CLS ({cls:.3f}) - Should be under 0.1")
else:
cwv_analysis.append(f"βœ… CLS ({cls:.3f}) - Good")
# Opportunities list
opportunities_html = ""
for opp in opportunities[:5]:
opportunities_html += f"""
<div class="opportunity">
<h4>{opp.get('title', 'Optimization Opportunity')}</h4>
<p>{opp.get('description', '')}</p>
<span class="savings">Potential savings: {opp.get('potential_savings', 0):.0f}ms</span>
</div>
"""
return f"""
<div class="technical-metrics">
<div class="metric-row">
<div class="metric-card">
<h4>Mobile Performance</h4>
<div class="score">{mobile.get('performance_score', 0):.1f}/100</div>
</div>
<div class="metric-card">
<h4>Desktop Performance</h4>
<div class="score">{desktop.get('performance_score', 0):.1f}/100</div>
</div>
<div class="metric-card">
<h4>SEO Score</h4>
<div class="score">{mobile.get('seo_score', 0):.1f}/100</div>
</div>
<div class="metric-card">
<h4>Accessibility</h4>
<div class="score">{mobile.get('accessibility_score', 0):.1f}/100</div>
</div>
</div>
</div>
<div class="cwv-analysis">
<h3>Core Web Vitals Analysis</h3>
<ul>
{"".join([f"<li>{analysis}</li>" for analysis in cwv_analysis])}
</ul>
</div>
<div class="optimization-opportunities">
<h3>πŸ”§ Optimization Opportunities</h3>
{opportunities_html if opportunities_html else '<p>No major optimization opportunities identified.</p>'}
</div>
"""
def _generate_content_section(self, content_data: Dict[str, Any]) -> str:
"""Generate content audit section"""
if content_data.get('error'):
return f"""
<div class="error-message">
<h3>⚠️ Content Audit</h3>
<p>Unable to complete content analysis: {content_data.get('error')}</p>
</div>
"""
metadata = content_data.get('metadata_completeness', {})
content_metrics = content_data.get('content_metrics', {})
freshness = content_data.get('content_freshness', {})
return f"""
<div class="content-overview">
<div class="metric-row">
<div class="metric-card">
<h4>Pages Discovered</h4>
<div class="score">{content_data.get('total_pages_discovered', 0)}</div>
</div>
<div class="metric-card">
<h4>Pages Analyzed</h4>
<div class="score">{content_data.get('pages_analyzed', 0)}</div>
</div>
<div class="metric-card">
<h4>Avg. Word Count</h4>
<div class="score">{content_metrics.get('avg_word_count', 0):.0f}</div>
</div>
<div class="metric-card">
<h4>CTA Coverage</h4>
<div class="score">{content_metrics.get('cta_coverage', 0):.1f}%</div>
</div>
</div>
</div>
<div class="metadata-analysis">
<h3>πŸ“ Metadata Completeness</h3>
<div class="metadata-stats">
<div class="stat">
<span class="label">Title Tags:</span>
<span class="value">{metadata.get('title_coverage', 0):.1f}% complete</span>
<span class="benchmark">(Target: 90%+)</span>
</div>
<div class="stat">
<span class="label">Meta Descriptions:</span>
<span class="value">{metadata.get('description_coverage', 0):.1f}% complete</span>
<span class="benchmark">(Target: 90%+)</span>
</div>
<div class="stat">
<span class="label">H1 Tags:</span>
<span class="value">{metadata.get('h1_coverage', 0):.1f}% complete</span>
<span class="benchmark">(Target: 90%+)</span>
</div>
</div>
</div>
<div class="content-quality">
<h3>πŸ“Š Content Quality Metrics</h3>
<div class="quality-stats">
<div class="stat">
<span class="label">Average Word Count:</span>
<span class="value">{content_metrics.get('avg_word_count', 0):.0f} words</span>
<span class="benchmark">(Recommended: 800-1200)</span>
</div>
<div class="stat">
<span class="label">Call-to-Action Coverage:</span>
<span class="value">{content_metrics.get('cta_coverage', 0):.1f}% of pages</span>
<span class="benchmark">(Target: 80%+)</span>
</div>
</div>
</div>
<div class="content-freshness">
<h3>πŸ—“οΈ Content Freshness</h3>
<div class="freshness-stats">
<div class="stat">
<span class="label">Fresh Content (&lt;6 months):</span>
<span class="value">{freshness.get('fresh_content', {}).get('percentage', 0):.1f}%</span>
</div>
<div class="stat">
<span class="label">Moderate Age (6-18 months):</span>
<span class="value">{freshness.get('moderate_content', {}).get('percentage', 0):.1f}%</span>
</div>
<div class="stat">
<span class="label">Stale Content (&gt;18 months):</span>
<span class="value">{freshness.get('stale_content', {}).get('percentage', 0):.1f}%</span>
</div>
</div>
</div>
"""
def _generate_competitor_section(self, competitor_data: List[Dict],
primary_technical: Dict[str, Any],
primary_content: Dict[str, Any]) -> str:
"""Generate competitor comparison section"""
if not competitor_data:
return ""
comparison_html = """
<div class="competitor-comparison">
<h3>πŸ† Competitor Benchmarking</h3>
<table class="comparison-table">
<thead>
<tr>
<th>Domain</th>
<th>Mobile Perf.</th>
<th>Desktop Perf.</th>
<th>SEO Score</th>
<th>Content Pages</th>
</tr>
</thead>
<tbody>
"""
# Add primary site
primary_mobile = primary_technical.get('mobile', {}).get('performance_score', 0)
primary_desktop = primary_technical.get('desktop', {}).get('performance_score', 0)
primary_seo = primary_technical.get('mobile', {}).get('seo_score', 0)
primary_pages = primary_content.get('pages_analyzed', 0)
comparison_html += f"""
<tr class="primary-site">
<td><strong>Your Site</strong></td>
<td>{primary_mobile:.1f}</td>
<td>{primary_desktop:.1f}</td>
<td>{primary_seo:.1f}</td>
<td>{primary_pages}</td>
</tr>
"""
# Add competitors
for comp in competitor_data:
comp_technical = comp.get('technical', {})
comp_content = comp.get('content', {})
comp_mobile = comp_technical.get('mobile', {}).get('performance_score', 0)
comp_desktop = comp_technical.get('desktop', {}).get('performance_score', 0)
comp_seo = comp_technical.get('mobile', {}).get('seo_score', 0)
comp_pages = comp_content.get('pages_analyzed', 0)
domain = comp.get('url', '').replace('https://', '').replace('http://', '')
comparison_html += f"""
<tr>
<td>{domain}</td>
<td>{comp_mobile:.1f}</td>
<td>{comp_desktop:.1f}</td>
<td>{comp_seo:.1f}</td>
<td>{comp_pages}</td>
</tr>
"""
comparison_html += """
</tbody>
</table>
</div>
"""
return comparison_html
def _generate_recommendations(self, technical_data: Dict[str, Any], content_data: Dict[str, Any]) -> str:
"""Generate prioritized recommendations"""
recommendations = []
# Technical recommendations
if not technical_data.get('error'):
mobile = technical_data.get('mobile', {})
if mobile.get('performance_score', 0) < 70:
recommendations.append({
'priority': 'High',
'category': 'Technical SEO',
'title': 'Improve Mobile Performance',
'description': f'Mobile performance score is {mobile.get("performance_score", 0):.1f}/100. Focus on Core Web Vitals optimization.',
'timeline': '2-4 weeks'
})
# Content recommendations
if not content_data.get('error'):
metadata = content_data.get('metadata_completeness', {})
if metadata.get('title_coverage', 0) < 90:
recommendations.append({
'priority': 'High',
'category': 'Content',
'title': 'Complete Missing Title Tags',
'description': f'{100 - metadata.get("title_coverage", 0):.1f}% of pages are missing title tags. This directly impacts search visibility.',
'timeline': '1-2 weeks'
})
if metadata.get('description_coverage', 0) < 90:
recommendations.append({
'priority': 'Medium',
'category': 'Content',
'title': 'Add Missing Meta Descriptions',
'description': f'{100 - metadata.get("description_coverage", 0):.1f}% of pages are missing meta descriptions. Improve click-through rates from search results.',
'timeline': '2-3 weeks'
})
content_metrics = content_data.get('content_metrics', {})
if content_metrics.get('avg_word_count', 0) < 800:
recommendations.append({
'priority': 'Medium',
'category': 'Content',
'title': 'Increase Content Depth',
'description': f'Average word count is {content_metrics.get("avg_word_count", 0):.0f} words. Aim for 800-1200 words per page for better rankings.',
'timeline': '4-6 weeks'
})
# Sort by priority
priority_order = {'High': 0, 'Medium': 1, 'Low': 2}
recommendations.sort(key=lambda x: priority_order.get(x['priority'], 2))
recommendations_html = ""
for i, rec in enumerate(recommendations[:8], 1):
priority_color = {
'High': '#E74C3C',
'Medium': '#F39C12',
'Low': '#2ECC71'
}.get(rec['priority'], '#95A5A6')
recommendations_html += f"""
<div class="recommendation">
<div class="rec-header">
<span class="rec-number">{i}</span>
<span class="rec-priority" style="background-color: {priority_color}">{rec['priority']}</span>
<span class="rec-category">{rec['category']}</span>
</div>
<h4>{rec['title']}</h4>
<p>{rec['description']}</p>
<div class="rec-timeline">Timeline: {rec['timeline']}</div>
</div>
"""
return f"""
<div class="recommendations-section">
<h3>🎯 Prioritized Recommendations</h3>
<div class="recommendations-list">
{recommendations_html if recommendations_html else '<p>Great job! No immediate recommendations identified.</p>'}
</div>
</div>
"""
def _generate_keywords_section(self, keywords_data: Dict[str, Any]) -> str:
"""Generate keywords analysis section"""
if keywords_data.get('placeholder'):
return f"""
<div class="placeholder-section">
<h3>πŸ” Keyword Rankings</h3>
<div class="placeholder-content">
<p><strong>No keyword data available.</strong></p>
<p>{keywords_data.get('message', 'Connect Google Search Console or SERP API to unlock keyword insights.')}</p>
</div>
</div>
"""
total = keywords_data.get('total_keywords', 0)
pos_dist = keywords_data.get('position_distribution', {})
best_keywords = keywords_data.get('best_keywords', [])
opportunity_keywords = keywords_data.get('opportunity_keywords', [])
worst_keywords = keywords_data.get('worst_keywords', {})
# Create position distribution chart
pos_chart = ""
if pos_dist:
import plotly.graph_objects as go
from plotly.offline import plot
labels = ['Top 3', 'Top 10', 'Top 50', 'Beyond 50']
values = [
pos_dist.get('top_3', 0),
pos_dist.get('top_10', 0) - pos_dist.get('top_3', 0),
pos_dist.get('top_50', 0) - pos_dist.get('top_10', 0),
pos_dist.get('beyond_50', 0)
]
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=0.4)])
fig.update_layout(title="Keyword Position Distribution", height=400)
pos_chart = plot(fig, include_plotlyjs=False, output_type='div')
best_keywords_html = ""
if best_keywords:
best_keywords_html = "<h4>πŸ† Top Performing Keywords</h4><table class='data-table'><tr><th>Keyword</th><th>Position</th><th>Clicks</th><th>Impressions</th></tr>"
for kw in best_keywords[:10]:
best_keywords_html += f"""
<tr>
<td>{kw.get('keyword', '')}</td>
<td>{kw.get('position', 0)}</td>
<td>{kw.get('clicks', 0)}</td>
<td>{kw.get('impressions', 0)}</td>
</tr>
"""
best_keywords_html += "</table>"
opportunity_html = ""
if opportunity_keywords:
opportunity_html = "<h4>πŸš€ Opportunity Keywords</h4><table class='data-table'><tr><th>Keyword</th><th>Position</th><th>Impressions</th><th>CTR</th></tr>"
for kw in opportunity_keywords[:10]:
opportunity_html += f"""
<tr>
<td>{kw.get('keyword', '')}</td>
<td>{kw.get('position', 0)}</td>
<td>{kw.get('impressions', 0)}</td>
<td>{kw.get('ctr', 0)}%</td>
</tr>
"""
opportunity_html += "</table>"
# Worst performing keywords
worst_keywords_html = ""
if worst_keywords.get('by_ctr') or worst_keywords.get('by_position'):
worst_keywords_html = "<h4>⚠️ Worst Performing Keywords</h4>"
if worst_keywords.get('by_ctr'):
worst_keywords_html += "<h5>By CTR (Low Click-Through Rate)</h5>"
worst_keywords_html += "<table class='data-table'><tr><th>Keyword</th><th>Position</th><th>Impressions</th><th>CTR</th></tr>"
for kw in worst_keywords['by_ctr'][:10]:
worst_keywords_html += f"""
<tr>
<td>{kw.get('keyword', '')}</td>
<td>{kw.get('rank', 0)}</td>
<td>{kw.get('impressions', 0)}</td>
<td>{kw.get('estimated_ctr', 0):.2f}%</td>
</tr>
"""
worst_keywords_html += "</table>"
if worst_keywords.get('by_position'):
worst_keywords_html += "<h5>By Position (Poor Rankings)</h5>"
worst_keywords_html += "<table class='data-table'><tr><th>Keyword</th><th>Position</th><th>Impressions</th></tr>"
for kw in worst_keywords['by_position'][:10]:
worst_keywords_html += f"""
<tr>
<td>{kw.get('keyword', '')}</td>
<td>{kw.get('rank', 0)}</td>
<td>{kw.get('impressions', 0)}</td>
</tr>
"""
worst_keywords_html += "</table>"
return f"""
<div class="card">
<h3>πŸ” Keyword Rankings Analysis</h3>
<div class="metrics-grid">
<div class="metric-card">
<div class="metric-value">{total}</div>
<div class="metric-label">Total Keywords</div>
</div>
<div class="metric-card">
<div class="metric-value">{pos_dist.get('top_10', 0)}</div>
<div class="metric-label">Top 10 Rankings</div>
</div>
<div class="metric-card">
<div class="metric-value">{len(opportunity_keywords)}</div>
<div class="metric-label">Opportunities</div>
</div>
<div class="metric-card">
<div class="metric-value">{keywords_data.get('data_source', 'Unknown')}</div>
<div class="metric-label">Data Source</div>
</div>
</div>
{pos_chart}
{best_keywords_html}
{worst_keywords_html}
{opportunity_html}
</div>
"""
def _generate_backlinks_section(self, backlinks_data: Dict[str, Any]) -> str:
"""Generate backlinks analysis section"""
if backlinks_data.get('placeholder'):
return f"""
<div class="placeholder-section">
<h3>πŸ”— Backlink Profile</h3>
<div class="placeholder-content">
<p><strong>No backlink data available.</strong></p>
<p>{backlinks_data.get('message', 'Add RapidAPI key to unlock comprehensive backlink insights.')}</p>
</div>
</div>
"""
total_backlinks = backlinks_data.get('total_backlinks', 0)
total_ref_domains = backlinks_data.get('total_ref_domains', 0)
domain_rating = backlinks_data.get('domain_rating', 0)
monthly_changes = backlinks_data.get('monthly_changes', {})
referring_domains = backlinks_data.get('referring_domains', [])
anchor_distribution = backlinks_data.get('anchor_distribution', [])
new_backlinks = backlinks_data.get('new_backlinks_30d', 0)
lost_backlinks = backlinks_data.get('lost_backlinks_30d')
data_source = backlinks_data.get('data_source', 'Unknown')
# Create anchor text distribution chart
anchor_chart = ""
if anchor_distribution:
import plotly.graph_objects as go
from plotly.offline import plot
anchors = [a.get('anchor_text', '')[:30] for a in anchor_distribution[:10]]
counts = [a.get('backlinks', 0) for a in anchor_distribution[:10]]
fig = go.Figure(data=[go.Bar(x=anchors, y=counts)])
fig.update_layout(title="Top Anchor Text Distribution", height=400, xaxis={'tickangle': 45})
anchor_chart = plot(fig, include_plotlyjs=False, output_type='div')
ref_domains_html = ""
if referring_domains:
ref_domains_html = "<h4>🏒 Top Referring Domains</h4><table class='data-table'><tr><th>Domain</th><th>Domain Rating</th><th>Backlinks</th><th>First Seen</th></tr>"
for rd in referring_domains[:10]:
ref_domains_html += f"""
<tr>
<td>{rd.get('domain', '')}</td>
<td>{rd.get('domain_rating', 0)}</td>
<td>{rd.get('backlinks', 0)}</td>
<td>{rd.get('first_seen', 'N/A')}</td>
</tr>
"""
ref_domains_html += "</table>"
lost_display = "N/A (future work)" if lost_backlinks is None else str(lost_backlinks)
return f"""
<div class="card">
<h3>πŸ”— Backlink Profile Analysis</h3>
<p class="data-source-label">Source: {data_source}</p>
<div class="metrics-grid">
<div class="metric-card">
<div class="metric-value">{total_backlinks:,}</div>
<div class="metric-label">Total Backlinks</div>
</div>
<div class="metric-card">
<div class="metric-value">{total_ref_domains:,}</div>
<div class="metric-label">Referring Domains</div>
</div>
<div class="metric-card">
<div class="metric-value">{domain_rating}</div>
<div class="metric-label">Domain Rating</div>
</div>
<div class="metric-card">
<div class="metric-value">{new_backlinks}</div>
<div class="metric-label">New Links (30d)</div>
</div>
<div class="metric-card">
<div class="metric-value">{lost_display}</div>
<div class="metric-label">Lost Links (30d)</div>
</div>
</div>
{anchor_chart}
{ref_domains_html}
</div>
"""
def _generate_recommendations_section(self, llm_recommendations: Dict[str, Any]) -> str:
"""Generate LLM-powered recommendations section with markdown rendering"""
if not llm_recommendations:
return ""
recommendations_markdown = llm_recommendations.get('recommendations_markdown', '')
executive_insights = llm_recommendations.get('executive_insights', [])
priority_actions = llm_recommendations.get('priority_actions', [])
# Skip executive insights and priority actions - show only markdown
insights_html = ""
priority_html = ""
# Convert markdown recommendations to HTML
recommendations_html = ""
if recommendations_markdown:
recommendations_html = f"""
<div class='llm-recommendations'>
<h4>πŸ€– AI-Generated Recommendations</h4>
<div class="markdown-content">
{self._markdown_to_html(recommendations_markdown)}
</div>
</div>
"""
return f"""
<div class="card">
<h3>🧠 Smart Recommendations</h3>
<p class="data-source">Generated by {llm_recommendations.get('data_source', 'AI Analysis')}</p>
{insights_html}
{priority_html}
{recommendations_html}
</div>
"""
def _get_report_template(self) -> str:
"""Get the HTML template for the report"""
return """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>SEO Report - {url}</title>
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
<style>
* {{
margin: 0;
padding: 0;
box-sizing: border-box;
}}
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
line-height: 1.6;
color: #333;
background-color: #f8f9fa;
}}
.report-container {{
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}}
.report-header {{
background: #f8f9fa;
color: #333;
border: 2px solid #e9ecef;
padding: 40px;
border-radius: 10px;
margin-bottom: 30px;
text-align: center;
}}
.report-header h1 {{
font-size: 2.5rem;
margin-bottom: 10px;
}}
.report-header p {{
font-size: 1.1rem;
opacity: 0.9;
}}
.section {{
background: white;
margin-bottom: 30px;
padding: 30px;
border-radius: 10px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}}
.section h2 {{
color: #2c3e50;
margin-bottom: 20px;
font-size: 1.8rem;
border-bottom: 3px solid #3498db;
padding-bottom: 10px;
}}
.summary-card {{
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 30px;
padding: 20px;
background: #f8f9fa;
border: 2px solid #28a745;
border-radius: 10px;
color: #333;
}}
.health-score {{
text-align: center;
}}
.score-circle {{
width: 120px;
height: 120px;
border: 6px solid;
border-radius: 50%;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
margin: 10px auto;
}}
.score-number {{
font-size: 2rem;
font-weight: bold;
}}
.score-label {{
font-size: 0.9rem;
opacity: 0.8;
}}
.health-status {{
font-size: 1.2rem;
font-weight: bold;
margin-top: 10px;
}}
.key-metrics {{
display: flex;
gap: 30px;
}}
.metric {{
text-align: center;
}}
.metric h4 {{
margin-bottom: 10px;
font-size: 1rem;
opacity: 0.9;
}}
.metric p {{
font-size: 1.1rem;
margin-bottom: 5px;
}}
.quick-wins {{
background: #fff3cd;
border: 1px solid #ffeeba;
border-radius: 8px;
padding: 20px;
}}
.quick-wins h3 {{
color: #856404;
margin-bottom: 15px;
}}
.quick-wins ul {{
list-style-type: none;
}}
.quick-wins li {{
color: #856404;
margin-bottom: 8px;
position: relative;
padding-left: 20px;
}}
.quick-wins li:before {{
content: "β†’";
position: absolute;
left: 0;
color: #ffc107;
font-weight: bold;
}}
.metric-row {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 20px;
margin-bottom: 30px;
}}
.metric-card {{
background: #fff;
border: 2px solid #6c757d;
color: #333;
padding: 20px;
border-radius: 10px;
text-align: center;
}}
.metric-card h4 {{
font-size: 0.9rem;
margin-bottom: 10px;
opacity: 0.9;
}}
.metric-card .score {{
font-size: 2rem;
font-weight: bold;
}}
.chart-container {{
margin: 30px 0;
background: white;
border-radius: 10px;
padding: 20px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
}}
.cwv-analysis ul, .metadata-stats, .quality-stats, .freshness-stats {{
list-style: none;
}}
.stat {{
display: flex;
justify-content: space-between;
align-items: center;
padding: 10px 0;
border-bottom: 1px solid #eee;
}}
.stat:last-child {{
border-bottom: none;
}}
.stat .label {{
font-weight: 600;
color: #2c3e50;
}}
.stat .value {{
font-weight: bold;
color: #3498db;
}}
.stat .benchmark {{
font-size: 0.85rem;
color: #7f8c8d;
}}
.opportunity {{
background: #f8f9fa;
border-left: 4px solid #ff6b6b;
padding: 15px;
margin-bottom: 15px;
border-radius: 5px;
}}
.opportunity h4 {{
color: #2c3e50;
margin-bottom: 8px;
}}
.savings {{
display: inline-block;
background: #ff6b6b;
color: white;
padding: 4px 8px;
border-radius: 4px;
font-size: 0.8rem;
margin-top: 8px;
}}
.comparison-table {{
width: 100%;
border-collapse: collapse;
margin-top: 20px;
}}
.comparison-table th,
.comparison-table td {{
padding: 12px;
text-align: left;
border-bottom: 1px solid #ddd;
}}
.comparison-table th {{
background: #f8f9fa;
font-weight: bold;
color: #2c3e50;
}}
.primary-site {{
background: #e8f5e8;
font-weight: bold;
}}
.placeholder-sections {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: 20px;
}}
.placeholder-section {{
border: 2px dashed #ddd;
border-radius: 10px;
padding: 20px;
text-align: center;
background: #fafafa;
}}
.placeholder-section h3 {{
color: #7f8c8d;
margin-bottom: 15px;
}}
.placeholder-content p {{
color: #7f8c8d;
font-style: italic;
margin-bottom: 15px;
}}
.placeholder-content ul {{
list-style: none;
color: #95a5a6;
}}
.placeholder-content li {{
margin-bottom: 8px;
}}
.recommendations-section {{
background: #f8f9fa;
border: 2px solid #007bff;
color: #333;
border-radius: 10px;
padding: 30px;
}}
.recommendations-section h3 {{
margin-bottom: 25px;
font-size: 1.8rem;
}}
.recommendation {{
background: white;
color: #333;
border-radius: 8px;
padding: 20px;
margin-bottom: 20px;
}}
.rec-header {{
display: flex;
align-items: center;
gap: 10px;
margin-bottom: 10px;
}}
.rec-number {{
background: #3498db;
color: white;
width: 30px;
height: 30px;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
font-weight: bold;
}}
.rec-priority {{
color: white;
padding: 4px 8px;
border-radius: 4px;
font-size: 0.8rem;
font-weight: bold;
}}
.rec-category {{
background: #ecf0f1;
color: #2c3e50;
padding: 4px 8px;
border-radius: 4px;
font-size: 0.8rem;
}}
.rec-timeline {{
color: #7f8c8d;
font-size: 0.9rem;
margin-top: 10px;
font-weight: bold;
}}
.error-message {{
background: #f8d7da;
border: 1px solid #f5c6cb;
color: #721c24;
padding: 20px;
border-radius: 8px;
text-align: center;
}}
.markdown-content {{
line-height: 1.6;
color: #2c3e50;
}}
.markdown-content h1 {{
color: #2c3e50;
border-bottom: 2px solid #3498db;
padding-bottom: 10px;
margin-top: 30px;
margin-bottom: 20px;
}}
.markdown-content h2 {{
color: #34495e;
margin-top: 25px;
margin-bottom: 15px;
font-size: 1.3em;
}}
.markdown-content h3 {{
color: #34495e;
margin-top: 20px;
margin-bottom: 10px;
font-size: 1.1em;
}}
.markdown-content strong {{
color: #2c3e50;
font-weight: 600;
}}
.markdown-content ul {{
margin: 15px 0;
padding-left: 20px;
}}
.markdown-content li {{
margin-bottom: 8px;
line-height: 1.5;
}}
.llm-recommendations {{
background: #f8f9fa;
border-left: 4px solid #3498db;
padding: 20px;
margin: 20px 0;
border-radius: 0 8px 8px 0;
}}
@media (max-width: 768px) {{
.report-container {{
padding: 10px;
}}
.section {{
padding: 20px;
}}
.summary-card {{
flex-direction: column;
text-align: center;
gap: 20px;
}}
.key-metrics {{
flex-direction: column;
gap: 15px;
}}
.metric-row {{
grid-template-columns: 1fr;
}}
}}
/* Benchmark badges */
.benchmark-badges {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 15px;
margin-bottom: 30px;
padding: 20px;
background: #f8f9fa;
border-radius: 10px;
border: 2px solid #e9ecef;
}}
.benchmark-badge {{
display: flex;
align-items: center;
background: white;
padding: 15px;
border-radius: 8px;
border: 2px solid;
}}
.benchmark-badge.pass {{
border-color: #28a745;
background: #f8fff8;
}}
.benchmark-badge.fail {{
border-color: #dc3545;
background: #fff8f8;
}}
.badge-icon {{
font-size: 1.2rem;
margin-right: 12px;
font-weight: bold;
}}
.benchmark-badge.pass .badge-icon {{
color: #28a745;
}}
.benchmark-badge.fail .badge-icon {{
color: #dc3545;
}}
.badge-content {{
flex: 1;
}}
.badge-value {{
font-weight: bold;
font-size: 1rem;
margin-bottom: 2px;
}}
.badge-label {{
font-size: 0.85rem;
color: #666;
margin-bottom: 2px;
}}
.badge-target {{
font-size: 0.75rem;
color: #888;
}}
/* Data source labels */
.data-source-label {{
font-size: 0.9rem;
color: #6c757d;
font-style: italic;
margin-bottom: 15px;
}}
/* Benchmark target labels */
.benchmark-target {{
font-size: 0.8rem;
color: #6c757d;
margin-bottom: 10px;
font-style: italic;
}}
/* Stale pages section */
.stale-pages-section {{
margin: 20px 0;
padding: 20px;
background: #fff3cd;
border: 1px solid #ffeeba;
border-radius: 8px;
}}
.stale-pages-list {{
max-height: 300px;
overflow-y: auto;
}}
.stale-page-item {{
padding: 8px 0;
border-bottom: 1px solid #f0f0f0;
font-size: 0.9rem;
}}
.stale-page-item:last-child {{
border-bottom: none;
}}
.stale-page-item .url {{
color: #007bff;
margin-right: 10px;
}}
.stale-page-item .date {{
color: #6c757d;
font-size: 0.8rem;
}}
.more-pages {{
padding: 10px;
text-align: center;
font-style: italic;
color: #6c757d;
}}
/* hreflang section */
.hreflang-section {{
margin: 20px 0;
padding: 20px;
background: #d1ecf1;
border: 1px solid #bee5eb;
border-radius: 8px;
}}
.hreflang-summary {{
font-weight: bold;
margin-bottom: 15px;
color: #0c5460;
}}
.hreflang-percentage {{
font-size: 1.2rem;
color: #0c5460;
}}
.hreflang-samples .sample-item {{
padding: 5px 0;
font-size: 0.9rem;
color: #0c5460;
}}
.hreflang-samples .url {{
color: #007bff;
margin-right: 10px;
}}
</style>
</head>
<body>
<div class="report-container">
<div class="report-header">
<h1>πŸ” SEO Analysis Report</h1>
<p>{url}</p>
<p>Generated on {generated_date}</p>
</div>
<div class="section">
<h2>πŸ“Š Executive Summary</h2>
{executive_summary}
</div>
<div class="section">
<h2>πŸ“ˆ Performance Charts</h2>
{charts}
</div>
<div class="section">
<h2>⚑ Technical SEO</h2>
{technical_section}
</div>
<div class="section">
<h2>πŸ“ Content Audit</h2>
{content_section}
</div>
<div class="section">
<h2>πŸ” Keywords Analysis</h2>
{keywords_section}
</div>
<div class="section">
<h2>πŸ”— Backlinks Profile</h2>
{backlinks_section}
</div>
{competitor_section}
<div class="section">
{recommendations}
</div>
{llm_recommendations}
</div>
</body>
</html>
"""