Spaces:
Running
Running
from flask import Flask, render_template, request, jsonify, send_file, redirect, url_for, session | |
import validators | |
import os | |
import tempfile | |
import uuid | |
from urllib.parse import urlparse | |
from typing import Dict, Any, List | |
# Load environment variables from .env file | |
try: | |
from dotenv import load_dotenv | |
load_dotenv() | |
except ImportError: | |
print("python-dotenv not installed. Using system environment variables only.") | |
from modules.technical_seo import TechnicalSEOModule | |
from modules.content_audit import ContentAuditModule | |
from modules.keywords import KeywordsModule | |
from modules.backlinks import BacklinksModule | |
from report_generator import ReportGenerator | |
from simple_pdf_generator import SimplePDFGenerator | |
from llm_recommendations import LLMRecommendations | |
from gsc_client import GSCClient | |
from utils import safe_pct | |
from benchmarks import BENCHMARKS, badge | |
app = Flask(__name__, static_folder='static') | |
app.secret_key = os.getenv('FLASK_SECRET_KEY', 'seo_report_generator_2024') | |
technical_module = TechnicalSEOModule(api_key=os.getenv('GOOGLE_API_KEY')) | |
content_module = ContentAuditModule() | |
keywords_module = KeywordsModule() | |
backlinks_module = BacklinksModule() | |
report_gen = ReportGenerator() | |
pdf_gen = SimplePDFGenerator() | |
llm_recommendations = LLMRecommendations() | |
try: | |
gsc_client = GSCClient() | |
except ImportError as e: | |
print(f"GSC client not available: {e}") | |
gsc_client = None | |
reports_store = {} | |
def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]: | |
if not new_data or new_data.get('placeholder'): | |
return { | |
'placeholder': True, | |
'message': 'No keyword data available', | |
'total_keywords': 0, | |
'position_distribution': {'top_3': 0, 'top_10': 0, 'top_50': 0, 'beyond_50': 0}, | |
'best_keywords': [], | |
'opportunity_keywords': [], | |
'data_source': 'Analysis failed' | |
} | |
totals = new_data.get('totals', {}) | |
distribution = new_data.get('distribution', {}) | |
movement = new_data.get('movement', {}) | |
best_keywords = new_data.get('best_keywords', []) | |
declining_keywords = new_data.get('declining_keywords', []) | |
opportunities = new_data.get('opportunities', []) | |
data_sources = new_data.get('data_sources', {}) | |
pos_dist = { | |
'top_3': distribution.get('top3', 0), | |
'top_10': distribution.get('top10', 0), | |
'top_50': distribution.get('top50', 0), | |
'beyond_50': totals.get('keywords', 0) - distribution.get('top50', 0) | |
} | |
transformed_best_keywords = [] | |
for kw in best_keywords: | |
transformed_best_keywords.append({ | |
'keyword': kw.get('keyword', ''), | |
'position': kw.get('rank', 0), | |
'clicks': 0, | |
'impressions': kw.get('volume', 0), | |
'url': kw.get('url', ''), | |
'estimated_traffic': kw.get('estimated_traffic', 0), | |
'trend': kw.get('trend', 'stable') | |
}) | |
transformed_opportunities = [] | |
for opp in opportunities: | |
transformed_opportunities.append({ | |
'keyword': opp.get('keyword', ''), | |
'position': 0, | |
'impressions': opp.get('volume', 0), | |
'ctr': 0, | |
'competitor_rank': opp.get('competitor_rank', 0), | |
'priority_score': opp.get('priority_score', 0), | |
'competitor_domain': opp.get('competitor_domain', '') | |
}) | |
return { | |
'total_keywords': totals.get('keywords', 0), | |
'estimated_traffic': totals.get('estimated_traffic', 0), | |
'position_distribution': pos_dist, | |
'movement': movement, | |
'best_keywords': transformed_best_keywords, | |
'declining_keywords': declining_keywords, | |
'opportunity_keywords': transformed_opportunities, | |
'competitor_summary': new_data.get('competitor_summary', []), | |
'data_source': f"{data_sources.get('positions', 'Unknown')} + {data_sources.get('volume', 'Unknown')}", | |
'enrichment_rate': data_sources.get('enrichment_rate', 0), | |
'meta': new_data.get('meta', {}), | |
'placeholder': False | |
} | |
def index(): | |
return render_template('index.html') | |
def generate_report(): | |
try: | |
data = request.json | |
url = data.get('url', '').strip() | |
competitors = data.get('competitors', []) | |
if not url: | |
return jsonify({'error': 'Website URL is required'}), 400 | |
if not validators.url(url): | |
return jsonify({'error': 'Please enter a valid URL'}), 400 | |
report_id = str(uuid.uuid4()) | |
competitor_domains = [] | |
competitor_list = [] | |
for comp in competitors: | |
comp = comp.strip() | |
if comp and validators.url(comp): | |
competitor_list.append(comp) | |
domain = urlparse(comp).netloc.replace('www.', '') | |
competitor_domains.append(domain) | |
technical_data = technical_module.analyze(url) | |
content_data = content_module.analyze(url) | |
# Check if GSC should be used | |
use_gsc = False | |
if gsc_client and 'gsc_tokens' in session and gsc_client.property_url: | |
domain = urlparse(url).netloc.replace('www.', '') | |
property_domain = urlparse(gsc_client.property_url).netloc.replace('www.', '') | |
if domain == property_domain: | |
use_gsc = True | |
# Analyze keywords | |
if use_gsc: | |
keywords_result = app._analyze_with_gsc(url, competitor_domains) | |
else: | |
keywords_result = keywords_module.analyze(url, competitor_domains=competitor_domains) | |
if not keywords_result.success: | |
keywords_data = { | |
'placeholder': True, | |
'message': f'Keywords analysis failed: {keywords_result.error}', | |
'total_keywords': 0, | |
'position_distribution': {'top_3': 0, 'top_10': 0, 'top_50': 0, 'beyond_50': 0}, | |
'best_keywords': [], | |
'opportunity_keywords': [], | |
'data_source': 'Analysis failed' | |
} | |
else: | |
keywords_data = _transform_keywords_data(keywords_result.data) | |
print(f"DEBUG: Starting backlinks analysis for {url}") | |
backlinks_result = backlinks_module.analyze(url) | |
backlinks_data = backlinks_result.data | |
print(f"DEBUG: Backlinks analysis result - Success: {backlinks_result.success}") | |
print(f"DEBUG: Backlinks data keys: {list(backlinks_data.keys())}") | |
if backlinks_data.get('total_backlinks'): | |
print(f"DEBUG: Total backlinks found: {backlinks_data.get('total_backlinks')}") | |
if backlinks_data.get('placeholder'): | |
print(f"DEBUG: Using placeholder data: {backlinks_data.get('message')}") | |
llm_rec_data = llm_recommendations.generate_recommendations( | |
url, technical_data, content_data, keywords_data, backlinks_data | |
) | |
competitor_data = [] | |
for comp_url in competitor_list: | |
comp_technical = technical_module.analyze(comp_url) | |
comp_content = content_module.analyze(comp_url, quick_scan=True) | |
comp_keywords_result = keywords_module.analyze(comp_url, competitor_domains=[], quick_scan=True) | |
if comp_keywords_result.success: | |
comp_keywords = _transform_keywords_data(comp_keywords_result.data) | |
else: | |
comp_keywords = { | |
'placeholder': True, | |
'message': f'Keywords analysis failed: {comp_keywords_result.error}', | |
'total_keywords': 0, | |
'position_distribution': {'top_3': 0, 'top_10': 0, 'top_50': 0, 'beyond_50': 0}, | |
'best_keywords': [], | |
'opportunity_keywords': [], | |
'data_source': 'Analysis failed' | |
} | |
comp_backlinks_result = backlinks_module.analyze(comp_url, quick_scan=True) | |
comp_backlinks = comp_backlinks_result.data | |
competitor_data.append({ | |
'url': comp_url, | |
'technical': comp_technical, | |
'content': comp_content, | |
'keywords': comp_keywords, | |
'backlinks': comp_backlinks | |
}) | |
report_html = report_gen.generate_html_report( | |
url=url, | |
technical_data=technical_data, | |
content_data=content_data, | |
competitor_data=competitor_data, | |
keywords_data=keywords_data, | |
backlinks_data=backlinks_data, | |
llm_recommendations=llm_rec_data, | |
include_charts=True | |
) | |
reports_store[report_id] = { | |
'url': url, | |
'html': report_html, | |
'technical_data': technical_data, | |
'content_data': content_data, | |
'keywords_data': keywords_data, | |
'backlinks_data': backlinks_data, | |
'llm_recommendations': llm_rec_data, | |
'competitor_data': competitor_data | |
} | |
return jsonify({ | |
'success': True, | |
'report_id': report_id, | |
'redirect_url': f'/report/{report_id}' | |
}) | |
except Exception as e: | |
return jsonify({'error': f'Error generating report: {str(e)}'}), 500 | |
def view_report(report_id): | |
if report_id not in reports_store: | |
return redirect(url_for('index')) | |
report_data = reports_store[report_id] | |
return render_template('report.html', | |
report_html=report_data['html'], | |
report_id=report_id, | |
url=report_data['url']) | |
def download_html(report_id): | |
if report_id not in reports_store: | |
return jsonify({'error': 'Report not found'}), 404 | |
report_data = reports_store[report_id] | |
with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f: | |
f.write(report_data['html']) | |
temp_path = f.name | |
filename = f"seo_report_{report_data['url'].replace('https://', '').replace('http://', '').replace('/', '_')}.html" | |
return send_file(temp_path, as_attachment=True, download_name=filename, mimetype='text/html') | |
# PDF Download removed - now using browser print functionality | |
def _analyze_with_gsc(url: str, competitor_domains: List[str]): | |
"""Analyze keywords using GSC as primary source""" | |
try: | |
gsc_tokens = session.get('gsc_tokens', {}) | |
if not gsc_tokens.get('access_token'): | |
return keywords_module.analyze(url, competitor_domains=competitor_domains) | |
# Fetch GSC data using the updated method | |
gsc_data = gsc_client.get_search_analytics(gsc_tokens) | |
transformed_data = gsc_client.transform_gsc_data(gsc_data, urlparse(url).netloc) | |
# Update session with potentially refreshed tokens | |
session['gsc_tokens'] = gsc_tokens | |
from modules.keywords import ModuleResult | |
return ModuleResult(success=True, data=transformed_data) | |
except Exception as e: | |
print(f"GSC analysis failed: {e}") | |
return keywords_module.analyze(url, competitor_domains=competitor_domains) | |
app._analyze_with_gsc = _analyze_with_gsc | |
def gsc_auth_start(): | |
"""Start GSC OAuth flow""" | |
if not gsc_client: | |
return jsonify({'error': 'Google Search Console integration not available. Install: pip install google-api-python-client google-auth-oauthlib google-auth'}), 500 | |
try: | |
auth_url = gsc_client.get_auth_url() | |
return redirect(auth_url) | |
except Exception as e: | |
return jsonify({'error': f'OAuth setup failed: {str(e)}'}), 500 | |
def gsc_auth_callback(): | |
"""Handle GSC OAuth callback""" | |
auth_code = request.args.get('code') | |
error = request.args.get('error') | |
if error: | |
return redirect(url_for('index', error=f'OAuth error: {error}')) | |
if not auth_code: | |
return redirect(url_for('index', error='No authorization code received')) | |
try: | |
tokens = gsc_client.exchange_code(auth_code) | |
session['gsc_tokens'] = tokens | |
return redirect(url_for('index', success='Google Search Console connected successfully')) | |
except Exception as e: | |
return redirect(url_for('index', error=f'Token exchange failed: {str(e)}')) | |
def gsc_auth_status(): | |
"""Check GSC authentication status""" | |
has_tokens = 'gsc_tokens' in session | |
property_url = gsc_client.property_url | |
return jsonify({ | |
'authenticated': has_tokens, | |
'property_url': property_url, | |
'client_configured': bool(gsc_client.client_id and gsc_client.client_secret) | |
}) | |
if __name__ == '__main__': | |
app.run(debug=False, host='0.0.0.0', port=7860) |