Spaces:

apexherbert200
/

playwright-scraper-clean

Sleeping

App Files Files Community

playwright-scraper-clean / dashboard.py

apexherbert200

Worked on get_page function

dd2c937 about 1 month ago

raw

history blame contribute delete

15.4 kB

	# enhanced_dashboard.py
	import streamlit as st
	import requests
	import base64
	import json
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from datetime import datetime
	import time

	# Page configuration
	st.set_page_config(
	page_title="Website Intelligence Dashboard",
	page_icon="🚀",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS for better styling
	st.markdown("""
	<style>
	.main-header {
	font-size: 3rem;
	color: #1f77b4;
	text-align: center;
	margin-bottom: 2rem;
	}
	.metric-card {
	background-color: #f0f2f6;
	padding: 1rem;
	border-radius: 0.5rem;
	border-left: 4px solid #1f77b4;
	}
	.success-metric {
	border-left-color: #28a745;
	}
	.warning-metric {
	border-left-color: #ffc107;
	}
	.danger-metric {
	border-left-color: #dc3545;
	}
	.sidebar-info {
	background-color: #e8f4fd;
	padding: 1rem;
	border-radius: 0.5rem;
	margin-bottom: 1rem;
	}
	</style>
	""", unsafe_allow_html=True)

	# API Configuration
	API_BASE = "https://apexherbert200-playwright-scraper-clean.hf.space"

	# Sidebar configuration
	st.sidebar.markdown('<div class="sidebar-info"><h3>🚀 Website Intelligence</h3><p>Comprehensive website analysis and monitoring platform</p></div>', unsafe_allow_html=True)

	# API endpoint selection
	analysis_type = st.sidebar.selectbox(
	"Choose Analysis Type",
	["Complete Analysis", "SEO Only", "Performance Only", "Metadata Only", "Screenshot Only"]
	)

	# Advanced options
	st.sidebar.markdown("### ⚙️ Advanced Options")
	screenshot_width = st.sidebar.slider("Screenshot Width", 800, 1920, 1200)
	screenshot_height = st.sidebar.slider("Screenshot Height", 600, 1080, 800)
	full_page_screenshot = st.sidebar.checkbox("Full Page Screenshot", value=True)

	# Main dashboard
	st.markdown('<h1 class="main-header">🚀 Website Intelligence Dashboard</h1>', unsafe_allow_html=True)

	# URL input with validation
	col1, col2 = st.columns([3, 1])
	with col1:
	url = st.text_input(
	"🌐 Enter Website URL",
	value="https://www.example.com",
	placeholder="https://www.yourwebsite.com"
	)
	with col2:
	st.markdown("<br>", unsafe_allow_html=True)
	analyze_button = st.button("🔍 Analyze Website", type="primary")

	# URL validation
	def validate_url(url):
	if not url:
	return False, "Please enter a URL"
	if not url.startswith(('http://', 'https://')):
	return False, "URL must start with http:// or https://"
	return True, ""

	# API request function with error handling
	def make_api_request(endpoint, params):
	try:
	response = requests.get(f"{API_BASE}/{endpoint}", params=params)
	response.raise_for_status()
	return response.json(), None
	except requests.exceptions.Timeout:
	return None, "Request timed out. Please try again."
	except requests.exceptions.ConnectionError:
	return None, "Connection error. Please check your internet connection."
	except requests.exceptions.HTTPError as e:
	return None, f"HTTP error: {e.response.status_code}"
	except Exception as e:
	return None, f"Unexpected error: {str(e)}"

	# Main analysis logic
	if analyze_button:
	is_valid, error_msg = validate_url(url)

	if not is_valid:
	st.error(f"❌ {error_msg}")
	else:
	# Progress tracking
	progress_bar = st.progress(0)
	status_text = st.empty()

	# Initialize data containers
	seo_data = None
	perf_data = None
	meta_data = None
	screenshot_data = None

	try:
	# Metadata Analysis
	if analysis_type in ["Complete Analysis", "Metadata Only"]:
	status_text.text("📄 Analyzing metadata...")
	progress_bar.progress(20)
	meta_data, error = make_api_request("metadata", {"url": url})
	if error:
	st.error(f"Metadata error: {error}")

	# SEO Analysis
	if analysis_type in ["Complete Analysis", "SEO Only"]:
	status_text.text("🔍 Performing SEO audit...")
	progress_bar.progress(40)
	seo_data, error = make_api_request("seo", {"url": url})
	if error:
	st.error(f"SEO error: {error}")

	# Performance Analysis
	if analysis_type in ["Complete Analysis", "Performance Only"]:
	status_text.text("⚡ Measuring performance...")
	progress_bar.progress(60)
	perf_data, error = make_api_request("performance", {"url": url})
	if error:
	st.error(f"Performance error: {error}")

	# Screenshot
	if analysis_type in ["Complete Analysis", "Screenshot Only"]:
	status_text.text("📸 Capturing screenshot...")
	progress_bar.progress(80)
	screenshot_params = {
	"url": url,
	"width": screenshot_width,
	"height": screenshot_height,
	"full_page": full_page_screenshot
	}
	screenshot_response, error = make_api_request("screenshot", screenshot_params)
	if error:
	st.error(f"Screenshot error: {error}")
	else:
	screenshot_data = screenshot_response.get("screenshot")

	progress_bar.progress(100)
	status_text.text("✅ Analysis complete!")
	time.sleep(1)
	progress_bar.empty()
	status_text.empty()

	except Exception as e:
	st.error(f"❌ Analysis failed: {str(e)}")
	st.stop()

	# Display Results
	st.markdown("---")

	# Overview Section
	if any([meta_data, seo_data, perf_data]):
	st.header("📊 Website Overview")

	col1, col2, col3, col4 = st.columns(4)

	with col1:
	if meta_data and meta_data.get('title'):
	st.metric("📄 Page Title", "✅ Found" if meta_data['title'] else "❌ Missing")

	with col2:
	if seo_data:
	h1_count = seo_data.get('h1_count', 0)
	h1_status = "✅ Good" if h1_count == 1 else f"⚠️ {h1_count} H1s"
	st.metric("🏷️ H1 Tags", h1_status)

	with col3:
	if seo_data:
	missing_alts = len(seo_data.get('missing_image_alts', []))
	alt_status = "✅ All Good" if missing_alts == 0 else f"❌ {missing_alts} Missing"
	st.metric("🖼️ Image Alt Tags", alt_status)

	with col4:
	if perf_data and perf_data.get('page_load_time_ms'):
	load_time = perf_data['page_load_time_ms']
	if load_time < 2000:
	load_status = "🚀 Fast"
	elif load_time < 4000:
	load_status = "⚠️ Moderate"
	else:
	load_status = "🐌 Slow"
	st.metric("⚡ Load Time", f"{load_time:.0f}ms", delta=load_status)

	# Metadata Section
	if meta_data:
	st.header("📄 Metadata Analysis")

	col1, col2 = st.columns(2)

	with col1:
	st.subheader("Basic Information")
	st.write(f"Title: {meta_data.get('title', 'Not found')}")
	st.write(f"Description: {meta_data.get('description', 'Not found')}")
	st.write(f"Canonical URL: {meta_data.get('canonical', 'Not found')}")
	if meta_data.get('favicon'):
	st.write(f"Favicon: ✅ Found")
	st.image(meta_data['favicon'], width=32)

	with col2:
	st.subheader("Social Media")
	og_data = meta_data.get('og', {})
	twitter_data = meta_data.get('twitter', {})

	if og_data.get('og:title'):
	st.write(f"OG Title: {og_data['og:title']}")
	if og_data.get('og:description'):
	st.write(f"OG Description: {og_data['og:description']}")
	if twitter_data.get('twitter:title'):
	st.write(f"Twitter Title: {twitter_data['twitter:title']}")

	# SEO Section
	if seo_data:
	st.header("🔍 SEO Analysis")

	col1, col2, col3 = st.columns(3)

	with col1:
	st.markdown('<div class="metric-card">', unsafe_allow_html=True)
	st.metric("H1 Tags Count", seo_data.get('h1_count', 0))
	if seo_data.get('h1_count', 0) != 1:
	st.warning("⚠️ Should have exactly 1 H1 tag")
	st.markdown('</div>', unsafe_allow_html=True)

	with col2:
	st.markdown('<div class="metric-card">', unsafe_allow_html=True)
	internal_links = seo_data.get('internal_links', 0)
	external_links = seo_data.get('external_links', 0)
	st.metric("Internal Links", internal_links)
	st.metric("External Links", external_links)
	st.markdown('</div>', unsafe_allow_html=True)

	with col3:
	st.markdown('<div class="metric-card">', unsafe_allow_html=True)
	missing_alts = seo_data.get('missing_image_alts', [])
	st.metric("Missing Alt Tags", len(missing_alts))
	if missing_alts:
	st.warning(f"⚠️ {len(missing_alts)} images missing alt text")
	st.markdown('</div>', unsafe_allow_html=True)

	# SEO Details
	st.subheader("SEO Details")
	col1, col2 = st.columns(2)

	with col1:
	st.write(f"Robots Meta: {seo_data.get('robots_meta', 'Not found')}")
	st.write(f"Has Canonical: {'✅ Yes' if seo_data.get('has_canonical') else '❌ No'}")
	st.write(f"Meta Keywords: {seo_data.get('meta_keywords', 'Not found')}")

	with col2:
	if missing_alts:
	st.write("Images Missing Alt Text:")
	for img in missing_alts[:5]: # Show first 5
	st.write(f"- {img}")
	if len(missing_alts) > 5:
	st.write(f"... and {len(missing_alts) - 5} more")

	# Performance Section
	if perf_data:
	st.header("⚡ Performance Metrics")

	# Create performance chart
	metrics = []
	values = []
	colors = []

	if perf_data.get('page_load_time_ms'):
	metrics.append('Page Load Time (ms)')
	values.append(perf_data['page_load_time_ms'])
	colors.append('#1f77b4')

	if perf_data.get('first_contentful_paint'):
	metrics.append('First Contentful Paint (ms)')
	values.append(perf_data['first_contentful_paint'])
	colors.append('#ff7f0e')

	if perf_data.get('largest_contentful_paint'):
	metrics.append('Largest Contentful Paint (ms)')
	values.append(perf_data['largest_contentful_paint'])
	colors.append('#2ca02c')

	if metrics:
	fig = px.bar(
	x=metrics,
	y=values,
	title="Performance Metrics",
	color=metrics,
	color_discrete_sequence=colors
	)
	fig.update_layout(showlegend=False)
	st.plotly_chart(fig, use_container_width=True)

	# Performance details
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("Core Web Vitals")
	if perf_data.get('first_contentful_paint'):
	fcp = perf_data['first_contentful_paint']
	fcp_status = "🟢 Good" if fcp < 1800 else "🟡 Needs Improvement" if fcp < 3000 else "🔴 Poor"
	st.metric("First Contentful Paint", f"{fcp:.0f}ms", delta=fcp_status)

	if perf_data.get('largest_contentful_paint'):
	lcp = perf_data['largest_contentful_paint']
	lcp_status = "🟢 Good" if lcp < 2500 else "🟡 Needs Improvement" if lcp < 4000 else "🔴 Poor"
	st.metric("Largest Contentful Paint", f"{lcp:.0f}ms", delta=lcp_status)

	with col2:
	st.subheader("Additional Metrics")
	if perf_data.get('cumulative_layout_shift'):
	cls = perf_data['cumulative_layout_shift']
	cls_status = "🟢 Good" if cls < 0.1 else "🟡 Needs Improvement" if cls < 0.25 else "🔴 Poor"
	st.metric("Cumulative Layout Shift", f"{cls:.3f}", delta=cls_status)

	if perf_data.get('page_load_time_ms'):
	load_time = perf_data['page_load_time_ms']
	st.metric("Total Load Time", f"{load_time:.0f}ms")

	# Screenshot Section
	if screenshot_data:
	st.header("📸 Website Screenshot")
	try:
	screenshot_bytes = base64.b64decode(screenshot_data)
	st.image(screenshot_bytes, caption=f"Screenshot of {url}", use_column_width=True)

	# Download button for screenshot
	st.download_button(
	label="📥 Download Screenshot",
	data=screenshot_bytes,
	file_name=f"screenshot_{url.replace('https://', '').replace('http://', '').replace('/', '_')}.png",
	mime="image/png"
	)
	except Exception as e:
	st.error(f"Failed to display screenshot: {str(e)}")

	# Footer
	st.markdown("---")
	st.markdown("""
	<div style='text-align: center; color: #666; padding: 2rem;'>
	<p>🚀 <strong>Website Intelligence Dashboard</strong> \| Powered by Advanced Web Analysis APIs</p>
	<p>Built with ❤️ using Streamlit \| © 2024</p>
	</div>
	""", unsafe_allow_html=True)

	# Sidebar additional info
	st.sidebar.markdown("---")
	st.sidebar.markdown("### 📊 Analysis Features")
	st.sidebar.markdown("""
	- SEO Audit: H1 tags, meta data, links analysis
	- Performance: Core Web Vitals, load times
	- Metadata: Social media tags, canonical URLs
	- Screenshots: Visual website capture
	- Real-time: Live website analysis
	""")

	st.sidebar.markdown("### 🔧 API Status")
	try:
	health_response = requests.get(f"{API_BASE}/health", timeout=5)
	if health_response.status_code == 200:
	st.sidebar.success("🟢 API Online")
	else:
	st.sidebar.error("🔴 API Issues")
	except:
	st.sidebar.warning("🟡 API Status Unknown")