playwright-scraper-clean / dashboard.py
apexherbert200's picture
Worked on get_page function
dd2c937
# enhanced_dashboard.py
import streamlit as st
import requests
import base64
import json
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import time
# Page configuration
st.set_page_config(
page_title="Website Intelligence Dashboard",
page_icon="πŸš€",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for better styling
st.markdown("""
<style>
.main-header {
font-size: 3rem;
color: #1f77b4;
text-align: center;
margin-bottom: 2rem;
}
.metric-card {
background-color: #f0f2f6;
padding: 1rem;
border-radius: 0.5rem;
border-left: 4px solid #1f77b4;
}
.success-metric {
border-left-color: #28a745;
}
.warning-metric {
border-left-color: #ffc107;
}
.danger-metric {
border-left-color: #dc3545;
}
.sidebar-info {
background-color: #e8f4fd;
padding: 1rem;
border-radius: 0.5rem;
margin-bottom: 1rem;
}
</style>
""", unsafe_allow_html=True)
# API Configuration
API_BASE = "https://apexherbert200-playwright-scraper-clean.hf.space"
# Sidebar configuration
st.sidebar.markdown('<div class="sidebar-info"><h3>πŸš€ Website Intelligence</h3><p>Comprehensive website analysis and monitoring platform</p></div>', unsafe_allow_html=True)
# API endpoint selection
analysis_type = st.sidebar.selectbox(
"Choose Analysis Type",
["Complete Analysis", "SEO Only", "Performance Only", "Metadata Only", "Screenshot Only"]
)
# Advanced options
st.sidebar.markdown("### βš™οΈ Advanced Options")
screenshot_width = st.sidebar.slider("Screenshot Width", 800, 1920, 1200)
screenshot_height = st.sidebar.slider("Screenshot Height", 600, 1080, 800)
full_page_screenshot = st.sidebar.checkbox("Full Page Screenshot", value=True)
# Main dashboard
st.markdown('<h1 class="main-header">πŸš€ Website Intelligence Dashboard</h1>', unsafe_allow_html=True)
# URL input with validation
col1, col2 = st.columns([3, 1])
with col1:
url = st.text_input(
"🌐 Enter Website URL",
value="https://www.example.com",
placeholder="https://www.yourwebsite.com"
)
with col2:
st.markdown("<br>", unsafe_allow_html=True)
analyze_button = st.button("πŸ” Analyze Website", type="primary")
# URL validation
def validate_url(url):
if not url:
return False, "Please enter a URL"
if not url.startswith(('http://', 'https://')):
return False, "URL must start with http:// or https://"
return True, ""
# API request function with error handling
def make_api_request(endpoint, params):
try:
response = requests.get(f"{API_BASE}/{endpoint}", params=params)
response.raise_for_status()
return response.json(), None
except requests.exceptions.Timeout:
return None, "Request timed out. Please try again."
except requests.exceptions.ConnectionError:
return None, "Connection error. Please check your internet connection."
except requests.exceptions.HTTPError as e:
return None, f"HTTP error: {e.response.status_code}"
except Exception as e:
return None, f"Unexpected error: {str(e)}"
# Main analysis logic
if analyze_button:
is_valid, error_msg = validate_url(url)
if not is_valid:
st.error(f"❌ {error_msg}")
else:
# Progress tracking
progress_bar = st.progress(0)
status_text = st.empty()
# Initialize data containers
seo_data = None
perf_data = None
meta_data = None
screenshot_data = None
try:
# Metadata Analysis
if analysis_type in ["Complete Analysis", "Metadata Only"]:
status_text.text("πŸ“„ Analyzing metadata...")
progress_bar.progress(20)
meta_data, error = make_api_request("metadata", {"url": url})
if error:
st.error(f"Metadata error: {error}")
# SEO Analysis
if analysis_type in ["Complete Analysis", "SEO Only"]:
status_text.text("πŸ” Performing SEO audit...")
progress_bar.progress(40)
seo_data, error = make_api_request("seo", {"url": url})
if error:
st.error(f"SEO error: {error}")
# Performance Analysis
if analysis_type in ["Complete Analysis", "Performance Only"]:
status_text.text("⚑ Measuring performance...")
progress_bar.progress(60)
perf_data, error = make_api_request("performance", {"url": url})
if error:
st.error(f"Performance error: {error}")
# Screenshot
if analysis_type in ["Complete Analysis", "Screenshot Only"]:
status_text.text("πŸ“Έ Capturing screenshot...")
progress_bar.progress(80)
screenshot_params = {
"url": url,
"width": screenshot_width,
"height": screenshot_height,
"full_page": full_page_screenshot
}
screenshot_response, error = make_api_request("screenshot", screenshot_params)
if error:
st.error(f"Screenshot error: {error}")
else:
screenshot_data = screenshot_response.get("screenshot")
progress_bar.progress(100)
status_text.text("βœ… Analysis complete!")
time.sleep(1)
progress_bar.empty()
status_text.empty()
except Exception as e:
st.error(f"❌ Analysis failed: {str(e)}")
st.stop()
# Display Results
st.markdown("---")
# Overview Section
if any([meta_data, seo_data, perf_data]):
st.header("πŸ“Š Website Overview")
col1, col2, col3, col4 = st.columns(4)
with col1:
if meta_data and meta_data.get('title'):
st.metric("πŸ“„ Page Title", "βœ… Found" if meta_data['title'] else "❌ Missing")
with col2:
if seo_data:
h1_count = seo_data.get('h1_count', 0)
h1_status = "βœ… Good" if h1_count == 1 else f"⚠️ {h1_count} H1s"
st.metric("🏷️ H1 Tags", h1_status)
with col3:
if seo_data:
missing_alts = len(seo_data.get('missing_image_alts', []))
alt_status = "βœ… All Good" if missing_alts == 0 else f"❌ {missing_alts} Missing"
st.metric("πŸ–ΌοΈ Image Alt Tags", alt_status)
with col4:
if perf_data and perf_data.get('page_load_time_ms'):
load_time = perf_data['page_load_time_ms']
if load_time < 2000:
load_status = "πŸš€ Fast"
elif load_time < 4000:
load_status = "⚠️ Moderate"
else:
load_status = "🐌 Slow"
st.metric("⚑ Load Time", f"{load_time:.0f}ms", delta=load_status)
# Metadata Section
if meta_data:
st.header("πŸ“„ Metadata Analysis")
col1, col2 = st.columns(2)
with col1:
st.subheader("Basic Information")
st.write(f"**Title:** {meta_data.get('title', 'Not found')}")
st.write(f"**Description:** {meta_data.get('description', 'Not found')}")
st.write(f"**Canonical URL:** {meta_data.get('canonical', 'Not found')}")
if meta_data.get('favicon'):
st.write(f"**Favicon:** βœ… Found")
st.image(meta_data['favicon'], width=32)
with col2:
st.subheader("Social Media")
og_data = meta_data.get('og', {})
twitter_data = meta_data.get('twitter', {})
if og_data.get('og:title'):
st.write(f"**OG Title:** {og_data['og:title']}")
if og_data.get('og:description'):
st.write(f"**OG Description:** {og_data['og:description']}")
if twitter_data.get('twitter:title'):
st.write(f"**Twitter Title:** {twitter_data['twitter:title']}")
# SEO Section
if seo_data:
st.header("πŸ” SEO Analysis")
col1, col2, col3 = st.columns(3)
with col1:
st.markdown('<div class="metric-card">', unsafe_allow_html=True)
st.metric("H1 Tags Count", seo_data.get('h1_count', 0))
if seo_data.get('h1_count', 0) != 1:
st.warning("⚠️ Should have exactly 1 H1 tag")
st.markdown('</div>', unsafe_allow_html=True)
with col2:
st.markdown('<div class="metric-card">', unsafe_allow_html=True)
internal_links = seo_data.get('internal_links', 0)
external_links = seo_data.get('external_links', 0)
st.metric("Internal Links", internal_links)
st.metric("External Links", external_links)
st.markdown('</div>', unsafe_allow_html=True)
with col3:
st.markdown('<div class="metric-card">', unsafe_allow_html=True)
missing_alts = seo_data.get('missing_image_alts', [])
st.metric("Missing Alt Tags", len(missing_alts))
if missing_alts:
st.warning(f"⚠️ {len(missing_alts)} images missing alt text")
st.markdown('</div>', unsafe_allow_html=True)
# SEO Details
st.subheader("SEO Details")
col1, col2 = st.columns(2)
with col1:
st.write(f"**Robots Meta:** {seo_data.get('robots_meta', 'Not found')}")
st.write(f"**Has Canonical:** {'βœ… Yes' if seo_data.get('has_canonical') else '❌ No'}")
st.write(f"**Meta Keywords:** {seo_data.get('meta_keywords', 'Not found')}")
with col2:
if missing_alts:
st.write("**Images Missing Alt Text:**")
for img in missing_alts[:5]: # Show first 5
st.write(f"- {img}")
if len(missing_alts) > 5:
st.write(f"... and {len(missing_alts) - 5} more")
# Performance Section
if perf_data:
st.header("⚑ Performance Metrics")
# Create performance chart
metrics = []
values = []
colors = []
if perf_data.get('page_load_time_ms'):
metrics.append('Page Load Time (ms)')
values.append(perf_data['page_load_time_ms'])
colors.append('#1f77b4')
if perf_data.get('first_contentful_paint'):
metrics.append('First Contentful Paint (ms)')
values.append(perf_data['first_contentful_paint'])
colors.append('#ff7f0e')
if perf_data.get('largest_contentful_paint'):
metrics.append('Largest Contentful Paint (ms)')
values.append(perf_data['largest_contentful_paint'])
colors.append('#2ca02c')
if metrics:
fig = px.bar(
x=metrics,
y=values,
title="Performance Metrics",
color=metrics,
color_discrete_sequence=colors
)
fig.update_layout(showlegend=False)
st.plotly_chart(fig, use_container_width=True)
# Performance details
col1, col2 = st.columns(2)
with col1:
st.subheader("Core Web Vitals")
if perf_data.get('first_contentful_paint'):
fcp = perf_data['first_contentful_paint']
fcp_status = "🟒 Good" if fcp < 1800 else "🟑 Needs Improvement" if fcp < 3000 else "πŸ”΄ Poor"
st.metric("First Contentful Paint", f"{fcp:.0f}ms", delta=fcp_status)
if perf_data.get('largest_contentful_paint'):
lcp = perf_data['largest_contentful_paint']
lcp_status = "🟒 Good" if lcp < 2500 else "🟑 Needs Improvement" if lcp < 4000 else "πŸ”΄ Poor"
st.metric("Largest Contentful Paint", f"{lcp:.0f}ms", delta=lcp_status)
with col2:
st.subheader("Additional Metrics")
if perf_data.get('cumulative_layout_shift'):
cls = perf_data['cumulative_layout_shift']
cls_status = "🟒 Good" if cls < 0.1 else "🟑 Needs Improvement" if cls < 0.25 else "πŸ”΄ Poor"
st.metric("Cumulative Layout Shift", f"{cls:.3f}", delta=cls_status)
if perf_data.get('page_load_time_ms'):
load_time = perf_data['page_load_time_ms']
st.metric("Total Load Time", f"{load_time:.0f}ms")
# Screenshot Section
if screenshot_data:
st.header("πŸ“Έ Website Screenshot")
try:
screenshot_bytes = base64.b64decode(screenshot_data)
st.image(screenshot_bytes, caption=f"Screenshot of {url}", use_column_width=True)
# Download button for screenshot
st.download_button(
label="πŸ“₯ Download Screenshot",
data=screenshot_bytes,
file_name=f"screenshot_{url.replace('https://', '').replace('http://', '').replace('/', '_')}.png",
mime="image/png"
)
except Exception as e:
st.error(f"Failed to display screenshot: {str(e)}")
# Footer
st.markdown("---")
st.markdown("""
<div style='text-align: center; color: #666; padding: 2rem;'>
<p>πŸš€ <strong>Website Intelligence Dashboard</strong> | Powered by Advanced Web Analysis APIs</p>
<p>Built with ❀️ using Streamlit | © 2024</p>
</div>
""", unsafe_allow_html=True)
# Sidebar additional info
st.sidebar.markdown("---")
st.sidebar.markdown("### πŸ“Š Analysis Features")
st.sidebar.markdown("""
- **SEO Audit**: H1 tags, meta data, links analysis
- **Performance**: Core Web Vitals, load times
- **Metadata**: Social media tags, canonical URLs
- **Screenshots**: Visual website capture
- **Real-time**: Live website analysis
""")
st.sidebar.markdown("### πŸ”§ API Status")
try:
health_response = requests.get(f"{API_BASE}/health", timeout=5)
if health_response.status_code == 200:
st.sidebar.success("🟒 API Online")
else:
st.sidebar.error("πŸ”΄ API Issues")
except:
st.sidebar.warning("🟑 API Status Unknown")