|
|
|
import streamlit as st |
|
import requests |
|
import base64 |
|
import json |
|
import pandas as pd |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from datetime import datetime |
|
import time |
|
|
|
|
|
st.set_page_config( |
|
page_title="Website Intelligence Dashboard", |
|
page_icon="π", |
|
layout="wide", |
|
initial_sidebar_state="expanded" |
|
) |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.main-header { |
|
font-size: 3rem; |
|
color: #1f77b4; |
|
text-align: center; |
|
margin-bottom: 2rem; |
|
} |
|
.metric-card { |
|
background-color: #f0f2f6; |
|
padding: 1rem; |
|
border-radius: 0.5rem; |
|
border-left: 4px solid #1f77b4; |
|
} |
|
.success-metric { |
|
border-left-color: #28a745; |
|
} |
|
.warning-metric { |
|
border-left-color: #ffc107; |
|
} |
|
.danger-metric { |
|
border-left-color: #dc3545; |
|
} |
|
.sidebar-info { |
|
background-color: #e8f4fd; |
|
padding: 1rem; |
|
border-radius: 0.5rem; |
|
margin-bottom: 1rem; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
API_BASE = "https://apexherbert200-playwright-scraper-clean.hf.space" |
|
|
|
|
|
st.sidebar.markdown('<div class="sidebar-info"><h3>π Website Intelligence</h3><p>Comprehensive website analysis and monitoring platform</p></div>', unsafe_allow_html=True) |
|
|
|
|
|
analysis_type = st.sidebar.selectbox( |
|
"Choose Analysis Type", |
|
["Complete Analysis", "SEO Only", "Performance Only", "Metadata Only", "Screenshot Only"] |
|
) |
|
|
|
|
|
st.sidebar.markdown("### βοΈ Advanced Options") |
|
screenshot_width = st.sidebar.slider("Screenshot Width", 800, 1920, 1200) |
|
screenshot_height = st.sidebar.slider("Screenshot Height", 600, 1080, 800) |
|
full_page_screenshot = st.sidebar.checkbox("Full Page Screenshot", value=True) |
|
|
|
|
|
st.markdown('<h1 class="main-header">π Website Intelligence Dashboard</h1>', unsafe_allow_html=True) |
|
|
|
|
|
col1, col2 = st.columns([3, 1]) |
|
with col1: |
|
url = st.text_input( |
|
"π Enter Website URL", |
|
value="https://www.example.com", |
|
placeholder="https://www.yourwebsite.com" |
|
) |
|
with col2: |
|
st.markdown("<br>", unsafe_allow_html=True) |
|
analyze_button = st.button("π Analyze Website", type="primary") |
|
|
|
|
|
def validate_url(url): |
|
if not url: |
|
return False, "Please enter a URL" |
|
if not url.startswith(('http://', 'https://')): |
|
return False, "URL must start with http:// or https://" |
|
return True, "" |
|
|
|
|
|
def make_api_request(endpoint, params): |
|
try: |
|
response = requests.get(f"{API_BASE}/{endpoint}", params=params) |
|
response.raise_for_status() |
|
return response.json(), None |
|
except requests.exceptions.Timeout: |
|
return None, "Request timed out. Please try again." |
|
except requests.exceptions.ConnectionError: |
|
return None, "Connection error. Please check your internet connection." |
|
except requests.exceptions.HTTPError as e: |
|
return None, f"HTTP error: {e.response.status_code}" |
|
except Exception as e: |
|
return None, f"Unexpected error: {str(e)}" |
|
|
|
|
|
if analyze_button: |
|
is_valid, error_msg = validate_url(url) |
|
|
|
if not is_valid: |
|
st.error(f"β {error_msg}") |
|
else: |
|
|
|
progress_bar = st.progress(0) |
|
status_text = st.empty() |
|
|
|
|
|
seo_data = None |
|
perf_data = None |
|
meta_data = None |
|
screenshot_data = None |
|
|
|
try: |
|
|
|
if analysis_type in ["Complete Analysis", "Metadata Only"]: |
|
status_text.text("π Analyzing metadata...") |
|
progress_bar.progress(20) |
|
meta_data, error = make_api_request("metadata", {"url": url}) |
|
if error: |
|
st.error(f"Metadata error: {error}") |
|
|
|
|
|
if analysis_type in ["Complete Analysis", "SEO Only"]: |
|
status_text.text("π Performing SEO audit...") |
|
progress_bar.progress(40) |
|
seo_data, error = make_api_request("seo", {"url": url}) |
|
if error: |
|
st.error(f"SEO error: {error}") |
|
|
|
|
|
if analysis_type in ["Complete Analysis", "Performance Only"]: |
|
status_text.text("β‘ Measuring performance...") |
|
progress_bar.progress(60) |
|
perf_data, error = make_api_request("performance", {"url": url}) |
|
if error: |
|
st.error(f"Performance error: {error}") |
|
|
|
|
|
if analysis_type in ["Complete Analysis", "Screenshot Only"]: |
|
status_text.text("πΈ Capturing screenshot...") |
|
progress_bar.progress(80) |
|
screenshot_params = { |
|
"url": url, |
|
"width": screenshot_width, |
|
"height": screenshot_height, |
|
"full_page": full_page_screenshot |
|
} |
|
screenshot_response, error = make_api_request("screenshot", screenshot_params) |
|
if error: |
|
st.error(f"Screenshot error: {error}") |
|
else: |
|
screenshot_data = screenshot_response.get("screenshot") |
|
|
|
progress_bar.progress(100) |
|
status_text.text("β
Analysis complete!") |
|
time.sleep(1) |
|
progress_bar.empty() |
|
status_text.empty() |
|
|
|
except Exception as e: |
|
st.error(f"β Analysis failed: {str(e)}") |
|
st.stop() |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
if any([meta_data, seo_data, perf_data]): |
|
st.header("π Website Overview") |
|
|
|
col1, col2, col3, col4 = st.columns(4) |
|
|
|
with col1: |
|
if meta_data and meta_data.get('title'): |
|
st.metric("π Page Title", "β
Found" if meta_data['title'] else "β Missing") |
|
|
|
with col2: |
|
if seo_data: |
|
h1_count = seo_data.get('h1_count', 0) |
|
h1_status = "β
Good" if h1_count == 1 else f"β οΈ {h1_count} H1s" |
|
st.metric("π·οΈ H1 Tags", h1_status) |
|
|
|
with col3: |
|
if seo_data: |
|
missing_alts = len(seo_data.get('missing_image_alts', [])) |
|
alt_status = "β
All Good" if missing_alts == 0 else f"β {missing_alts} Missing" |
|
st.metric("πΌοΈ Image Alt Tags", alt_status) |
|
|
|
with col4: |
|
if perf_data and perf_data.get('page_load_time_ms'): |
|
load_time = perf_data['page_load_time_ms'] |
|
if load_time < 2000: |
|
load_status = "π Fast" |
|
elif load_time < 4000: |
|
load_status = "β οΈ Moderate" |
|
else: |
|
load_status = "π Slow" |
|
st.metric("β‘ Load Time", f"{load_time:.0f}ms", delta=load_status) |
|
|
|
|
|
if meta_data: |
|
st.header("π Metadata Analysis") |
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
st.subheader("Basic Information") |
|
st.write(f"**Title:** {meta_data.get('title', 'Not found')}") |
|
st.write(f"**Description:** {meta_data.get('description', 'Not found')}") |
|
st.write(f"**Canonical URL:** {meta_data.get('canonical', 'Not found')}") |
|
if meta_data.get('favicon'): |
|
st.write(f"**Favicon:** β
Found") |
|
st.image(meta_data['favicon'], width=32) |
|
|
|
with col2: |
|
st.subheader("Social Media") |
|
og_data = meta_data.get('og', {}) |
|
twitter_data = meta_data.get('twitter', {}) |
|
|
|
if og_data.get('og:title'): |
|
st.write(f"**OG Title:** {og_data['og:title']}") |
|
if og_data.get('og:description'): |
|
st.write(f"**OG Description:** {og_data['og:description']}") |
|
if twitter_data.get('twitter:title'): |
|
st.write(f"**Twitter Title:** {twitter_data['twitter:title']}") |
|
|
|
|
|
if seo_data: |
|
st.header("π SEO Analysis") |
|
|
|
col1, col2, col3 = st.columns(3) |
|
|
|
with col1: |
|
st.markdown('<div class="metric-card">', unsafe_allow_html=True) |
|
st.metric("H1 Tags Count", seo_data.get('h1_count', 0)) |
|
if seo_data.get('h1_count', 0) != 1: |
|
st.warning("β οΈ Should have exactly 1 H1 tag") |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
with col2: |
|
st.markdown('<div class="metric-card">', unsafe_allow_html=True) |
|
internal_links = seo_data.get('internal_links', 0) |
|
external_links = seo_data.get('external_links', 0) |
|
st.metric("Internal Links", internal_links) |
|
st.metric("External Links", external_links) |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
with col3: |
|
st.markdown('<div class="metric-card">', unsafe_allow_html=True) |
|
missing_alts = seo_data.get('missing_image_alts', []) |
|
st.metric("Missing Alt Tags", len(missing_alts)) |
|
if missing_alts: |
|
st.warning(f"β οΈ {len(missing_alts)} images missing alt text") |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
|
|
st.subheader("SEO Details") |
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
st.write(f"**Robots Meta:** {seo_data.get('robots_meta', 'Not found')}") |
|
st.write(f"**Has Canonical:** {'β
Yes' if seo_data.get('has_canonical') else 'β No'}") |
|
st.write(f"**Meta Keywords:** {seo_data.get('meta_keywords', 'Not found')}") |
|
|
|
with col2: |
|
if missing_alts: |
|
st.write("**Images Missing Alt Text:**") |
|
for img in missing_alts[:5]: |
|
st.write(f"- {img}") |
|
if len(missing_alts) > 5: |
|
st.write(f"... and {len(missing_alts) - 5} more") |
|
|
|
|
|
if perf_data: |
|
st.header("β‘ Performance Metrics") |
|
|
|
|
|
metrics = [] |
|
values = [] |
|
colors = [] |
|
|
|
if perf_data.get('page_load_time_ms'): |
|
metrics.append('Page Load Time (ms)') |
|
values.append(perf_data['page_load_time_ms']) |
|
colors.append('#1f77b4') |
|
|
|
if perf_data.get('first_contentful_paint'): |
|
metrics.append('First Contentful Paint (ms)') |
|
values.append(perf_data['first_contentful_paint']) |
|
colors.append('#ff7f0e') |
|
|
|
if perf_data.get('largest_contentful_paint'): |
|
metrics.append('Largest Contentful Paint (ms)') |
|
values.append(perf_data['largest_contentful_paint']) |
|
colors.append('#2ca02c') |
|
|
|
if metrics: |
|
fig = px.bar( |
|
x=metrics, |
|
y=values, |
|
title="Performance Metrics", |
|
color=metrics, |
|
color_discrete_sequence=colors |
|
) |
|
fig.update_layout(showlegend=False) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
st.subheader("Core Web Vitals") |
|
if perf_data.get('first_contentful_paint'): |
|
fcp = perf_data['first_contentful_paint'] |
|
fcp_status = "π’ Good" if fcp < 1800 else "π‘ Needs Improvement" if fcp < 3000 else "π΄ Poor" |
|
st.metric("First Contentful Paint", f"{fcp:.0f}ms", delta=fcp_status) |
|
|
|
if perf_data.get('largest_contentful_paint'): |
|
lcp = perf_data['largest_contentful_paint'] |
|
lcp_status = "π’ Good" if lcp < 2500 else "π‘ Needs Improvement" if lcp < 4000 else "π΄ Poor" |
|
st.metric("Largest Contentful Paint", f"{lcp:.0f}ms", delta=lcp_status) |
|
|
|
with col2: |
|
st.subheader("Additional Metrics") |
|
if perf_data.get('cumulative_layout_shift'): |
|
cls = perf_data['cumulative_layout_shift'] |
|
cls_status = "π’ Good" if cls < 0.1 else "π‘ Needs Improvement" if cls < 0.25 else "π΄ Poor" |
|
st.metric("Cumulative Layout Shift", f"{cls:.3f}", delta=cls_status) |
|
|
|
if perf_data.get('page_load_time_ms'): |
|
load_time = perf_data['page_load_time_ms'] |
|
st.metric("Total Load Time", f"{load_time:.0f}ms") |
|
|
|
|
|
if screenshot_data: |
|
st.header("πΈ Website Screenshot") |
|
try: |
|
screenshot_bytes = base64.b64decode(screenshot_data) |
|
st.image(screenshot_bytes, caption=f"Screenshot of {url}", use_column_width=True) |
|
|
|
|
|
st.download_button( |
|
label="π₯ Download Screenshot", |
|
data=screenshot_bytes, |
|
file_name=f"screenshot_{url.replace('https://', '').replace('http://', '').replace('/', '_')}.png", |
|
mime="image/png" |
|
) |
|
except Exception as e: |
|
st.error(f"Failed to display screenshot: {str(e)}") |
|
|
|
|
|
st.markdown("---") |
|
st.markdown(""" |
|
<div style='text-align: center; color: #666; padding: 2rem;'> |
|
<p>π <strong>Website Intelligence Dashboard</strong> | Powered by Advanced Web Analysis APIs</p> |
|
<p>Built with β€οΈ using Streamlit | Β© 2024</p> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.sidebar.markdown("---") |
|
st.sidebar.markdown("### π Analysis Features") |
|
st.sidebar.markdown(""" |
|
- **SEO Audit**: H1 tags, meta data, links analysis |
|
- **Performance**: Core Web Vitals, load times |
|
- **Metadata**: Social media tags, canonical URLs |
|
- **Screenshots**: Visual website capture |
|
- **Real-time**: Live website analysis |
|
""") |
|
|
|
st.sidebar.markdown("### π§ API Status") |
|
try: |
|
health_response = requests.get(f"{API_BASE}/health", timeout=5) |
|
if health_response.status_code == 200: |
|
st.sidebar.success("π’ API Online") |
|
else: |
|
st.sidebar.error("π΄ API Issues") |
|
except: |
|
st.sidebar.warning("π‘ API Status Unknown") |
|
|