Spaces:

PinkAlpaca
/

RandomWeb

Paused

App Files Files Community

Guest1 commited on 12 days ago

Commit

d22875e

0 Parent(s):

🚀 Initial Clean Deployment

Browse files

Files changed (25) hide show

.env.example +10 -0
.github/workflows/deploy.yml +22 -0
.gitignore +10 -0
Dockerfile +43 -0
README.md +36 -0
backend/__init__.py +1 -0
backend/api/__init__.py +1 -0
backend/api/routes.py +130 -0
backend/config.py +150 -0
backend/db.py +189 -0
backend/main.py +105 -0
backend/requirements.txt +9 -0
backend/workers/__init__.py +1 -0
backend/workers/common_crawl.py +187 -0
backend/workers/crawler.py +305 -0
backend/workers/ct_log.py +147 -0
backend/workers/scheduler.py +47 -0
backend/workers/validator.py +259 -0
deploy_hf.sh +72 -0
frontend/app.js +319 -0
frontend/index.html +153 -0
frontend/styles.css +801 -0
nginx.conf +66 -0
run.sh +25 -0
supabase_schema.sql +112 -0

.env.example ADDED Viewed

	@@ -0,0 +1,10 @@

+# Supabase Configuration
+# Required for the database and validation workers
+SUPABASE_URL="https://your-project.supabase.co"
+SUPABASE_PUBLISHABLE_KEY="your-public-key"
+SUPABASE_SECRET_KEY="your-private-secret-key"
+# IMPORTANT:
+# 1. On Hugging Face, go to Settings > Variables and Secrets
+# 2. Add these as 'Secrets' there.
+# 3. NEVER hardcode your real keys in your code!

.github/workflows/deploy.yml ADDED Viewed

	@@ -0,0 +1,22 @@

+name: Sync to Hugging Face Hub
+on:
+  push:
+    branches: [main]
+  workflow_dispatch:
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Push to Hugging Face
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          git remote add hf https://PinkAlpaca:$HF_TOKEN@huggingface.co/spaces/PinkAlpaca/RandomWeb
+          git push -f hf main

.gitignore ADDED Viewed

	@@ -0,0 +1,10 @@

+__pycache__/
+*.pyc
+*.pyo
+.env
+.env.local
+.DS_Store
+*.log
+node_modules/
+.vscode/
+.idea/

Dockerfile ADDED Viewed

	@@ -0,0 +1,43 @@

+FROM python:3.12-slim
+# Install system dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends nginx curl && \
+    rm -rf /var/lib/apt/lists/*
+# Create non-root user (required by HF Spaces)
+RUN useradd -m -u 1000 appuser
+# Set working directory
+WORKDIR /app
+# Install Python dependencies
+COPY backend/requirements.txt ./requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY backend/ ./backend/
+COPY frontend/ ./frontend/
+# Copy Nginx config
+COPY nginx.conf /etc/nginx/nginx.conf
+# Copy startup script
+COPY run.sh ./run.sh
+RUN chmod +x ./run.sh
+# Create Nginx temp directories writable by appuser
+RUN mkdir -p /tmp/nginx-client-body /tmp/nginx-proxy /tmp/nginx-fastcgi /tmp/nginx-uwsgi /tmp/nginx-scgi && \
+    chown -R appuser:appuser /tmp/nginx-* && \
+    chown -R appuser:appuser /var/lib/nginx && \
+    chown -R appuser:appuser /var/log/nginx && \
+    chown -R appuser:appuser /app && \
+    touch /tmp/nginx.pid && \
+    chown appuser:appuser /tmp/nginx.pid
+# Switch to non-root user
+USER appuser
+EXPOSE 7860
+CMD ["./run.sh"]

README.md ADDED Viewed

	@@ -0,0 +1,36 @@

+---
+title: RandomWeb
+emoji: 🌐
+colorFrom: blue
+colorTo: purple
+sdk: docker
+app_port: 7860
+pinned: true
+---
+# 🌐 RandomWeb — Discover Random Websites
+A random website discovery platform that indexes the entire web using:
+- **Certificate Transparency Logs** — Real-time domain discovery via CertStream
+- **Common Crawl** — Batch import from the largest public web archive
+- **BFS Recursive Crawler** — Breadth-first link extraction and traversal
+- **Polite Validation** — Rate-limited, robots.txt-compliant URL verification
+## Features
+- 🎲 **Random Button** — Instant redirect to a random live website
+- 🔍 **Search** — Find specific indexed websites
+- ➕ **Submit URLs** — Add websites to the index
+- 📊 **Live Counter** — Real-time count of active indexed sites (via Supabase Realtime)
+## Architecture
+- **Backend**: Python / FastAPI with async workers
+- **Frontend**: Vanilla HTML/CSS/JS with Supabase JS client
+- **Database**: Supabase (PostgreSQL) with RLS and Realtime
+- **Deployment**: Docker on Hugging Face Spaces
+## Links
+- [GitHub Repository](https://github.com/guestcoder0906/RandomWeb)

backend/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # RandomWeb Backend

backend/api/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # API Routes

backend/api/routes.py ADDED Viewed

	@@ -0,0 +1,130 @@

+"""
+RandomWeb — REST API Routes
+Endpoints for random redirect, search, URL submission, and stats.
+"""
+import logging
+import re
+from urllib.parse import urlparse
+from fastapi import APIRouter, HTTPException, Query
+from pydantic import BaseModel, field_validator
+from backend.db import (
+    get_random_active_url,
+    search_websites,
+    get_active_count,
+    get_total_count,
+    url_exists,
+)
+from backend.workers.validator import enqueue_url
+logger = logging.getLogger("randomweb.api")
+router = APIRouter(prefix="/api")
+# ─── Models ──────────────────────────────────────────────────
+class SubmitRequest(BaseModel):
+    url: str
+    @field_validator("url")
+    @classmethod
+    def validate_url(cls, v: str) -> str:
+        v = v.strip()
+        if not v:
+            raise ValueError("URL cannot be empty")
+        # Add scheme if missing
+        if not v.startswith(("http://", "https://")):
+            v = f"https://{v}"
+        parsed = urlparse(v)
+        if not parsed.netloc or "." not in parsed.netloc:
+            raise ValueError("Invalid URL format")
+        if len(v) > 2000:
+            raise ValueError("URL too long")
+        return v
+class SubmitResponse(BaseModel):
+    success: bool
+    message: str
+    url: str
+class RandomResponse(BaseModel):
+    url: str
+class StatsResponse(BaseModel):
+    active_count: int
+    total_count: int
+class SearchResult(BaseModel):
+    url: str
+    domain: str
+    is_active: bool
+# ─── Endpoints ───────────────────────────────────────────────
+@router.get("/random", response_model=RandomResponse)
+async def get_random():
+    """Get a random active website URL for redirect."""
+    url = get_random_active_url()
+    if not url:
+        raise HTTPException(
+            status_code=404,
+            detail="No active websites found yet. The system is still indexing.",
+        )
+    return {"url": url}
+@router.get("/search", response_model=list[SearchResult])
+async def search(
+    q: str = Query(..., min_length=1, max_length=200, description="Search query"),
+    limit: int = Query(20, ge=1, le=100, description="Max results"),
+):
+    """Search for indexed websites by URL or domain."""
+    results = search_websites(q, limit=limit)
+    return results
+@router.post("/submit", response_model=SubmitResponse)
+async def submit_url(request: SubmitRequest):
+    """Submit a new URL for validation and indexing."""
+    url = request.url
+    logger.info("User submitted URL: %s", url)
+    # Check if already indexed
+    if url_exists(url):
+        return SubmitResponse(
+            success=True,
+            message="This URL is already in our index.",
+            url=url,
+        )
+    # Queue for validation
+    await enqueue_url(url, source="user_submit")
+    return SubmitResponse(
+        success=True,
+        message="URL submitted successfully! It will be validated and added if accessible.",
+        url=url,
+    )
+@router.get("/stats", response_model=StatsResponse)
+async def get_stats():
+    """Get current index statistics."""
+    return StatsResponse(
+        active_count=get_active_count(),
+        total_count=get_total_count(),
+    )
+@router.get("/health")
+async def health():
+    """Health check endpoint."""
+    return {"status": "ok"}

backend/config.py ADDED Viewed

	@@ -0,0 +1,150 @@

+"""
+RandomWeb — Configuration
+Loads environment variables and defines constants for all workers.
+"""
+import os
+# ─── Supabase ────────────────────────────────────────────────
+SUPABASE_URL = os.getenv("SUPABASE_URL", "")
+SUPABASE_SECRET_KEY = os.getenv("SUPABASE_SECRET_KEY", "")
+SUPABASE_PUBLISHABLE_KEY = os.getenv("SUPABASE_PUBLISHABLE_KEY", "")
+# ─── Crawler Settings ───────────────────────────────────────
+USER_AGENT = "RandomWeb/1.0 (+https://github.com/guestcoder0906/RandomWeb; polite-bot)"
+REQUEST_TIMEOUT = 10  # seconds
+MAX_GLOBAL_CONCURRENCY = 20  # max simultaneous outbound connections
+PER_DOMAIN_RATE_LIMIT = 1.0  # requests per second per domain
+CRAWL_DELAY_DEFAULT = 1.0  # fallback crawl delay if robots.txt doesn't specify
+MAX_CRAWL_DEPTH = 3  # BFS depth limit per seed
+MAX_LINKS_PER_PAGE = 50  # max links to extract per page
+MAX_QUEUE_SIZE = 100_000  # max URLs in crawler queue
+# ─── Validator Settings ──────────────────────────────────────
+VALIDATION_BATCH_SIZE = 50  # URLs per validation batch
+VALIDATION_CONCURRENCY = 10  # concurrent validation requests
+RECHECK_INTERVAL_DAYS = 365  # re-verify every year
+# ─── CertStream ──────────────────────────────────────────────
+CERTSTREAM_URL = "wss://certstream.calidog.io/"
+CT_LOG_BATCH_SIZE = 100  # queue batch size before flushing to validation
+CT_LOG_RECONNECT_DELAY = 5  # initial reconnect delay in seconds
+CT_LOG_MAX_RECONNECT_DELAY = 300  # max reconnect delay
+# ─── Common Crawl ────────────────────────────────────────────
+COMMON_CRAWL_INDEX_URL = "https://index.commoncrawl.org/collinfo.json"
+COMMON_CRAWL_SAMPLE_SIZE = 10_000  # URLs per crawl import batch
+COMMON_CRAWL_RESCAN_HOURS = 168  # re-import weekly (7 * 24)
+# ─── Scheduler ───────────────────────────────────────────────
+SCHEDULER_INTERVAL_SECONDS = 3600  # run re-verification check every hour
+SCHEDULER_BATCH_SIZE = 100  # URLs per re-verification batch
+# ─── Blocked TLDs / Patterns ────────────────────────────────
+BLOCKED_TLDS = {
+    ".local", ".internal", ".test", ".example",
+    ".invalid", ".localhost", ".onion",
+}
+# ─── Top 100 Seed Websites ──────────────────────────────────
+SEED_WEBSITES = [
+    "https://google.com",
+    "https://youtube.com",
+    "https://facebook.com",
+    "https://instagram.com",
+    "https://chatgpt.com",
+    "https://x.com",
+    "https://reddit.com",
+    "https://wikipedia.org",
+    "https://whatsapp.com",
+    "https://bing.com",
+    "https://tiktok.com",
+    "https://yahoo.co.jp",
+    "https://yandex.ru",
+    "https://yahoo.com",
+    "https://amazon.com",
+    "https://gemini.google.com",
+    "https://linkedin.com",
+    "https://bet.br",
+    "https://baidu.com",
+    "https://naver.com",
+    "https://netflix.com",
+    "https://pinterest.com",
+    "https://live.com",
+    "https://bilibili.com",
+    "https://pornhub.com",
+    "https://temu.com",
+    "https://dzen.ru",
+    "https://office.com",
+    "https://microsoft.com",
+    "https://xhamster.com",
+    "https://twitch.tv",
+    "https://xvideos.com",
+    "https://canva.com",
+    "https://weather.com",
+    "https://vk.com",
+    "https://globo.com",
+    "https://fandom.com",
+    "https://news.yahoo.co.jp",
+    "https://t.me",
+    "https://samsung.com",
+    "https://mail.ru",
+    "https://duckduckgo.com",
+    "https://nytimes.com",
+    "https://stripchat.com",
+    "https://xnxx.com",
+    "https://ebay.com",
+    "https://zoom.us",
+    "https://xhamster44.desi",
+    "https://discord.com",
+    "https://eporner.com",
+    "https://github.com",
+    "https://booking.com",
+    "https://spotify.com",
+    "https://cricbuzz.com",
+    "https://instructure.com",
+    "https://docomo.ne.jp",
+    "https://roblox.com",
+    "https://aliexpress.com",
+    "https://bbc.com",
+    "https://bbc.co.uk",
+    "https://ozon.ru",
+    "https://apple.com",
+    "https://imdb.com",
+    "https://telegram.org",
+    "https://brave.com",
+    "https://amazon.in",
+    "https://chaturbate.com",
+    "https://msn.com",
+    "https://walmart.com",
+    "https://amazon.co.jp",
+    "https://paypal.com",
+    "https://cnn.com",
+    "https://ya.ru",
+    "https://indeed.com",
+    "https://etsy.com",
+    "https://rakuten.co.jp",
+    "https://amazon.de",
+    "https://espn.com",
+    "https://hbomax.com",
+    "https://usps.com",
+    "https://music.youtube.com",
+    "https://ok.ru",
+    "https://wildberries.ru",
+    "https://office365.com",
+    "https://disneyplus.com",
+    "https://douyin.com",
+    "https://namu.wiki",
+    "https://adobe.com",
+    "https://shein.com",
+    "https://qq.com",
+    "https://amazon.co.uk",
+    "https://quora.com",
+    "https://faphouse.com",
+    "https://rutube.ru",
+    "https://theguardian.com",
+    "https://scribd.com",
+    "https://grok.com",
+    "https://zillow.com",
+    "https://dcinside.com",
+    "https://onlyfans.com",
+]

backend/db.py ADDED Viewed

	@@ -0,0 +1,189 @@

+"""
+RandomWeb — Database Helpers
+Supabase client initialization and common query functions.
+"""
+import logging
+from datetime import datetime, timedelta, timezone
+from urllib.parse import urlparse
+from typing import Optional
+from supabase import create_client, Client
+from backend.config import (
+    SUPABASE_URL,
+    SUPABASE_SECRET_KEY,
+    SUPABASE_PUBLISHABLE_KEY,
+    RECHECK_INTERVAL_DAYS,
+)
+logger = logging.getLogger("randomweb.db")
+# ─── Client Initialization ──────────────────────────────────
+_client: Optional[Client] = None
+def get_client() -> Client:
+    """Return a Supabase client using the secret key if available, else publishable."""
+    global _client
+    if _client is None:
+        # Priority: Secret Key (for writes) -> Publishable Key (fallback)
+        key = SUPABASE_SECRET_KEY or SUPABASE_PUBLISHABLE_KEY
+        if not key:
+            logger.critical("❌ No Supabase API key found!")
+            raise ValueError("SUPABASE_SECRET_KEY and SUPABASE_PUBLISHABLE_KEY are both empty.")
+        _client = create_client(SUPABASE_URL, key)
+        # Identify key type for debugging purposes
+        key_type = "Managed (New)" if key.startswith("sb_") else "Legacy (JWT)"
+        logger.info("✅ Supabase client initialized (Type: %s) for %s", key_type, SUPABASE_URL)
+    return _client
+def extract_domain(url: str) -> str:
+    """Extract the domain from a URL."""
+    parsed = urlparse(url)
+    return parsed.netloc or parsed.path.split("/")[0]
+# ─── Insert / Upsert ────────────────────────────────────────
+def upsert_website(
+    url: str,
+    source: str = "unknown",
+    status: Optional[int] = None,
+    is_active: bool = False,
+) -> bool:
+    """Insert or update a website record. Returns True on success."""
+    try:
+        domain = extract_domain(url)
+        now = datetime.now(timezone.utc).isoformat()
+        next_check = (
+            (datetime.now(timezone.utc) + timedelta(days=RECHECK_INTERVAL_DAYS)).isoformat()
+            if is_active
+            else None
+        )
+        data = {
+            "url": url,
+            "domain": domain,
+            "source": source,
+            "status": status,
+            "is_active": is_active,
+            "last_checked": now,
+            "next_check": next_check,
+        }
+        get_client().table("websites").upsert(
+            data, on_conflict="url"
+        ).execute()
+        return True
+    except Exception as e:
+        logger.error("Failed to upsert %s: %s", url, e)
+        return False
+def bulk_upsert_websites(records: list[dict]) -> int:
+    """Bulk upsert a list of website records. Returns count of successful inserts."""
+    if not records:
+        return 0
+    try:
+        get_client().table("websites").upsert(
+            records, on_conflict="url"
+        ).execute()
+        return len(records)
+    except Exception as e:
+        logger.error("Bulk upsert failed (%d records): %s", len(records), e)
+        return 0
+# ─── Queries ─────────────────────────────────────────────────
+def get_random_active_url() -> Optional[str]:
+    """Retrieve a random active website URL using the database function."""
+    try:
+        result = get_client().rpc("get_random_active_website").execute()
+        if result.data and len(result.data) > 0:
+            return result.data[0]["url"]
+        return None
+    except Exception as e:
+        logger.error("Failed to get random URL: %s", e)
+        return None
+def search_websites(query: str, limit: int = 20) -> list[dict]:
+    """Search websites by URL or domain using trigram similarity."""
+    try:
+        result = (
+            get_client()
+            .table("websites")
+            .select("url, domain, is_active")
+            .or_(f"url.ilike.%{query}%,domain.ilike.%{query}%")
+            .eq("is_active", True)
+            .limit(limit)
+            .execute()
+        )
+        return result.data or []
+    except Exception as e:
+        logger.error("Search failed for '%s': %s", query, e)
+        return []
+def get_active_count() -> int:
+    """Get the current active website count from stats."""
+    try:
+        result = get_client().table("stats").select("active_count").eq("id", 1).execute()
+        if result.data:
+            return result.data[0]["active_count"]
+        return 0
+    except Exception as e:
+        logger.error("Failed to get active count: %s", e)
+        return 0
+def get_total_count() -> int:
+    """Get total indexed websites from stats."""
+    try:
+        result = get_client().table("stats").select("total_count").eq("id", 1).execute()
+        if result.data:
+            return result.data[0]["total_count"]
+        return 0
+    except Exception as e:
+        logger.error("Failed to get total count: %s", e)
+        return 0
+def url_exists(url: str) -> bool:
+    """Check if a URL is already in the database."""
+    try:
+        result = (
+            get_client()
+            .table("websites")
+            .select("id")
+            .eq("url", url)
+            .limit(1)
+            .execute()
+        )
+        return bool(result.data)
+    except Exception as e:
+        logger.error("Failed to check URL existence: %s", e)
+        return False
+def get_urls_needing_recheck(limit: int = 100) -> list[dict]:
+    """Get URLs that are due for re-verification."""
+    try:
+        now = datetime.now(timezone.utc).isoformat()
+        result = (
+            get_client()
+            .table("websites")
+            .select("id, url, domain")
+            .eq("is_active", True)
+            .lte("next_check", now)
+            .limit(limit)
+            .execute()
+        )
+        return result.data or []
+    except Exception as e:
+        logger.error("Failed to get recheck URLs: %s", e)
+        return []

backend/main.py ADDED Viewed

	@@ -0,0 +1,105 @@

+"""
+RandomWeb — Main Application
+FastAPI app with background workers for URL discovery, validation, and re-verification.
+"""
+import asyncio
+import logging
+from contextlib import asynccontextmanager
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from backend.api.routes import router
+from backend.config import SEED_WEBSITES, SUPABASE_URL, SUPABASE_SECRET_KEY
+from backend.db import get_client, extract_domain
+from backend.workers.validator import run_validator, enqueue_url
+from backend.workers.ct_log import run_ct_log_worker
+from backend.workers.common_crawl import run_common_crawl_importer
+from backend.workers.crawler import run_crawler
+from backend.workers.scheduler import run_scheduler
+# ─── Logging ─────────────────────────────────────────────────
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+logger = logging.getLogger("randomweb")
+async def seed_top_websites():
+    """Seed the top 100 websites into the validation queue."""
+    logger.info("Seeding %d top websites...", len(SEED_WEBSITES))
+    for url in SEED_WEBSITES:
+        await enqueue_url(url, source="seed")
+    logger.info("All seed websites queued for validation")
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Manage background workers lifecycle."""
+    logger.info("=" * 60)
+    logger.info("RandomWeb starting up")
+    logger.info("Supabase URL: %s", SUPABASE_URL)
+    logger.info("Secret key configured: %s", "Yes" if SUPABASE_SECRET_KEY else "No")
+    logger.info("=" * 60)
+    # Initialize Supabase client
+    try:
+        get_client()
+        logger.info("Supabase client connected")
+    except Exception as e:
+        logger.error("Failed to connect to Supabase: %s", e)
+    # Launch background workers
+    tasks = []
+    # 1. Validation worker (must start first)
+    tasks.append(asyncio.create_task(run_validator(), name="validator"))
+    # 2. Seed top websites
+    tasks.append(asyncio.create_task(seed_top_websites(), name="seeder"))
+    # 3. CT Log worker
+    tasks.append(asyncio.create_task(run_ct_log_worker(), name="ct_log"))
+    # 4. Common Crawl importer
+    tasks.append(asyncio.create_task(run_common_crawl_importer(), name="common_crawl"))
+    # 5. BFS Crawler
+    tasks.append(asyncio.create_task(run_crawler(), name="crawler"))
+    # 6. Re-verification scheduler
+    tasks.append(asyncio.create_task(run_scheduler(), name="scheduler"))
+    logger.info("All %d background workers launched", len(tasks))
+    yield
+    # Shutdown: cancel all tasks
+    logger.info("Shutting down background workers...")
+    for task in tasks:
+        task.cancel()
+    await asyncio.gather(*tasks, return_exceptions=True)
+    logger.info("All workers stopped")
+# ─── FastAPI App ─────────────────────────────────────────────
+app = FastAPI(
+    title="RandomWeb",
+    description="Discover random websites from across the internet",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+# CORS — allow frontend
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Mount API routes
+app.include_router(router)

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi==0.115.6
+uvicorn[standard]==0.34.0
+supabase==2.11.0
+aiohttp==3.11.11
+aiolimiter==1.2.1
+protego==0.3.1
+beautifulsoup4==4.12.3
+websockets==14.1
+pydantic==2.10.4

backend/workers/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Background Workers

backend/workers/common_crawl.py ADDED Viewed

	@@ -0,0 +1,187 @@

+"""
+RandomWeb — Common Crawl CDX Importer
+Fetches URLs from the Common Crawl CDX Index API to seed the database
+with a broad sample of the internet.
+"""
+import asyncio
+import logging
+import random
+from typing import Optional
+from urllib.parse import urlparse
+import aiohttp
+from backend.config import (
+    COMMON_CRAWL_INDEX_URL,
+    COMMON_CRAWL_SAMPLE_SIZE,
+    COMMON_CRAWL_RESCAN_HOURS,
+    USER_AGENT,
+    REQUEST_TIMEOUT,
+)
+from backend.workers.validator import enqueue_url
+logger = logging.getLogger("randomweb.common_crawl")
+# Sample TLDs to query for broad coverage
+SAMPLE_QUERIES = [
+    "*.com", "*.org", "*.net", "*.io", "*.co",
+    "*.edu", "*.gov", "*.dev", "*.app", "*.info",
+    "*.me", "*.tv", "*.co.uk", "*.de", "*.fr",
+    "*.jp", "*.ru", "*.br", "*.in", "*.ca",
+    "*.au", "*.nl", "*.it", "*.es", "*.ch",
+    "*.se", "*.no", "*.fi", "*.dk", "*.pl",
+]
+async def _get_latest_crawl_index(
+    session: aiohttp.ClientSession,
+) -> Optional[str]:
+    """Fetch the latest Common Crawl index URL."""
+    try:
+        async with session.get(
+            COMMON_CRAWL_INDEX_URL,
+            timeout=aiohttp.ClientTimeout(total=30),
+            headers={"User-Agent": USER_AGENT},
+        ) as resp:
+            if resp.status != 200:
+                logger.error("Failed to fetch crawl index: HTTP %d", resp.status)
+                return None
+            data = await resp.json()
+            if data and len(data) > 0:
+                # Latest crawl is first in the list
+                cdx_api = data[0].get("cdx-api")
+                crawl_id = data[0].get("id", "unknown")
+                logger.info("Latest Common Crawl: %s", crawl_id)
+                return cdx_api
+    except Exception as e:
+        logger.error("Failed to get crawl index: %s", e)
+    return None
+async def _query_cdx_for_domains(
+    session: aiohttp.ClientSession,
+    cdx_api: str,
+    query: str,
+    limit: int = 500,
+) -> list[str]:
+    """Query the CDX API for URLs matching a pattern."""
+    urls = []
+    try:
+        params = {
+            "url": query,
+            "output": "json",
+            "fl": "url",
+            "limit": str(limit),
+            "filter": "status:200",
+        }
+        async with session.get(
+            cdx_api,
+            params=params,
+            timeout=aiohttp.ClientTimeout(total=60),
+            headers={"User-Agent": USER_AGENT},
+        ) as resp:
+            if resp.status != 200:
+                logger.debug("CDX query failed for %s: HTTP %d", query, resp.status)
+                return urls
+            text = await resp.text()
+            lines = text.strip().split("\n")
+            for line in lines:
+                line = line.strip()
+                if not line or line.startswith("["):
+                    continue
+                try:
+                    # Lines can be JSON or plain URL
+                    if line.startswith("{"):
+                        import json
+                        data = json.loads(line)
+                        url = data.get("url", "")
+                    elif line.startswith('"'):
+                        url = line.strip('"')
+                    else:
+                        url = line
+                    if url and url.startswith("http"):
+                        # Normalize to homepage
+                        parsed = urlparse(url)
+                        normalized = f"https://{parsed.netloc}"
+                        urls.append(normalized)
+                except Exception:
+                    continue
+    except asyncio.TimeoutError:
+        logger.debug("CDX query timed out for %s", query)
+    except Exception as e:
+        logger.debug("CDX query error for %s: %s", query, e)
+    return urls
+async def run_common_crawl_importer():
+    """
+    Main Common Crawl import loop.
+    Fetches a broad sample of URLs from the CDX API and queues them.
+    Runs once on startup, then rescans weekly.
+    """
+    logger.info("Common Crawl importer starting")
+    while True:
+        try:
+            async with aiohttp.ClientSession() as session:
+                cdx_api = await _get_latest_crawl_index(session)
+                if not cdx_api:
+                    logger.warning("No CDX API available, retrying in 1 hour")
+                    await asyncio.sleep(3600)
+                    continue
+                logger.info("Importing from CDX API: %s", cdx_api)
+                total_queued = 0
+                seen_domains = set()
+                # Shuffle queries for variety
+                queries = SAMPLE_QUERIES.copy()
+                random.shuffle(queries)
+                per_query_limit = max(
+                    50, COMMON_CRAWL_SAMPLE_SIZE // len(queries)
+                )
+                for query in queries:
+                    if total_queued >= COMMON_CRAWL_SAMPLE_SIZE:
+                        break
+                    urls = await _query_cdx_for_domains(
+                        session, cdx_api, query, limit=per_query_limit
+                    )
+                    for url in urls:
+                        domain = urlparse(url).netloc
+                        if domain and domain not in seen_domains:
+                            seen_domains.add(domain)
+                            await enqueue_url(url, source="common_crawl")
+                            total_queued += 1
+                            if total_queued >= COMMON_CRAWL_SAMPLE_SIZE:
+                                break
+                    # Be polite to the CDX API
+                    await asyncio.sleep(2)
+                logger.info(
+                    "Common Crawl import complete: %d URLs queued", total_queued
+                )
+        except Exception as e:
+            logger.error("Common Crawl importer error: %s", e)
+        # Wait before next rescan
+        logger.info(
+            "Next Common Crawl rescan in %d hours",
+            COMMON_CRAWL_RESCAN_HOURS,
+        )
+        await asyncio.sleep(COMMON_CRAWL_RESCAN_HOURS * 3600)

backend/workers/crawler.py ADDED Viewed

	@@ -0,0 +1,305 @@

+"""
+RandomWeb — BFS Recursive Crawler
+Breadth-first crawler that extracts and queues all hyperlinks from indexed pages
+to continuously expand the known network graph.
+"""
+import asyncio
+import logging
+import re
+from collections import deque
+from typing import Optional
+from urllib.parse import urljoin, urlparse
+import aiohttp
+from aiolimiter import AsyncLimiter
+from bs4 import BeautifulSoup
+from protego import Protego
+from backend.config import (
+    USER_AGENT,
+    REQUEST_TIMEOUT,
+    MAX_GLOBAL_CONCURRENCY,
+    PER_DOMAIN_RATE_LIMIT,
+    CRAWL_DELAY_DEFAULT,
+    MAX_CRAWL_DEPTH,
+    MAX_LINKS_PER_PAGE,
+    MAX_QUEUE_SIZE,
+    BLOCKED_TLDS,
+)
+from backend.workers.validator import enqueue_url
+from backend.db import get_client
+logger = logging.getLogger("randomweb.crawler")
+# ─── State ───────────────────────────────────────────────────
+_crawl_queue: deque = deque(maxlen=MAX_QUEUE_SIZE)
+_visited: set = set()
+_MAX_VISITED_CACHE = 1_000_000
+_robots_cache: dict[str, Optional[Protego]] = {}
+_domain_limiters: dict[str, AsyncLimiter] = {}
+# File extensions to skip
+SKIP_EXTENSIONS = {
+    ".jpg", ".jpeg", ".png", ".gif", ".svg", ".webp", ".ico",
+    ".css", ".js", ".woff", ".woff2", ".ttf", ".eot",
+    ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
+    ".zip", ".rar", ".7z", ".tar", ".gz",
+    ".mp3", ".mp4", ".avi", ".mkv", ".mov", ".flv",
+    ".exe", ".msi", ".dmg", ".apk",
+}
+def _get_domain_limiter(domain: str) -> AsyncLimiter:
+    if domain not in _domain_limiters:
+        _domain_limiters[domain] = AsyncLimiter(PER_DOMAIN_RATE_LIMIT, 1.0)
+    return _domain_limiters[domain]
+async def _fetch_robots(
+    session: aiohttp.ClientSession, domain: str
+) -> Optional[Protego]:
+    if domain in _robots_cache:
+        return _robots_cache[domain]
+    try:
+        async with session.get(
+            f"https://{domain}/robots.txt",
+            timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
+            headers={"User-Agent": USER_AGENT},
+            allow_redirects=True,
+            ssl=False,
+        ) as resp:
+            if resp.status == 200:
+                text = await resp.text()
+                parser = Protego.parse(text)
+                _robots_cache[domain] = parser
+                return parser
+    except Exception:
+        pass
+    _robots_cache[domain] = None
+    return None
+def _normalize_url(base_url: str, href: str) -> Optional[str]:
+    """Normalize and validate a discovered URL."""
+    try:
+        # Resolve relative URLs
+        full_url = urljoin(base_url, href)
+        parsed = urlparse(full_url)
+        # Only HTTP/HTTPS
+        if parsed.scheme not in ("http", "https"):
+            return None
+        # Skip blocked TLDs
+        domain = parsed.netloc.lower()
+        for tld in BLOCKED_TLDS:
+            if domain.endswith(tld):
+                return None
+        # Skip file extensions we don't want
+        path_lower = parsed.path.lower()
+        for ext in SKIP_EXTENSIONS:
+            if path_lower.endswith(ext):
+                return None
+        # Strip fragments and normalize
+        clean = f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
+        if parsed.query:
+            clean += f"?{parsed.query}"
+        # Remove trailing slash for consistency
+        clean = clean.rstrip("/")
+        return clean if len(clean) < 2000 else None
+    except Exception:
+        return None
+async def _crawl_page(
+    session: aiohttp.ClientSession,
+    url: str,
+    depth: int,
+    semaphore: asyncio.Semaphore,
+) -> list[str]:
+    """
+    Fetch a page and extract all hyperlinks.
+    Returns list of discovered URLs.
+    """
+    domain = urlparse(url).netloc
+    limiter = _get_domain_limiter(domain)
+    async with semaphore:
+        async with limiter:
+            # Check robots.txt
+            robots = await _fetch_robots(session, domain)
+            if robots and not robots.can_fetch(url, USER_AGENT):
+                return []
+            # Respect crawl delay
+            delay = CRAWL_DELAY_DEFAULT
+            if robots:
+                d = robots.crawl_delay(USER_AGENT)
+                if d is not None:
+                    delay = d
+            if delay > 0:
+                await asyncio.sleep(delay)
+            discovered = []
+            try:
+                async with session.get(
+                    url,
+                    timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
+                    headers={
+                        "User-Agent": USER_AGENT,
+                        "Accept": "text/html",
+                    },
+                    allow_redirects=True,
+                    ssl=False,
+                ) as resp:
+                    if resp.status != 200:
+                        return []
+                    content_type = resp.headers.get("Content-Type", "")
+                    if "text/html" not in content_type:
+                        return []
+                    # Limit response body to avoid memory issues
+                    body = await resp.text(errors="ignore")
+                    if len(body) > 5_000_000:  # 5MB limit
+                        body = body[:5_000_000]
+                    soup = BeautifulSoup(body, "html.parser")
+                    links = soup.find_all("a", href=True)
+                    count = 0
+                    for link in links:
+                        if count >= MAX_LINKS_PER_PAGE:
+                            break
+                        href = link.get("href", "").strip()
+                        if not href:
+                            continue
+                        normalized = _normalize_url(url, href)
+                        if normalized and normalized not in _visited:
+                            discovered.append(normalized)
+                            count += 1
+            except asyncio.TimeoutError:
+                logger.debug("Timeout crawling %s", url)
+            except Exception as e:
+                logger.debug("Error crawling %s: %s", url, e)
+            return discovered
+async def seed_from_database():
+    """Load existing active URLs from database as crawler seeds."""
+    try:
+        result = (
+            get_client()
+            .table("websites")
+            .select("url")
+            .eq("is_active", True)
+            .limit(1000)
+            .execute()
+        )
+        if result.data:
+            for row in result.data:
+                url = row["url"]
+                if url not in _visited:
+                    _crawl_queue.append({"url": url, "depth": 0})
+            logger.info("Seeded crawler with %d URLs from database", len(result.data))
+    except Exception as e:
+        logger.error("Failed to seed from database: %s", e)
+async def run_crawler():
+    """
+    Main BFS crawler loop.
+    Continuously crawls pages, extracts links, and queues discoveries
+    for validation.
+    """
+    logger.info("BFS Crawler starting")
+    # Wait for initial seeds to be validated
+    await asyncio.sleep(30)
+    # Seed from database
+    await seed_from_database()
+    semaphore = asyncio.Semaphore(MAX_GLOBAL_CONCURRENCY)
+    connector = aiohttp.TCPConnector(
+        limit=MAX_GLOBAL_CONCURRENCY,
+        ttl_dns_cache=300,
+        force_close=False,
+    )
+    async with aiohttp.ClientSession(connector=connector) as session:
+        while True:
+            try:
+                if not _crawl_queue:
+                    # Re-seed periodically
+                    await seed_from_database()
+                    if not _crawl_queue:
+                        logger.debug("Crawler queue empty, waiting...")
+                        await asyncio.sleep(60)
+                        continue
+                # Process a batch
+                batch_size = min(10, len(_crawl_queue))
+                tasks = []
+                for _ in range(batch_size):
+                    if not _crawl_queue:
+                        break
+                    item = _crawl_queue.popleft()
+                    url = item["url"]
+                    depth = item["depth"]
+                    if url in _visited:
+                        continue
+                    _visited.add(url)
+                    # Evict old entries from visited cache
+                    if len(_visited) > _MAX_VISITED_CACHE:
+                        to_remove = list(_visited)[:_MAX_VISITED_CACHE // 2]
+                        for v in to_remove:
+                            _visited.discard(v)
+                    if depth <= MAX_CRAWL_DEPTH:
+                        tasks.append(_crawl_page(session, url, depth, semaphore))
+                if tasks:
+                    results = await asyncio.gather(*tasks, return_exceptions=True)
+                    for result in results:
+                        if isinstance(result, list):
+                            for discovered_url in result:
+                                # Extract homepage for validation
+                                parsed = urlparse(discovered_url)
+                                homepage = f"https://{parsed.netloc}"
+                                await enqueue_url(homepage, source="crawler")
+                                # Add to crawl queue for further BFS
+                                if (
+                                    len(_crawl_queue) < MAX_QUEUE_SIZE
+                                    and discovered_url not in _visited
+                                ):
+                                    current_depth = 1  # simplified
+                                    if current_depth < MAX_CRAWL_DEPTH:
+                                        _crawl_queue.append({
+                                            "url": discovered_url,
+                                            "depth": current_depth + 1,
+                                        })
+                # Small delay between batches
+                await asyncio.sleep(0.5)
+            except Exception as e:
+                logger.error("Crawler loop error: %s", e)
+                await asyncio.sleep(10)

backend/workers/ct_log.py ADDED Viewed

	@@ -0,0 +1,147 @@

+"""
+RandomWeb — Certificate Transparency Log Worker
+Connects to CertStream WebSocket to discover newly registered domains in real-time.
+"""
+import asyncio
+import json
+import logging
+from urllib.parse import urlparse
+import websockets
+from backend.config import (
+    CERTSTREAM_URL,
+    CT_LOG_BATCH_SIZE,
+    CT_LOG_RECONNECT_DELAY,
+    CT_LOG_MAX_RECONNECT_DELAY,
+    BLOCKED_TLDS,
+)
+from backend.workers.validator import enqueue_url
+from backend.db import url_exists
+logger = logging.getLogger("randomweb.ct_log")
+# ─── Domain Filtering ───────────────────────────────────────
+_seen_domains: set = set()
+_MAX_SEEN_CACHE = 500_000
+def _is_valid_domain(domain: str) -> bool:
+    """Filter out invalid, wildcard, IP, and blocked TLD domains."""
+    if not domain or len(domain) < 4:
+        return False
+    # Skip wildcards
+    if domain.startswith("*."):
+        domain = domain[2:]
+    if "*" in domain:
+        return False
+    # Skip IP addresses
+    parts = domain.split(".")
+    if all(p.isdigit() for p in parts):
+        return False
+    # Skip blocked TLDs
+    for tld in BLOCKED_TLDS:
+        if domain.endswith(tld):
+            return False
+    # Must have at least one dot
+    if "." not in domain:
+        return False
+    # Skip overly long domains (likely garbage)
+    if len(domain) > 253:
+        return False
+    return True
+def _deduplicate(domain: str) -> bool:
+    """Returns True if the domain is new (not seen before)."""
+    global _seen_domains
+    if domain in _seen_domains:
+        return False
+    # Evict oldest entries if cache is full
+    if len(_seen_domains) >= _MAX_SEEN_CACHE:
+        # Remove half the cache (FIFO approximation)
+        to_remove = list(_seen_domains)[:_MAX_SEEN_CACHE // 2]
+        for d in to_remove:
+            _seen_domains.discard(d)
+    _seen_domains.add(domain)
+    return True
+async def _process_message(message: dict):
+    """Process a single CertStream message and extract domains."""
+    try:
+        msg_type = message.get("message_type")
+        if msg_type != "certificate_update":
+            return
+        data = message.get("data", {})
+        leaf_cert = data.get("leaf_cert", {})
+        all_domains = leaf_cert.get("all_domains", [])
+        for domain in all_domains:
+            # Strip wildcard prefix
+            if domain.startswith("*."):
+                domain = domain[2:]
+            domain = domain.lower().strip()
+            if not _is_valid_domain(domain):
+                continue
+            if not _deduplicate(domain):
+                continue
+            url = f"https://{domain}"
+            await enqueue_url(url, source="ct_log")
+    except Exception as e:
+        logger.debug("Error processing CT message: %s", e)
+async def run_ct_log_worker():
+    """
+    Main CT log worker loop. Connects to CertStream WebSocket,
+    parses certificate updates, and queues new domains for validation.
+    Auto-reconnects with exponential backoff.
+    """
+    logger.info("CT Log worker starting — connecting to %s", CERTSTREAM_URL)
+    reconnect_delay = CT_LOG_RECONNECT_DELAY
+    while True:
+        try:
+            async with websockets.connect(
+                CERTSTREAM_URL,
+                ping_interval=30,
+                ping_timeout=10,
+                close_timeout=5,
+                max_size=2**20,  # 1MB max message size
+            ) as ws:
+                logger.info("Connected to CertStream")
+                reconnect_delay = CT_LOG_RECONNECT_DELAY  # Reset on success
+                async for raw_message in ws:
+                    try:
+                        message = json.loads(raw_message)
+                        await _process_message(message)
+                    except json.JSONDecodeError:
+                        continue
+                    except Exception as e:
+                        logger.debug("Message processing error: %s", e)
+        except websockets.exceptions.ConnectionClosed as e:
+            logger.warning("CertStream connection closed: %s", e)
+        except Exception as e:
+            logger.warning("CertStream connection error: %s", e)
+        # Exponential backoff reconnect
+        logger.info("Reconnecting to CertStream in %ds...", reconnect_delay)
+        await asyncio.sleep(reconnect_delay)
+        reconnect_delay = min(reconnect_delay * 2, CT_LOG_MAX_RECONNECT_DELAY)

backend/workers/scheduler.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""
+RandomWeb — Re-verification Scheduler
+Rolling yearly re-verification of indexed websites.
+Politely re-checks active URLs and toggles visibility on failure.
+"""
+import asyncio
+import logging
+from datetime import datetime, timezone
+from backend.config import SCHEDULER_INTERVAL_SECONDS, SCHEDULER_BATCH_SIZE
+from backend.db import get_urls_needing_recheck
+from backend.workers.validator import enqueue_url
+logger = logging.getLogger("randomweb.scheduler")
+async def run_scheduler():
+    """
+    Background scheduler that continuously checks for URLs due re-verification.
+    Runs every hour, queries for URLs where next_check <= now(),
+    and routes them through the validation queue.
+    """
+    logger.info("Re-verification scheduler started (interval: %ds)", SCHEDULER_INTERVAL_SECONDS)
+    # Initial delay to let the system warm up
+    await asyncio.sleep(120)
+    while True:
+        try:
+            urls = get_urls_needing_recheck(limit=SCHEDULER_BATCH_SIZE)
+            if urls:
+                logger.info("Re-verifying %d URLs", len(urls))
+                for record in urls:
+                    await enqueue_url(record["url"], source="recheck")
+                    # Small delay between queuing to avoid flooding
+                    await asyncio.sleep(0.1)
+                logger.info("Queued %d URLs for re-verification", len(urls))
+            else:
+                logger.debug("No URLs due for re-verification")
+        except Exception as e:
+            logger.error("Scheduler error: %s", e)
+        # Wait until next check
+        await asyncio.sleep(SCHEDULER_INTERVAL_SECONDS)

backend/workers/validator.py ADDED Viewed

	@@ -0,0 +1,259 @@

+"""
+RandomWeb — Polite Async HTTP Validator
+Validates discovered URLs with rate limiting, robots.txt compliance,
+clear user-agent identification, and timeout rules.
+"""
+import asyncio
+import logging
+from datetime import datetime, timedelta, timezone
+from typing import Optional
+from urllib.parse import urlparse
+import aiohttp
+from aiolimiter import AsyncLimiter
+from protego import Protego
+from backend.config import (
+    USER_AGENT,
+    REQUEST_TIMEOUT,
+    VALIDATION_CONCURRENCY,
+    PER_DOMAIN_RATE_LIMIT,
+    CRAWL_DELAY_DEFAULT,
+    RECHECK_INTERVAL_DAYS,
+)
+from backend.db import get_client, extract_domain
+logger = logging.getLogger("randomweb.validator")
+# ─── Shared State ────────────────────────────────────────────
+_validation_queue: asyncio.Queue = asyncio.Queue(maxsize=50_000)
+_robots_cache: dict[str, Optional[Protego]] = {}
+_domain_limiters: dict[str, AsyncLimiter] = {}
+_semaphore: Optional[asyncio.Semaphore] = None
+def get_validation_queue() -> asyncio.Queue:
+    return _validation_queue
+async def enqueue_url(url: str, source: str = "unknown"):
+    """Add a URL to the validation queue."""
+    try:
+        _validation_queue.put_nowait({"url": url, "source": source})
+    except asyncio.QueueFull:
+        logger.warning("Validation queue full, dropping: %s", url)
+def _get_domain_limiter(domain: str) -> AsyncLimiter:
+    """Get or create a per-domain rate limiter."""
+    if domain not in _domain_limiters:
+        _domain_limiters[domain] = AsyncLimiter(
+            PER_DOMAIN_RATE_LIMIT, 1.0
+        )
+    return _domain_limiters[domain]
+async def _fetch_robots_txt(
+    session: aiohttp.ClientSession, domain: str
+) -> Optional[Protego]:
+    """Fetch and parse robots.txt for a domain. Cached."""
+    if domain in _robots_cache:
+        return _robots_cache[domain]
+    robots_url = f"https://{domain}/robots.txt"
+    try:
+        async with session.get(
+            robots_url,
+            timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
+            headers={"User-Agent": USER_AGENT},
+            allow_redirects=True,
+            ssl=False,
+        ) as resp:
+            if resp.status == 200:
+                text = await resp.text()
+                parser = Protego.parse(text)
+                _robots_cache[domain] = parser
+                return parser
+    except Exception:
+        pass
+    _robots_cache[domain] = None
+    return None
+async def _can_fetch(
+    session: aiohttp.ClientSession, url: str
+) -> tuple[bool, float]:
+    """
+    Check if we're allowed to fetch a URL per robots.txt.
+    Returns (allowed, crawl_delay).
+    """
+    domain = extract_domain(url)
+    robots = await _fetch_robots_txt(session, domain)
+    if robots is None:
+        return True, CRAWL_DELAY_DEFAULT
+    allowed = robots.can_fetch(url, USER_AGENT)
+    delay = robots.crawl_delay(USER_AGENT)
+    if delay is None:
+        delay = CRAWL_DELAY_DEFAULT
+    return allowed, delay
+async def validate_url(
+    session: aiohttp.ClientSession,
+    url: str,
+    source: str = "unknown",
+) -> Optional[dict]:
+    """
+    Validate a single URL. Returns a record dict if successful, else None.
+    Steps:
+      1. Check robots.txt
+      2. Send HEAD request (fallback to GET)
+      3. Return result with status
+    """
+    domain = extract_domain(url)
+    limiter = _get_domain_limiter(domain)
+    # Rate limit per domain
+    async with limiter:
+        # Check robots.txt
+        allowed, delay = await _can_fetch(session, url)
+        if not allowed:
+            logger.debug("Blocked by robots.txt: %s", url)
+            return None
+        # Respect crawl delay
+        if delay > 0:
+            await asyncio.sleep(delay)
+        now = datetime.now(timezone.utc).isoformat()
+        status_code = None
+        try:
+            # Try HEAD first (lighter)
+            async with session.head(
+                url,
+                timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
+                headers={"User-Agent": USER_AGENT},
+                allow_redirects=True,
+                ssl=False,
+            ) as resp:
+                status_code = resp.status
+        except Exception:
+            try:
+                # Fallback to GET
+                async with session.get(
+                    url,
+                    timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
+                    headers={"User-Agent": USER_AGENT},
+                    allow_redirects=True,
+                    ssl=False,
+                ) as resp:
+                    status_code = resp.status
+            except Exception as e:
+                logger.debug("Validation failed for %s: %s", url, e)
+                status_code = None
+        is_active = status_code == 200
+        next_check = (
+            (datetime.now(timezone.utc) + timedelta(days=RECHECK_INTERVAL_DAYS)).isoformat()
+            if is_active
+            else None
+        )
+        record = {
+            "url": url,
+            "domain": domain,
+            "source": source,
+            "status": status_code,
+            "is_active": is_active,
+            "last_checked": now,
+            "next_check": next_check,
+        }
+        return record
+async def _process_batch(
+    session: aiohttp.ClientSession,
+    batch: list[dict],
+) -> list[dict]:
+    """Validate a batch of URLs concurrently."""
+    tasks = [
+        validate_url(session, item["url"], item.get("source", "unknown"))
+        for item in batch
+    ]
+    results = await asyncio.gather(*tasks, return_exceptions=True)
+    records = []
+    for result in results:
+        if isinstance(result, dict) and result is not None:
+            records.append(result)
+        elif isinstance(result, Exception):
+            logger.error("Validation task error: %s", result)
+    return records
+async def run_validator():
+    """
+    Main validation loop. Continuously drains the validation queue,
+    validates URLs in batches, and upserts results to Supabase.
+    """
+    global _semaphore
+    _semaphore = asyncio.Semaphore(VALIDATION_CONCURRENCY)
+    logger.info("Validation worker started")
+    connector = aiohttp.TCPConnector(
+        limit=VALIDATION_CONCURRENCY,
+        ttl_dns_cache=300,
+        force_close=False,
+    )
+    async with aiohttp.ClientSession(connector=connector) as session:
+        while True:
+            try:
+                # Collect a batch
+                batch = []
+                try:
+                    # Wait for at least one item
+                    item = await asyncio.wait_for(
+                        _validation_queue.get(), timeout=5.0
+                    )
+                    batch.append(item)
+                except asyncio.TimeoutError:
+                    await asyncio.sleep(1)
+                    continue
+                # Drain up to batch size
+                while len(batch) < 50 and not _validation_queue.empty():
+                    try:
+                        batch.append(_validation_queue.get_nowait())
+                    except asyncio.QueueEmpty:
+                        break
+                if batch:
+                    logger.info("Validating batch of %d URLs", len(batch))
+                    records = await _process_batch(session, batch)
+                    if records:
+                        # Bulk upsert to Supabase
+                        try:
+                            get_client().table("websites").upsert(
+                                records, on_conflict="url"
+                            ).execute()
+                            active = sum(1 for r in records if r["is_active"])
+                            logger.info(
+                                "Upserted %d records (%d active)",
+                                len(records), active,
+                            )
+                        except Exception as e:
+                            logger.error("Bulk upsert failed: %s", e)
+            except Exception as e:
+                logger.error("Validator loop error: %s", e)
+                await asyncio.sleep(5)

deploy_hf.sh ADDED Viewed

	@@ -0,0 +1,72 @@

+#!/bin/bash
+# Configuration: Update these values
+# ==========================================
+HF_USERNAME="PinkAlpaca"
+SPACE_NAME="RandomWeb"
+# ==========================================
+# Colors for output
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+echo -e "${BLUE}==========================================${NC}"
+echo -e "${BLUE}  Starting Hugging Face Deployment${NC}"
+echo -e "${BLUE}==========================================${NC}"
+# Check for git
+if ! command -v git &> /dev/null; then
+    echo -e "${RED}Error: git is not installed.${NC}"
+    exit 1
+fi
+# Ensure local git repo is initialized
+if [ ! -d ".git" ]; then
+    echo "Initializing local git repository..."
+    git init
+    git add .
+    git commit -m "Initial commit for HF deployment"
+fi
+# Confirm username is updated
+if [ "$HF_USERNAME" == "UPDATE_WITH_YOUR_HF_USERNAME" ]; then
+    echo -e "${RED}Error: Please edit this script and set your HF_USERNAME.${NC}"
+    exit 1
+fi
+# Set remote URL
+REMOTE_URL="https://huggingface.co/spaces/${HF_USERNAME}/${SPACE_NAME}"
+echo -e "Target Space: ${REMOTE_URL}"
+# Check if 'huggingface' remote exists, add if not
+if ! git remote | grep -q "huggingface"; then
+    echo "Adding Hugging Face remote..."
+    git remote add huggingface "${REMOTE_URL}"
+else
+    echo "Hugging Face remote already exists. Updating URL..."
+    git remote set-url huggingface "${REMOTE_URL}"
+fi
+# Stage all files
+git add .
+# Commit changes
+COMMIT_MSG="Deploy: $(date '+%Y-%m-%d %H:%M:%S')"
+git commit -m "$COMMIT_MSG" --allow-empty
+# Push to Hugging Face
+echo -e "${GREEN}Pushing to Hugging Face...${NC}"
+echo "--------------------------------------------------------"
+echo "TIP: Use your Hugging Face Access Token as the password."
+echo "--------------------------------------------------------"
+git push huggingface main --force
+if [ $? -eq 0 ]; then
+    echo -e "${GREEN}SUCCESS! Your Space is building at: ${REMOTE_URL}${NC}"
+    echo "View progress here: ${REMOTE_URL}?logs=build"
+else
+    echo -e "${RED}Deployment failed. Please check your credentials or network status.${NC}"
+fi

frontend/app.js ADDED Viewed

	@@ -0,0 +1,319 @@

+/**
+ * RandomWeb — Frontend Application Logic
+ * Handles random redirect, search, submission, and real-time counter.
+ */
+// ─── Configuration ──────────────────────────────────────────
+const SUPABASE_URL = 'https://oyxgydfmaocqxictnmou.supabase.co';
+const SUPABASE_KEY = 'sb_publishable_9l3BSqU-mIdYLEgZB2Pv2Q_UUZXU385';
+const API_BASE = '/api';
+// ─── Supabase Client ────────────────────────────────────────
+const supabase = window.supabase.createClient(SUPABASE_URL, SUPABASE_KEY);
+// ─── DOM Elements ───────────────────────────────────────────
+const randomBtn = document.getElementById('random-btn');
+const btnText = randomBtn.querySelector('.btn-text');
+const searchInput = document.getElementById('search-input');
+const searchResults = document.getElementById('search-results');
+const submitForm = document.getElementById('submit-form');
+const submitInput = document.getElementById('submit-input');
+const submitBtn = document.getElementById('submit-btn');
+const submitFeedback = document.getElementById('submit-feedback');
+const counterValue = document.getElementById('counter-value');
+const headerActiveCount = document.getElementById('header-active-count');
+const toastContainer = document.getElementById('toast-container');
+// ─── State ──────────────────────────────────────────────────
+let currentCount = 0;
+let targetCount = 0;
+let animationFrame = null;
+let searchDebounceTimer = null;
+// ─── Utility Functions ──────────────────────────────────────
+function formatNumber(num) {
+  if (num >= 1_000_000) {
+    return (num / 1_000_000).toFixed(2) + 'M';
+  }
+  if (num >= 1_000) {
+    return (num / 1_000).toFixed(1) + 'K';
+  }
+  return num.toLocaleString();
+}
+function formatNumberFull(num) {
+  return num.toLocaleString();
+}
+function showToast(message, type = 'info') {
+  const toast = document.createElement('div');
+  toast.className = `toast toast-${type}`;
+  toast.textContent = message;
+  toastContainer.appendChild(toast);
+  setTimeout(() => {
+    toast.classList.add('toast-exiting');
+    setTimeout(() => toast.remove(), 300);
+  }, 4000);
+}
+// ─── Animated Counter ───────────────────────────────────────
+function animateCounter(target) {
+  targetCount = target;
+  if (animationFrame) {
+    cancelAnimationFrame(animationFrame);
+  }
+  const startCount = currentCount;
+  const diff = target - startCount;
+  const duration = Math.min(1500, Math.max(300, Math.abs(diff) * 10));
+  const startTime = performance.now();
+  function step(timestamp) {
+    const elapsed = timestamp - startTime;
+    const progress = Math.min(elapsed / duration, 1);
+    // Ease-out cubic
+    const eased = 1 - Math.pow(1 - progress, 3);
+    currentCount = Math.round(startCount + diff * eased);
+    counterValue.textContent = formatNumberFull(currentCount);
+    headerActiveCount.textContent = formatNumber(currentCount);
+    if (progress < 1) {
+      animationFrame = requestAnimationFrame(step);
+    } else {
+      currentCount = target;
+      counterValue.textContent = formatNumberFull(target);
+      headerActiveCount.textContent = formatNumber(target);
+    }
+  }
+  animationFrame = requestAnimationFrame(step);
+}
+// ─── Fetch Stats (Initial) ─────────────────────────────────
+async function fetchStats() {
+  try {
+    const response = await fetch(`${API_BASE}/stats`);
+    if (response.ok) {
+      const data = await response.json();
+      animateCounter(data.active_count);
+    }
+  } catch (err) {
+    console.warn('Failed to fetch stats:', err);
+    // Fallback: query Supabase directly
+    try {
+      const { data, error } = await supabase
+        .from('stats')
+        .select('active_count')
+        .eq('id', 1)
+        .single();
+      if (!error && data) {
+        animateCounter(data.active_count);
+      }
+    } catch (e) {
+      console.warn('Supabase fallback also failed:', e);
+    }
+  }
+}
+// ─── Realtime Subscription ──────────────────────────────────
+function setupRealtimeSubscription() {
+  const channel = supabase
+    .channel('stats-realtime')
+    .on(
+      'postgres_changes',
+      {
+        event: 'UPDATE',
+        schema: 'public',
+        table: 'stats',
+        filter: 'id=eq.1',
+      },
+      (payload) => {
+        const newCount = payload.new.active_count;
+        if (newCount !== undefined && newCount !== targetCount) {
+          animateCounter(newCount);
+        }
+      }
+    )
+    .subscribe((status) => {
+      if (status === 'SUBSCRIBED') {
+        console.log('Realtime subscription active');
+      }
+    });
+}
+// Also poll every 30 seconds as a fallback
+setInterval(fetchStats, 30000);
+// ─── Random Button ──────────────────────────────────────────
+randomBtn.addEventListener('click', async () => {
+  if (randomBtn.classList.contains('loading')) return;
+  randomBtn.classList.add('loading');
+  btnText.textContent = 'Finding a website...';
+  try {
+    const response = await fetch(`${API_BASE}/random`);
+    if (response.ok) {
+      const data = await response.json();
+      if (data.url) {
+        btnText.textContent = 'Redirecting...';
+        // Small delay for visual feedback
+        setTimeout(() => {
+          window.open(data.url, '_blank', 'noopener,noreferrer');
+          randomBtn.classList.remove('loading');
+          btnText.textContent = 'Take Me Somewhere Random';
+        }, 500);
+        return;
+      }
+    }
+    // API failed, try direct Supabase query
+    const { data: websites, error } = await supabase
+      .rpc('get_random_active_website');
+    if (!error && websites && websites.length > 0) {
+      btnText.textContent = 'Redirecting...';
+      setTimeout(() => {
+        window.open(websites[0].url, '_blank', 'noopener,noreferrer');
+        randomBtn.classList.remove('loading');
+        btnText.textContent = 'Take Me Somewhere Random';
+      }, 500);
+      return;
+    }
+    showToast('No active websites found yet. The system is still indexing.', 'info');
+  } catch (err) {
+    console.error('Random fetch error:', err);
+    showToast('Failed to get a random website. Please try again.', 'error');
+  }
+  randomBtn.classList.remove('loading');
+  btnText.textContent = 'Take Me Somewhere Random';
+});
+// ─── Search ─────────────────────────────────────────────────
+searchInput.addEventListener('input', (e) => {
+  const query = e.target.value.trim();
+  clearTimeout(searchDebounceTimer);
+  if (query.length < 2) {
+    searchResults.innerHTML = '';
+    return;
+  }
+  searchDebounceTimer = setTimeout(() => performSearch(query), 300);
+});
+async function performSearch(query) {
+  try {
+    const response = await fetch(
+      `${API_BASE}/search?q=${encodeURIComponent(query)}&limit=15`
+    );
+    if (response.ok) {
+      const results = await response.json();
+      renderSearchResults(results);
+      return;
+    }
+    // Fallback to direct Supabase
+    const { data, error } = await supabase
+      .from('websites')
+      .select('url, domain, is_active')
+      .or(`url.ilike.%${query}%,domain.ilike.%${query}%`)
+      .eq('is_active', true)
+      .limit(15);
+    if (!error && data) {
+      renderSearchResults(data);
+    }
+  } catch (err) {
+    console.error('Search error:', err);
+  }
+}
+function renderSearchResults(results) {
+  if (!results || results.length === 0) {
+    searchResults.innerHTML = `
+      <div class="search-empty">
+        No matching websites found. Try a different search term.
+      </div>
+    `;
+    return;
+  }
+  searchResults.innerHTML = results
+    .map(
+      (r) => `
+        <a href="${escapeHtml(r.url)}" target="_blank" rel="noopener noreferrer"
+           class="search-result-item">
+          <div>
+            <div class="result-url">${escapeHtml(r.url)}</div>
+            <div class="result-domain">${escapeHtml(r.domain)}</div>
+          </div>
+          <span class="result-arrow">→</span>
+        </a>
+      `
+    )
+    .join('');
+}
+function escapeHtml(text) {
+  const div = document.createElement('div');
+  div.textContent = text;
+  return div.innerHTML;
+}
+// ─── Submit Form ────────────────────────────────────────────
+submitForm.addEventListener('submit', async (e) => {
+  e.preventDefault();
+  const url = submitInput.value.trim();
+  if (!url) return;
+  submitBtn.disabled = true;
+  submitBtn.textContent = 'Submitting...';
+  submitFeedback.className = 'submit-feedback';
+  submitFeedback.style.display = 'none';
+  try {
+    const response = await fetch(`${API_BASE}/submit`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ url }),
+    });
+    const data = await response.json();
+    if (response.ok) {
+      submitFeedback.className = 'submit-feedback success';
+      submitFeedback.textContent = data.message || 'URL submitted successfully!';
+      submitInput.value = '';
+    } else {
+      submitFeedback.className = 'submit-feedback error';
+      submitFeedback.textContent =
+        data.detail || 'Failed to submit URL. Please check the format.';
+    }
+  } catch (err) {
+    submitFeedback.className = 'submit-feedback error';
+    submitFeedback.textContent = 'Network error. Please try again.';
+  }
+  submitBtn.disabled = false;
+  submitBtn.textContent = 'Submit URL';
+});
+// ─── Initialize ─────────────────────────────────────────────
+document.addEventListener('DOMContentLoaded', () => {
+  fetchStats();
+  setupRealtimeSubscription();
+});

frontend/index.html ADDED Viewed

	@@ -0,0 +1,153 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>RandomWeb — Discover Random Websites from Across the Internet</title>
+  <meta name="description" content="Explore the web randomly. RandomWeb discovers, indexes, and validates websites from across the entire internet. Click and go anywhere.">
+  <meta name="theme-color" content="#0a0e1a">
+  <meta property="og:title" content="RandomWeb — Discover Random Websites">
+  <meta property="og:description" content="One click. One random website. Explore the entire internet.">
+  <meta property="og:type" content="website">
+  <link rel="icon" type="image/svg+xml" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='.9em' font-size='90'>🌐</text></svg>">
+  <link rel="stylesheet" href="styles.css">
+</head>
+<body>
+  <!-- Animated background -->
+  <div class="bg-grid"></div>
+  <div class="orb orb-1"></div>
+  <div class="orb orb-2"></div>
+  <div class="orb orb-3"></div>
+  <div class="app">
+    <!-- ─── Header ──────────────────────────────── -->
+    <header class="header fade-in">
+      <div class="container">
+        <a href="/" class="logo" id="logo-link">
+          <div class="logo-icon">🌐</div>
+          <span class="logo-text">RandomWeb</span>
+        </a>
+        <div class="header-stats">
+          <span class="pulse-dot"></span>
+          <span id="header-active-count">—</span> sites indexed
+        </div>
+      </div>
+    </header>
+    <!-- ─── Main Content ────────────────────────── -->
+    <main>
+      <!-- Hero Section -->
+      <section class="hero">
+        <div class="container">
+          <div class="hero-badge slide-up">
+            <span class="dot"></span>
+            <span>Live — Indexing the web in real-time</span>
+          </div>
+          <h1 class="slide-up slide-up-delay-1">
+            Discover the<br>
+            <span class="gradient-text">Entire Web</span>
+          </h1>
+          <p class="slide-up slide-up-delay-2">
+            One click takes you to a random website from our ever-growing index.
+            Powered by Certificate Transparency logs, Common Crawl, and recursive crawling.
+          </p>
+          <div class="random-btn-wrapper slide-up slide-up-delay-3">
+            <button class="random-btn" id="random-btn" type="button">
+              <span class="btn-icon">🎲</span>
+              <span class="btn-text">Take Me Somewhere Random</span>
+            </button>
+          </div>
+        </div>
+      </section>
+      <!-- Search Section -->
+      <section class="search-section slide-up slide-up-delay-3">
+        <div class="container">
+          <div class="glass-card" style="padding: var(--space-xl);">
+            <h2>🔍 Search the Index</h2>
+            <div class="search-box">
+              <span class="search-icon">⌕</span>
+              <input
+                type="text"
+                id="search-input"
+                placeholder="Search for websites... (e.g., github.com, news)"
+                autocomplete="off"
+                spellcheck="false"
+              >
+            </div>
+            <div class="search-results" id="search-results"></div>
+          </div>
+        </div>
+      </section>
+      <!-- Submit Section -->
+      <section class="submit-section slide-up slide-up-delay-4">
+        <div class="container">
+          <div class="glass-card" style="padding: var(--space-xl);">
+            <h2>➕ Submit a Website</h2>
+            <p class="subtitle">
+              Know a website that's not in our index? Submit it and we'll validate and add it.
+            </p>
+            <form class="submit-form" id="submit-form">
+              <input
+                type="text"
+                id="submit-input"
+                placeholder="Enter a URL (e.g., https://example.com)"
+                autocomplete="off"
+                spellcheck="false"
+                required
+              >
+              <button type="submit" class="submit-btn" id="submit-btn">
+                Submit URL
+              </button>
+            </form>
+            <div class="submit-feedback" id="submit-feedback"></div>
+          </div>
+        </div>
+      </section>
+    </main>
+    <!-- ─── Footer ──────────────────────────────── -->
+    <footer class="footer">
+      <div class="container">
+        <div class="live-counter">
+          <div class="counter-label">
+            <span class="live-dot"></span>
+            Active Websites Indexed
+          </div>
+          <div class="counter-value" id="counter-value">0</div>
+          <div class="counter-subtext">
+            and growing every second
+          </div>
+        </div>
+        <div class="footer-links">
+          <a href="https://github.com/guestcoder0906/RandomWeb" target="_blank" rel="noopener">GitHub</a>
+          <span class="divider"></span>
+          <span style="color: var(--text-muted);">Built with 🌐 by RandomWeb</span>
+        </div>
+      </div>
+    </footer>
+  </div>
+  <!-- Toast container -->
+  <div class="toast-container" id="toast-container"></div>
+  <!-- Supabase Client (CDN) -->
+  <script src="https://cdn.jsdelivr.net/npm/@supabase/supabase-js@2/dist/umd/supabase.min.js"></script>
+  <!-- App Logic -->
+  <script src="app.js"></script>
+</body>
+</html>

frontend/styles.css ADDED Viewed

	@@ -0,0 +1,801 @@

+/* ============================================================
+   RandomWeb — Premium Dark Theme
+   Design system: Midnight navy base, cyan↔violet gradients,
+   glassmorphism panels, Inter + Outfit fonts, micro-animations
+   ============================================================ */
+/* ─── Google Fonts ─────────────────────────────────────────── */
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Outfit:wght@400;500;600;700;800;900&family=JetBrains+Mono:wght@400;500&display=swap');
+/* ─── CSS Custom Properties ────────────────────────────────── */
+:root {
+  /* Core palette */
+  --bg-primary: #0a0e1a;
+  --bg-secondary: #111827;
+  --bg-card: rgba(17, 24, 39, 0.7);
+  --bg-glass: rgba(255, 255, 255, 0.03);
+  --bg-glass-hover: rgba(255, 255, 255, 0.06);
+  /* Accent gradients */
+  --gradient-primary: linear-gradient(135deg, #06b6d4, #8b5cf6);
+  --gradient-secondary: linear-gradient(135deg, #8b5cf6, #ec4899);
+  --gradient-glow: linear-gradient(135deg, rgba(6, 182, 212, 0.15), rgba(139, 92, 246, 0.15));
+  --gradient-hero: linear-gradient(180deg, #0a0e1a 0%, #111827 50%, #0a0e1a 100%);
+  /* Text */
+  --text-primary: #f1f5f9;
+  --text-secondary: #94a3b8;
+  --text-muted: #64748b;
+  --text-accent: #06b6d4;
+  /* Borders */
+  --border-subtle: rgba(255, 255, 255, 0.06);
+  --border-accent: rgba(6, 182, 212, 0.3);
+  /* Shadows */
+  --shadow-lg: 0 25px 50px -12px rgba(0, 0, 0, 0.5);
+  --shadow-glow-cyan: 0 0 40px rgba(6, 182, 212, 0.15);
+  --shadow-glow-violet: 0 0 40px rgba(139, 92, 246, 0.15);
+  --shadow-button: 0 0 30px rgba(6, 182, 212, 0.3), 0 0 60px rgba(139, 92, 246, 0.1);
+  /* Typography */
+  --font-body: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
+  --font-heading: 'Outfit', -apple-system, BlinkMacSystemFont, sans-serif;
+  --font-mono: 'JetBrains Mono', 'Fira Code', monospace;
+  /* Spacing */
+  --space-xs: 0.25rem;
+  --space-sm: 0.5rem;
+  --space-md: 1rem;
+  --space-lg: 1.5rem;
+  --space-xl: 2rem;
+  --space-2xl: 3rem;
+  --space-3xl: 4rem;
+  --space-4xl: 6rem;
+  /* Radius */
+  --radius-sm: 0.5rem;
+  --radius-md: 0.75rem;
+  --radius-lg: 1rem;
+  --radius-xl: 1.5rem;
+  --radius-full: 9999px;
+  /* Transitions */
+  --transition-fast: 150ms cubic-bezier(0.4, 0, 0.2, 1);
+  --transition-base: 250ms cubic-bezier(0.4, 0, 0.2, 1);
+  --transition-slow: 400ms cubic-bezier(0.4, 0, 0.2, 1);
+  --transition-spring: 500ms cubic-bezier(0.34, 1.56, 0.64, 1);
+}
+/* ─── Reset & Base ─────────────────────────────────────────── */
+*,
+*::before,
+*::after {
+  margin: 0;
+  padding: 0;
+  box-sizing: border-box;
+}
+html {
+  scroll-behavior: smooth;
+  -webkit-font-smoothing: antialiased;
+  -moz-osx-font-smoothing: grayscale;
+}
+body {
+  font-family: var(--font-body);
+  background: var(--bg-primary);
+  color: var(--text-primary);
+  min-height: 100vh;
+  overflow-x: hidden;
+  line-height: 1.6;
+}
+/* ─── Animated Background ──────────────────────────────────── */
+.bg-grid {
+  position: fixed;
+  inset: 0;
+  z-index: 0;
+  background-image:
+    radial-gradient(ellipse at 20% 50%, rgba(6, 182, 212, 0.08) 0%, transparent 50%),
+    radial-gradient(ellipse at 80% 20%, rgba(139, 92, 246, 0.08) 0%, transparent 50%),
+    radial-gradient(ellipse at 50% 80%, rgba(236, 72, 153, 0.05) 0%, transparent 50%);
+  pointer-events: none;
+}
+.bg-grid::before {
+  content: '';
+  position: absolute;
+  inset: 0;
+  background-image:
+    linear-gradient(rgba(255, 255, 255, 0.015) 1px, transparent 1px),
+    linear-gradient(90deg, rgba(255, 255, 255, 0.015) 1px, transparent 1px);
+  background-size: 60px 60px;
+  mask-image: radial-gradient(ellipse at center, black 30%, transparent 70%);
+}
+/* Floating orbs */
+.orb {
+  position: fixed;
+  border-radius: 50%;
+  filter: blur(80px);
+  opacity: 0.4;
+  pointer-events: none;
+  z-index: 0;
+  animation: orbFloat 20s ease-in-out infinite;
+}
+.orb-1 {
+  width: 400px;
+  height: 400px;
+  background: rgba(6, 182, 212, 0.12);
+  top: -100px;
+  left: -100px;
+  animation-delay: 0s;
+}
+.orb-2 {
+  width: 350px;
+  height: 350px;
+  background: rgba(139, 92, 246, 0.12);
+  bottom: -100px;
+  right: -100px;
+  animation-delay: -7s;
+}
+.orb-3 {
+  width: 300px;
+  height: 300px;
+  background: rgba(236, 72, 153, 0.08);
+  top: 50%;
+  left: 50%;
+  transform: translate(-50%, -50%);
+  animation-delay: -14s;
+}
+@keyframes orbFloat {
+  0%, 100% { transform: translate(0, 0) scale(1); }
+  25% { transform: translate(30px, -40px) scale(1.05); }
+  50% { transform: translate(-20px, 20px) scale(0.95); }
+  75% { transform: translate(40px, 30px) scale(1.02); }
+}
+/* ─── Layout ──────────���────────────────────────────────────── */
+.app {
+  position: relative;
+  z-index: 1;
+  min-height: 100vh;
+  display: flex;
+  flex-direction: column;
+}
+.container {
+  width: 100%;
+  max-width: 800px;
+  margin: 0 auto;
+  padding: 0 var(--space-lg);
+}
+/* ─── Header ───────────────────────────────────────────────── */
+.header {
+  padding: var(--space-lg) 0;
+  border-bottom: 1px solid var(--border-subtle);
+  backdrop-filter: blur(20px);
+  -webkit-backdrop-filter: blur(20px);
+  background: rgba(10, 14, 26, 0.8);
+  position: sticky;
+  top: 0;
+  z-index: 100;
+}
+.header .container {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+}
+.logo {
+  display: flex;
+  align-items: center;
+  gap: var(--space-sm);
+  text-decoration: none;
+}
+.logo-icon {
+  width: 36px;
+  height: 36px;
+  border-radius: var(--radius-md);
+  background: var(--gradient-primary);
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  font-size: 1.1rem;
+  box-shadow: var(--shadow-glow-cyan);
+}
+.logo-text {
+  font-family: var(--font-heading);
+  font-weight: 700;
+  font-size: 1.25rem;
+  background: var(--gradient-primary);
+  -webkit-background-clip: text;
+  -webkit-text-fill-color: transparent;
+  background-clip: text;
+}
+.header-stats {
+  display: flex;
+  align-items: center;
+  gap: var(--space-sm);
+  font-size: 0.8rem;
+  color: var(--text-muted);
+  font-family: var(--font-mono);
+}
+.header-stats .pulse-dot {
+  width: 8px;
+  height: 8px;
+  background: #22c55e;
+  border-radius: 50%;
+  animation: pulse 2s ease-in-out infinite;
+}
+@keyframes pulse {
+  0%, 100% { opacity: 1; box-shadow: 0 0 0 0 rgba(34, 197, 94, 0.4); }
+  50% { opacity: 0.7; box-shadow: 0 0 0 6px rgba(34, 197, 94, 0); }
+}
+/* ─── Hero Section ─────────────────────────────────────────── */
+.hero {
+  padding: var(--space-4xl) 0 var(--space-3xl);
+  text-align: center;
+}
+.hero-badge {
+  display: inline-flex;
+  align-items: center;
+  gap: var(--space-sm);
+  padding: var(--space-xs) var(--space-md);
+  background: var(--bg-glass);
+  border: 1px solid var(--border-subtle);
+  border-radius: var(--radius-full);
+  font-size: 0.8rem;
+  color: var(--text-secondary);
+  margin-bottom: var(--space-xl);
+  backdrop-filter: blur(10px);
+}
+.hero-badge .dot {
+  width: 6px;
+  height: 6px;
+  background: #22c55e;
+  border-radius: 50%;
+  animation: pulse 2s ease-in-out infinite;
+}
+.hero h1 {
+  font-family: var(--font-heading);
+  font-weight: 900;
+  font-size: clamp(2.5rem, 6vw, 4rem);
+  line-height: 1.1;
+  margin-bottom: var(--space-lg);
+  letter-spacing: -0.03em;
+}
+.hero h1 .gradient-text {
+  background: var(--gradient-primary);
+  -webkit-background-clip: text;
+  -webkit-text-fill-color: transparent;
+  background-clip: text;
+}
+.hero p {
+  font-size: 1.1rem;
+  color: var(--text-secondary);
+  max-width: 500px;
+  margin: 0 auto var(--space-2xl);
+  line-height: 1.7;
+}
+/* ─── Random Button ────────────────────────────────────────── */
+.random-btn-wrapper {
+  display: flex;
+  justify-content: center;
+  margin-bottom: var(--space-3xl);
+}
+.random-btn {
+  position: relative;
+  display: inline-flex;
+  align-items: center;
+  gap: var(--space-md);
+  padding: 1.15rem 2.5rem;
+  background: var(--gradient-primary);
+  color: white;
+  font-family: var(--font-heading);
+  font-weight: 700;
+  font-size: 1.15rem;
+  border: none;
+  border-radius: var(--radius-xl);
+  cursor: pointer;
+  transition: all var(--transition-base);
+  box-shadow: var(--shadow-button);
+  text-decoration: none;
+  letter-spacing: 0.01em;
+  overflow: hidden;
+}
+.random-btn::before {
+  content: '';
+  position: absolute;
+  inset: 0;
+  background: linear-gradient(135deg, rgba(255,255,255,0.15), transparent);
+  opacity: 0;
+  transition: opacity var(--transition-base);
+}
+.random-btn:hover {
+  transform: translateY(-3px) scale(1.03);
+  box-shadow: 0 0 50px rgba(6, 182, 212, 0.4), 0 0 80px rgba(139, 92, 246, 0.2);
+}
+.random-btn:hover::before {
+  opacity: 1;
+}
+.random-btn:active {
+  transform: translateY(-1px) scale(0.98);
+}
+.random-btn .btn-icon {
+  font-size: 1.4rem;
+  animation: spinSlow 8s linear infinite;
+}
+@keyframes spinSlow {
+  from { transform: rotate(0deg); }
+  to { transform: rotate(360deg); }
+}
+.random-btn:hover .btn-icon {
+  animation-duration: 1s;
+}
+.random-btn.loading .btn-icon {
+  animation: spinSlow 0.6s linear infinite;
+}
+/* ─── Glass Card ───────────────────────────────────────────── */
+.glass-card {
+  background: var(--bg-card);
+  border: 1px solid var(--border-subtle);
+  border-radius: var(--radius-lg);
+  backdrop-filter: blur(20px);
+  -webkit-backdrop-filter: blur(20px);
+  transition: all var(--transition-base);
+}
+.glass-card:hover {
+  border-color: var(--border-accent);
+  background: var(--bg-glass-hover);
+}
+/* ─── Search Section ───────────────────────────────────────── */
+.search-section {
+  margin-bottom: var(--space-2xl);
+}
+.search-section h2 {
+  font-family: var(--font-heading);
+  font-weight: 700;
+  font-size: 1.3rem;
+  margin-bottom: var(--space-md);
+  display: flex;
+  align-items: center;
+  gap: var(--space-sm);
+}
+.search-box {
+  position: relative;
+}
+.search-box input {
+  width: 100%;
+  padding: 1rem 1rem 1rem 3rem;
+  background: var(--bg-glass);
+  border: 1px solid var(--border-subtle);
+  border-radius: var(--radius-lg);
+  color: var(--text-primary);
+  font-family: var(--font-body);
+  font-size: 1rem;
+  outline: none;
+  transition: all var(--transition-base);
+}
+.search-box input:focus {
+  border-color: var(--border-accent);
+  box-shadow: 0 0 0 3px rgba(6, 182, 212, 0.1);
+  background: var(--bg-glass-hover);
+}
+.search-box input::placeholder {
+  color: var(--text-muted);
+}
+.search-box .search-icon {
+  position: absolute;
+  left: 1rem;
+  top: 50%;
+  transform: translateY(-50%);
+  font-size: 1.1rem;
+  color: var(--text-muted);
+  pointer-events: none;
+}
+.search-results {
+  margin-top: var(--space-md);
+  display: flex;
+  flex-direction: column;
+  gap: var(--space-sm);
+  max-height: 400px;
+  overflow-y: auto;
+}
+.search-results::-webkit-scrollbar {
+  width: 6px;
+}
+.search-results::-webkit-scrollbar-track {
+  background: transparent;
+}
+.search-results::-webkit-scrollbar-thumb {
+  background: var(--border-subtle);
+  border-radius: 3px;
+}
+.search-result-item {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: var(--space-md) var(--space-lg);
+  background: var(--bg-glass);
+  border: 1px solid var(--border-subtle);
+  border-radius: var(--radius-md);
+  transition: all var(--transition-fast);
+  cursor: pointer;
+  text-decoration: none;
+  color: var(--text-primary);
+}
+.search-result-item:hover {
+  border-color: var(--border-accent);
+  background: var(--bg-glass-hover);
+  transform: translateX(4px);
+}
+.search-result-item .result-url {
+  font-family: var(--font-mono);
+  font-size: 0.9rem;
+  color: var(--text-accent);
+  word-break: break-all;
+}
+.search-result-item .result-domain {
+  font-size: 0.8rem;
+  color: var(--text-muted);
+  margin-top: 2px;
+}
+.search-result-item .result-arrow {
+  font-size: 1.1rem;
+  color: var(--text-muted);
+  transition: all var(--transition-fast);
+  flex-shrink: 0;
+  margin-left: var(--space-md);
+}
+.search-result-item:hover .result-arrow {
+  color: var(--text-accent);
+  transform: translateX(4px);
+}
+.search-empty {
+  text-align: center;
+  padding: var(--space-xl);
+  color: var(--text-muted);
+  font-size: 0.9rem;
+}
+/* ─── Submit Section ───────────────────────────────────────── */
+.submit-section {
+  margin-bottom: var(--space-3xl);
+}
+.submit-section h2 {
+  font-family: var(--font-heading);
+  font-weight: 700;
+  font-size: 1.3rem;
+  margin-bottom: var(--space-sm);
+  display: flex;
+  align-items: center;
+  gap: var(--space-sm);
+}
+.submit-section .subtitle {
+  color: var(--text-secondary);
+  font-size: 0.9rem;
+  margin-bottom: var(--space-lg);
+}
+.submit-form {
+  display: flex;
+  gap: var(--space-sm);
+}
+.submit-form input {
+  flex: 1;
+  padding: 0.85rem 1rem;
+  background: var(--bg-glass);
+  border: 1px solid var(--border-subtle);
+  border-radius: var(--radius-md);
+  color: var(--text-primary);
+  font-family: var(--font-body);
+  font-size: 0.95rem;
+  outline: none;
+  transition: all var(--transition-base);
+}
+.submit-form input:focus {
+  border-color: var(--border-accent);
+  box-shadow: 0 0 0 3px rgba(6, 182, 212, 0.1);
+}
+.submit-form input::placeholder {
+  color: var(--text-muted);
+}
+.submit-btn {
+  padding: 0.85rem 1.5rem;
+  background: var(--gradient-primary);
+  color: white;
+  font-family: var(--font-heading);
+  font-weight: 600;
+  font-size: 0.9rem;
+  border: none;
+  border-radius: var(--radius-md);
+  cursor: pointer;
+  transition: all var(--transition-base);
+  white-space: nowrap;
+}
+.submit-btn:hover {
+  transform: translateY(-2px);
+  box-shadow: 0 0 20px rgba(6, 182, 212, 0.3);
+}
+.submit-btn:active {
+  transform: translateY(0);
+}
+.submit-btn:disabled {
+  opacity: 0.5;
+  cursor: not-allowed;
+  transform: none;
+}
+.submit-feedback {
+  margin-top: var(--space-md);
+  padding: var(--space-md) var(--space-lg);
+  border-radius: var(--radius-md);
+  font-size: 0.9rem;
+  display: none;
+  animation: fadeSlideUp 0.3s ease-out;
+}
+.submit-feedback.success {
+  display: block;
+  background: rgba(34, 197, 94, 0.1);
+  border: 1px solid rgba(34, 197, 94, 0.2);
+  color: #86efac;
+}
+.submit-feedback.error {
+  display: block;
+  background: rgba(239, 68, 68, 0.1);
+  border: 1px solid rgba(239, 68, 68, 0.2);
+  color: #fca5a5;
+}
+@keyframes fadeSlideUp {
+  from { opacity: 0; transform: translateY(8px); }
+  to { opacity: 1; transform: translateY(0); }
+}
+/* ─── Footer ───────────────────────────────────────────────── */
+.footer {
+  margin-top: auto;
+  border-top: 1px solid var(--border-subtle);
+  padding: var(--space-xl) 0;
+  background: rgba(10, 14, 26, 0.9);
+  backdrop-filter: blur(20px);
+}
+.footer .container {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  gap: var(--space-lg);
+}
+.live-counter {
+  text-align: center;
+}
+.live-counter .counter-label {
+  font-size: 0.75rem;
+  color: var(--text-muted);
+  text-transform: uppercase;
+  letter-spacing: 0.15em;
+  margin-bottom: var(--space-sm);
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  gap: var(--space-sm);
+}
+.live-counter .counter-label .live-dot {
+  width: 8px;
+  height: 8px;
+  background: #22c55e;
+  border-radius: 50%;
+  animation: pulse 2s ease-in-out infinite;
+  display: inline-block;
+}
+.live-counter .counter-value {
+  font-family: var(--font-heading);
+  font-weight: 900;
+  font-size: clamp(2rem, 5vw, 3rem);
+  background: var(--gradient-primary);
+  -webkit-background-clip: text;
+  -webkit-text-fill-color: transparent;
+  background-clip: text;
+  line-height: 1.2;
+  transition: all var(--transition-base);
+}
+.live-counter .counter-subtext {
+  font-size: 0.8rem;
+  color: var(--text-secondary);
+  margin-top: var(--space-xs);
+}
+.footer-links {
+  display: flex;
+  align-items: center;
+  gap: var(--space-lg);
+  font-size: 0.8rem;
+}
+.footer-links a {
+  color: var(--text-muted);
+  text-decoration: none;
+  transition: color var(--transition-fast);
+}
+.footer-links a:hover {
+  color: var(--text-accent);
+}
+.footer-links .divider {
+  width: 3px;
+  height: 3px;
+  background: var(--text-muted);
+  border-radius: 50%;
+  opacity: 0.5;
+}
+/* ─── Toast Notifications ──────────────────────────────────── */
+.toast-container {
+  position: fixed;
+  top: var(--space-lg);
+  right: var(--space-lg);
+  z-index: 1000;
+  display: flex;
+  flex-direction: column;
+  gap: var(--space-sm);
+}
+.toast {
+  padding: var(--space-md) var(--space-lg);
+  background: var(--bg-card);
+  border: 1px solid var(--border-subtle);
+  border-radius: var(--radius-md);
+  backdrop-filter: blur(20px);
+  animation: toastIn 0.3s ease-out;
+  font-size: 0.9rem;
+  max-width: 350px;
+  box-shadow: var(--shadow-lg);
+}
+.toast.toast-exiting {
+  animation: toastOut 0.3s ease-in forwards;
+}
+@keyframes toastIn {
+  from { opacity: 0; transform: translateX(100px); }
+  to { opacity: 1; transform: translateX(0); }
+}
+@keyframes toastOut {
+  from { opacity: 1; transform: translateX(0); }
+  to { opacity: 0; transform: translateX(100px); }
+}
+/* ─── Responsive ───────────────────────────────────────────── */
+@media (max-width: 640px) {
+  .container {
+    padding: 0 var(--space-md);
+  }
+  .hero {
+    padding: var(--space-3xl) 0 var(--space-2xl);
+  }
+  .submit-form {
+    flex-direction: column;
+  }
+  .header .container {
+    flex-direction: column;
+    gap: var(--space-sm);
+  }
+  .footer-links {
+    flex-wrap: wrap;
+    justify-content: center;
+  }
+}
+/* ─── Utility Animations ───────────────────────────────────── */
+@keyframes fadeIn {
+  from { opacity: 0; }
+  to { opacity: 1; }
+}
+@keyframes slideUp {
+  from { opacity: 0; transform: translateY(20px); }
+  to { opacity: 1; transform: translateY(0); }
+}
+.fade-in {
+  animation: fadeIn 0.6s ease-out;
+}
+.slide-up {
+  animation: slideUp 0.6s ease-out;
+}
+.slide-up-delay-1 { animation-delay: 0.1s; animation-fill-mode: backwards; }
+.slide-up-delay-2 { animation-delay: 0.2s; animation-fill-mode: backwards; }
+.slide-up-delay-3 { animation-delay: 0.3s; animation-fill-mode: backwards; }
+.slide-up-delay-4 { animation-delay: 0.4s; animation-fill-mode: backwards; }
+/* ─── Loading Skeleton ─────────────────────────────────────── */
+.skeleton {
+  background: linear-gradient(
+    90deg,
+    var(--bg-glass) 25%,
+    rgba(255, 255, 255, 0.06) 50%,
+    var(--bg-glass) 75%
+  );
+  background-size: 200% 100%;
+  animation: shimmer 1.5s infinite;
+  border-radius: var(--radius-sm);
+}
+@keyframes shimmer {
+  0% { background-position: 200% 50%; }
+  100% { background-position: -200% 50%; }
+}

nginx.conf ADDED Viewed

	@@ -0,0 +1,66 @@

+worker_processes auto;
+pid /tmp/nginx.pid;
+events {
+    worker_connections 1024;
+}
+http {
+    include /etc/nginx/mime.types;
+    default_type application/octet-stream;
+    # Temp paths for non-root
+    client_body_temp_path /tmp/nginx-client-body;
+    proxy_temp_path /tmp/nginx-proxy;
+    fastcgi_temp_path /tmp/nginx-fastcgi;
+    uwsgi_temp_path /tmp/nginx-uwsgi;
+    scgi_temp_path /tmp/nginx-scgi;
+    sendfile on;
+    tcp_nopush on;
+    keepalive_timeout 65;
+    gzip on;
+    gzip_types text/plain text/css application/json application/javascript text/xml;
+    # Logging
+    access_log /tmp/nginx-access.log;
+    error_log /tmp/nginx-error.log;
+    server {
+        listen 7860;
+        server_name _;
+        # Frontend static files
+        root /app/frontend;
+        index index.html;
+        # API proxy → FastAPI
+        location /api/ {
+            proxy_pass http://127.0.0.1:8000;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+            # Timeouts
+            proxy_connect_timeout 10s;
+            proxy_send_timeout 30s;
+            proxy_read_timeout 30s;
+        }
+        # Health check
+        location /health {
+            proxy_pass http://127.0.0.1:8000;
+        }
+        # Frontend SPA fallback
+        location / {
+            try_files $uri $uri/ /index.html;
+        }
+        # Security headers
+        add_header X-Frame-Options "SAMEORIGIN" always;
+        add_header X-Content-Type-Options "nosniff" always;
+        add_header Referrer-Policy "strict-origin-when-cross-origin" always;
+    }
+}

run.sh ADDED Viewed

	@@ -0,0 +1,25 @@

+#!/bin/bash
+set -e
+echo "=========================================="
+echo "  RandomWeb — Starting services"
+echo "=========================================="
+# Start FastAPI backend in background
+echo "[1/2] Starting FastAPI backend on :8000..."
+cd /app
+python -m uvicorn backend.main:app --host 0.0.0.0 --port 8000 --log-level info &
+# Wait for backend to be ready
+echo "  Waiting for backend..."
+for i in $(seq 1 30); do
+    if curl -s http://127.0.0.1:8000/api/health > /dev/null 2>&1; then
+        echo "  Backend ready!"
+        break
+    fi
+    sleep 1
+done
+# Start Nginx in foreground
+echo "[2/2] Starting Nginx on :7860..."
+exec nginx -g 'daemon off;'

supabase_schema.sql ADDED Viewed

	@@ -0,0 +1,112 @@

+-- ============================================================
+-- RandomWeb — Supabase Schema
+-- Run this in the Supabase SQL Editor (Dashboard → SQL Editor)
+-- ============================================================
+-- Enable required extensions
+CREATE EXTENSION IF NOT EXISTS pg_trgm;
+-- ============================================================
+-- 1. WEBSITES TABLE
+-- ============================================================
+CREATE TABLE IF NOT EXISTS websites (
+  id           BIGSERIAL PRIMARY KEY,
+  url          TEXT NOT NULL UNIQUE,
+  domain       TEXT NOT NULL,
+  source       TEXT NOT NULL DEFAULT 'unknown',
+  status       INTEGER,
+  is_active    BOOLEAN NOT NULL DEFAULT false,
+  first_seen   TIMESTAMPTZ NOT NULL DEFAULT now(),
+  last_checked TIMESTAMPTZ,
+  next_check   TIMESTAMPTZ,
+  created_at   TIMESTAMPTZ NOT NULL DEFAULT now()
+);
+-- Indexes for performance
+CREATE INDEX IF NOT EXISTS idx_websites_is_active ON websites (is_active) WHERE is_active = true;
+CREATE INDEX IF NOT EXISTS idx_websites_domain ON websites (domain);
+CREATE INDEX IF NOT EXISTS idx_websites_next_check ON websites (next_check) WHERE next_check IS NOT NULL;
+CREATE INDEX IF NOT EXISTS idx_websites_random ON websites (id) WHERE is_active = true;
+-- Trigram index for fuzzy search
+CREATE INDEX IF NOT EXISTS idx_websites_url_trgm ON websites USING gin (url gin_trgm_ops);
+CREATE INDEX IF NOT EXISTS idx_websites_domain_trgm ON websites USING gin (domain gin_trgm_ops);
+-- ============================================================
+-- 2. STATS TABLE (single-row, live counter)
+-- ============================================================
+CREATE TABLE IF NOT EXISTS stats (
+  id           INTEGER PRIMARY KEY DEFAULT 1 CHECK (id = 1),
+  active_count BIGINT NOT NULL DEFAULT 0,
+  total_count  BIGINT NOT NULL DEFAULT 0,
+  updated_at   TIMESTAMPTZ NOT NULL DEFAULT now()
+);
+INSERT INTO stats (active_count, total_count) VALUES (0, 0)
+ON CONFLICT (id) DO NOTHING;
+-- ============================================================
+-- 3. TRIGGER — Auto-update stats on website changes
+-- ============================================================
+CREATE OR REPLACE FUNCTION update_stats_count()
+RETURNS TRIGGER AS $$
+BEGIN
+  UPDATE stats SET
+    active_count = (SELECT count(*) FROM websites WHERE is_active = true),
+    total_count  = (SELECT count(*) FROM websites),
+    updated_at   = now()
+  WHERE id = 1;
+  RETURN NULL;
+END;
+$$ LANGUAGE plpgsql;
+DROP TRIGGER IF EXISTS trg_update_stats ON websites;
+CREATE TRIGGER trg_update_stats
+  AFTER INSERT OR UPDATE OF is_active OR DELETE ON websites
+  FOR EACH STATEMENT EXECUTE FUNCTION update_stats_count();
+-- ============================================================
+-- 4. FUNCTION — Optimized random active website
+-- ============================================================
+CREATE OR REPLACE FUNCTION get_random_active_website()
+RETURNS TABLE(id BIGINT, url TEXT, domain TEXT) AS $$
+BEGIN
+  RETURN QUERY
+    SELECT w.id, w.url, w.domain
+    FROM websites w
+    WHERE w.is_active = true
+    ORDER BY random()
+    LIMIT 1;
+END;
+$$ LANGUAGE plpgsql;
+-- ============================================================
+-- 5. ROW LEVEL SECURITY
+-- ============================================================
+ALTER TABLE websites ENABLE ROW LEVEL SECURITY;
+ALTER TABLE stats ENABLE ROW LEVEL SECURITY;
+-- Public read access for frontend (publishable key)
+CREATE POLICY "Allow public read on websites"
+  ON websites FOR SELECT
+  USING (true);
+CREATE POLICY "Allow public read on stats"
+  ON stats FOR SELECT
+  USING (true);
+-- Allow inserts/updates from authenticated or service role
+CREATE POLICY "Allow service write on websites"
+  ON websites FOR ALL
+  USING (true)
+  WITH CHECK (true);
+CREATE POLICY "Allow service write on stats"
+  ON stats FOR ALL
+  USING (true)
+  WITH CHECK (true);
+-- ============================================================
+-- 6. ENABLE REALTIME on stats table
+-- ============================================================
+ALTER PUBLICATION supabase_realtime ADD TABLE stats;