Spaces:
Paused
Paused
Guest1 commited on
Commit Β·
d22875e
0
Parent(s):
π Initial Clean Deployment
Browse files- .env.example +10 -0
- .github/workflows/deploy.yml +22 -0
- .gitignore +10 -0
- Dockerfile +43 -0
- README.md +36 -0
- backend/__init__.py +1 -0
- backend/api/__init__.py +1 -0
- backend/api/routes.py +130 -0
- backend/config.py +150 -0
- backend/db.py +189 -0
- backend/main.py +105 -0
- backend/requirements.txt +9 -0
- backend/workers/__init__.py +1 -0
- backend/workers/common_crawl.py +187 -0
- backend/workers/crawler.py +305 -0
- backend/workers/ct_log.py +147 -0
- backend/workers/scheduler.py +47 -0
- backend/workers/validator.py +259 -0
- deploy_hf.sh +72 -0
- frontend/app.js +319 -0
- frontend/index.html +153 -0
- frontend/styles.css +801 -0
- nginx.conf +66 -0
- run.sh +25 -0
- supabase_schema.sql +112 -0
.env.example
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Supabase Configuration
|
| 2 |
+
# Required for the database and validation workers
|
| 3 |
+
SUPABASE_URL="https://your-project.supabase.co"
|
| 4 |
+
SUPABASE_PUBLISHABLE_KEY="your-public-key"
|
| 5 |
+
SUPABASE_SECRET_KEY="your-private-secret-key"
|
| 6 |
+
|
| 7 |
+
# IMPORTANT:
|
| 8 |
+
# 1. On Hugging Face, go to Settings > Variables and Secrets
|
| 9 |
+
# 2. Add these as 'Secrets' there.
|
| 10 |
+
# 3. NEVER hardcode your real keys in your code!
|
.github/workflows/deploy.yml
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Sync to Hugging Face Hub
|
| 2 |
+
on:
|
| 3 |
+
push:
|
| 4 |
+
branches: [main]
|
| 5 |
+
workflow_dispatch:
|
| 6 |
+
|
| 7 |
+
jobs:
|
| 8 |
+
deploy:
|
| 9 |
+
runs-on: ubuntu-latest
|
| 10 |
+
steps:
|
| 11 |
+
- name: Checkout Repository
|
| 12 |
+
uses: actions/checkout@v3
|
| 13 |
+
with:
|
| 14 |
+
fetch-depth: 0
|
| 15 |
+
lfs: true
|
| 16 |
+
|
| 17 |
+
- name: Push to Hugging Face
|
| 18 |
+
env:
|
| 19 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 20 |
+
run: |
|
| 21 |
+
git remote add hf https://PinkAlpaca:$HF_TOKEN@huggingface.co/spaces/PinkAlpaca/RandomWeb
|
| 22 |
+
git push -f hf main
|
.gitignore
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
*.pyo
|
| 4 |
+
.env
|
| 5 |
+
.env.local
|
| 6 |
+
.DS_Store
|
| 7 |
+
*.log
|
| 8 |
+
node_modules/
|
| 9 |
+
.vscode/
|
| 10 |
+
.idea/
|
Dockerfile
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.12-slim
|
| 2 |
+
|
| 3 |
+
# Install system dependencies
|
| 4 |
+
RUN apt-get update && \
|
| 5 |
+
apt-get install -y --no-install-recommends nginx curl && \
|
| 6 |
+
rm -rf /var/lib/apt/lists/*
|
| 7 |
+
|
| 8 |
+
# Create non-root user (required by HF Spaces)
|
| 9 |
+
RUN useradd -m -u 1000 appuser
|
| 10 |
+
|
| 11 |
+
# Set working directory
|
| 12 |
+
WORKDIR /app
|
| 13 |
+
|
| 14 |
+
# Install Python dependencies
|
| 15 |
+
COPY backend/requirements.txt ./requirements.txt
|
| 16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
+
|
| 18 |
+
# Copy application code
|
| 19 |
+
COPY backend/ ./backend/
|
| 20 |
+
COPY frontend/ ./frontend/
|
| 21 |
+
|
| 22 |
+
# Copy Nginx config
|
| 23 |
+
COPY nginx.conf /etc/nginx/nginx.conf
|
| 24 |
+
|
| 25 |
+
# Copy startup script
|
| 26 |
+
COPY run.sh ./run.sh
|
| 27 |
+
RUN chmod +x ./run.sh
|
| 28 |
+
|
| 29 |
+
# Create Nginx temp directories writable by appuser
|
| 30 |
+
RUN mkdir -p /tmp/nginx-client-body /tmp/nginx-proxy /tmp/nginx-fastcgi /tmp/nginx-uwsgi /tmp/nginx-scgi && \
|
| 31 |
+
chown -R appuser:appuser /tmp/nginx-* && \
|
| 32 |
+
chown -R appuser:appuser /var/lib/nginx && \
|
| 33 |
+
chown -R appuser:appuser /var/log/nginx && \
|
| 34 |
+
chown -R appuser:appuser /app && \
|
| 35 |
+
touch /tmp/nginx.pid && \
|
| 36 |
+
chown appuser:appuser /tmp/nginx.pid
|
| 37 |
+
|
| 38 |
+
# Switch to non-root user
|
| 39 |
+
USER appuser
|
| 40 |
+
|
| 41 |
+
EXPOSE 7860
|
| 42 |
+
|
| 43 |
+
CMD ["./run.sh"]
|
README.md
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: RandomWeb
|
| 3 |
+
emoji: π
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
pinned: true
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# π RandomWeb β Discover Random Websites
|
| 12 |
+
|
| 13 |
+
A random website discovery platform that indexes the entire web using:
|
| 14 |
+
|
| 15 |
+
- **Certificate Transparency Logs** β Real-time domain discovery via CertStream
|
| 16 |
+
- **Common Crawl** β Batch import from the largest public web archive
|
| 17 |
+
- **BFS Recursive Crawler** β Breadth-first link extraction and traversal
|
| 18 |
+
- **Polite Validation** β Rate-limited, robots.txt-compliant URL verification
|
| 19 |
+
|
| 20 |
+
## Features
|
| 21 |
+
|
| 22 |
+
- π² **Random Button** β Instant redirect to a random live website
|
| 23 |
+
- π **Search** β Find specific indexed websites
|
| 24 |
+
- β **Submit URLs** β Add websites to the index
|
| 25 |
+
- π **Live Counter** β Real-time count of active indexed sites (via Supabase Realtime)
|
| 26 |
+
|
| 27 |
+
## Architecture
|
| 28 |
+
|
| 29 |
+
- **Backend**: Python / FastAPI with async workers
|
| 30 |
+
- **Frontend**: Vanilla HTML/CSS/JS with Supabase JS client
|
| 31 |
+
- **Database**: Supabase (PostgreSQL) with RLS and Realtime
|
| 32 |
+
- **Deployment**: Docker on Hugging Face Spaces
|
| 33 |
+
|
| 34 |
+
## Links
|
| 35 |
+
|
| 36 |
+
- [GitHub Repository](https://github.com/guestcoder0906/RandomWeb)
|
backend/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# RandomWeb Backend
|
backend/api/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# API Routes
|
backend/api/routes.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RandomWeb β REST API Routes
|
| 3 |
+
Endpoints for random redirect, search, URL submission, and stats.
|
| 4 |
+
"""
|
| 5 |
+
import logging
|
| 6 |
+
import re
|
| 7 |
+
from urllib.parse import urlparse
|
| 8 |
+
|
| 9 |
+
from fastapi import APIRouter, HTTPException, Query
|
| 10 |
+
from pydantic import BaseModel, field_validator
|
| 11 |
+
|
| 12 |
+
from backend.db import (
|
| 13 |
+
get_random_active_url,
|
| 14 |
+
search_websites,
|
| 15 |
+
get_active_count,
|
| 16 |
+
get_total_count,
|
| 17 |
+
url_exists,
|
| 18 |
+
)
|
| 19 |
+
from backend.workers.validator import enqueue_url
|
| 20 |
+
|
| 21 |
+
logger = logging.getLogger("randomweb.api")
|
| 22 |
+
router = APIRouter(prefix="/api")
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# βββ Models ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 26 |
+
class SubmitRequest(BaseModel):
|
| 27 |
+
url: str
|
| 28 |
+
|
| 29 |
+
@field_validator("url")
|
| 30 |
+
@classmethod
|
| 31 |
+
def validate_url(cls, v: str) -> str:
|
| 32 |
+
v = v.strip()
|
| 33 |
+
if not v:
|
| 34 |
+
raise ValueError("URL cannot be empty")
|
| 35 |
+
|
| 36 |
+
# Add scheme if missing
|
| 37 |
+
if not v.startswith(("http://", "https://")):
|
| 38 |
+
v = f"https://{v}"
|
| 39 |
+
|
| 40 |
+
parsed = urlparse(v)
|
| 41 |
+
if not parsed.netloc or "." not in parsed.netloc:
|
| 42 |
+
raise ValueError("Invalid URL format")
|
| 43 |
+
|
| 44 |
+
if len(v) > 2000:
|
| 45 |
+
raise ValueError("URL too long")
|
| 46 |
+
|
| 47 |
+
return v
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class SubmitResponse(BaseModel):
|
| 51 |
+
success: bool
|
| 52 |
+
message: str
|
| 53 |
+
url: str
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class RandomResponse(BaseModel):
|
| 57 |
+
url: str
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class StatsResponse(BaseModel):
|
| 61 |
+
active_count: int
|
| 62 |
+
total_count: int
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
class SearchResult(BaseModel):
|
| 66 |
+
url: str
|
| 67 |
+
domain: str
|
| 68 |
+
is_active: bool
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
# βββ Endpoints βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 72 |
+
@router.get("/random", response_model=RandomResponse)
|
| 73 |
+
async def get_random():
|
| 74 |
+
"""Get a random active website URL for redirect."""
|
| 75 |
+
url = get_random_active_url()
|
| 76 |
+
if not url:
|
| 77 |
+
raise HTTPException(
|
| 78 |
+
status_code=404,
|
| 79 |
+
detail="No active websites found yet. The system is still indexing.",
|
| 80 |
+
)
|
| 81 |
+
return {"url": url}
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
@router.get("/search", response_model=list[SearchResult])
|
| 85 |
+
async def search(
|
| 86 |
+
q: str = Query(..., min_length=1, max_length=200, description="Search query"),
|
| 87 |
+
limit: int = Query(20, ge=1, le=100, description="Max results"),
|
| 88 |
+
):
|
| 89 |
+
"""Search for indexed websites by URL or domain."""
|
| 90 |
+
results = search_websites(q, limit=limit)
|
| 91 |
+
return results
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
@router.post("/submit", response_model=SubmitResponse)
|
| 95 |
+
async def submit_url(request: SubmitRequest):
|
| 96 |
+
"""Submit a new URL for validation and indexing."""
|
| 97 |
+
url = request.url
|
| 98 |
+
logger.info("User submitted URL: %s", url)
|
| 99 |
+
|
| 100 |
+
# Check if already indexed
|
| 101 |
+
if url_exists(url):
|
| 102 |
+
return SubmitResponse(
|
| 103 |
+
success=True,
|
| 104 |
+
message="This URL is already in our index.",
|
| 105 |
+
url=url,
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
# Queue for validation
|
| 109 |
+
await enqueue_url(url, source="user_submit")
|
| 110 |
+
|
| 111 |
+
return SubmitResponse(
|
| 112 |
+
success=True,
|
| 113 |
+
message="URL submitted successfully! It will be validated and added if accessible.",
|
| 114 |
+
url=url,
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
@router.get("/stats", response_model=StatsResponse)
|
| 119 |
+
async def get_stats():
|
| 120 |
+
"""Get current index statistics."""
|
| 121 |
+
return StatsResponse(
|
| 122 |
+
active_count=get_active_count(),
|
| 123 |
+
total_count=get_total_count(),
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
@router.get("/health")
|
| 128 |
+
async def health():
|
| 129 |
+
"""Health check endpoint."""
|
| 130 |
+
return {"status": "ok"}
|
backend/config.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RandomWeb β Configuration
|
| 3 |
+
Loads environment variables and defines constants for all workers.
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
# βββ Supabase ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 8 |
+
SUPABASE_URL = os.getenv("SUPABASE_URL", "")
|
| 9 |
+
SUPABASE_SECRET_KEY = os.getenv("SUPABASE_SECRET_KEY", "")
|
| 10 |
+
SUPABASE_PUBLISHABLE_KEY = os.getenv("SUPABASE_PUBLISHABLE_KEY", "")
|
| 11 |
+
|
| 12 |
+
# βββ Crawler Settings βββββββββββββββββββββββββββββββββββββββ
|
| 13 |
+
USER_AGENT = "RandomWeb/1.0 (+https://github.com/guestcoder0906/RandomWeb; polite-bot)"
|
| 14 |
+
REQUEST_TIMEOUT = 10 # seconds
|
| 15 |
+
MAX_GLOBAL_CONCURRENCY = 20 # max simultaneous outbound connections
|
| 16 |
+
PER_DOMAIN_RATE_LIMIT = 1.0 # requests per second per domain
|
| 17 |
+
CRAWL_DELAY_DEFAULT = 1.0 # fallback crawl delay if robots.txt doesn't specify
|
| 18 |
+
MAX_CRAWL_DEPTH = 3 # BFS depth limit per seed
|
| 19 |
+
MAX_LINKS_PER_PAGE = 50 # max links to extract per page
|
| 20 |
+
MAX_QUEUE_SIZE = 100_000 # max URLs in crawler queue
|
| 21 |
+
|
| 22 |
+
# βββ Validator Settings ββββββββββββββββββββββββββββββββββββββ
|
| 23 |
+
VALIDATION_BATCH_SIZE = 50 # URLs per validation batch
|
| 24 |
+
VALIDATION_CONCURRENCY = 10 # concurrent validation requests
|
| 25 |
+
RECHECK_INTERVAL_DAYS = 365 # re-verify every year
|
| 26 |
+
|
| 27 |
+
# βββ CertStream ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
+
CERTSTREAM_URL = "wss://certstream.calidog.io/"
|
| 29 |
+
CT_LOG_BATCH_SIZE = 100 # queue batch size before flushing to validation
|
| 30 |
+
CT_LOG_RECONNECT_DELAY = 5 # initial reconnect delay in seconds
|
| 31 |
+
CT_LOG_MAX_RECONNECT_DELAY = 300 # max reconnect delay
|
| 32 |
+
|
| 33 |
+
# βββ Common Crawl ββββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
+
COMMON_CRAWL_INDEX_URL = "https://index.commoncrawl.org/collinfo.json"
|
| 35 |
+
COMMON_CRAWL_SAMPLE_SIZE = 10_000 # URLs per crawl import batch
|
| 36 |
+
COMMON_CRAWL_RESCAN_HOURS = 168 # re-import weekly (7 * 24)
|
| 37 |
+
|
| 38 |
+
# βββ Scheduler βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 39 |
+
SCHEDULER_INTERVAL_SECONDS = 3600 # run re-verification check every hour
|
| 40 |
+
SCHEDULER_BATCH_SIZE = 100 # URLs per re-verification batch
|
| 41 |
+
|
| 42 |
+
# βββ Blocked TLDs / Patterns ββββββββββββββββββββββββββββββββ
|
| 43 |
+
BLOCKED_TLDS = {
|
| 44 |
+
".local", ".internal", ".test", ".example",
|
| 45 |
+
".invalid", ".localhost", ".onion",
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
# βββ Top 100 Seed Websites ββββββββββββββββββββββββββββββββββ
|
| 49 |
+
SEED_WEBSITES = [
|
| 50 |
+
"https://google.com",
|
| 51 |
+
"https://youtube.com",
|
| 52 |
+
"https://facebook.com",
|
| 53 |
+
"https://instagram.com",
|
| 54 |
+
"https://chatgpt.com",
|
| 55 |
+
"https://x.com",
|
| 56 |
+
"https://reddit.com",
|
| 57 |
+
"https://wikipedia.org",
|
| 58 |
+
"https://whatsapp.com",
|
| 59 |
+
"https://bing.com",
|
| 60 |
+
"https://tiktok.com",
|
| 61 |
+
"https://yahoo.co.jp",
|
| 62 |
+
"https://yandex.ru",
|
| 63 |
+
"https://yahoo.com",
|
| 64 |
+
"https://amazon.com",
|
| 65 |
+
"https://gemini.google.com",
|
| 66 |
+
"https://linkedin.com",
|
| 67 |
+
"https://bet.br",
|
| 68 |
+
"https://baidu.com",
|
| 69 |
+
"https://naver.com",
|
| 70 |
+
"https://netflix.com",
|
| 71 |
+
"https://pinterest.com",
|
| 72 |
+
"https://live.com",
|
| 73 |
+
"https://bilibili.com",
|
| 74 |
+
"https://pornhub.com",
|
| 75 |
+
"https://temu.com",
|
| 76 |
+
"https://dzen.ru",
|
| 77 |
+
"https://office.com",
|
| 78 |
+
"https://microsoft.com",
|
| 79 |
+
"https://xhamster.com",
|
| 80 |
+
"https://twitch.tv",
|
| 81 |
+
"https://xvideos.com",
|
| 82 |
+
"https://canva.com",
|
| 83 |
+
"https://weather.com",
|
| 84 |
+
"https://vk.com",
|
| 85 |
+
"https://globo.com",
|
| 86 |
+
"https://fandom.com",
|
| 87 |
+
"https://news.yahoo.co.jp",
|
| 88 |
+
"https://t.me",
|
| 89 |
+
"https://samsung.com",
|
| 90 |
+
"https://mail.ru",
|
| 91 |
+
"https://duckduckgo.com",
|
| 92 |
+
"https://nytimes.com",
|
| 93 |
+
"https://stripchat.com",
|
| 94 |
+
"https://xnxx.com",
|
| 95 |
+
"https://ebay.com",
|
| 96 |
+
"https://zoom.us",
|
| 97 |
+
"https://xhamster44.desi",
|
| 98 |
+
"https://discord.com",
|
| 99 |
+
"https://eporner.com",
|
| 100 |
+
"https://github.com",
|
| 101 |
+
"https://booking.com",
|
| 102 |
+
"https://spotify.com",
|
| 103 |
+
"https://cricbuzz.com",
|
| 104 |
+
"https://instructure.com",
|
| 105 |
+
"https://docomo.ne.jp",
|
| 106 |
+
"https://roblox.com",
|
| 107 |
+
"https://aliexpress.com",
|
| 108 |
+
"https://bbc.com",
|
| 109 |
+
"https://bbc.co.uk",
|
| 110 |
+
"https://ozon.ru",
|
| 111 |
+
"https://apple.com",
|
| 112 |
+
"https://imdb.com",
|
| 113 |
+
"https://telegram.org",
|
| 114 |
+
"https://brave.com",
|
| 115 |
+
"https://amazon.in",
|
| 116 |
+
"https://chaturbate.com",
|
| 117 |
+
"https://msn.com",
|
| 118 |
+
"https://walmart.com",
|
| 119 |
+
"https://amazon.co.jp",
|
| 120 |
+
"https://paypal.com",
|
| 121 |
+
"https://cnn.com",
|
| 122 |
+
"https://ya.ru",
|
| 123 |
+
"https://indeed.com",
|
| 124 |
+
"https://etsy.com",
|
| 125 |
+
"https://rakuten.co.jp",
|
| 126 |
+
"https://amazon.de",
|
| 127 |
+
"https://espn.com",
|
| 128 |
+
"https://hbomax.com",
|
| 129 |
+
"https://usps.com",
|
| 130 |
+
"https://music.youtube.com",
|
| 131 |
+
"https://ok.ru",
|
| 132 |
+
"https://wildberries.ru",
|
| 133 |
+
"https://office365.com",
|
| 134 |
+
"https://disneyplus.com",
|
| 135 |
+
"https://douyin.com",
|
| 136 |
+
"https://namu.wiki",
|
| 137 |
+
"https://adobe.com",
|
| 138 |
+
"https://shein.com",
|
| 139 |
+
"https://qq.com",
|
| 140 |
+
"https://amazon.co.uk",
|
| 141 |
+
"https://quora.com",
|
| 142 |
+
"https://faphouse.com",
|
| 143 |
+
"https://rutube.ru",
|
| 144 |
+
"https://theguardian.com",
|
| 145 |
+
"https://scribd.com",
|
| 146 |
+
"https://grok.com",
|
| 147 |
+
"https://zillow.com",
|
| 148 |
+
"https://dcinside.com",
|
| 149 |
+
"https://onlyfans.com",
|
| 150 |
+
]
|
backend/db.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RandomWeb β Database Helpers
|
| 3 |
+
Supabase client initialization and common query functions.
|
| 4 |
+
"""
|
| 5 |
+
import logging
|
| 6 |
+
from datetime import datetime, timedelta, timezone
|
| 7 |
+
from urllib.parse import urlparse
|
| 8 |
+
from typing import Optional
|
| 9 |
+
|
| 10 |
+
from supabase import create_client, Client
|
| 11 |
+
|
| 12 |
+
from backend.config import (
|
| 13 |
+
SUPABASE_URL,
|
| 14 |
+
SUPABASE_SECRET_KEY,
|
| 15 |
+
SUPABASE_PUBLISHABLE_KEY,
|
| 16 |
+
RECHECK_INTERVAL_DAYS,
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger("randomweb.db")
|
| 20 |
+
|
| 21 |
+
# βββ Client Initialization ββββββββββββββββββββββββββββββββββ
|
| 22 |
+
_client: Optional[Client] = None
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def get_client() -> Client:
|
| 26 |
+
"""Return a Supabase client using the secret key if available, else publishable."""
|
| 27 |
+
global _client
|
| 28 |
+
if _client is None:
|
| 29 |
+
# Priority: Secret Key (for writes) -> Publishable Key (fallback)
|
| 30 |
+
key = SUPABASE_SECRET_KEY or SUPABASE_PUBLISHABLE_KEY
|
| 31 |
+
|
| 32 |
+
if not key:
|
| 33 |
+
logger.critical("β No Supabase API key found!")
|
| 34 |
+
raise ValueError("SUPABASE_SECRET_KEY and SUPABASE_PUBLISHABLE_KEY are both empty.")
|
| 35 |
+
|
| 36 |
+
_client = create_client(SUPABASE_URL, key)
|
| 37 |
+
|
| 38 |
+
# Identify key type for debugging purposes
|
| 39 |
+
key_type = "Managed (New)" if key.startswith("sb_") else "Legacy (JWT)"
|
| 40 |
+
logger.info("β
Supabase client initialized (Type: %s) for %s", key_type, SUPABASE_URL)
|
| 41 |
+
|
| 42 |
+
return _client
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def extract_domain(url: str) -> str:
|
| 46 |
+
"""Extract the domain from a URL."""
|
| 47 |
+
parsed = urlparse(url)
|
| 48 |
+
return parsed.netloc or parsed.path.split("/")[0]
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
# βββ Insert / Upsert ββββββββββββββββββββββββββββββββββββββββ
|
| 52 |
+
def upsert_website(
|
| 53 |
+
url: str,
|
| 54 |
+
source: str = "unknown",
|
| 55 |
+
status: Optional[int] = None,
|
| 56 |
+
is_active: bool = False,
|
| 57 |
+
) -> bool:
|
| 58 |
+
"""Insert or update a website record. Returns True on success."""
|
| 59 |
+
try:
|
| 60 |
+
domain = extract_domain(url)
|
| 61 |
+
now = datetime.now(timezone.utc).isoformat()
|
| 62 |
+
next_check = (
|
| 63 |
+
(datetime.now(timezone.utc) + timedelta(days=RECHECK_INTERVAL_DAYS)).isoformat()
|
| 64 |
+
if is_active
|
| 65 |
+
else None
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
data = {
|
| 69 |
+
"url": url,
|
| 70 |
+
"domain": domain,
|
| 71 |
+
"source": source,
|
| 72 |
+
"status": status,
|
| 73 |
+
"is_active": is_active,
|
| 74 |
+
"last_checked": now,
|
| 75 |
+
"next_check": next_check,
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
get_client().table("websites").upsert(
|
| 79 |
+
data, on_conflict="url"
|
| 80 |
+
).execute()
|
| 81 |
+
return True
|
| 82 |
+
except Exception as e:
|
| 83 |
+
logger.error("Failed to upsert %s: %s", url, e)
|
| 84 |
+
return False
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def bulk_upsert_websites(records: list[dict]) -> int:
|
| 88 |
+
"""Bulk upsert a list of website records. Returns count of successful inserts."""
|
| 89 |
+
if not records:
|
| 90 |
+
return 0
|
| 91 |
+
try:
|
| 92 |
+
get_client().table("websites").upsert(
|
| 93 |
+
records, on_conflict="url"
|
| 94 |
+
).execute()
|
| 95 |
+
return len(records)
|
| 96 |
+
except Exception as e:
|
| 97 |
+
logger.error("Bulk upsert failed (%d records): %s", len(records), e)
|
| 98 |
+
return 0
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# βββ Queries βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 102 |
+
def get_random_active_url() -> Optional[str]:
|
| 103 |
+
"""Retrieve a random active website URL using the database function."""
|
| 104 |
+
try:
|
| 105 |
+
result = get_client().rpc("get_random_active_website").execute()
|
| 106 |
+
if result.data and len(result.data) > 0:
|
| 107 |
+
return result.data[0]["url"]
|
| 108 |
+
return None
|
| 109 |
+
except Exception as e:
|
| 110 |
+
logger.error("Failed to get random URL: %s", e)
|
| 111 |
+
return None
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def search_websites(query: str, limit: int = 20) -> list[dict]:
|
| 115 |
+
"""Search websites by URL or domain using trigram similarity."""
|
| 116 |
+
try:
|
| 117 |
+
result = (
|
| 118 |
+
get_client()
|
| 119 |
+
.table("websites")
|
| 120 |
+
.select("url, domain, is_active")
|
| 121 |
+
.or_(f"url.ilike.%{query}%,domain.ilike.%{query}%")
|
| 122 |
+
.eq("is_active", True)
|
| 123 |
+
.limit(limit)
|
| 124 |
+
.execute()
|
| 125 |
+
)
|
| 126 |
+
return result.data or []
|
| 127 |
+
except Exception as e:
|
| 128 |
+
logger.error("Search failed for '%s': %s", query, e)
|
| 129 |
+
return []
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def get_active_count() -> int:
|
| 133 |
+
"""Get the current active website count from stats."""
|
| 134 |
+
try:
|
| 135 |
+
result = get_client().table("stats").select("active_count").eq("id", 1).execute()
|
| 136 |
+
if result.data:
|
| 137 |
+
return result.data[0]["active_count"]
|
| 138 |
+
return 0
|
| 139 |
+
except Exception as e:
|
| 140 |
+
logger.error("Failed to get active count: %s", e)
|
| 141 |
+
return 0
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def get_total_count() -> int:
|
| 145 |
+
"""Get total indexed websites from stats."""
|
| 146 |
+
try:
|
| 147 |
+
result = get_client().table("stats").select("total_count").eq("id", 1).execute()
|
| 148 |
+
if result.data:
|
| 149 |
+
return result.data[0]["total_count"]
|
| 150 |
+
return 0
|
| 151 |
+
except Exception as e:
|
| 152 |
+
logger.error("Failed to get total count: %s", e)
|
| 153 |
+
return 0
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def url_exists(url: str) -> bool:
|
| 157 |
+
"""Check if a URL is already in the database."""
|
| 158 |
+
try:
|
| 159 |
+
result = (
|
| 160 |
+
get_client()
|
| 161 |
+
.table("websites")
|
| 162 |
+
.select("id")
|
| 163 |
+
.eq("url", url)
|
| 164 |
+
.limit(1)
|
| 165 |
+
.execute()
|
| 166 |
+
)
|
| 167 |
+
return bool(result.data)
|
| 168 |
+
except Exception as e:
|
| 169 |
+
logger.error("Failed to check URL existence: %s", e)
|
| 170 |
+
return False
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def get_urls_needing_recheck(limit: int = 100) -> list[dict]:
|
| 174 |
+
"""Get URLs that are due for re-verification."""
|
| 175 |
+
try:
|
| 176 |
+
now = datetime.now(timezone.utc).isoformat()
|
| 177 |
+
result = (
|
| 178 |
+
get_client()
|
| 179 |
+
.table("websites")
|
| 180 |
+
.select("id, url, domain")
|
| 181 |
+
.eq("is_active", True)
|
| 182 |
+
.lte("next_check", now)
|
| 183 |
+
.limit(limit)
|
| 184 |
+
.execute()
|
| 185 |
+
)
|
| 186 |
+
return result.data or []
|
| 187 |
+
except Exception as e:
|
| 188 |
+
logger.error("Failed to get recheck URLs: %s", e)
|
| 189 |
+
return []
|
backend/main.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RandomWeb β Main Application
|
| 3 |
+
FastAPI app with background workers for URL discovery, validation, and re-verification.
|
| 4 |
+
"""
|
| 5 |
+
import asyncio
|
| 6 |
+
import logging
|
| 7 |
+
from contextlib import asynccontextmanager
|
| 8 |
+
|
| 9 |
+
from fastapi import FastAPI
|
| 10 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
+
|
| 12 |
+
from backend.api.routes import router
|
| 13 |
+
from backend.config import SEED_WEBSITES, SUPABASE_URL, SUPABASE_SECRET_KEY
|
| 14 |
+
from backend.db import get_client, extract_domain
|
| 15 |
+
from backend.workers.validator import run_validator, enqueue_url
|
| 16 |
+
from backend.workers.ct_log import run_ct_log_worker
|
| 17 |
+
from backend.workers.common_crawl import run_common_crawl_importer
|
| 18 |
+
from backend.workers.crawler import run_crawler
|
| 19 |
+
from backend.workers.scheduler import run_scheduler
|
| 20 |
+
|
| 21 |
+
# βββ Logging βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 22 |
+
logging.basicConfig(
|
| 23 |
+
level=logging.INFO,
|
| 24 |
+
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
| 25 |
+
datefmt="%Y-%m-%d %H:%M:%S",
|
| 26 |
+
)
|
| 27 |
+
logger = logging.getLogger("randomweb")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
async def seed_top_websites():
|
| 31 |
+
"""Seed the top 100 websites into the validation queue."""
|
| 32 |
+
logger.info("Seeding %d top websites...", len(SEED_WEBSITES))
|
| 33 |
+
for url in SEED_WEBSITES:
|
| 34 |
+
await enqueue_url(url, source="seed")
|
| 35 |
+
logger.info("All seed websites queued for validation")
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@asynccontextmanager
|
| 39 |
+
async def lifespan(app: FastAPI):
|
| 40 |
+
"""Manage background workers lifecycle."""
|
| 41 |
+
logger.info("=" * 60)
|
| 42 |
+
logger.info("RandomWeb starting up")
|
| 43 |
+
logger.info("Supabase URL: %s", SUPABASE_URL)
|
| 44 |
+
logger.info("Secret key configured: %s", "Yes" if SUPABASE_SECRET_KEY else "No")
|
| 45 |
+
logger.info("=" * 60)
|
| 46 |
+
|
| 47 |
+
# Initialize Supabase client
|
| 48 |
+
try:
|
| 49 |
+
get_client()
|
| 50 |
+
logger.info("Supabase client connected")
|
| 51 |
+
except Exception as e:
|
| 52 |
+
logger.error("Failed to connect to Supabase: %s", e)
|
| 53 |
+
|
| 54 |
+
# Launch background workers
|
| 55 |
+
tasks = []
|
| 56 |
+
|
| 57 |
+
# 1. Validation worker (must start first)
|
| 58 |
+
tasks.append(asyncio.create_task(run_validator(), name="validator"))
|
| 59 |
+
|
| 60 |
+
# 2. Seed top websites
|
| 61 |
+
tasks.append(asyncio.create_task(seed_top_websites(), name="seeder"))
|
| 62 |
+
|
| 63 |
+
# 3. CT Log worker
|
| 64 |
+
tasks.append(asyncio.create_task(run_ct_log_worker(), name="ct_log"))
|
| 65 |
+
|
| 66 |
+
# 4. Common Crawl importer
|
| 67 |
+
tasks.append(asyncio.create_task(run_common_crawl_importer(), name="common_crawl"))
|
| 68 |
+
|
| 69 |
+
# 5. BFS Crawler
|
| 70 |
+
tasks.append(asyncio.create_task(run_crawler(), name="crawler"))
|
| 71 |
+
|
| 72 |
+
# 6. Re-verification scheduler
|
| 73 |
+
tasks.append(asyncio.create_task(run_scheduler(), name="scheduler"))
|
| 74 |
+
|
| 75 |
+
logger.info("All %d background workers launched", len(tasks))
|
| 76 |
+
|
| 77 |
+
yield
|
| 78 |
+
|
| 79 |
+
# Shutdown: cancel all tasks
|
| 80 |
+
logger.info("Shutting down background workers...")
|
| 81 |
+
for task in tasks:
|
| 82 |
+
task.cancel()
|
| 83 |
+
await asyncio.gather(*tasks, return_exceptions=True)
|
| 84 |
+
logger.info("All workers stopped")
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
# βββ FastAPI App βββββββββββββββββββββββββββββββββββββββββββββ
|
| 88 |
+
app = FastAPI(
|
| 89 |
+
title="RandomWeb",
|
| 90 |
+
description="Discover random websites from across the internet",
|
| 91 |
+
version="1.0.0",
|
| 92 |
+
lifespan=lifespan,
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
# CORS β allow frontend
|
| 96 |
+
app.add_middleware(
|
| 97 |
+
CORSMiddleware,
|
| 98 |
+
allow_origins=["*"],
|
| 99 |
+
allow_credentials=True,
|
| 100 |
+
allow_methods=["*"],
|
| 101 |
+
allow_headers=["*"],
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
# Mount API routes
|
| 105 |
+
app.include_router(router)
|
backend/requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.115.6
|
| 2 |
+
uvicorn[standard]==0.34.0
|
| 3 |
+
supabase==2.11.0
|
| 4 |
+
aiohttp==3.11.11
|
| 5 |
+
aiolimiter==1.2.1
|
| 6 |
+
protego==0.3.1
|
| 7 |
+
beautifulsoup4==4.12.3
|
| 8 |
+
websockets==14.1
|
| 9 |
+
pydantic==2.10.4
|
backend/workers/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Background Workers
|
backend/workers/common_crawl.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RandomWeb β Common Crawl CDX Importer
|
| 3 |
+
Fetches URLs from the Common Crawl CDX Index API to seed the database
|
| 4 |
+
with a broad sample of the internet.
|
| 5 |
+
"""
|
| 6 |
+
import asyncio
|
| 7 |
+
import logging
|
| 8 |
+
import random
|
| 9 |
+
from typing import Optional
|
| 10 |
+
from urllib.parse import urlparse
|
| 11 |
+
|
| 12 |
+
import aiohttp
|
| 13 |
+
|
| 14 |
+
from backend.config import (
|
| 15 |
+
COMMON_CRAWL_INDEX_URL,
|
| 16 |
+
COMMON_CRAWL_SAMPLE_SIZE,
|
| 17 |
+
COMMON_CRAWL_RESCAN_HOURS,
|
| 18 |
+
USER_AGENT,
|
| 19 |
+
REQUEST_TIMEOUT,
|
| 20 |
+
)
|
| 21 |
+
from backend.workers.validator import enqueue_url
|
| 22 |
+
|
| 23 |
+
logger = logging.getLogger("randomweb.common_crawl")
|
| 24 |
+
|
| 25 |
+
# Sample TLDs to query for broad coverage
|
| 26 |
+
SAMPLE_QUERIES = [
|
| 27 |
+
"*.com", "*.org", "*.net", "*.io", "*.co",
|
| 28 |
+
"*.edu", "*.gov", "*.dev", "*.app", "*.info",
|
| 29 |
+
"*.me", "*.tv", "*.co.uk", "*.de", "*.fr",
|
| 30 |
+
"*.jp", "*.ru", "*.br", "*.in", "*.ca",
|
| 31 |
+
"*.au", "*.nl", "*.it", "*.es", "*.ch",
|
| 32 |
+
"*.se", "*.no", "*.fi", "*.dk", "*.pl",
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
async def _get_latest_crawl_index(
|
| 37 |
+
session: aiohttp.ClientSession,
|
| 38 |
+
) -> Optional[str]:
|
| 39 |
+
"""Fetch the latest Common Crawl index URL."""
|
| 40 |
+
try:
|
| 41 |
+
async with session.get(
|
| 42 |
+
COMMON_CRAWL_INDEX_URL,
|
| 43 |
+
timeout=aiohttp.ClientTimeout(total=30),
|
| 44 |
+
headers={"User-Agent": USER_AGENT},
|
| 45 |
+
) as resp:
|
| 46 |
+
if resp.status != 200:
|
| 47 |
+
logger.error("Failed to fetch crawl index: HTTP %d", resp.status)
|
| 48 |
+
return None
|
| 49 |
+
|
| 50 |
+
data = await resp.json()
|
| 51 |
+
if data and len(data) > 0:
|
| 52 |
+
# Latest crawl is first in the list
|
| 53 |
+
cdx_api = data[0].get("cdx-api")
|
| 54 |
+
crawl_id = data[0].get("id", "unknown")
|
| 55 |
+
logger.info("Latest Common Crawl: %s", crawl_id)
|
| 56 |
+
return cdx_api
|
| 57 |
+
|
| 58 |
+
except Exception as e:
|
| 59 |
+
logger.error("Failed to get crawl index: %s", e)
|
| 60 |
+
|
| 61 |
+
return None
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
async def _query_cdx_for_domains(
|
| 65 |
+
session: aiohttp.ClientSession,
|
| 66 |
+
cdx_api: str,
|
| 67 |
+
query: str,
|
| 68 |
+
limit: int = 500,
|
| 69 |
+
) -> list[str]:
|
| 70 |
+
"""Query the CDX API for URLs matching a pattern."""
|
| 71 |
+
urls = []
|
| 72 |
+
try:
|
| 73 |
+
params = {
|
| 74 |
+
"url": query,
|
| 75 |
+
"output": "json",
|
| 76 |
+
"fl": "url",
|
| 77 |
+
"limit": str(limit),
|
| 78 |
+
"filter": "status:200",
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
async with session.get(
|
| 82 |
+
cdx_api,
|
| 83 |
+
params=params,
|
| 84 |
+
timeout=aiohttp.ClientTimeout(total=60),
|
| 85 |
+
headers={"User-Agent": USER_AGENT},
|
| 86 |
+
) as resp:
|
| 87 |
+
if resp.status != 200:
|
| 88 |
+
logger.debug("CDX query failed for %s: HTTP %d", query, resp.status)
|
| 89 |
+
return urls
|
| 90 |
+
|
| 91 |
+
text = await resp.text()
|
| 92 |
+
lines = text.strip().split("\n")
|
| 93 |
+
|
| 94 |
+
for line in lines:
|
| 95 |
+
line = line.strip()
|
| 96 |
+
if not line or line.startswith("["):
|
| 97 |
+
continue
|
| 98 |
+
try:
|
| 99 |
+
# Lines can be JSON or plain URL
|
| 100 |
+
if line.startswith("{"):
|
| 101 |
+
import json
|
| 102 |
+
data = json.loads(line)
|
| 103 |
+
url = data.get("url", "")
|
| 104 |
+
elif line.startswith('"'):
|
| 105 |
+
url = line.strip('"')
|
| 106 |
+
else:
|
| 107 |
+
url = line
|
| 108 |
+
|
| 109 |
+
if url and url.startswith("http"):
|
| 110 |
+
# Normalize to homepage
|
| 111 |
+
parsed = urlparse(url)
|
| 112 |
+
normalized = f"https://{parsed.netloc}"
|
| 113 |
+
urls.append(normalized)
|
| 114 |
+
except Exception:
|
| 115 |
+
continue
|
| 116 |
+
|
| 117 |
+
except asyncio.TimeoutError:
|
| 118 |
+
logger.debug("CDX query timed out for %s", query)
|
| 119 |
+
except Exception as e:
|
| 120 |
+
logger.debug("CDX query error for %s: %s", query, e)
|
| 121 |
+
|
| 122 |
+
return urls
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
async def run_common_crawl_importer():
|
| 126 |
+
"""
|
| 127 |
+
Main Common Crawl import loop.
|
| 128 |
+
Fetches a broad sample of URLs from the CDX API and queues them.
|
| 129 |
+
Runs once on startup, then rescans weekly.
|
| 130 |
+
"""
|
| 131 |
+
logger.info("Common Crawl importer starting")
|
| 132 |
+
|
| 133 |
+
while True:
|
| 134 |
+
try:
|
| 135 |
+
async with aiohttp.ClientSession() as session:
|
| 136 |
+
cdx_api = await _get_latest_crawl_index(session)
|
| 137 |
+
if not cdx_api:
|
| 138 |
+
logger.warning("No CDX API available, retrying in 1 hour")
|
| 139 |
+
await asyncio.sleep(3600)
|
| 140 |
+
continue
|
| 141 |
+
|
| 142 |
+
logger.info("Importing from CDX API: %s", cdx_api)
|
| 143 |
+
total_queued = 0
|
| 144 |
+
seen_domains = set()
|
| 145 |
+
|
| 146 |
+
# Shuffle queries for variety
|
| 147 |
+
queries = SAMPLE_QUERIES.copy()
|
| 148 |
+
random.shuffle(queries)
|
| 149 |
+
|
| 150 |
+
per_query_limit = max(
|
| 151 |
+
50, COMMON_CRAWL_SAMPLE_SIZE // len(queries)
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
for query in queries:
|
| 155 |
+
if total_queued >= COMMON_CRAWL_SAMPLE_SIZE:
|
| 156 |
+
break
|
| 157 |
+
|
| 158 |
+
urls = await _query_cdx_for_domains(
|
| 159 |
+
session, cdx_api, query, limit=per_query_limit
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
for url in urls:
|
| 163 |
+
domain = urlparse(url).netloc
|
| 164 |
+
if domain and domain not in seen_domains:
|
| 165 |
+
seen_domains.add(domain)
|
| 166 |
+
await enqueue_url(url, source="common_crawl")
|
| 167 |
+
total_queued += 1
|
| 168 |
+
|
| 169 |
+
if total_queued >= COMMON_CRAWL_SAMPLE_SIZE:
|
| 170 |
+
break
|
| 171 |
+
|
| 172 |
+
# Be polite to the CDX API
|
| 173 |
+
await asyncio.sleep(2)
|
| 174 |
+
|
| 175 |
+
logger.info(
|
| 176 |
+
"Common Crawl import complete: %d URLs queued", total_queued
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
except Exception as e:
|
| 180 |
+
logger.error("Common Crawl importer error: %s", e)
|
| 181 |
+
|
| 182 |
+
# Wait before next rescan
|
| 183 |
+
logger.info(
|
| 184 |
+
"Next Common Crawl rescan in %d hours",
|
| 185 |
+
COMMON_CRAWL_RESCAN_HOURS,
|
| 186 |
+
)
|
| 187 |
+
await asyncio.sleep(COMMON_CRAWL_RESCAN_HOURS * 3600)
|
backend/workers/crawler.py
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RandomWeb β BFS Recursive Crawler
|
| 3 |
+
Breadth-first crawler that extracts and queues all hyperlinks from indexed pages
|
| 4 |
+
to continuously expand the known network graph.
|
| 5 |
+
"""
|
| 6 |
+
import asyncio
|
| 7 |
+
import logging
|
| 8 |
+
import re
|
| 9 |
+
from collections import deque
|
| 10 |
+
from typing import Optional
|
| 11 |
+
from urllib.parse import urljoin, urlparse
|
| 12 |
+
|
| 13 |
+
import aiohttp
|
| 14 |
+
from aiolimiter import AsyncLimiter
|
| 15 |
+
from bs4 import BeautifulSoup
|
| 16 |
+
from protego import Protego
|
| 17 |
+
|
| 18 |
+
from backend.config import (
|
| 19 |
+
USER_AGENT,
|
| 20 |
+
REQUEST_TIMEOUT,
|
| 21 |
+
MAX_GLOBAL_CONCURRENCY,
|
| 22 |
+
PER_DOMAIN_RATE_LIMIT,
|
| 23 |
+
CRAWL_DELAY_DEFAULT,
|
| 24 |
+
MAX_CRAWL_DEPTH,
|
| 25 |
+
MAX_LINKS_PER_PAGE,
|
| 26 |
+
MAX_QUEUE_SIZE,
|
| 27 |
+
BLOCKED_TLDS,
|
| 28 |
+
)
|
| 29 |
+
from backend.workers.validator import enqueue_url
|
| 30 |
+
from backend.db import get_client
|
| 31 |
+
|
| 32 |
+
logger = logging.getLogger("randomweb.crawler")
|
| 33 |
+
|
| 34 |
+
# βββ State βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
+
_crawl_queue: deque = deque(maxlen=MAX_QUEUE_SIZE)
|
| 36 |
+
_visited: set = set()
|
| 37 |
+
_MAX_VISITED_CACHE = 1_000_000
|
| 38 |
+
_robots_cache: dict[str, Optional[Protego]] = {}
|
| 39 |
+
_domain_limiters: dict[str, AsyncLimiter] = {}
|
| 40 |
+
|
| 41 |
+
# File extensions to skip
|
| 42 |
+
SKIP_EXTENSIONS = {
|
| 43 |
+
".jpg", ".jpeg", ".png", ".gif", ".svg", ".webp", ".ico",
|
| 44 |
+
".css", ".js", ".woff", ".woff2", ".ttf", ".eot",
|
| 45 |
+
".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
|
| 46 |
+
".zip", ".rar", ".7z", ".tar", ".gz",
|
| 47 |
+
".mp3", ".mp4", ".avi", ".mkv", ".mov", ".flv",
|
| 48 |
+
".exe", ".msi", ".dmg", ".apk",
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _get_domain_limiter(domain: str) -> AsyncLimiter:
|
| 53 |
+
if domain not in _domain_limiters:
|
| 54 |
+
_domain_limiters[domain] = AsyncLimiter(PER_DOMAIN_RATE_LIMIT, 1.0)
|
| 55 |
+
return _domain_limiters[domain]
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
async def _fetch_robots(
|
| 59 |
+
session: aiohttp.ClientSession, domain: str
|
| 60 |
+
) -> Optional[Protego]:
|
| 61 |
+
if domain in _robots_cache:
|
| 62 |
+
return _robots_cache[domain]
|
| 63 |
+
|
| 64 |
+
try:
|
| 65 |
+
async with session.get(
|
| 66 |
+
f"https://{domain}/robots.txt",
|
| 67 |
+
timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
|
| 68 |
+
headers={"User-Agent": USER_AGENT},
|
| 69 |
+
allow_redirects=True,
|
| 70 |
+
ssl=False,
|
| 71 |
+
) as resp:
|
| 72 |
+
if resp.status == 200:
|
| 73 |
+
text = await resp.text()
|
| 74 |
+
parser = Protego.parse(text)
|
| 75 |
+
_robots_cache[domain] = parser
|
| 76 |
+
return parser
|
| 77 |
+
except Exception:
|
| 78 |
+
pass
|
| 79 |
+
|
| 80 |
+
_robots_cache[domain] = None
|
| 81 |
+
return None
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def _normalize_url(base_url: str, href: str) -> Optional[str]:
|
| 85 |
+
"""Normalize and validate a discovered URL."""
|
| 86 |
+
try:
|
| 87 |
+
# Resolve relative URLs
|
| 88 |
+
full_url = urljoin(base_url, href)
|
| 89 |
+
parsed = urlparse(full_url)
|
| 90 |
+
|
| 91 |
+
# Only HTTP/HTTPS
|
| 92 |
+
if parsed.scheme not in ("http", "https"):
|
| 93 |
+
return None
|
| 94 |
+
|
| 95 |
+
# Skip blocked TLDs
|
| 96 |
+
domain = parsed.netloc.lower()
|
| 97 |
+
for tld in BLOCKED_TLDS:
|
| 98 |
+
if domain.endswith(tld):
|
| 99 |
+
return None
|
| 100 |
+
|
| 101 |
+
# Skip file extensions we don't want
|
| 102 |
+
path_lower = parsed.path.lower()
|
| 103 |
+
for ext in SKIP_EXTENSIONS:
|
| 104 |
+
if path_lower.endswith(ext):
|
| 105 |
+
return None
|
| 106 |
+
|
| 107 |
+
# Strip fragments and normalize
|
| 108 |
+
clean = f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
|
| 109 |
+
if parsed.query:
|
| 110 |
+
clean += f"?{parsed.query}"
|
| 111 |
+
|
| 112 |
+
# Remove trailing slash for consistency
|
| 113 |
+
clean = clean.rstrip("/")
|
| 114 |
+
|
| 115 |
+
return clean if len(clean) < 2000 else None
|
| 116 |
+
|
| 117 |
+
except Exception:
|
| 118 |
+
return None
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
async def _crawl_page(
|
| 122 |
+
session: aiohttp.ClientSession,
|
| 123 |
+
url: str,
|
| 124 |
+
depth: int,
|
| 125 |
+
semaphore: asyncio.Semaphore,
|
| 126 |
+
) -> list[str]:
|
| 127 |
+
"""
|
| 128 |
+
Fetch a page and extract all hyperlinks.
|
| 129 |
+
Returns list of discovered URLs.
|
| 130 |
+
"""
|
| 131 |
+
domain = urlparse(url).netloc
|
| 132 |
+
limiter = _get_domain_limiter(domain)
|
| 133 |
+
|
| 134 |
+
async with semaphore:
|
| 135 |
+
async with limiter:
|
| 136 |
+
# Check robots.txt
|
| 137 |
+
robots = await _fetch_robots(session, domain)
|
| 138 |
+
if robots and not robots.can_fetch(url, USER_AGENT):
|
| 139 |
+
return []
|
| 140 |
+
|
| 141 |
+
# Respect crawl delay
|
| 142 |
+
delay = CRAWL_DELAY_DEFAULT
|
| 143 |
+
if robots:
|
| 144 |
+
d = robots.crawl_delay(USER_AGENT)
|
| 145 |
+
if d is not None:
|
| 146 |
+
delay = d
|
| 147 |
+
if delay > 0:
|
| 148 |
+
await asyncio.sleep(delay)
|
| 149 |
+
|
| 150 |
+
discovered = []
|
| 151 |
+
try:
|
| 152 |
+
async with session.get(
|
| 153 |
+
url,
|
| 154 |
+
timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
|
| 155 |
+
headers={
|
| 156 |
+
"User-Agent": USER_AGENT,
|
| 157 |
+
"Accept": "text/html",
|
| 158 |
+
},
|
| 159 |
+
allow_redirects=True,
|
| 160 |
+
ssl=False,
|
| 161 |
+
) as resp:
|
| 162 |
+
if resp.status != 200:
|
| 163 |
+
return []
|
| 164 |
+
|
| 165 |
+
content_type = resp.headers.get("Content-Type", "")
|
| 166 |
+
if "text/html" not in content_type:
|
| 167 |
+
return []
|
| 168 |
+
|
| 169 |
+
# Limit response body to avoid memory issues
|
| 170 |
+
body = await resp.text(errors="ignore")
|
| 171 |
+
if len(body) > 5_000_000: # 5MB limit
|
| 172 |
+
body = body[:5_000_000]
|
| 173 |
+
|
| 174 |
+
soup = BeautifulSoup(body, "html.parser")
|
| 175 |
+
links = soup.find_all("a", href=True)
|
| 176 |
+
|
| 177 |
+
count = 0
|
| 178 |
+
for link in links:
|
| 179 |
+
if count >= MAX_LINKS_PER_PAGE:
|
| 180 |
+
break
|
| 181 |
+
|
| 182 |
+
href = link.get("href", "").strip()
|
| 183 |
+
if not href:
|
| 184 |
+
continue
|
| 185 |
+
|
| 186 |
+
normalized = _normalize_url(url, href)
|
| 187 |
+
if normalized and normalized not in _visited:
|
| 188 |
+
discovered.append(normalized)
|
| 189 |
+
count += 1
|
| 190 |
+
|
| 191 |
+
except asyncio.TimeoutError:
|
| 192 |
+
logger.debug("Timeout crawling %s", url)
|
| 193 |
+
except Exception as e:
|
| 194 |
+
logger.debug("Error crawling %s: %s", url, e)
|
| 195 |
+
|
| 196 |
+
return discovered
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
async def seed_from_database():
|
| 200 |
+
"""Load existing active URLs from database as crawler seeds."""
|
| 201 |
+
try:
|
| 202 |
+
result = (
|
| 203 |
+
get_client()
|
| 204 |
+
.table("websites")
|
| 205 |
+
.select("url")
|
| 206 |
+
.eq("is_active", True)
|
| 207 |
+
.limit(1000)
|
| 208 |
+
.execute()
|
| 209 |
+
)
|
| 210 |
+
if result.data:
|
| 211 |
+
for row in result.data:
|
| 212 |
+
url = row["url"]
|
| 213 |
+
if url not in _visited:
|
| 214 |
+
_crawl_queue.append({"url": url, "depth": 0})
|
| 215 |
+
logger.info("Seeded crawler with %d URLs from database", len(result.data))
|
| 216 |
+
except Exception as e:
|
| 217 |
+
logger.error("Failed to seed from database: %s", e)
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
async def run_crawler():
|
| 221 |
+
"""
|
| 222 |
+
Main BFS crawler loop.
|
| 223 |
+
Continuously crawls pages, extracts links, and queues discoveries
|
| 224 |
+
for validation.
|
| 225 |
+
"""
|
| 226 |
+
logger.info("BFS Crawler starting")
|
| 227 |
+
|
| 228 |
+
# Wait for initial seeds to be validated
|
| 229 |
+
await asyncio.sleep(30)
|
| 230 |
+
|
| 231 |
+
# Seed from database
|
| 232 |
+
await seed_from_database()
|
| 233 |
+
|
| 234 |
+
semaphore = asyncio.Semaphore(MAX_GLOBAL_CONCURRENCY)
|
| 235 |
+
connector = aiohttp.TCPConnector(
|
| 236 |
+
limit=MAX_GLOBAL_CONCURRENCY,
|
| 237 |
+
ttl_dns_cache=300,
|
| 238 |
+
force_close=False,
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
async with aiohttp.ClientSession(connector=connector) as session:
|
| 242 |
+
while True:
|
| 243 |
+
try:
|
| 244 |
+
if not _crawl_queue:
|
| 245 |
+
# Re-seed periodically
|
| 246 |
+
await seed_from_database()
|
| 247 |
+
if not _crawl_queue:
|
| 248 |
+
logger.debug("Crawler queue empty, waiting...")
|
| 249 |
+
await asyncio.sleep(60)
|
| 250 |
+
continue
|
| 251 |
+
|
| 252 |
+
# Process a batch
|
| 253 |
+
batch_size = min(10, len(_crawl_queue))
|
| 254 |
+
tasks = []
|
| 255 |
+
|
| 256 |
+
for _ in range(batch_size):
|
| 257 |
+
if not _crawl_queue:
|
| 258 |
+
break
|
| 259 |
+
|
| 260 |
+
item = _crawl_queue.popleft()
|
| 261 |
+
url = item["url"]
|
| 262 |
+
depth = item["depth"]
|
| 263 |
+
|
| 264 |
+
if url in _visited:
|
| 265 |
+
continue
|
| 266 |
+
_visited.add(url)
|
| 267 |
+
|
| 268 |
+
# Evict old entries from visited cache
|
| 269 |
+
if len(_visited) > _MAX_VISITED_CACHE:
|
| 270 |
+
to_remove = list(_visited)[:_MAX_VISITED_CACHE // 2]
|
| 271 |
+
for v in to_remove:
|
| 272 |
+
_visited.discard(v)
|
| 273 |
+
|
| 274 |
+
if depth <= MAX_CRAWL_DEPTH:
|
| 275 |
+
tasks.append(_crawl_page(session, url, depth, semaphore))
|
| 276 |
+
|
| 277 |
+
if tasks:
|
| 278 |
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 279 |
+
|
| 280 |
+
for result in results:
|
| 281 |
+
if isinstance(result, list):
|
| 282 |
+
for discovered_url in result:
|
| 283 |
+
# Extract homepage for validation
|
| 284 |
+
parsed = urlparse(discovered_url)
|
| 285 |
+
homepage = f"https://{parsed.netloc}"
|
| 286 |
+
await enqueue_url(homepage, source="crawler")
|
| 287 |
+
|
| 288 |
+
# Add to crawl queue for further BFS
|
| 289 |
+
if (
|
| 290 |
+
len(_crawl_queue) < MAX_QUEUE_SIZE
|
| 291 |
+
and discovered_url not in _visited
|
| 292 |
+
):
|
| 293 |
+
current_depth = 1 # simplified
|
| 294 |
+
if current_depth < MAX_CRAWL_DEPTH:
|
| 295 |
+
_crawl_queue.append({
|
| 296 |
+
"url": discovered_url,
|
| 297 |
+
"depth": current_depth + 1,
|
| 298 |
+
})
|
| 299 |
+
|
| 300 |
+
# Small delay between batches
|
| 301 |
+
await asyncio.sleep(0.5)
|
| 302 |
+
|
| 303 |
+
except Exception as e:
|
| 304 |
+
logger.error("Crawler loop error: %s", e)
|
| 305 |
+
await asyncio.sleep(10)
|
backend/workers/ct_log.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RandomWeb β Certificate Transparency Log Worker
|
| 3 |
+
Connects to CertStream WebSocket to discover newly registered domains in real-time.
|
| 4 |
+
"""
|
| 5 |
+
import asyncio
|
| 6 |
+
import json
|
| 7 |
+
import logging
|
| 8 |
+
from urllib.parse import urlparse
|
| 9 |
+
|
| 10 |
+
import websockets
|
| 11 |
+
|
| 12 |
+
from backend.config import (
|
| 13 |
+
CERTSTREAM_URL,
|
| 14 |
+
CT_LOG_BATCH_SIZE,
|
| 15 |
+
CT_LOG_RECONNECT_DELAY,
|
| 16 |
+
CT_LOG_MAX_RECONNECT_DELAY,
|
| 17 |
+
BLOCKED_TLDS,
|
| 18 |
+
)
|
| 19 |
+
from backend.workers.validator import enqueue_url
|
| 20 |
+
from backend.db import url_exists
|
| 21 |
+
|
| 22 |
+
logger = logging.getLogger("randomweb.ct_log")
|
| 23 |
+
|
| 24 |
+
# βββ Domain Filtering βββββββββββββββββββββββββββββββββββββββ
|
| 25 |
+
_seen_domains: set = set()
|
| 26 |
+
_MAX_SEEN_CACHE = 500_000
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _is_valid_domain(domain: str) -> bool:
|
| 30 |
+
"""Filter out invalid, wildcard, IP, and blocked TLD domains."""
|
| 31 |
+
if not domain or len(domain) < 4:
|
| 32 |
+
return False
|
| 33 |
+
|
| 34 |
+
# Skip wildcards
|
| 35 |
+
if domain.startswith("*."):
|
| 36 |
+
domain = domain[2:]
|
| 37 |
+
if "*" in domain:
|
| 38 |
+
return False
|
| 39 |
+
|
| 40 |
+
# Skip IP addresses
|
| 41 |
+
parts = domain.split(".")
|
| 42 |
+
if all(p.isdigit() for p in parts):
|
| 43 |
+
return False
|
| 44 |
+
|
| 45 |
+
# Skip blocked TLDs
|
| 46 |
+
for tld in BLOCKED_TLDS:
|
| 47 |
+
if domain.endswith(tld):
|
| 48 |
+
return False
|
| 49 |
+
|
| 50 |
+
# Must have at least one dot
|
| 51 |
+
if "." not in domain:
|
| 52 |
+
return False
|
| 53 |
+
|
| 54 |
+
# Skip overly long domains (likely garbage)
|
| 55 |
+
if len(domain) > 253:
|
| 56 |
+
return False
|
| 57 |
+
|
| 58 |
+
return True
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def _deduplicate(domain: str) -> bool:
|
| 62 |
+
"""Returns True if the domain is new (not seen before)."""
|
| 63 |
+
global _seen_domains
|
| 64 |
+
if domain in _seen_domains:
|
| 65 |
+
return False
|
| 66 |
+
|
| 67 |
+
# Evict oldest entries if cache is full
|
| 68 |
+
if len(_seen_domains) >= _MAX_SEEN_CACHE:
|
| 69 |
+
# Remove half the cache (FIFO approximation)
|
| 70 |
+
to_remove = list(_seen_domains)[:_MAX_SEEN_CACHE // 2]
|
| 71 |
+
for d in to_remove:
|
| 72 |
+
_seen_domains.discard(d)
|
| 73 |
+
|
| 74 |
+
_seen_domains.add(domain)
|
| 75 |
+
return True
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
async def _process_message(message: dict):
|
| 79 |
+
"""Process a single CertStream message and extract domains."""
|
| 80 |
+
try:
|
| 81 |
+
msg_type = message.get("message_type")
|
| 82 |
+
if msg_type != "certificate_update":
|
| 83 |
+
return
|
| 84 |
+
|
| 85 |
+
data = message.get("data", {})
|
| 86 |
+
leaf_cert = data.get("leaf_cert", {})
|
| 87 |
+
all_domains = leaf_cert.get("all_domains", [])
|
| 88 |
+
|
| 89 |
+
for domain in all_domains:
|
| 90 |
+
# Strip wildcard prefix
|
| 91 |
+
if domain.startswith("*."):
|
| 92 |
+
domain = domain[2:]
|
| 93 |
+
|
| 94 |
+
domain = domain.lower().strip()
|
| 95 |
+
|
| 96 |
+
if not _is_valid_domain(domain):
|
| 97 |
+
continue
|
| 98 |
+
|
| 99 |
+
if not _deduplicate(domain):
|
| 100 |
+
continue
|
| 101 |
+
|
| 102 |
+
url = f"https://{domain}"
|
| 103 |
+
await enqueue_url(url, source="ct_log")
|
| 104 |
+
|
| 105 |
+
except Exception as e:
|
| 106 |
+
logger.debug("Error processing CT message: %s", e)
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
async def run_ct_log_worker():
|
| 110 |
+
"""
|
| 111 |
+
Main CT log worker loop. Connects to CertStream WebSocket,
|
| 112 |
+
parses certificate updates, and queues new domains for validation.
|
| 113 |
+
Auto-reconnects with exponential backoff.
|
| 114 |
+
"""
|
| 115 |
+
logger.info("CT Log worker starting β connecting to %s", CERTSTREAM_URL)
|
| 116 |
+
reconnect_delay = CT_LOG_RECONNECT_DELAY
|
| 117 |
+
|
| 118 |
+
while True:
|
| 119 |
+
try:
|
| 120 |
+
async with websockets.connect(
|
| 121 |
+
CERTSTREAM_URL,
|
| 122 |
+
ping_interval=30,
|
| 123 |
+
ping_timeout=10,
|
| 124 |
+
close_timeout=5,
|
| 125 |
+
max_size=2**20, # 1MB max message size
|
| 126 |
+
) as ws:
|
| 127 |
+
logger.info("Connected to CertStream")
|
| 128 |
+
reconnect_delay = CT_LOG_RECONNECT_DELAY # Reset on success
|
| 129 |
+
|
| 130 |
+
async for raw_message in ws:
|
| 131 |
+
try:
|
| 132 |
+
message = json.loads(raw_message)
|
| 133 |
+
await _process_message(message)
|
| 134 |
+
except json.JSONDecodeError:
|
| 135 |
+
continue
|
| 136 |
+
except Exception as e:
|
| 137 |
+
logger.debug("Message processing error: %s", e)
|
| 138 |
+
|
| 139 |
+
except websockets.exceptions.ConnectionClosed as e:
|
| 140 |
+
logger.warning("CertStream connection closed: %s", e)
|
| 141 |
+
except Exception as e:
|
| 142 |
+
logger.warning("CertStream connection error: %s", e)
|
| 143 |
+
|
| 144 |
+
# Exponential backoff reconnect
|
| 145 |
+
logger.info("Reconnecting to CertStream in %ds...", reconnect_delay)
|
| 146 |
+
await asyncio.sleep(reconnect_delay)
|
| 147 |
+
reconnect_delay = min(reconnect_delay * 2, CT_LOG_MAX_RECONNECT_DELAY)
|
backend/workers/scheduler.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RandomWeb β Re-verification Scheduler
|
| 3 |
+
Rolling yearly re-verification of indexed websites.
|
| 4 |
+
Politely re-checks active URLs and toggles visibility on failure.
|
| 5 |
+
"""
|
| 6 |
+
import asyncio
|
| 7 |
+
import logging
|
| 8 |
+
from datetime import datetime, timezone
|
| 9 |
+
|
| 10 |
+
from backend.config import SCHEDULER_INTERVAL_SECONDS, SCHEDULER_BATCH_SIZE
|
| 11 |
+
from backend.db import get_urls_needing_recheck
|
| 12 |
+
from backend.workers.validator import enqueue_url
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger("randomweb.scheduler")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
async def run_scheduler():
|
| 18 |
+
"""
|
| 19 |
+
Background scheduler that continuously checks for URLs due re-verification.
|
| 20 |
+
Runs every hour, queries for URLs where next_check <= now(),
|
| 21 |
+
and routes them through the validation queue.
|
| 22 |
+
"""
|
| 23 |
+
logger.info("Re-verification scheduler started (interval: %ds)", SCHEDULER_INTERVAL_SECONDS)
|
| 24 |
+
|
| 25 |
+
# Initial delay to let the system warm up
|
| 26 |
+
await asyncio.sleep(120)
|
| 27 |
+
|
| 28 |
+
while True:
|
| 29 |
+
try:
|
| 30 |
+
urls = get_urls_needing_recheck(limit=SCHEDULER_BATCH_SIZE)
|
| 31 |
+
|
| 32 |
+
if urls:
|
| 33 |
+
logger.info("Re-verifying %d URLs", len(urls))
|
| 34 |
+
for record in urls:
|
| 35 |
+
await enqueue_url(record["url"], source="recheck")
|
| 36 |
+
# Small delay between queuing to avoid flooding
|
| 37 |
+
await asyncio.sleep(0.1)
|
| 38 |
+
|
| 39 |
+
logger.info("Queued %d URLs for re-verification", len(urls))
|
| 40 |
+
else:
|
| 41 |
+
logger.debug("No URLs due for re-verification")
|
| 42 |
+
|
| 43 |
+
except Exception as e:
|
| 44 |
+
logger.error("Scheduler error: %s", e)
|
| 45 |
+
|
| 46 |
+
# Wait until next check
|
| 47 |
+
await asyncio.sleep(SCHEDULER_INTERVAL_SECONDS)
|
backend/workers/validator.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RandomWeb β Polite Async HTTP Validator
|
| 3 |
+
Validates discovered URLs with rate limiting, robots.txt compliance,
|
| 4 |
+
clear user-agent identification, and timeout rules.
|
| 5 |
+
"""
|
| 6 |
+
import asyncio
|
| 7 |
+
import logging
|
| 8 |
+
from datetime import datetime, timedelta, timezone
|
| 9 |
+
from typing import Optional
|
| 10 |
+
from urllib.parse import urlparse
|
| 11 |
+
|
| 12 |
+
import aiohttp
|
| 13 |
+
from aiolimiter import AsyncLimiter
|
| 14 |
+
from protego import Protego
|
| 15 |
+
|
| 16 |
+
from backend.config import (
|
| 17 |
+
USER_AGENT,
|
| 18 |
+
REQUEST_TIMEOUT,
|
| 19 |
+
VALIDATION_CONCURRENCY,
|
| 20 |
+
PER_DOMAIN_RATE_LIMIT,
|
| 21 |
+
CRAWL_DELAY_DEFAULT,
|
| 22 |
+
RECHECK_INTERVAL_DAYS,
|
| 23 |
+
)
|
| 24 |
+
from backend.db import get_client, extract_domain
|
| 25 |
+
|
| 26 |
+
logger = logging.getLogger("randomweb.validator")
|
| 27 |
+
|
| 28 |
+
# βββ Shared State ββββββββββββββββββββββββββββββββββββββββββββ
|
| 29 |
+
_validation_queue: asyncio.Queue = asyncio.Queue(maxsize=50_000)
|
| 30 |
+
_robots_cache: dict[str, Optional[Protego]] = {}
|
| 31 |
+
_domain_limiters: dict[str, AsyncLimiter] = {}
|
| 32 |
+
_semaphore: Optional[asyncio.Semaphore] = None
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def get_validation_queue() -> asyncio.Queue:
|
| 36 |
+
return _validation_queue
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
async def enqueue_url(url: str, source: str = "unknown"):
|
| 40 |
+
"""Add a URL to the validation queue."""
|
| 41 |
+
try:
|
| 42 |
+
_validation_queue.put_nowait({"url": url, "source": source})
|
| 43 |
+
except asyncio.QueueFull:
|
| 44 |
+
logger.warning("Validation queue full, dropping: %s", url)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _get_domain_limiter(domain: str) -> AsyncLimiter:
|
| 48 |
+
"""Get or create a per-domain rate limiter."""
|
| 49 |
+
if domain not in _domain_limiters:
|
| 50 |
+
_domain_limiters[domain] = AsyncLimiter(
|
| 51 |
+
PER_DOMAIN_RATE_LIMIT, 1.0
|
| 52 |
+
)
|
| 53 |
+
return _domain_limiters[domain]
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
async def _fetch_robots_txt(
|
| 57 |
+
session: aiohttp.ClientSession, domain: str
|
| 58 |
+
) -> Optional[Protego]:
|
| 59 |
+
"""Fetch and parse robots.txt for a domain. Cached."""
|
| 60 |
+
if domain in _robots_cache:
|
| 61 |
+
return _robots_cache[domain]
|
| 62 |
+
|
| 63 |
+
robots_url = f"https://{domain}/robots.txt"
|
| 64 |
+
try:
|
| 65 |
+
async with session.get(
|
| 66 |
+
robots_url,
|
| 67 |
+
timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
|
| 68 |
+
headers={"User-Agent": USER_AGENT},
|
| 69 |
+
allow_redirects=True,
|
| 70 |
+
ssl=False,
|
| 71 |
+
) as resp:
|
| 72 |
+
if resp.status == 200:
|
| 73 |
+
text = await resp.text()
|
| 74 |
+
parser = Protego.parse(text)
|
| 75 |
+
_robots_cache[domain] = parser
|
| 76 |
+
return parser
|
| 77 |
+
except Exception:
|
| 78 |
+
pass
|
| 79 |
+
|
| 80 |
+
_robots_cache[domain] = None
|
| 81 |
+
return None
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
async def _can_fetch(
|
| 85 |
+
session: aiohttp.ClientSession, url: str
|
| 86 |
+
) -> tuple[bool, float]:
|
| 87 |
+
"""
|
| 88 |
+
Check if we're allowed to fetch a URL per robots.txt.
|
| 89 |
+
Returns (allowed, crawl_delay).
|
| 90 |
+
"""
|
| 91 |
+
domain = extract_domain(url)
|
| 92 |
+
robots = await _fetch_robots_txt(session, domain)
|
| 93 |
+
|
| 94 |
+
if robots is None:
|
| 95 |
+
return True, CRAWL_DELAY_DEFAULT
|
| 96 |
+
|
| 97 |
+
allowed = robots.can_fetch(url, USER_AGENT)
|
| 98 |
+
delay = robots.crawl_delay(USER_AGENT)
|
| 99 |
+
if delay is None:
|
| 100 |
+
delay = CRAWL_DELAY_DEFAULT
|
| 101 |
+
|
| 102 |
+
return allowed, delay
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
async def validate_url(
|
| 106 |
+
session: aiohttp.ClientSession,
|
| 107 |
+
url: str,
|
| 108 |
+
source: str = "unknown",
|
| 109 |
+
) -> Optional[dict]:
|
| 110 |
+
"""
|
| 111 |
+
Validate a single URL. Returns a record dict if successful, else None.
|
| 112 |
+
Steps:
|
| 113 |
+
1. Check robots.txt
|
| 114 |
+
2. Send HEAD request (fallback to GET)
|
| 115 |
+
3. Return result with status
|
| 116 |
+
"""
|
| 117 |
+
domain = extract_domain(url)
|
| 118 |
+
limiter = _get_domain_limiter(domain)
|
| 119 |
+
|
| 120 |
+
# Rate limit per domain
|
| 121 |
+
async with limiter:
|
| 122 |
+
# Check robots.txt
|
| 123 |
+
allowed, delay = await _can_fetch(session, url)
|
| 124 |
+
if not allowed:
|
| 125 |
+
logger.debug("Blocked by robots.txt: %s", url)
|
| 126 |
+
return None
|
| 127 |
+
|
| 128 |
+
# Respect crawl delay
|
| 129 |
+
if delay > 0:
|
| 130 |
+
await asyncio.sleep(delay)
|
| 131 |
+
|
| 132 |
+
now = datetime.now(timezone.utc).isoformat()
|
| 133 |
+
status_code = None
|
| 134 |
+
|
| 135 |
+
try:
|
| 136 |
+
# Try HEAD first (lighter)
|
| 137 |
+
async with session.head(
|
| 138 |
+
url,
|
| 139 |
+
timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
|
| 140 |
+
headers={"User-Agent": USER_AGENT},
|
| 141 |
+
allow_redirects=True,
|
| 142 |
+
ssl=False,
|
| 143 |
+
) as resp:
|
| 144 |
+
status_code = resp.status
|
| 145 |
+
except Exception:
|
| 146 |
+
try:
|
| 147 |
+
# Fallback to GET
|
| 148 |
+
async with session.get(
|
| 149 |
+
url,
|
| 150 |
+
timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
|
| 151 |
+
headers={"User-Agent": USER_AGENT},
|
| 152 |
+
allow_redirects=True,
|
| 153 |
+
ssl=False,
|
| 154 |
+
) as resp:
|
| 155 |
+
status_code = resp.status
|
| 156 |
+
except Exception as e:
|
| 157 |
+
logger.debug("Validation failed for %s: %s", url, e)
|
| 158 |
+
status_code = None
|
| 159 |
+
|
| 160 |
+
is_active = status_code == 200
|
| 161 |
+
next_check = (
|
| 162 |
+
(datetime.now(timezone.utc) + timedelta(days=RECHECK_INTERVAL_DAYS)).isoformat()
|
| 163 |
+
if is_active
|
| 164 |
+
else None
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
record = {
|
| 168 |
+
"url": url,
|
| 169 |
+
"domain": domain,
|
| 170 |
+
"source": source,
|
| 171 |
+
"status": status_code,
|
| 172 |
+
"is_active": is_active,
|
| 173 |
+
"last_checked": now,
|
| 174 |
+
"next_check": next_check,
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
return record
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
async def _process_batch(
|
| 181 |
+
session: aiohttp.ClientSession,
|
| 182 |
+
batch: list[dict],
|
| 183 |
+
) -> list[dict]:
|
| 184 |
+
"""Validate a batch of URLs concurrently."""
|
| 185 |
+
tasks = [
|
| 186 |
+
validate_url(session, item["url"], item.get("source", "unknown"))
|
| 187 |
+
for item in batch
|
| 188 |
+
]
|
| 189 |
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 190 |
+
|
| 191 |
+
records = []
|
| 192 |
+
for result in results:
|
| 193 |
+
if isinstance(result, dict) and result is not None:
|
| 194 |
+
records.append(result)
|
| 195 |
+
elif isinstance(result, Exception):
|
| 196 |
+
logger.error("Validation task error: %s", result)
|
| 197 |
+
|
| 198 |
+
return records
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
async def run_validator():
|
| 202 |
+
"""
|
| 203 |
+
Main validation loop. Continuously drains the validation queue,
|
| 204 |
+
validates URLs in batches, and upserts results to Supabase.
|
| 205 |
+
"""
|
| 206 |
+
global _semaphore
|
| 207 |
+
_semaphore = asyncio.Semaphore(VALIDATION_CONCURRENCY)
|
| 208 |
+
|
| 209 |
+
logger.info("Validation worker started")
|
| 210 |
+
|
| 211 |
+
connector = aiohttp.TCPConnector(
|
| 212 |
+
limit=VALIDATION_CONCURRENCY,
|
| 213 |
+
ttl_dns_cache=300,
|
| 214 |
+
force_close=False,
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
async with aiohttp.ClientSession(connector=connector) as session:
|
| 218 |
+
while True:
|
| 219 |
+
try:
|
| 220 |
+
# Collect a batch
|
| 221 |
+
batch = []
|
| 222 |
+
try:
|
| 223 |
+
# Wait for at least one item
|
| 224 |
+
item = await asyncio.wait_for(
|
| 225 |
+
_validation_queue.get(), timeout=5.0
|
| 226 |
+
)
|
| 227 |
+
batch.append(item)
|
| 228 |
+
except asyncio.TimeoutError:
|
| 229 |
+
await asyncio.sleep(1)
|
| 230 |
+
continue
|
| 231 |
+
|
| 232 |
+
# Drain up to batch size
|
| 233 |
+
while len(batch) < 50 and not _validation_queue.empty():
|
| 234 |
+
try:
|
| 235 |
+
batch.append(_validation_queue.get_nowait())
|
| 236 |
+
except asyncio.QueueEmpty:
|
| 237 |
+
break
|
| 238 |
+
|
| 239 |
+
if batch:
|
| 240 |
+
logger.info("Validating batch of %d URLs", len(batch))
|
| 241 |
+
records = await _process_batch(session, batch)
|
| 242 |
+
|
| 243 |
+
if records:
|
| 244 |
+
# Bulk upsert to Supabase
|
| 245 |
+
try:
|
| 246 |
+
get_client().table("websites").upsert(
|
| 247 |
+
records, on_conflict="url"
|
| 248 |
+
).execute()
|
| 249 |
+
active = sum(1 for r in records if r["is_active"])
|
| 250 |
+
logger.info(
|
| 251 |
+
"Upserted %d records (%d active)",
|
| 252 |
+
len(records), active,
|
| 253 |
+
)
|
| 254 |
+
except Exception as e:
|
| 255 |
+
logger.error("Bulk upsert failed: %s", e)
|
| 256 |
+
|
| 257 |
+
except Exception as e:
|
| 258 |
+
logger.error("Validator loop error: %s", e)
|
| 259 |
+
await asyncio.sleep(5)
|
deploy_hf.sh
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# Configuration: Update these values
|
| 4 |
+
# ==========================================
|
| 5 |
+
HF_USERNAME="PinkAlpaca"
|
| 6 |
+
SPACE_NAME="RandomWeb"
|
| 7 |
+
# ==========================================
|
| 8 |
+
|
| 9 |
+
# Colors for output
|
| 10 |
+
GREEN='\033[0;32m'
|
| 11 |
+
BLUE='\033[0;34m'
|
| 12 |
+
RED='\033[0;31m'
|
| 13 |
+
NC='\033[0m' # No Color
|
| 14 |
+
|
| 15 |
+
echo -e "${BLUE}==========================================${NC}"
|
| 16 |
+
echo -e "${BLUE} Starting Hugging Face Deployment${NC}"
|
| 17 |
+
echo -e "${BLUE}==========================================${NC}"
|
| 18 |
+
|
| 19 |
+
# Check for git
|
| 20 |
+
if ! command -v git &> /dev/null; then
|
| 21 |
+
echo -e "${RED}Error: git is not installed.${NC}"
|
| 22 |
+
exit 1
|
| 23 |
+
fi
|
| 24 |
+
|
| 25 |
+
# Ensure local git repo is initialized
|
| 26 |
+
if [ ! -d ".git" ]; then
|
| 27 |
+
echo "Initializing local git repository..."
|
| 28 |
+
git init
|
| 29 |
+
git add .
|
| 30 |
+
git commit -m "Initial commit for HF deployment"
|
| 31 |
+
fi
|
| 32 |
+
|
| 33 |
+
# Confirm username is updated
|
| 34 |
+
if [ "$HF_USERNAME" == "UPDATE_WITH_YOUR_HF_USERNAME" ]; then
|
| 35 |
+
echo -e "${RED}Error: Please edit this script and set your HF_USERNAME.${NC}"
|
| 36 |
+
exit 1
|
| 37 |
+
fi
|
| 38 |
+
|
| 39 |
+
# Set remote URL
|
| 40 |
+
REMOTE_URL="https://huggingface.co/spaces/${HF_USERNAME}/${SPACE_NAME}"
|
| 41 |
+
echo -e "Target Space: ${REMOTE_URL}"
|
| 42 |
+
|
| 43 |
+
# Check if 'huggingface' remote exists, add if not
|
| 44 |
+
if ! git remote | grep -q "huggingface"; then
|
| 45 |
+
echo "Adding Hugging Face remote..."
|
| 46 |
+
git remote add huggingface "${REMOTE_URL}"
|
| 47 |
+
else
|
| 48 |
+
echo "Hugging Face remote already exists. Updating URL..."
|
| 49 |
+
git remote set-url huggingface "${REMOTE_URL}"
|
| 50 |
+
fi
|
| 51 |
+
|
| 52 |
+
# Stage all files
|
| 53 |
+
git add .
|
| 54 |
+
|
| 55 |
+
# Commit changes
|
| 56 |
+
COMMIT_MSG="Deploy: $(date '+%Y-%m-%d %H:%M:%S')"
|
| 57 |
+
git commit -m "$COMMIT_MSG" --allow-empty
|
| 58 |
+
|
| 59 |
+
# Push to Hugging Face
|
| 60 |
+
echo -e "${GREEN}Pushing to Hugging Face...${NC}"
|
| 61 |
+
echo "--------------------------------------------------------"
|
| 62 |
+
echo "TIP: Use your Hugging Face Access Token as the password."
|
| 63 |
+
echo "--------------------------------------------------------"
|
| 64 |
+
|
| 65 |
+
git push huggingface main --force
|
| 66 |
+
|
| 67 |
+
if [ $? -eq 0 ]; then
|
| 68 |
+
echo -e "${GREEN}SUCCESS! Your Space is building at: ${REMOTE_URL}${NC}"
|
| 69 |
+
echo "View progress here: ${REMOTE_URL}?logs=build"
|
| 70 |
+
else
|
| 71 |
+
echo -e "${RED}Deployment failed. Please check your credentials or network status.${NC}"
|
| 72 |
+
fi
|
frontend/app.js
ADDED
|
@@ -0,0 +1,319 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* RandomWeb β Frontend Application Logic
|
| 3 |
+
* Handles random redirect, search, submission, and real-time counter.
|
| 4 |
+
*/
|
| 5 |
+
|
| 6 |
+
// βββ Configuration ββββββββββββββββββββββββββββββββββββββββββ
|
| 7 |
+
const SUPABASE_URL = 'https://oyxgydfmaocqxictnmou.supabase.co';
|
| 8 |
+
const SUPABASE_KEY = 'sb_publishable_9l3BSqU-mIdYLEgZB2Pv2Q_UUZXU385';
|
| 9 |
+
const API_BASE = '/api';
|
| 10 |
+
|
| 11 |
+
// βββ Supabase Client ββββββββββββββββββββββββββββββββββββββββ
|
| 12 |
+
const supabase = window.supabase.createClient(SUPABASE_URL, SUPABASE_KEY);
|
| 13 |
+
|
| 14 |
+
// βββ DOM Elements βββββββββββββββββββββββββββββββββββββββββββ
|
| 15 |
+
const randomBtn = document.getElementById('random-btn');
|
| 16 |
+
const btnText = randomBtn.querySelector('.btn-text');
|
| 17 |
+
const searchInput = document.getElementById('search-input');
|
| 18 |
+
const searchResults = document.getElementById('search-results');
|
| 19 |
+
const submitForm = document.getElementById('submit-form');
|
| 20 |
+
const submitInput = document.getElementById('submit-input');
|
| 21 |
+
const submitBtn = document.getElementById('submit-btn');
|
| 22 |
+
const submitFeedback = document.getElementById('submit-feedback');
|
| 23 |
+
const counterValue = document.getElementById('counter-value');
|
| 24 |
+
const headerActiveCount = document.getElementById('header-active-count');
|
| 25 |
+
const toastContainer = document.getElementById('toast-container');
|
| 26 |
+
|
| 27 |
+
// βββ State ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
+
let currentCount = 0;
|
| 29 |
+
let targetCount = 0;
|
| 30 |
+
let animationFrame = null;
|
| 31 |
+
let searchDebounceTimer = null;
|
| 32 |
+
|
| 33 |
+
// βββ Utility Functions ββββββββββββββββββββββββββββββββββββββ
|
| 34 |
+
function formatNumber(num) {
|
| 35 |
+
if (num >= 1_000_000) {
|
| 36 |
+
return (num / 1_000_000).toFixed(2) + 'M';
|
| 37 |
+
}
|
| 38 |
+
if (num >= 1_000) {
|
| 39 |
+
return (num / 1_000).toFixed(1) + 'K';
|
| 40 |
+
}
|
| 41 |
+
return num.toLocaleString();
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
function formatNumberFull(num) {
|
| 45 |
+
return num.toLocaleString();
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
function showToast(message, type = 'info') {
|
| 49 |
+
const toast = document.createElement('div');
|
| 50 |
+
toast.className = `toast toast-${type}`;
|
| 51 |
+
toast.textContent = message;
|
| 52 |
+
toastContainer.appendChild(toast);
|
| 53 |
+
|
| 54 |
+
setTimeout(() => {
|
| 55 |
+
toast.classList.add('toast-exiting');
|
| 56 |
+
setTimeout(() => toast.remove(), 300);
|
| 57 |
+
}, 4000);
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
// βββ Animated Counter βββββββββββββββββββββββββββββββββββββββ
|
| 61 |
+
function animateCounter(target) {
|
| 62 |
+
targetCount = target;
|
| 63 |
+
|
| 64 |
+
if (animationFrame) {
|
| 65 |
+
cancelAnimationFrame(animationFrame);
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
const startCount = currentCount;
|
| 69 |
+
const diff = target - startCount;
|
| 70 |
+
const duration = Math.min(1500, Math.max(300, Math.abs(diff) * 10));
|
| 71 |
+
const startTime = performance.now();
|
| 72 |
+
|
| 73 |
+
function step(timestamp) {
|
| 74 |
+
const elapsed = timestamp - startTime;
|
| 75 |
+
const progress = Math.min(elapsed / duration, 1);
|
| 76 |
+
|
| 77 |
+
// Ease-out cubic
|
| 78 |
+
const eased = 1 - Math.pow(1 - progress, 3);
|
| 79 |
+
currentCount = Math.round(startCount + diff * eased);
|
| 80 |
+
|
| 81 |
+
counterValue.textContent = formatNumberFull(currentCount);
|
| 82 |
+
headerActiveCount.textContent = formatNumber(currentCount);
|
| 83 |
+
|
| 84 |
+
if (progress < 1) {
|
| 85 |
+
animationFrame = requestAnimationFrame(step);
|
| 86 |
+
} else {
|
| 87 |
+
currentCount = target;
|
| 88 |
+
counterValue.textContent = formatNumberFull(target);
|
| 89 |
+
headerActiveCount.textContent = formatNumber(target);
|
| 90 |
+
}
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
animationFrame = requestAnimationFrame(step);
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
// βββ Fetch Stats (Initial) βββββββββββββββββββββββββββββββββ
|
| 97 |
+
async function fetchStats() {
|
| 98 |
+
try {
|
| 99 |
+
const response = await fetch(`${API_BASE}/stats`);
|
| 100 |
+
if (response.ok) {
|
| 101 |
+
const data = await response.json();
|
| 102 |
+
animateCounter(data.active_count);
|
| 103 |
+
}
|
| 104 |
+
} catch (err) {
|
| 105 |
+
console.warn('Failed to fetch stats:', err);
|
| 106 |
+
|
| 107 |
+
// Fallback: query Supabase directly
|
| 108 |
+
try {
|
| 109 |
+
const { data, error } = await supabase
|
| 110 |
+
.from('stats')
|
| 111 |
+
.select('active_count')
|
| 112 |
+
.eq('id', 1)
|
| 113 |
+
.single();
|
| 114 |
+
|
| 115 |
+
if (!error && data) {
|
| 116 |
+
animateCounter(data.active_count);
|
| 117 |
+
}
|
| 118 |
+
} catch (e) {
|
| 119 |
+
console.warn('Supabase fallback also failed:', e);
|
| 120 |
+
}
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
// βββ Realtime Subscription ββββββββββββββββββββββββββββββββββ
|
| 125 |
+
function setupRealtimeSubscription() {
|
| 126 |
+
const channel = supabase
|
| 127 |
+
.channel('stats-realtime')
|
| 128 |
+
.on(
|
| 129 |
+
'postgres_changes',
|
| 130 |
+
{
|
| 131 |
+
event: 'UPDATE',
|
| 132 |
+
schema: 'public',
|
| 133 |
+
table: 'stats',
|
| 134 |
+
filter: 'id=eq.1',
|
| 135 |
+
},
|
| 136 |
+
(payload) => {
|
| 137 |
+
const newCount = payload.new.active_count;
|
| 138 |
+
if (newCount !== undefined && newCount !== targetCount) {
|
| 139 |
+
animateCounter(newCount);
|
| 140 |
+
}
|
| 141 |
+
}
|
| 142 |
+
)
|
| 143 |
+
.subscribe((status) => {
|
| 144 |
+
if (status === 'SUBSCRIBED') {
|
| 145 |
+
console.log('Realtime subscription active');
|
| 146 |
+
}
|
| 147 |
+
});
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
// Also poll every 30 seconds as a fallback
|
| 151 |
+
setInterval(fetchStats, 30000);
|
| 152 |
+
|
| 153 |
+
// βββ Random Button ββββββββββββββββββββββββββββββββββββββββββ
|
| 154 |
+
randomBtn.addEventListener('click', async () => {
|
| 155 |
+
if (randomBtn.classList.contains('loading')) return;
|
| 156 |
+
|
| 157 |
+
randomBtn.classList.add('loading');
|
| 158 |
+
btnText.textContent = 'Finding a website...';
|
| 159 |
+
|
| 160 |
+
try {
|
| 161 |
+
const response = await fetch(`${API_BASE}/random`);
|
| 162 |
+
|
| 163 |
+
if (response.ok) {
|
| 164 |
+
const data = await response.json();
|
| 165 |
+
if (data.url) {
|
| 166 |
+
btnText.textContent = 'Redirecting...';
|
| 167 |
+
|
| 168 |
+
// Small delay for visual feedback
|
| 169 |
+
setTimeout(() => {
|
| 170 |
+
window.open(data.url, '_blank', 'noopener,noreferrer');
|
| 171 |
+
randomBtn.classList.remove('loading');
|
| 172 |
+
btnText.textContent = 'Take Me Somewhere Random';
|
| 173 |
+
}, 500);
|
| 174 |
+
return;
|
| 175 |
+
}
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
// API failed, try direct Supabase query
|
| 179 |
+
const { data: websites, error } = await supabase
|
| 180 |
+
.rpc('get_random_active_website');
|
| 181 |
+
|
| 182 |
+
if (!error && websites && websites.length > 0) {
|
| 183 |
+
btnText.textContent = 'Redirecting...';
|
| 184 |
+
setTimeout(() => {
|
| 185 |
+
window.open(websites[0].url, '_blank', 'noopener,noreferrer');
|
| 186 |
+
randomBtn.classList.remove('loading');
|
| 187 |
+
btnText.textContent = 'Take Me Somewhere Random';
|
| 188 |
+
}, 500);
|
| 189 |
+
return;
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
showToast('No active websites found yet. The system is still indexing.', 'info');
|
| 193 |
+
} catch (err) {
|
| 194 |
+
console.error('Random fetch error:', err);
|
| 195 |
+
showToast('Failed to get a random website. Please try again.', 'error');
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
randomBtn.classList.remove('loading');
|
| 199 |
+
btnText.textContent = 'Take Me Somewhere Random';
|
| 200 |
+
});
|
| 201 |
+
|
| 202 |
+
// βββ Search βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 203 |
+
searchInput.addEventListener('input', (e) => {
|
| 204 |
+
const query = e.target.value.trim();
|
| 205 |
+
|
| 206 |
+
clearTimeout(searchDebounceTimer);
|
| 207 |
+
|
| 208 |
+
if (query.length < 2) {
|
| 209 |
+
searchResults.innerHTML = '';
|
| 210 |
+
return;
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
searchDebounceTimer = setTimeout(() => performSearch(query), 300);
|
| 214 |
+
});
|
| 215 |
+
|
| 216 |
+
async function performSearch(query) {
|
| 217 |
+
try {
|
| 218 |
+
const response = await fetch(
|
| 219 |
+
`${API_BASE}/search?q=${encodeURIComponent(query)}&limit=15`
|
| 220 |
+
);
|
| 221 |
+
|
| 222 |
+
if (response.ok) {
|
| 223 |
+
const results = await response.json();
|
| 224 |
+
renderSearchResults(results);
|
| 225 |
+
return;
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
// Fallback to direct Supabase
|
| 229 |
+
const { data, error } = await supabase
|
| 230 |
+
.from('websites')
|
| 231 |
+
.select('url, domain, is_active')
|
| 232 |
+
.or(`url.ilike.%${query}%,domain.ilike.%${query}%`)
|
| 233 |
+
.eq('is_active', true)
|
| 234 |
+
.limit(15);
|
| 235 |
+
|
| 236 |
+
if (!error && data) {
|
| 237 |
+
renderSearchResults(data);
|
| 238 |
+
}
|
| 239 |
+
} catch (err) {
|
| 240 |
+
console.error('Search error:', err);
|
| 241 |
+
}
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
function renderSearchResults(results) {
|
| 245 |
+
if (!results || results.length === 0) {
|
| 246 |
+
searchResults.innerHTML = `
|
| 247 |
+
<div class="search-empty">
|
| 248 |
+
No matching websites found. Try a different search term.
|
| 249 |
+
</div>
|
| 250 |
+
`;
|
| 251 |
+
return;
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
searchResults.innerHTML = results
|
| 255 |
+
.map(
|
| 256 |
+
(r) => `
|
| 257 |
+
<a href="${escapeHtml(r.url)}" target="_blank" rel="noopener noreferrer"
|
| 258 |
+
class="search-result-item">
|
| 259 |
+
<div>
|
| 260 |
+
<div class="result-url">${escapeHtml(r.url)}</div>
|
| 261 |
+
<div class="result-domain">${escapeHtml(r.domain)}</div>
|
| 262 |
+
</div>
|
| 263 |
+
<span class="result-arrow">β</span>
|
| 264 |
+
</a>
|
| 265 |
+
`
|
| 266 |
+
)
|
| 267 |
+
.join('');
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
function escapeHtml(text) {
|
| 271 |
+
const div = document.createElement('div');
|
| 272 |
+
div.textContent = text;
|
| 273 |
+
return div.innerHTML;
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
// βββ Submit Form ββββββββββββββββββββββββββββββββββββββββββββ
|
| 277 |
+
submitForm.addEventListener('submit', async (e) => {
|
| 278 |
+
e.preventDefault();
|
| 279 |
+
|
| 280 |
+
const url = submitInput.value.trim();
|
| 281 |
+
if (!url) return;
|
| 282 |
+
|
| 283 |
+
submitBtn.disabled = true;
|
| 284 |
+
submitBtn.textContent = 'Submitting...';
|
| 285 |
+
submitFeedback.className = 'submit-feedback';
|
| 286 |
+
submitFeedback.style.display = 'none';
|
| 287 |
+
|
| 288 |
+
try {
|
| 289 |
+
const response = await fetch(`${API_BASE}/submit`, {
|
| 290 |
+
method: 'POST',
|
| 291 |
+
headers: { 'Content-Type': 'application/json' },
|
| 292 |
+
body: JSON.stringify({ url }),
|
| 293 |
+
});
|
| 294 |
+
|
| 295 |
+
const data = await response.json();
|
| 296 |
+
|
| 297 |
+
if (response.ok) {
|
| 298 |
+
submitFeedback.className = 'submit-feedback success';
|
| 299 |
+
submitFeedback.textContent = data.message || 'URL submitted successfully!';
|
| 300 |
+
submitInput.value = '';
|
| 301 |
+
} else {
|
| 302 |
+
submitFeedback.className = 'submit-feedback error';
|
| 303 |
+
submitFeedback.textContent =
|
| 304 |
+
data.detail || 'Failed to submit URL. Please check the format.';
|
| 305 |
+
}
|
| 306 |
+
} catch (err) {
|
| 307 |
+
submitFeedback.className = 'submit-feedback error';
|
| 308 |
+
submitFeedback.textContent = 'Network error. Please try again.';
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
submitBtn.disabled = false;
|
| 312 |
+
submitBtn.textContent = 'Submit URL';
|
| 313 |
+
});
|
| 314 |
+
|
| 315 |
+
// βββ Initialize βββββββββββββββββββββββββββββββββββββββββββββ
|
| 316 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 317 |
+
fetchStats();
|
| 318 |
+
setupRealtimeSubscription();
|
| 319 |
+
});
|
frontend/index.html
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>RandomWeb β Discover Random Websites from Across the Internet</title>
|
| 7 |
+
<meta name="description" content="Explore the web randomly. RandomWeb discovers, indexes, and validates websites from across the entire internet. Click and go anywhere.">
|
| 8 |
+
<meta name="theme-color" content="#0a0e1a">
|
| 9 |
+
<meta property="og:title" content="RandomWeb β Discover Random Websites">
|
| 10 |
+
<meta property="og:description" content="One click. One random website. Explore the entire internet.">
|
| 11 |
+
<meta property="og:type" content="website">
|
| 12 |
+
<link rel="icon" type="image/svg+xml" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='.9em' font-size='90'>π</text></svg>">
|
| 13 |
+
<link rel="stylesheet" href="styles.css">
|
| 14 |
+
</head>
|
| 15 |
+
<body>
|
| 16 |
+
|
| 17 |
+
<!-- Animated background -->
|
| 18 |
+
<div class="bg-grid"></div>
|
| 19 |
+
<div class="orb orb-1"></div>
|
| 20 |
+
<div class="orb orb-2"></div>
|
| 21 |
+
<div class="orb orb-3"></div>
|
| 22 |
+
|
| 23 |
+
<div class="app">
|
| 24 |
+
|
| 25 |
+
<!-- βββ Header ββββββββββββββββββββββββββββββββ -->
|
| 26 |
+
<header class="header fade-in">
|
| 27 |
+
<div class="container">
|
| 28 |
+
<a href="/" class="logo" id="logo-link">
|
| 29 |
+
<div class="logo-icon">π</div>
|
| 30 |
+
<span class="logo-text">RandomWeb</span>
|
| 31 |
+
</a>
|
| 32 |
+
<div class="header-stats">
|
| 33 |
+
<span class="pulse-dot"></span>
|
| 34 |
+
<span id="header-active-count">β</span> sites indexed
|
| 35 |
+
</div>
|
| 36 |
+
</div>
|
| 37 |
+
</header>
|
| 38 |
+
|
| 39 |
+
<!-- βββ Main Content ββββββββββββββββββββββββββ -->
|
| 40 |
+
<main>
|
| 41 |
+
|
| 42 |
+
<!-- Hero Section -->
|
| 43 |
+
<section class="hero">
|
| 44 |
+
<div class="container">
|
| 45 |
+
<div class="hero-badge slide-up">
|
| 46 |
+
<span class="dot"></span>
|
| 47 |
+
<span>Live β Indexing the web in real-time</span>
|
| 48 |
+
</div>
|
| 49 |
+
|
| 50 |
+
<h1 class="slide-up slide-up-delay-1">
|
| 51 |
+
Discover the<br>
|
| 52 |
+
<span class="gradient-text">Entire Web</span>
|
| 53 |
+
</h1>
|
| 54 |
+
|
| 55 |
+
<p class="slide-up slide-up-delay-2">
|
| 56 |
+
One click takes you to a random website from our ever-growing index.
|
| 57 |
+
Powered by Certificate Transparency logs, Common Crawl, and recursive crawling.
|
| 58 |
+
</p>
|
| 59 |
+
|
| 60 |
+
<div class="random-btn-wrapper slide-up slide-up-delay-3">
|
| 61 |
+
<button class="random-btn" id="random-btn" type="button">
|
| 62 |
+
<span class="btn-icon">π²</span>
|
| 63 |
+
<span class="btn-text">Take Me Somewhere Random</span>
|
| 64 |
+
</button>
|
| 65 |
+
</div>
|
| 66 |
+
</div>
|
| 67 |
+
</section>
|
| 68 |
+
|
| 69 |
+
<!-- Search Section -->
|
| 70 |
+
<section class="search-section slide-up slide-up-delay-3">
|
| 71 |
+
<div class="container">
|
| 72 |
+
<div class="glass-card" style="padding: var(--space-xl);">
|
| 73 |
+
<h2>π Search the Index</h2>
|
| 74 |
+
<div class="search-box">
|
| 75 |
+
<span class="search-icon">β</span>
|
| 76 |
+
<input
|
| 77 |
+
type="text"
|
| 78 |
+
id="search-input"
|
| 79 |
+
placeholder="Search for websites... (e.g., github.com, news)"
|
| 80 |
+
autocomplete="off"
|
| 81 |
+
spellcheck="false"
|
| 82 |
+
>
|
| 83 |
+
</div>
|
| 84 |
+
<div class="search-results" id="search-results"></div>
|
| 85 |
+
</div>
|
| 86 |
+
</div>
|
| 87 |
+
</section>
|
| 88 |
+
|
| 89 |
+
<!-- Submit Section -->
|
| 90 |
+
<section class="submit-section slide-up slide-up-delay-4">
|
| 91 |
+
<div class="container">
|
| 92 |
+
<div class="glass-card" style="padding: var(--space-xl);">
|
| 93 |
+
<h2>β Submit a Website</h2>
|
| 94 |
+
<p class="subtitle">
|
| 95 |
+
Know a website that's not in our index? Submit it and we'll validate and add it.
|
| 96 |
+
</p>
|
| 97 |
+
<form class="submit-form" id="submit-form">
|
| 98 |
+
<input
|
| 99 |
+
type="text"
|
| 100 |
+
id="submit-input"
|
| 101 |
+
placeholder="Enter a URL (e.g., https://example.com)"
|
| 102 |
+
autocomplete="off"
|
| 103 |
+
spellcheck="false"
|
| 104 |
+
required
|
| 105 |
+
>
|
| 106 |
+
<button type="submit" class="submit-btn" id="submit-btn">
|
| 107 |
+
Submit URL
|
| 108 |
+
</button>
|
| 109 |
+
</form>
|
| 110 |
+
<div class="submit-feedback" id="submit-feedback"></div>
|
| 111 |
+
</div>
|
| 112 |
+
</div>
|
| 113 |
+
</section>
|
| 114 |
+
|
| 115 |
+
</main>
|
| 116 |
+
|
| 117 |
+
<!-- βββ Footer ββββββββββββββββββββββββββββββββ -->
|
| 118 |
+
<footer class="footer">
|
| 119 |
+
<div class="container">
|
| 120 |
+
|
| 121 |
+
<div class="live-counter">
|
| 122 |
+
<div class="counter-label">
|
| 123 |
+
<span class="live-dot"></span>
|
| 124 |
+
Active Websites Indexed
|
| 125 |
+
</div>
|
| 126 |
+
<div class="counter-value" id="counter-value">0</div>
|
| 127 |
+
<div class="counter-subtext">
|
| 128 |
+
and growing every second
|
| 129 |
+
</div>
|
| 130 |
+
</div>
|
| 131 |
+
|
| 132 |
+
<div class="footer-links">
|
| 133 |
+
<a href="https://github.com/guestcoder0906/RandomWeb" target="_blank" rel="noopener">GitHub</a>
|
| 134 |
+
<span class="divider"></span>
|
| 135 |
+
<span style="color: var(--text-muted);">Built with π by RandomWeb</span>
|
| 136 |
+
</div>
|
| 137 |
+
|
| 138 |
+
</div>
|
| 139 |
+
</footer>
|
| 140 |
+
|
| 141 |
+
</div>
|
| 142 |
+
|
| 143 |
+
<!-- Toast container -->
|
| 144 |
+
<div class="toast-container" id="toast-container"></div>
|
| 145 |
+
|
| 146 |
+
<!-- Supabase Client (CDN) -->
|
| 147 |
+
<script src="https://cdn.jsdelivr.net/npm/@supabase/supabase-js@2/dist/umd/supabase.min.js"></script>
|
| 148 |
+
|
| 149 |
+
<!-- App Logic -->
|
| 150 |
+
<script src="app.js"></script>
|
| 151 |
+
|
| 152 |
+
</body>
|
| 153 |
+
</html>
|
frontend/styles.css
ADDED
|
@@ -0,0 +1,801 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* ============================================================
|
| 2 |
+
RandomWeb β Premium Dark Theme
|
| 3 |
+
Design system: Midnight navy base, cyanβviolet gradients,
|
| 4 |
+
glassmorphism panels, Inter + Outfit fonts, micro-animations
|
| 5 |
+
============================================================ */
|
| 6 |
+
|
| 7 |
+
/* βββ Google Fonts βββββββββββββββββββββββββββββββββββββββββββ */
|
| 8 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Outfit:wght@400;500;600;700;800;900&family=JetBrains+Mono:wght@400;500&display=swap');
|
| 9 |
+
|
| 10 |
+
/* βββ CSS Custom Properties ββββββββββββββββββββββββββββββββββ */
|
| 11 |
+
:root {
|
| 12 |
+
/* Core palette */
|
| 13 |
+
--bg-primary: #0a0e1a;
|
| 14 |
+
--bg-secondary: #111827;
|
| 15 |
+
--bg-card: rgba(17, 24, 39, 0.7);
|
| 16 |
+
--bg-glass: rgba(255, 255, 255, 0.03);
|
| 17 |
+
--bg-glass-hover: rgba(255, 255, 255, 0.06);
|
| 18 |
+
|
| 19 |
+
/* Accent gradients */
|
| 20 |
+
--gradient-primary: linear-gradient(135deg, #06b6d4, #8b5cf6);
|
| 21 |
+
--gradient-secondary: linear-gradient(135deg, #8b5cf6, #ec4899);
|
| 22 |
+
--gradient-glow: linear-gradient(135deg, rgba(6, 182, 212, 0.15), rgba(139, 92, 246, 0.15));
|
| 23 |
+
--gradient-hero: linear-gradient(180deg, #0a0e1a 0%, #111827 50%, #0a0e1a 100%);
|
| 24 |
+
|
| 25 |
+
/* Text */
|
| 26 |
+
--text-primary: #f1f5f9;
|
| 27 |
+
--text-secondary: #94a3b8;
|
| 28 |
+
--text-muted: #64748b;
|
| 29 |
+
--text-accent: #06b6d4;
|
| 30 |
+
|
| 31 |
+
/* Borders */
|
| 32 |
+
--border-subtle: rgba(255, 255, 255, 0.06);
|
| 33 |
+
--border-accent: rgba(6, 182, 212, 0.3);
|
| 34 |
+
|
| 35 |
+
/* Shadows */
|
| 36 |
+
--shadow-lg: 0 25px 50px -12px rgba(0, 0, 0, 0.5);
|
| 37 |
+
--shadow-glow-cyan: 0 0 40px rgba(6, 182, 212, 0.15);
|
| 38 |
+
--shadow-glow-violet: 0 0 40px rgba(139, 92, 246, 0.15);
|
| 39 |
+
--shadow-button: 0 0 30px rgba(6, 182, 212, 0.3), 0 0 60px rgba(139, 92, 246, 0.1);
|
| 40 |
+
|
| 41 |
+
/* Typography */
|
| 42 |
+
--font-body: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
|
| 43 |
+
--font-heading: 'Outfit', -apple-system, BlinkMacSystemFont, sans-serif;
|
| 44 |
+
--font-mono: 'JetBrains Mono', 'Fira Code', monospace;
|
| 45 |
+
|
| 46 |
+
/* Spacing */
|
| 47 |
+
--space-xs: 0.25rem;
|
| 48 |
+
--space-sm: 0.5rem;
|
| 49 |
+
--space-md: 1rem;
|
| 50 |
+
--space-lg: 1.5rem;
|
| 51 |
+
--space-xl: 2rem;
|
| 52 |
+
--space-2xl: 3rem;
|
| 53 |
+
--space-3xl: 4rem;
|
| 54 |
+
--space-4xl: 6rem;
|
| 55 |
+
|
| 56 |
+
/* Radius */
|
| 57 |
+
--radius-sm: 0.5rem;
|
| 58 |
+
--radius-md: 0.75rem;
|
| 59 |
+
--radius-lg: 1rem;
|
| 60 |
+
--radius-xl: 1.5rem;
|
| 61 |
+
--radius-full: 9999px;
|
| 62 |
+
|
| 63 |
+
/* Transitions */
|
| 64 |
+
--transition-fast: 150ms cubic-bezier(0.4, 0, 0.2, 1);
|
| 65 |
+
--transition-base: 250ms cubic-bezier(0.4, 0, 0.2, 1);
|
| 66 |
+
--transition-slow: 400ms cubic-bezier(0.4, 0, 0.2, 1);
|
| 67 |
+
--transition-spring: 500ms cubic-bezier(0.34, 1.56, 0.64, 1);
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
/* βββ Reset & Base βββββββββββββββββββββββββββββββββββββββββββ */
|
| 71 |
+
*,
|
| 72 |
+
*::before,
|
| 73 |
+
*::after {
|
| 74 |
+
margin: 0;
|
| 75 |
+
padding: 0;
|
| 76 |
+
box-sizing: border-box;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
html {
|
| 80 |
+
scroll-behavior: smooth;
|
| 81 |
+
-webkit-font-smoothing: antialiased;
|
| 82 |
+
-moz-osx-font-smoothing: grayscale;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
body {
|
| 86 |
+
font-family: var(--font-body);
|
| 87 |
+
background: var(--bg-primary);
|
| 88 |
+
color: var(--text-primary);
|
| 89 |
+
min-height: 100vh;
|
| 90 |
+
overflow-x: hidden;
|
| 91 |
+
line-height: 1.6;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
/* βββ Animated Background ββββββββββββββββββββββββββββββββββββ */
|
| 95 |
+
.bg-grid {
|
| 96 |
+
position: fixed;
|
| 97 |
+
inset: 0;
|
| 98 |
+
z-index: 0;
|
| 99 |
+
background-image:
|
| 100 |
+
radial-gradient(ellipse at 20% 50%, rgba(6, 182, 212, 0.08) 0%, transparent 50%),
|
| 101 |
+
radial-gradient(ellipse at 80% 20%, rgba(139, 92, 246, 0.08) 0%, transparent 50%),
|
| 102 |
+
radial-gradient(ellipse at 50% 80%, rgba(236, 72, 153, 0.05) 0%, transparent 50%);
|
| 103 |
+
pointer-events: none;
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
.bg-grid::before {
|
| 107 |
+
content: '';
|
| 108 |
+
position: absolute;
|
| 109 |
+
inset: 0;
|
| 110 |
+
background-image:
|
| 111 |
+
linear-gradient(rgba(255, 255, 255, 0.015) 1px, transparent 1px),
|
| 112 |
+
linear-gradient(90deg, rgba(255, 255, 255, 0.015) 1px, transparent 1px);
|
| 113 |
+
background-size: 60px 60px;
|
| 114 |
+
mask-image: radial-gradient(ellipse at center, black 30%, transparent 70%);
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
/* Floating orbs */
|
| 118 |
+
.orb {
|
| 119 |
+
position: fixed;
|
| 120 |
+
border-radius: 50%;
|
| 121 |
+
filter: blur(80px);
|
| 122 |
+
opacity: 0.4;
|
| 123 |
+
pointer-events: none;
|
| 124 |
+
z-index: 0;
|
| 125 |
+
animation: orbFloat 20s ease-in-out infinite;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
.orb-1 {
|
| 129 |
+
width: 400px;
|
| 130 |
+
height: 400px;
|
| 131 |
+
background: rgba(6, 182, 212, 0.12);
|
| 132 |
+
top: -100px;
|
| 133 |
+
left: -100px;
|
| 134 |
+
animation-delay: 0s;
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
.orb-2 {
|
| 138 |
+
width: 350px;
|
| 139 |
+
height: 350px;
|
| 140 |
+
background: rgba(139, 92, 246, 0.12);
|
| 141 |
+
bottom: -100px;
|
| 142 |
+
right: -100px;
|
| 143 |
+
animation-delay: -7s;
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
.orb-3 {
|
| 147 |
+
width: 300px;
|
| 148 |
+
height: 300px;
|
| 149 |
+
background: rgba(236, 72, 153, 0.08);
|
| 150 |
+
top: 50%;
|
| 151 |
+
left: 50%;
|
| 152 |
+
transform: translate(-50%, -50%);
|
| 153 |
+
animation-delay: -14s;
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
@keyframes orbFloat {
|
| 157 |
+
0%, 100% { transform: translate(0, 0) scale(1); }
|
| 158 |
+
25% { transform: translate(30px, -40px) scale(1.05); }
|
| 159 |
+
50% { transform: translate(-20px, 20px) scale(0.95); }
|
| 160 |
+
75% { transform: translate(40px, 30px) scale(1.02); }
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
/* βββ Layout ββββββββββοΏ½οΏ½οΏ½ββββββββββββββββββββββββββββββββββββββ */
|
| 164 |
+
.app {
|
| 165 |
+
position: relative;
|
| 166 |
+
z-index: 1;
|
| 167 |
+
min-height: 100vh;
|
| 168 |
+
display: flex;
|
| 169 |
+
flex-direction: column;
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
.container {
|
| 173 |
+
width: 100%;
|
| 174 |
+
max-width: 800px;
|
| 175 |
+
margin: 0 auto;
|
| 176 |
+
padding: 0 var(--space-lg);
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
/* βββ Header βββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 180 |
+
.header {
|
| 181 |
+
padding: var(--space-lg) 0;
|
| 182 |
+
border-bottom: 1px solid var(--border-subtle);
|
| 183 |
+
backdrop-filter: blur(20px);
|
| 184 |
+
-webkit-backdrop-filter: blur(20px);
|
| 185 |
+
background: rgba(10, 14, 26, 0.8);
|
| 186 |
+
position: sticky;
|
| 187 |
+
top: 0;
|
| 188 |
+
z-index: 100;
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
.header .container {
|
| 192 |
+
display: flex;
|
| 193 |
+
align-items: center;
|
| 194 |
+
justify-content: space-between;
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
.logo {
|
| 198 |
+
display: flex;
|
| 199 |
+
align-items: center;
|
| 200 |
+
gap: var(--space-sm);
|
| 201 |
+
text-decoration: none;
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
.logo-icon {
|
| 205 |
+
width: 36px;
|
| 206 |
+
height: 36px;
|
| 207 |
+
border-radius: var(--radius-md);
|
| 208 |
+
background: var(--gradient-primary);
|
| 209 |
+
display: flex;
|
| 210 |
+
align-items: center;
|
| 211 |
+
justify-content: center;
|
| 212 |
+
font-size: 1.1rem;
|
| 213 |
+
box-shadow: var(--shadow-glow-cyan);
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
.logo-text {
|
| 217 |
+
font-family: var(--font-heading);
|
| 218 |
+
font-weight: 700;
|
| 219 |
+
font-size: 1.25rem;
|
| 220 |
+
background: var(--gradient-primary);
|
| 221 |
+
-webkit-background-clip: text;
|
| 222 |
+
-webkit-text-fill-color: transparent;
|
| 223 |
+
background-clip: text;
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
.header-stats {
|
| 227 |
+
display: flex;
|
| 228 |
+
align-items: center;
|
| 229 |
+
gap: var(--space-sm);
|
| 230 |
+
font-size: 0.8rem;
|
| 231 |
+
color: var(--text-muted);
|
| 232 |
+
font-family: var(--font-mono);
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
.header-stats .pulse-dot {
|
| 236 |
+
width: 8px;
|
| 237 |
+
height: 8px;
|
| 238 |
+
background: #22c55e;
|
| 239 |
+
border-radius: 50%;
|
| 240 |
+
animation: pulse 2s ease-in-out infinite;
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
@keyframes pulse {
|
| 244 |
+
0%, 100% { opacity: 1; box-shadow: 0 0 0 0 rgba(34, 197, 94, 0.4); }
|
| 245 |
+
50% { opacity: 0.7; box-shadow: 0 0 0 6px rgba(34, 197, 94, 0); }
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
/* βββ Hero Section βββββββββββββββββββββββββββββββββββββββββββ */
|
| 249 |
+
.hero {
|
| 250 |
+
padding: var(--space-4xl) 0 var(--space-3xl);
|
| 251 |
+
text-align: center;
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
.hero-badge {
|
| 255 |
+
display: inline-flex;
|
| 256 |
+
align-items: center;
|
| 257 |
+
gap: var(--space-sm);
|
| 258 |
+
padding: var(--space-xs) var(--space-md);
|
| 259 |
+
background: var(--bg-glass);
|
| 260 |
+
border: 1px solid var(--border-subtle);
|
| 261 |
+
border-radius: var(--radius-full);
|
| 262 |
+
font-size: 0.8rem;
|
| 263 |
+
color: var(--text-secondary);
|
| 264 |
+
margin-bottom: var(--space-xl);
|
| 265 |
+
backdrop-filter: blur(10px);
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
.hero-badge .dot {
|
| 269 |
+
width: 6px;
|
| 270 |
+
height: 6px;
|
| 271 |
+
background: #22c55e;
|
| 272 |
+
border-radius: 50%;
|
| 273 |
+
animation: pulse 2s ease-in-out infinite;
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
.hero h1 {
|
| 277 |
+
font-family: var(--font-heading);
|
| 278 |
+
font-weight: 900;
|
| 279 |
+
font-size: clamp(2.5rem, 6vw, 4rem);
|
| 280 |
+
line-height: 1.1;
|
| 281 |
+
margin-bottom: var(--space-lg);
|
| 282 |
+
letter-spacing: -0.03em;
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
.hero h1 .gradient-text {
|
| 286 |
+
background: var(--gradient-primary);
|
| 287 |
+
-webkit-background-clip: text;
|
| 288 |
+
-webkit-text-fill-color: transparent;
|
| 289 |
+
background-clip: text;
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
.hero p {
|
| 293 |
+
font-size: 1.1rem;
|
| 294 |
+
color: var(--text-secondary);
|
| 295 |
+
max-width: 500px;
|
| 296 |
+
margin: 0 auto var(--space-2xl);
|
| 297 |
+
line-height: 1.7;
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
/* βββ Random Button ββββββββββββββββββββββββββββββββββββββββββ */
|
| 301 |
+
.random-btn-wrapper {
|
| 302 |
+
display: flex;
|
| 303 |
+
justify-content: center;
|
| 304 |
+
margin-bottom: var(--space-3xl);
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
.random-btn {
|
| 308 |
+
position: relative;
|
| 309 |
+
display: inline-flex;
|
| 310 |
+
align-items: center;
|
| 311 |
+
gap: var(--space-md);
|
| 312 |
+
padding: 1.15rem 2.5rem;
|
| 313 |
+
background: var(--gradient-primary);
|
| 314 |
+
color: white;
|
| 315 |
+
font-family: var(--font-heading);
|
| 316 |
+
font-weight: 700;
|
| 317 |
+
font-size: 1.15rem;
|
| 318 |
+
border: none;
|
| 319 |
+
border-radius: var(--radius-xl);
|
| 320 |
+
cursor: pointer;
|
| 321 |
+
transition: all var(--transition-base);
|
| 322 |
+
box-shadow: var(--shadow-button);
|
| 323 |
+
text-decoration: none;
|
| 324 |
+
letter-spacing: 0.01em;
|
| 325 |
+
overflow: hidden;
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
.random-btn::before {
|
| 329 |
+
content: '';
|
| 330 |
+
position: absolute;
|
| 331 |
+
inset: 0;
|
| 332 |
+
background: linear-gradient(135deg, rgba(255,255,255,0.15), transparent);
|
| 333 |
+
opacity: 0;
|
| 334 |
+
transition: opacity var(--transition-base);
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
.random-btn:hover {
|
| 338 |
+
transform: translateY(-3px) scale(1.03);
|
| 339 |
+
box-shadow: 0 0 50px rgba(6, 182, 212, 0.4), 0 0 80px rgba(139, 92, 246, 0.2);
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
.random-btn:hover::before {
|
| 343 |
+
opacity: 1;
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
.random-btn:active {
|
| 347 |
+
transform: translateY(-1px) scale(0.98);
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
.random-btn .btn-icon {
|
| 351 |
+
font-size: 1.4rem;
|
| 352 |
+
animation: spinSlow 8s linear infinite;
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
@keyframes spinSlow {
|
| 356 |
+
from { transform: rotate(0deg); }
|
| 357 |
+
to { transform: rotate(360deg); }
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
.random-btn:hover .btn-icon {
|
| 361 |
+
animation-duration: 1s;
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
.random-btn.loading .btn-icon {
|
| 365 |
+
animation: spinSlow 0.6s linear infinite;
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
/* βββ Glass Card βββββββββββββββββββββββββββββββββββββββββββββ */
|
| 369 |
+
.glass-card {
|
| 370 |
+
background: var(--bg-card);
|
| 371 |
+
border: 1px solid var(--border-subtle);
|
| 372 |
+
border-radius: var(--radius-lg);
|
| 373 |
+
backdrop-filter: blur(20px);
|
| 374 |
+
-webkit-backdrop-filter: blur(20px);
|
| 375 |
+
transition: all var(--transition-base);
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
.glass-card:hover {
|
| 379 |
+
border-color: var(--border-accent);
|
| 380 |
+
background: var(--bg-glass-hover);
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
/* βββ Search Section βββββββββββββββββββββββββββββββββββββββββ */
|
| 384 |
+
.search-section {
|
| 385 |
+
margin-bottom: var(--space-2xl);
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
.search-section h2 {
|
| 389 |
+
font-family: var(--font-heading);
|
| 390 |
+
font-weight: 700;
|
| 391 |
+
font-size: 1.3rem;
|
| 392 |
+
margin-bottom: var(--space-md);
|
| 393 |
+
display: flex;
|
| 394 |
+
align-items: center;
|
| 395 |
+
gap: var(--space-sm);
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
.search-box {
|
| 399 |
+
position: relative;
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
.search-box input {
|
| 403 |
+
width: 100%;
|
| 404 |
+
padding: 1rem 1rem 1rem 3rem;
|
| 405 |
+
background: var(--bg-glass);
|
| 406 |
+
border: 1px solid var(--border-subtle);
|
| 407 |
+
border-radius: var(--radius-lg);
|
| 408 |
+
color: var(--text-primary);
|
| 409 |
+
font-family: var(--font-body);
|
| 410 |
+
font-size: 1rem;
|
| 411 |
+
outline: none;
|
| 412 |
+
transition: all var(--transition-base);
|
| 413 |
+
}
|
| 414 |
+
|
| 415 |
+
.search-box input:focus {
|
| 416 |
+
border-color: var(--border-accent);
|
| 417 |
+
box-shadow: 0 0 0 3px rgba(6, 182, 212, 0.1);
|
| 418 |
+
background: var(--bg-glass-hover);
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
.search-box input::placeholder {
|
| 422 |
+
color: var(--text-muted);
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
.search-box .search-icon {
|
| 426 |
+
position: absolute;
|
| 427 |
+
left: 1rem;
|
| 428 |
+
top: 50%;
|
| 429 |
+
transform: translateY(-50%);
|
| 430 |
+
font-size: 1.1rem;
|
| 431 |
+
color: var(--text-muted);
|
| 432 |
+
pointer-events: none;
|
| 433 |
+
}
|
| 434 |
+
|
| 435 |
+
.search-results {
|
| 436 |
+
margin-top: var(--space-md);
|
| 437 |
+
display: flex;
|
| 438 |
+
flex-direction: column;
|
| 439 |
+
gap: var(--space-sm);
|
| 440 |
+
max-height: 400px;
|
| 441 |
+
overflow-y: auto;
|
| 442 |
+
}
|
| 443 |
+
|
| 444 |
+
.search-results::-webkit-scrollbar {
|
| 445 |
+
width: 6px;
|
| 446 |
+
}
|
| 447 |
+
|
| 448 |
+
.search-results::-webkit-scrollbar-track {
|
| 449 |
+
background: transparent;
|
| 450 |
+
}
|
| 451 |
+
|
| 452 |
+
.search-results::-webkit-scrollbar-thumb {
|
| 453 |
+
background: var(--border-subtle);
|
| 454 |
+
border-radius: 3px;
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
.search-result-item {
|
| 458 |
+
display: flex;
|
| 459 |
+
align-items: center;
|
| 460 |
+
justify-content: space-between;
|
| 461 |
+
padding: var(--space-md) var(--space-lg);
|
| 462 |
+
background: var(--bg-glass);
|
| 463 |
+
border: 1px solid var(--border-subtle);
|
| 464 |
+
border-radius: var(--radius-md);
|
| 465 |
+
transition: all var(--transition-fast);
|
| 466 |
+
cursor: pointer;
|
| 467 |
+
text-decoration: none;
|
| 468 |
+
color: var(--text-primary);
|
| 469 |
+
}
|
| 470 |
+
|
| 471 |
+
.search-result-item:hover {
|
| 472 |
+
border-color: var(--border-accent);
|
| 473 |
+
background: var(--bg-glass-hover);
|
| 474 |
+
transform: translateX(4px);
|
| 475 |
+
}
|
| 476 |
+
|
| 477 |
+
.search-result-item .result-url {
|
| 478 |
+
font-family: var(--font-mono);
|
| 479 |
+
font-size: 0.9rem;
|
| 480 |
+
color: var(--text-accent);
|
| 481 |
+
word-break: break-all;
|
| 482 |
+
}
|
| 483 |
+
|
| 484 |
+
.search-result-item .result-domain {
|
| 485 |
+
font-size: 0.8rem;
|
| 486 |
+
color: var(--text-muted);
|
| 487 |
+
margin-top: 2px;
|
| 488 |
+
}
|
| 489 |
+
|
| 490 |
+
.search-result-item .result-arrow {
|
| 491 |
+
font-size: 1.1rem;
|
| 492 |
+
color: var(--text-muted);
|
| 493 |
+
transition: all var(--transition-fast);
|
| 494 |
+
flex-shrink: 0;
|
| 495 |
+
margin-left: var(--space-md);
|
| 496 |
+
}
|
| 497 |
+
|
| 498 |
+
.search-result-item:hover .result-arrow {
|
| 499 |
+
color: var(--text-accent);
|
| 500 |
+
transform: translateX(4px);
|
| 501 |
+
}
|
| 502 |
+
|
| 503 |
+
.search-empty {
|
| 504 |
+
text-align: center;
|
| 505 |
+
padding: var(--space-xl);
|
| 506 |
+
color: var(--text-muted);
|
| 507 |
+
font-size: 0.9rem;
|
| 508 |
+
}
|
| 509 |
+
|
| 510 |
+
/* βββ Submit Section βββββββββββββββββββββββββββββββββββββββββ */
|
| 511 |
+
.submit-section {
|
| 512 |
+
margin-bottom: var(--space-3xl);
|
| 513 |
+
}
|
| 514 |
+
|
| 515 |
+
.submit-section h2 {
|
| 516 |
+
font-family: var(--font-heading);
|
| 517 |
+
font-weight: 700;
|
| 518 |
+
font-size: 1.3rem;
|
| 519 |
+
margin-bottom: var(--space-sm);
|
| 520 |
+
display: flex;
|
| 521 |
+
align-items: center;
|
| 522 |
+
gap: var(--space-sm);
|
| 523 |
+
}
|
| 524 |
+
|
| 525 |
+
.submit-section .subtitle {
|
| 526 |
+
color: var(--text-secondary);
|
| 527 |
+
font-size: 0.9rem;
|
| 528 |
+
margin-bottom: var(--space-lg);
|
| 529 |
+
}
|
| 530 |
+
|
| 531 |
+
.submit-form {
|
| 532 |
+
display: flex;
|
| 533 |
+
gap: var(--space-sm);
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
+
.submit-form input {
|
| 537 |
+
flex: 1;
|
| 538 |
+
padding: 0.85rem 1rem;
|
| 539 |
+
background: var(--bg-glass);
|
| 540 |
+
border: 1px solid var(--border-subtle);
|
| 541 |
+
border-radius: var(--radius-md);
|
| 542 |
+
color: var(--text-primary);
|
| 543 |
+
font-family: var(--font-body);
|
| 544 |
+
font-size: 0.95rem;
|
| 545 |
+
outline: none;
|
| 546 |
+
transition: all var(--transition-base);
|
| 547 |
+
}
|
| 548 |
+
|
| 549 |
+
.submit-form input:focus {
|
| 550 |
+
border-color: var(--border-accent);
|
| 551 |
+
box-shadow: 0 0 0 3px rgba(6, 182, 212, 0.1);
|
| 552 |
+
}
|
| 553 |
+
|
| 554 |
+
.submit-form input::placeholder {
|
| 555 |
+
color: var(--text-muted);
|
| 556 |
+
}
|
| 557 |
+
|
| 558 |
+
.submit-btn {
|
| 559 |
+
padding: 0.85rem 1.5rem;
|
| 560 |
+
background: var(--gradient-primary);
|
| 561 |
+
color: white;
|
| 562 |
+
font-family: var(--font-heading);
|
| 563 |
+
font-weight: 600;
|
| 564 |
+
font-size: 0.9rem;
|
| 565 |
+
border: none;
|
| 566 |
+
border-radius: var(--radius-md);
|
| 567 |
+
cursor: pointer;
|
| 568 |
+
transition: all var(--transition-base);
|
| 569 |
+
white-space: nowrap;
|
| 570 |
+
}
|
| 571 |
+
|
| 572 |
+
.submit-btn:hover {
|
| 573 |
+
transform: translateY(-2px);
|
| 574 |
+
box-shadow: 0 0 20px rgba(6, 182, 212, 0.3);
|
| 575 |
+
}
|
| 576 |
+
|
| 577 |
+
.submit-btn:active {
|
| 578 |
+
transform: translateY(0);
|
| 579 |
+
}
|
| 580 |
+
|
| 581 |
+
.submit-btn:disabled {
|
| 582 |
+
opacity: 0.5;
|
| 583 |
+
cursor: not-allowed;
|
| 584 |
+
transform: none;
|
| 585 |
+
}
|
| 586 |
+
|
| 587 |
+
.submit-feedback {
|
| 588 |
+
margin-top: var(--space-md);
|
| 589 |
+
padding: var(--space-md) var(--space-lg);
|
| 590 |
+
border-radius: var(--radius-md);
|
| 591 |
+
font-size: 0.9rem;
|
| 592 |
+
display: none;
|
| 593 |
+
animation: fadeSlideUp 0.3s ease-out;
|
| 594 |
+
}
|
| 595 |
+
|
| 596 |
+
.submit-feedback.success {
|
| 597 |
+
display: block;
|
| 598 |
+
background: rgba(34, 197, 94, 0.1);
|
| 599 |
+
border: 1px solid rgba(34, 197, 94, 0.2);
|
| 600 |
+
color: #86efac;
|
| 601 |
+
}
|
| 602 |
+
|
| 603 |
+
.submit-feedback.error {
|
| 604 |
+
display: block;
|
| 605 |
+
background: rgba(239, 68, 68, 0.1);
|
| 606 |
+
border: 1px solid rgba(239, 68, 68, 0.2);
|
| 607 |
+
color: #fca5a5;
|
| 608 |
+
}
|
| 609 |
+
|
| 610 |
+
@keyframes fadeSlideUp {
|
| 611 |
+
from { opacity: 0; transform: translateY(8px); }
|
| 612 |
+
to { opacity: 1; transform: translateY(0); }
|
| 613 |
+
}
|
| 614 |
+
|
| 615 |
+
/* βββ Footer βββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 616 |
+
.footer {
|
| 617 |
+
margin-top: auto;
|
| 618 |
+
border-top: 1px solid var(--border-subtle);
|
| 619 |
+
padding: var(--space-xl) 0;
|
| 620 |
+
background: rgba(10, 14, 26, 0.9);
|
| 621 |
+
backdrop-filter: blur(20px);
|
| 622 |
+
}
|
| 623 |
+
|
| 624 |
+
.footer .container {
|
| 625 |
+
display: flex;
|
| 626 |
+
flex-direction: column;
|
| 627 |
+
align-items: center;
|
| 628 |
+
gap: var(--space-lg);
|
| 629 |
+
}
|
| 630 |
+
|
| 631 |
+
.live-counter {
|
| 632 |
+
text-align: center;
|
| 633 |
+
}
|
| 634 |
+
|
| 635 |
+
.live-counter .counter-label {
|
| 636 |
+
font-size: 0.75rem;
|
| 637 |
+
color: var(--text-muted);
|
| 638 |
+
text-transform: uppercase;
|
| 639 |
+
letter-spacing: 0.15em;
|
| 640 |
+
margin-bottom: var(--space-sm);
|
| 641 |
+
display: flex;
|
| 642 |
+
align-items: center;
|
| 643 |
+
justify-content: center;
|
| 644 |
+
gap: var(--space-sm);
|
| 645 |
+
}
|
| 646 |
+
|
| 647 |
+
.live-counter .counter-label .live-dot {
|
| 648 |
+
width: 8px;
|
| 649 |
+
height: 8px;
|
| 650 |
+
background: #22c55e;
|
| 651 |
+
border-radius: 50%;
|
| 652 |
+
animation: pulse 2s ease-in-out infinite;
|
| 653 |
+
display: inline-block;
|
| 654 |
+
}
|
| 655 |
+
|
| 656 |
+
.live-counter .counter-value {
|
| 657 |
+
font-family: var(--font-heading);
|
| 658 |
+
font-weight: 900;
|
| 659 |
+
font-size: clamp(2rem, 5vw, 3rem);
|
| 660 |
+
background: var(--gradient-primary);
|
| 661 |
+
-webkit-background-clip: text;
|
| 662 |
+
-webkit-text-fill-color: transparent;
|
| 663 |
+
background-clip: text;
|
| 664 |
+
line-height: 1.2;
|
| 665 |
+
transition: all var(--transition-base);
|
| 666 |
+
}
|
| 667 |
+
|
| 668 |
+
.live-counter .counter-subtext {
|
| 669 |
+
font-size: 0.8rem;
|
| 670 |
+
color: var(--text-secondary);
|
| 671 |
+
margin-top: var(--space-xs);
|
| 672 |
+
}
|
| 673 |
+
|
| 674 |
+
.footer-links {
|
| 675 |
+
display: flex;
|
| 676 |
+
align-items: center;
|
| 677 |
+
gap: var(--space-lg);
|
| 678 |
+
font-size: 0.8rem;
|
| 679 |
+
}
|
| 680 |
+
|
| 681 |
+
.footer-links a {
|
| 682 |
+
color: var(--text-muted);
|
| 683 |
+
text-decoration: none;
|
| 684 |
+
transition: color var(--transition-fast);
|
| 685 |
+
}
|
| 686 |
+
|
| 687 |
+
.footer-links a:hover {
|
| 688 |
+
color: var(--text-accent);
|
| 689 |
+
}
|
| 690 |
+
|
| 691 |
+
.footer-links .divider {
|
| 692 |
+
width: 3px;
|
| 693 |
+
height: 3px;
|
| 694 |
+
background: var(--text-muted);
|
| 695 |
+
border-radius: 50%;
|
| 696 |
+
opacity: 0.5;
|
| 697 |
+
}
|
| 698 |
+
|
| 699 |
+
/* βββ Toast Notifications ββββββββββββββββββββββββββββββββββββ */
|
| 700 |
+
.toast-container {
|
| 701 |
+
position: fixed;
|
| 702 |
+
top: var(--space-lg);
|
| 703 |
+
right: var(--space-lg);
|
| 704 |
+
z-index: 1000;
|
| 705 |
+
display: flex;
|
| 706 |
+
flex-direction: column;
|
| 707 |
+
gap: var(--space-sm);
|
| 708 |
+
}
|
| 709 |
+
|
| 710 |
+
.toast {
|
| 711 |
+
padding: var(--space-md) var(--space-lg);
|
| 712 |
+
background: var(--bg-card);
|
| 713 |
+
border: 1px solid var(--border-subtle);
|
| 714 |
+
border-radius: var(--radius-md);
|
| 715 |
+
backdrop-filter: blur(20px);
|
| 716 |
+
animation: toastIn 0.3s ease-out;
|
| 717 |
+
font-size: 0.9rem;
|
| 718 |
+
max-width: 350px;
|
| 719 |
+
box-shadow: var(--shadow-lg);
|
| 720 |
+
}
|
| 721 |
+
|
| 722 |
+
.toast.toast-exiting {
|
| 723 |
+
animation: toastOut 0.3s ease-in forwards;
|
| 724 |
+
}
|
| 725 |
+
|
| 726 |
+
@keyframes toastIn {
|
| 727 |
+
from { opacity: 0; transform: translateX(100px); }
|
| 728 |
+
to { opacity: 1; transform: translateX(0); }
|
| 729 |
+
}
|
| 730 |
+
|
| 731 |
+
@keyframes toastOut {
|
| 732 |
+
from { opacity: 1; transform: translateX(0); }
|
| 733 |
+
to { opacity: 0; transform: translateX(100px); }
|
| 734 |
+
}
|
| 735 |
+
|
| 736 |
+
/* βββ Responsive βββββββββββββββββββββββββββββββββββββββββββββ */
|
| 737 |
+
@media (max-width: 640px) {
|
| 738 |
+
.container {
|
| 739 |
+
padding: 0 var(--space-md);
|
| 740 |
+
}
|
| 741 |
+
|
| 742 |
+
.hero {
|
| 743 |
+
padding: var(--space-3xl) 0 var(--space-2xl);
|
| 744 |
+
}
|
| 745 |
+
|
| 746 |
+
.submit-form {
|
| 747 |
+
flex-direction: column;
|
| 748 |
+
}
|
| 749 |
+
|
| 750 |
+
.header .container {
|
| 751 |
+
flex-direction: column;
|
| 752 |
+
gap: var(--space-sm);
|
| 753 |
+
}
|
| 754 |
+
|
| 755 |
+
.footer-links {
|
| 756 |
+
flex-wrap: wrap;
|
| 757 |
+
justify-content: center;
|
| 758 |
+
}
|
| 759 |
+
}
|
| 760 |
+
|
| 761 |
+
/* βββ Utility Animations βββββββββββββββββββββββββββββββββββββ */
|
| 762 |
+
@keyframes fadeIn {
|
| 763 |
+
from { opacity: 0; }
|
| 764 |
+
to { opacity: 1; }
|
| 765 |
+
}
|
| 766 |
+
|
| 767 |
+
@keyframes slideUp {
|
| 768 |
+
from { opacity: 0; transform: translateY(20px); }
|
| 769 |
+
to { opacity: 1; transform: translateY(0); }
|
| 770 |
+
}
|
| 771 |
+
|
| 772 |
+
.fade-in {
|
| 773 |
+
animation: fadeIn 0.6s ease-out;
|
| 774 |
+
}
|
| 775 |
+
|
| 776 |
+
.slide-up {
|
| 777 |
+
animation: slideUp 0.6s ease-out;
|
| 778 |
+
}
|
| 779 |
+
|
| 780 |
+
.slide-up-delay-1 { animation-delay: 0.1s; animation-fill-mode: backwards; }
|
| 781 |
+
.slide-up-delay-2 { animation-delay: 0.2s; animation-fill-mode: backwards; }
|
| 782 |
+
.slide-up-delay-3 { animation-delay: 0.3s; animation-fill-mode: backwards; }
|
| 783 |
+
.slide-up-delay-4 { animation-delay: 0.4s; animation-fill-mode: backwards; }
|
| 784 |
+
|
| 785 |
+
/* βββ Loading Skeleton βββββββββββββββββββββββββββββββββββββββ */
|
| 786 |
+
.skeleton {
|
| 787 |
+
background: linear-gradient(
|
| 788 |
+
90deg,
|
| 789 |
+
var(--bg-glass) 25%,
|
| 790 |
+
rgba(255, 255, 255, 0.06) 50%,
|
| 791 |
+
var(--bg-glass) 75%
|
| 792 |
+
);
|
| 793 |
+
background-size: 200% 100%;
|
| 794 |
+
animation: shimmer 1.5s infinite;
|
| 795 |
+
border-radius: var(--radius-sm);
|
| 796 |
+
}
|
| 797 |
+
|
| 798 |
+
@keyframes shimmer {
|
| 799 |
+
0% { background-position: 200% 50%; }
|
| 800 |
+
100% { background-position: -200% 50%; }
|
| 801 |
+
}
|
nginx.conf
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
worker_processes auto;
|
| 2 |
+
pid /tmp/nginx.pid;
|
| 3 |
+
|
| 4 |
+
events {
|
| 5 |
+
worker_connections 1024;
|
| 6 |
+
}
|
| 7 |
+
|
| 8 |
+
http {
|
| 9 |
+
include /etc/nginx/mime.types;
|
| 10 |
+
default_type application/octet-stream;
|
| 11 |
+
|
| 12 |
+
# Temp paths for non-root
|
| 13 |
+
client_body_temp_path /tmp/nginx-client-body;
|
| 14 |
+
proxy_temp_path /tmp/nginx-proxy;
|
| 15 |
+
fastcgi_temp_path /tmp/nginx-fastcgi;
|
| 16 |
+
uwsgi_temp_path /tmp/nginx-uwsgi;
|
| 17 |
+
scgi_temp_path /tmp/nginx-scgi;
|
| 18 |
+
|
| 19 |
+
sendfile on;
|
| 20 |
+
tcp_nopush on;
|
| 21 |
+
keepalive_timeout 65;
|
| 22 |
+
gzip on;
|
| 23 |
+
gzip_types text/plain text/css application/json application/javascript text/xml;
|
| 24 |
+
|
| 25 |
+
# Logging
|
| 26 |
+
access_log /tmp/nginx-access.log;
|
| 27 |
+
error_log /tmp/nginx-error.log;
|
| 28 |
+
|
| 29 |
+
server {
|
| 30 |
+
listen 7860;
|
| 31 |
+
server_name _;
|
| 32 |
+
|
| 33 |
+
# Frontend static files
|
| 34 |
+
root /app/frontend;
|
| 35 |
+
index index.html;
|
| 36 |
+
|
| 37 |
+
# API proxy β FastAPI
|
| 38 |
+
location /api/ {
|
| 39 |
+
proxy_pass http://127.0.0.1:8000;
|
| 40 |
+
proxy_set_header Host $host;
|
| 41 |
+
proxy_set_header X-Real-IP $remote_addr;
|
| 42 |
+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
| 43 |
+
proxy_set_header X-Forwarded-Proto $scheme;
|
| 44 |
+
|
| 45 |
+
# Timeouts
|
| 46 |
+
proxy_connect_timeout 10s;
|
| 47 |
+
proxy_send_timeout 30s;
|
| 48 |
+
proxy_read_timeout 30s;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
# Health check
|
| 52 |
+
location /health {
|
| 53 |
+
proxy_pass http://127.0.0.1:8000;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
# Frontend SPA fallback
|
| 57 |
+
location / {
|
| 58 |
+
try_files $uri $uri/ /index.html;
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
# Security headers
|
| 62 |
+
add_header X-Frame-Options "SAMEORIGIN" always;
|
| 63 |
+
add_header X-Content-Type-Options "nosniff" always;
|
| 64 |
+
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
|
| 65 |
+
}
|
| 66 |
+
}
|
run.sh
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
+
echo "=========================================="
|
| 5 |
+
echo " RandomWeb β Starting services"
|
| 6 |
+
echo "=========================================="
|
| 7 |
+
|
| 8 |
+
# Start FastAPI backend in background
|
| 9 |
+
echo "[1/2] Starting FastAPI backend on :8000..."
|
| 10 |
+
cd /app
|
| 11 |
+
python -m uvicorn backend.main:app --host 0.0.0.0 --port 8000 --log-level info &
|
| 12 |
+
|
| 13 |
+
# Wait for backend to be ready
|
| 14 |
+
echo " Waiting for backend..."
|
| 15 |
+
for i in $(seq 1 30); do
|
| 16 |
+
if curl -s http://127.0.0.1:8000/api/health > /dev/null 2>&1; then
|
| 17 |
+
echo " Backend ready!"
|
| 18 |
+
break
|
| 19 |
+
fi
|
| 20 |
+
sleep 1
|
| 21 |
+
done
|
| 22 |
+
|
| 23 |
+
# Start Nginx in foreground
|
| 24 |
+
echo "[2/2] Starting Nginx on :7860..."
|
| 25 |
+
exec nginx -g 'daemon off;'
|
supabase_schema.sql
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-- ============================================================
|
| 2 |
+
-- RandomWeb β Supabase Schema
|
| 3 |
+
-- Run this in the Supabase SQL Editor (Dashboard β SQL Editor)
|
| 4 |
+
-- ============================================================
|
| 5 |
+
|
| 6 |
+
-- Enable required extensions
|
| 7 |
+
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
| 8 |
+
|
| 9 |
+
-- ============================================================
|
| 10 |
+
-- 1. WEBSITES TABLE
|
| 11 |
+
-- ============================================================
|
| 12 |
+
CREATE TABLE IF NOT EXISTS websites (
|
| 13 |
+
id BIGSERIAL PRIMARY KEY,
|
| 14 |
+
url TEXT NOT NULL UNIQUE,
|
| 15 |
+
domain TEXT NOT NULL,
|
| 16 |
+
source TEXT NOT NULL DEFAULT 'unknown',
|
| 17 |
+
status INTEGER,
|
| 18 |
+
is_active BOOLEAN NOT NULL DEFAULT false,
|
| 19 |
+
first_seen TIMESTAMPTZ NOT NULL DEFAULT now(),
|
| 20 |
+
last_checked TIMESTAMPTZ,
|
| 21 |
+
next_check TIMESTAMPTZ,
|
| 22 |
+
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
| 23 |
+
);
|
| 24 |
+
|
| 25 |
+
-- Indexes for performance
|
| 26 |
+
CREATE INDEX IF NOT EXISTS idx_websites_is_active ON websites (is_active) WHERE is_active = true;
|
| 27 |
+
CREATE INDEX IF NOT EXISTS idx_websites_domain ON websites (domain);
|
| 28 |
+
CREATE INDEX IF NOT EXISTS idx_websites_next_check ON websites (next_check) WHERE next_check IS NOT NULL;
|
| 29 |
+
CREATE INDEX IF NOT EXISTS idx_websites_random ON websites (id) WHERE is_active = true;
|
| 30 |
+
|
| 31 |
+
-- Trigram index for fuzzy search
|
| 32 |
+
CREATE INDEX IF NOT EXISTS idx_websites_url_trgm ON websites USING gin (url gin_trgm_ops);
|
| 33 |
+
CREATE INDEX IF NOT EXISTS idx_websites_domain_trgm ON websites USING gin (domain gin_trgm_ops);
|
| 34 |
+
|
| 35 |
+
-- ============================================================
|
| 36 |
+
-- 2. STATS TABLE (single-row, live counter)
|
| 37 |
+
-- ============================================================
|
| 38 |
+
CREATE TABLE IF NOT EXISTS stats (
|
| 39 |
+
id INTEGER PRIMARY KEY DEFAULT 1 CHECK (id = 1),
|
| 40 |
+
active_count BIGINT NOT NULL DEFAULT 0,
|
| 41 |
+
total_count BIGINT NOT NULL DEFAULT 0,
|
| 42 |
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
| 43 |
+
);
|
| 44 |
+
|
| 45 |
+
INSERT INTO stats (active_count, total_count) VALUES (0, 0)
|
| 46 |
+
ON CONFLICT (id) DO NOTHING;
|
| 47 |
+
|
| 48 |
+
-- ============================================================
|
| 49 |
+
-- 3. TRIGGER β Auto-update stats on website changes
|
| 50 |
+
-- ============================================================
|
| 51 |
+
CREATE OR REPLACE FUNCTION update_stats_count()
|
| 52 |
+
RETURNS TRIGGER AS $$
|
| 53 |
+
BEGIN
|
| 54 |
+
UPDATE stats SET
|
| 55 |
+
active_count = (SELECT count(*) FROM websites WHERE is_active = true),
|
| 56 |
+
total_count = (SELECT count(*) FROM websites),
|
| 57 |
+
updated_at = now()
|
| 58 |
+
WHERE id = 1;
|
| 59 |
+
RETURN NULL;
|
| 60 |
+
END;
|
| 61 |
+
$$ LANGUAGE plpgsql;
|
| 62 |
+
|
| 63 |
+
DROP TRIGGER IF EXISTS trg_update_stats ON websites;
|
| 64 |
+
CREATE TRIGGER trg_update_stats
|
| 65 |
+
AFTER INSERT OR UPDATE OF is_active OR DELETE ON websites
|
| 66 |
+
FOR EACH STATEMENT EXECUTE FUNCTION update_stats_count();
|
| 67 |
+
|
| 68 |
+
-- ============================================================
|
| 69 |
+
-- 4. FUNCTION β Optimized random active website
|
| 70 |
+
-- ============================================================
|
| 71 |
+
CREATE OR REPLACE FUNCTION get_random_active_website()
|
| 72 |
+
RETURNS TABLE(id BIGINT, url TEXT, domain TEXT) AS $$
|
| 73 |
+
BEGIN
|
| 74 |
+
RETURN QUERY
|
| 75 |
+
SELECT w.id, w.url, w.domain
|
| 76 |
+
FROM websites w
|
| 77 |
+
WHERE w.is_active = true
|
| 78 |
+
ORDER BY random()
|
| 79 |
+
LIMIT 1;
|
| 80 |
+
END;
|
| 81 |
+
$$ LANGUAGE plpgsql;
|
| 82 |
+
|
| 83 |
+
-- ============================================================
|
| 84 |
+
-- 5. ROW LEVEL SECURITY
|
| 85 |
+
-- ============================================================
|
| 86 |
+
ALTER TABLE websites ENABLE ROW LEVEL SECURITY;
|
| 87 |
+
ALTER TABLE stats ENABLE ROW LEVEL SECURITY;
|
| 88 |
+
|
| 89 |
+
-- Public read access for frontend (publishable key)
|
| 90 |
+
CREATE POLICY "Allow public read on websites"
|
| 91 |
+
ON websites FOR SELECT
|
| 92 |
+
USING (true);
|
| 93 |
+
|
| 94 |
+
CREATE POLICY "Allow public read on stats"
|
| 95 |
+
ON stats FOR SELECT
|
| 96 |
+
USING (true);
|
| 97 |
+
|
| 98 |
+
-- Allow inserts/updates from authenticated or service role
|
| 99 |
+
CREATE POLICY "Allow service write on websites"
|
| 100 |
+
ON websites FOR ALL
|
| 101 |
+
USING (true)
|
| 102 |
+
WITH CHECK (true);
|
| 103 |
+
|
| 104 |
+
CREATE POLICY "Allow service write on stats"
|
| 105 |
+
ON stats FOR ALL
|
| 106 |
+
USING (true)
|
| 107 |
+
WITH CHECK (true);
|
| 108 |
+
|
| 109 |
+
-- ============================================================
|
| 110 |
+
-- 6. ENABLE REALTIME on stats table
|
| 111 |
+
-- ============================================================
|
| 112 |
+
ALTER PUBLICATION supabase_realtime ADD TABLE stats;
|