Guest1 commited on
Commit
d22875e
Β·
0 Parent(s):

πŸš€ Initial Clean Deployment

Browse files
.env.example ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Supabase Configuration
2
+ # Required for the database and validation workers
3
+ SUPABASE_URL="https://your-project.supabase.co"
4
+ SUPABASE_PUBLISHABLE_KEY="your-public-key"
5
+ SUPABASE_SECRET_KEY="your-private-secret-key"
6
+
7
+ # IMPORTANT:
8
+ # 1. On Hugging Face, go to Settings > Variables and Secrets
9
+ # 2. Add these as 'Secrets' there.
10
+ # 3. NEVER hardcode your real keys in your code!
.github/workflows/deploy.yml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face Hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+ workflow_dispatch:
6
+
7
+ jobs:
8
+ deploy:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - name: Checkout Repository
12
+ uses: actions/checkout@v3
13
+ with:
14
+ fetch-depth: 0
15
+ lfs: true
16
+
17
+ - name: Push to Hugging Face
18
+ env:
19
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run: |
21
+ git remote add hf https://PinkAlpaca:$HF_TOKEN@huggingface.co/spaces/PinkAlpaca/RandomWeb
22
+ git push -f hf main
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ .env
5
+ .env.local
6
+ .DS_Store
7
+ *.log
8
+ node_modules/
9
+ .vscode/
10
+ .idea/
Dockerfile ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ # Install system dependencies
4
+ RUN apt-get update && \
5
+ apt-get install -y --no-install-recommends nginx curl && \
6
+ rm -rf /var/lib/apt/lists/*
7
+
8
+ # Create non-root user (required by HF Spaces)
9
+ RUN useradd -m -u 1000 appuser
10
+
11
+ # Set working directory
12
+ WORKDIR /app
13
+
14
+ # Install Python dependencies
15
+ COPY backend/requirements.txt ./requirements.txt
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Copy application code
19
+ COPY backend/ ./backend/
20
+ COPY frontend/ ./frontend/
21
+
22
+ # Copy Nginx config
23
+ COPY nginx.conf /etc/nginx/nginx.conf
24
+
25
+ # Copy startup script
26
+ COPY run.sh ./run.sh
27
+ RUN chmod +x ./run.sh
28
+
29
+ # Create Nginx temp directories writable by appuser
30
+ RUN mkdir -p /tmp/nginx-client-body /tmp/nginx-proxy /tmp/nginx-fastcgi /tmp/nginx-uwsgi /tmp/nginx-scgi && \
31
+ chown -R appuser:appuser /tmp/nginx-* && \
32
+ chown -R appuser:appuser /var/lib/nginx && \
33
+ chown -R appuser:appuser /var/log/nginx && \
34
+ chown -R appuser:appuser /app && \
35
+ touch /tmp/nginx.pid && \
36
+ chown appuser:appuser /tmp/nginx.pid
37
+
38
+ # Switch to non-root user
39
+ USER appuser
40
+
41
+ EXPOSE 7860
42
+
43
+ CMD ["./run.sh"]
README.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: RandomWeb
3
+ emoji: 🌐
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: true
9
+ ---
10
+
11
+ # 🌐 RandomWeb β€” Discover Random Websites
12
+
13
+ A random website discovery platform that indexes the entire web using:
14
+
15
+ - **Certificate Transparency Logs** β€” Real-time domain discovery via CertStream
16
+ - **Common Crawl** β€” Batch import from the largest public web archive
17
+ - **BFS Recursive Crawler** β€” Breadth-first link extraction and traversal
18
+ - **Polite Validation** β€” Rate-limited, robots.txt-compliant URL verification
19
+
20
+ ## Features
21
+
22
+ - 🎲 **Random Button** β€” Instant redirect to a random live website
23
+ - πŸ” **Search** β€” Find specific indexed websites
24
+ - βž• **Submit URLs** β€” Add websites to the index
25
+ - πŸ“Š **Live Counter** β€” Real-time count of active indexed sites (via Supabase Realtime)
26
+
27
+ ## Architecture
28
+
29
+ - **Backend**: Python / FastAPI with async workers
30
+ - **Frontend**: Vanilla HTML/CSS/JS with Supabase JS client
31
+ - **Database**: Supabase (PostgreSQL) with RLS and Realtime
32
+ - **Deployment**: Docker on Hugging Face Spaces
33
+
34
+ ## Links
35
+
36
+ - [GitHub Repository](https://github.com/guestcoder0906/RandomWeb)
backend/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # RandomWeb Backend
backend/api/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # API Routes
backend/api/routes.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RandomWeb β€” REST API Routes
3
+ Endpoints for random redirect, search, URL submission, and stats.
4
+ """
5
+ import logging
6
+ import re
7
+ from urllib.parse import urlparse
8
+
9
+ from fastapi import APIRouter, HTTPException, Query
10
+ from pydantic import BaseModel, field_validator
11
+
12
+ from backend.db import (
13
+ get_random_active_url,
14
+ search_websites,
15
+ get_active_count,
16
+ get_total_count,
17
+ url_exists,
18
+ )
19
+ from backend.workers.validator import enqueue_url
20
+
21
+ logger = logging.getLogger("randomweb.api")
22
+ router = APIRouter(prefix="/api")
23
+
24
+
25
+ # ─── Models ──────────────────────────────────────────────────
26
+ class SubmitRequest(BaseModel):
27
+ url: str
28
+
29
+ @field_validator("url")
30
+ @classmethod
31
+ def validate_url(cls, v: str) -> str:
32
+ v = v.strip()
33
+ if not v:
34
+ raise ValueError("URL cannot be empty")
35
+
36
+ # Add scheme if missing
37
+ if not v.startswith(("http://", "https://")):
38
+ v = f"https://{v}"
39
+
40
+ parsed = urlparse(v)
41
+ if not parsed.netloc or "." not in parsed.netloc:
42
+ raise ValueError("Invalid URL format")
43
+
44
+ if len(v) > 2000:
45
+ raise ValueError("URL too long")
46
+
47
+ return v
48
+
49
+
50
+ class SubmitResponse(BaseModel):
51
+ success: bool
52
+ message: str
53
+ url: str
54
+
55
+
56
+ class RandomResponse(BaseModel):
57
+ url: str
58
+
59
+
60
+ class StatsResponse(BaseModel):
61
+ active_count: int
62
+ total_count: int
63
+
64
+
65
+ class SearchResult(BaseModel):
66
+ url: str
67
+ domain: str
68
+ is_active: bool
69
+
70
+
71
+ # ─── Endpoints ───────────────────────────────────────────────
72
+ @router.get("/random", response_model=RandomResponse)
73
+ async def get_random():
74
+ """Get a random active website URL for redirect."""
75
+ url = get_random_active_url()
76
+ if not url:
77
+ raise HTTPException(
78
+ status_code=404,
79
+ detail="No active websites found yet. The system is still indexing.",
80
+ )
81
+ return {"url": url}
82
+
83
+
84
+ @router.get("/search", response_model=list[SearchResult])
85
+ async def search(
86
+ q: str = Query(..., min_length=1, max_length=200, description="Search query"),
87
+ limit: int = Query(20, ge=1, le=100, description="Max results"),
88
+ ):
89
+ """Search for indexed websites by URL or domain."""
90
+ results = search_websites(q, limit=limit)
91
+ return results
92
+
93
+
94
+ @router.post("/submit", response_model=SubmitResponse)
95
+ async def submit_url(request: SubmitRequest):
96
+ """Submit a new URL for validation and indexing."""
97
+ url = request.url
98
+ logger.info("User submitted URL: %s", url)
99
+
100
+ # Check if already indexed
101
+ if url_exists(url):
102
+ return SubmitResponse(
103
+ success=True,
104
+ message="This URL is already in our index.",
105
+ url=url,
106
+ )
107
+
108
+ # Queue for validation
109
+ await enqueue_url(url, source="user_submit")
110
+
111
+ return SubmitResponse(
112
+ success=True,
113
+ message="URL submitted successfully! It will be validated and added if accessible.",
114
+ url=url,
115
+ )
116
+
117
+
118
+ @router.get("/stats", response_model=StatsResponse)
119
+ async def get_stats():
120
+ """Get current index statistics."""
121
+ return StatsResponse(
122
+ active_count=get_active_count(),
123
+ total_count=get_total_count(),
124
+ )
125
+
126
+
127
+ @router.get("/health")
128
+ async def health():
129
+ """Health check endpoint."""
130
+ return {"status": "ok"}
backend/config.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RandomWeb β€” Configuration
3
+ Loads environment variables and defines constants for all workers.
4
+ """
5
+ import os
6
+
7
+ # ─── Supabase ────────────────────────────────────────────────
8
+ SUPABASE_URL = os.getenv("SUPABASE_URL", "")
9
+ SUPABASE_SECRET_KEY = os.getenv("SUPABASE_SECRET_KEY", "")
10
+ SUPABASE_PUBLISHABLE_KEY = os.getenv("SUPABASE_PUBLISHABLE_KEY", "")
11
+
12
+ # ─── Crawler Settings ───────────────────────────────────────
13
+ USER_AGENT = "RandomWeb/1.0 (+https://github.com/guestcoder0906/RandomWeb; polite-bot)"
14
+ REQUEST_TIMEOUT = 10 # seconds
15
+ MAX_GLOBAL_CONCURRENCY = 20 # max simultaneous outbound connections
16
+ PER_DOMAIN_RATE_LIMIT = 1.0 # requests per second per domain
17
+ CRAWL_DELAY_DEFAULT = 1.0 # fallback crawl delay if robots.txt doesn't specify
18
+ MAX_CRAWL_DEPTH = 3 # BFS depth limit per seed
19
+ MAX_LINKS_PER_PAGE = 50 # max links to extract per page
20
+ MAX_QUEUE_SIZE = 100_000 # max URLs in crawler queue
21
+
22
+ # ─── Validator Settings ──────────────────────────────────────
23
+ VALIDATION_BATCH_SIZE = 50 # URLs per validation batch
24
+ VALIDATION_CONCURRENCY = 10 # concurrent validation requests
25
+ RECHECK_INTERVAL_DAYS = 365 # re-verify every year
26
+
27
+ # ─── CertStream ──────────────────────────────────────────────
28
+ CERTSTREAM_URL = "wss://certstream.calidog.io/"
29
+ CT_LOG_BATCH_SIZE = 100 # queue batch size before flushing to validation
30
+ CT_LOG_RECONNECT_DELAY = 5 # initial reconnect delay in seconds
31
+ CT_LOG_MAX_RECONNECT_DELAY = 300 # max reconnect delay
32
+
33
+ # ─── Common Crawl ────────────────────────────────────────────
34
+ COMMON_CRAWL_INDEX_URL = "https://index.commoncrawl.org/collinfo.json"
35
+ COMMON_CRAWL_SAMPLE_SIZE = 10_000 # URLs per crawl import batch
36
+ COMMON_CRAWL_RESCAN_HOURS = 168 # re-import weekly (7 * 24)
37
+
38
+ # ─── Scheduler ───────────────────────────────────────────────
39
+ SCHEDULER_INTERVAL_SECONDS = 3600 # run re-verification check every hour
40
+ SCHEDULER_BATCH_SIZE = 100 # URLs per re-verification batch
41
+
42
+ # ─── Blocked TLDs / Patterns ────────────────────────────────
43
+ BLOCKED_TLDS = {
44
+ ".local", ".internal", ".test", ".example",
45
+ ".invalid", ".localhost", ".onion",
46
+ }
47
+
48
+ # ─── Top 100 Seed Websites ──────────────────────────────────
49
+ SEED_WEBSITES = [
50
+ "https://google.com",
51
+ "https://youtube.com",
52
+ "https://facebook.com",
53
+ "https://instagram.com",
54
+ "https://chatgpt.com",
55
+ "https://x.com",
56
+ "https://reddit.com",
57
+ "https://wikipedia.org",
58
+ "https://whatsapp.com",
59
+ "https://bing.com",
60
+ "https://tiktok.com",
61
+ "https://yahoo.co.jp",
62
+ "https://yandex.ru",
63
+ "https://yahoo.com",
64
+ "https://amazon.com",
65
+ "https://gemini.google.com",
66
+ "https://linkedin.com",
67
+ "https://bet.br",
68
+ "https://baidu.com",
69
+ "https://naver.com",
70
+ "https://netflix.com",
71
+ "https://pinterest.com",
72
+ "https://live.com",
73
+ "https://bilibili.com",
74
+ "https://pornhub.com",
75
+ "https://temu.com",
76
+ "https://dzen.ru",
77
+ "https://office.com",
78
+ "https://microsoft.com",
79
+ "https://xhamster.com",
80
+ "https://twitch.tv",
81
+ "https://xvideos.com",
82
+ "https://canva.com",
83
+ "https://weather.com",
84
+ "https://vk.com",
85
+ "https://globo.com",
86
+ "https://fandom.com",
87
+ "https://news.yahoo.co.jp",
88
+ "https://t.me",
89
+ "https://samsung.com",
90
+ "https://mail.ru",
91
+ "https://duckduckgo.com",
92
+ "https://nytimes.com",
93
+ "https://stripchat.com",
94
+ "https://xnxx.com",
95
+ "https://ebay.com",
96
+ "https://zoom.us",
97
+ "https://xhamster44.desi",
98
+ "https://discord.com",
99
+ "https://eporner.com",
100
+ "https://github.com",
101
+ "https://booking.com",
102
+ "https://spotify.com",
103
+ "https://cricbuzz.com",
104
+ "https://instructure.com",
105
+ "https://docomo.ne.jp",
106
+ "https://roblox.com",
107
+ "https://aliexpress.com",
108
+ "https://bbc.com",
109
+ "https://bbc.co.uk",
110
+ "https://ozon.ru",
111
+ "https://apple.com",
112
+ "https://imdb.com",
113
+ "https://telegram.org",
114
+ "https://brave.com",
115
+ "https://amazon.in",
116
+ "https://chaturbate.com",
117
+ "https://msn.com",
118
+ "https://walmart.com",
119
+ "https://amazon.co.jp",
120
+ "https://paypal.com",
121
+ "https://cnn.com",
122
+ "https://ya.ru",
123
+ "https://indeed.com",
124
+ "https://etsy.com",
125
+ "https://rakuten.co.jp",
126
+ "https://amazon.de",
127
+ "https://espn.com",
128
+ "https://hbomax.com",
129
+ "https://usps.com",
130
+ "https://music.youtube.com",
131
+ "https://ok.ru",
132
+ "https://wildberries.ru",
133
+ "https://office365.com",
134
+ "https://disneyplus.com",
135
+ "https://douyin.com",
136
+ "https://namu.wiki",
137
+ "https://adobe.com",
138
+ "https://shein.com",
139
+ "https://qq.com",
140
+ "https://amazon.co.uk",
141
+ "https://quora.com",
142
+ "https://faphouse.com",
143
+ "https://rutube.ru",
144
+ "https://theguardian.com",
145
+ "https://scribd.com",
146
+ "https://grok.com",
147
+ "https://zillow.com",
148
+ "https://dcinside.com",
149
+ "https://onlyfans.com",
150
+ ]
backend/db.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RandomWeb β€” Database Helpers
3
+ Supabase client initialization and common query functions.
4
+ """
5
+ import logging
6
+ from datetime import datetime, timedelta, timezone
7
+ from urllib.parse import urlparse
8
+ from typing import Optional
9
+
10
+ from supabase import create_client, Client
11
+
12
+ from backend.config import (
13
+ SUPABASE_URL,
14
+ SUPABASE_SECRET_KEY,
15
+ SUPABASE_PUBLISHABLE_KEY,
16
+ RECHECK_INTERVAL_DAYS,
17
+ )
18
+
19
+ logger = logging.getLogger("randomweb.db")
20
+
21
+ # ─── Client Initialization ──────────────────────────────────
22
+ _client: Optional[Client] = None
23
+
24
+
25
+ def get_client() -> Client:
26
+ """Return a Supabase client using the secret key if available, else publishable."""
27
+ global _client
28
+ if _client is None:
29
+ # Priority: Secret Key (for writes) -> Publishable Key (fallback)
30
+ key = SUPABASE_SECRET_KEY or SUPABASE_PUBLISHABLE_KEY
31
+
32
+ if not key:
33
+ logger.critical("❌ No Supabase API key found!")
34
+ raise ValueError("SUPABASE_SECRET_KEY and SUPABASE_PUBLISHABLE_KEY are both empty.")
35
+
36
+ _client = create_client(SUPABASE_URL, key)
37
+
38
+ # Identify key type for debugging purposes
39
+ key_type = "Managed (New)" if key.startswith("sb_") else "Legacy (JWT)"
40
+ logger.info("βœ… Supabase client initialized (Type: %s) for %s", key_type, SUPABASE_URL)
41
+
42
+ return _client
43
+
44
+
45
+ def extract_domain(url: str) -> str:
46
+ """Extract the domain from a URL."""
47
+ parsed = urlparse(url)
48
+ return parsed.netloc or parsed.path.split("/")[0]
49
+
50
+
51
+ # ─── Insert / Upsert ────────────────────────────────────────
52
+ def upsert_website(
53
+ url: str,
54
+ source: str = "unknown",
55
+ status: Optional[int] = None,
56
+ is_active: bool = False,
57
+ ) -> bool:
58
+ """Insert or update a website record. Returns True on success."""
59
+ try:
60
+ domain = extract_domain(url)
61
+ now = datetime.now(timezone.utc).isoformat()
62
+ next_check = (
63
+ (datetime.now(timezone.utc) + timedelta(days=RECHECK_INTERVAL_DAYS)).isoformat()
64
+ if is_active
65
+ else None
66
+ )
67
+
68
+ data = {
69
+ "url": url,
70
+ "domain": domain,
71
+ "source": source,
72
+ "status": status,
73
+ "is_active": is_active,
74
+ "last_checked": now,
75
+ "next_check": next_check,
76
+ }
77
+
78
+ get_client().table("websites").upsert(
79
+ data, on_conflict="url"
80
+ ).execute()
81
+ return True
82
+ except Exception as e:
83
+ logger.error("Failed to upsert %s: %s", url, e)
84
+ return False
85
+
86
+
87
+ def bulk_upsert_websites(records: list[dict]) -> int:
88
+ """Bulk upsert a list of website records. Returns count of successful inserts."""
89
+ if not records:
90
+ return 0
91
+ try:
92
+ get_client().table("websites").upsert(
93
+ records, on_conflict="url"
94
+ ).execute()
95
+ return len(records)
96
+ except Exception as e:
97
+ logger.error("Bulk upsert failed (%d records): %s", len(records), e)
98
+ return 0
99
+
100
+
101
+ # ─── Queries ─────────────────────────────────────────────────
102
+ def get_random_active_url() -> Optional[str]:
103
+ """Retrieve a random active website URL using the database function."""
104
+ try:
105
+ result = get_client().rpc("get_random_active_website").execute()
106
+ if result.data and len(result.data) > 0:
107
+ return result.data[0]["url"]
108
+ return None
109
+ except Exception as e:
110
+ logger.error("Failed to get random URL: %s", e)
111
+ return None
112
+
113
+
114
+ def search_websites(query: str, limit: int = 20) -> list[dict]:
115
+ """Search websites by URL or domain using trigram similarity."""
116
+ try:
117
+ result = (
118
+ get_client()
119
+ .table("websites")
120
+ .select("url, domain, is_active")
121
+ .or_(f"url.ilike.%{query}%,domain.ilike.%{query}%")
122
+ .eq("is_active", True)
123
+ .limit(limit)
124
+ .execute()
125
+ )
126
+ return result.data or []
127
+ except Exception as e:
128
+ logger.error("Search failed for '%s': %s", query, e)
129
+ return []
130
+
131
+
132
+ def get_active_count() -> int:
133
+ """Get the current active website count from stats."""
134
+ try:
135
+ result = get_client().table("stats").select("active_count").eq("id", 1).execute()
136
+ if result.data:
137
+ return result.data[0]["active_count"]
138
+ return 0
139
+ except Exception as e:
140
+ logger.error("Failed to get active count: %s", e)
141
+ return 0
142
+
143
+
144
+ def get_total_count() -> int:
145
+ """Get total indexed websites from stats."""
146
+ try:
147
+ result = get_client().table("stats").select("total_count").eq("id", 1).execute()
148
+ if result.data:
149
+ return result.data[0]["total_count"]
150
+ return 0
151
+ except Exception as e:
152
+ logger.error("Failed to get total count: %s", e)
153
+ return 0
154
+
155
+
156
+ def url_exists(url: str) -> bool:
157
+ """Check if a URL is already in the database."""
158
+ try:
159
+ result = (
160
+ get_client()
161
+ .table("websites")
162
+ .select("id")
163
+ .eq("url", url)
164
+ .limit(1)
165
+ .execute()
166
+ )
167
+ return bool(result.data)
168
+ except Exception as e:
169
+ logger.error("Failed to check URL existence: %s", e)
170
+ return False
171
+
172
+
173
+ def get_urls_needing_recheck(limit: int = 100) -> list[dict]:
174
+ """Get URLs that are due for re-verification."""
175
+ try:
176
+ now = datetime.now(timezone.utc).isoformat()
177
+ result = (
178
+ get_client()
179
+ .table("websites")
180
+ .select("id, url, domain")
181
+ .eq("is_active", True)
182
+ .lte("next_check", now)
183
+ .limit(limit)
184
+ .execute()
185
+ )
186
+ return result.data or []
187
+ except Exception as e:
188
+ logger.error("Failed to get recheck URLs: %s", e)
189
+ return []
backend/main.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RandomWeb β€” Main Application
3
+ FastAPI app with background workers for URL discovery, validation, and re-verification.
4
+ """
5
+ import asyncio
6
+ import logging
7
+ from contextlib import asynccontextmanager
8
+
9
+ from fastapi import FastAPI
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+
12
+ from backend.api.routes import router
13
+ from backend.config import SEED_WEBSITES, SUPABASE_URL, SUPABASE_SECRET_KEY
14
+ from backend.db import get_client, extract_domain
15
+ from backend.workers.validator import run_validator, enqueue_url
16
+ from backend.workers.ct_log import run_ct_log_worker
17
+ from backend.workers.common_crawl import run_common_crawl_importer
18
+ from backend.workers.crawler import run_crawler
19
+ from backend.workers.scheduler import run_scheduler
20
+
21
+ # ─── Logging ─────────────────────────────────────────────────
22
+ logging.basicConfig(
23
+ level=logging.INFO,
24
+ format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
25
+ datefmt="%Y-%m-%d %H:%M:%S",
26
+ )
27
+ logger = logging.getLogger("randomweb")
28
+
29
+
30
+ async def seed_top_websites():
31
+ """Seed the top 100 websites into the validation queue."""
32
+ logger.info("Seeding %d top websites...", len(SEED_WEBSITES))
33
+ for url in SEED_WEBSITES:
34
+ await enqueue_url(url, source="seed")
35
+ logger.info("All seed websites queued for validation")
36
+
37
+
38
+ @asynccontextmanager
39
+ async def lifespan(app: FastAPI):
40
+ """Manage background workers lifecycle."""
41
+ logger.info("=" * 60)
42
+ logger.info("RandomWeb starting up")
43
+ logger.info("Supabase URL: %s", SUPABASE_URL)
44
+ logger.info("Secret key configured: %s", "Yes" if SUPABASE_SECRET_KEY else "No")
45
+ logger.info("=" * 60)
46
+
47
+ # Initialize Supabase client
48
+ try:
49
+ get_client()
50
+ logger.info("Supabase client connected")
51
+ except Exception as e:
52
+ logger.error("Failed to connect to Supabase: %s", e)
53
+
54
+ # Launch background workers
55
+ tasks = []
56
+
57
+ # 1. Validation worker (must start first)
58
+ tasks.append(asyncio.create_task(run_validator(), name="validator"))
59
+
60
+ # 2. Seed top websites
61
+ tasks.append(asyncio.create_task(seed_top_websites(), name="seeder"))
62
+
63
+ # 3. CT Log worker
64
+ tasks.append(asyncio.create_task(run_ct_log_worker(), name="ct_log"))
65
+
66
+ # 4. Common Crawl importer
67
+ tasks.append(asyncio.create_task(run_common_crawl_importer(), name="common_crawl"))
68
+
69
+ # 5. BFS Crawler
70
+ tasks.append(asyncio.create_task(run_crawler(), name="crawler"))
71
+
72
+ # 6. Re-verification scheduler
73
+ tasks.append(asyncio.create_task(run_scheduler(), name="scheduler"))
74
+
75
+ logger.info("All %d background workers launched", len(tasks))
76
+
77
+ yield
78
+
79
+ # Shutdown: cancel all tasks
80
+ logger.info("Shutting down background workers...")
81
+ for task in tasks:
82
+ task.cancel()
83
+ await asyncio.gather(*tasks, return_exceptions=True)
84
+ logger.info("All workers stopped")
85
+
86
+
87
+ # ─── FastAPI App ─────────────────────────────────────────────
88
+ app = FastAPI(
89
+ title="RandomWeb",
90
+ description="Discover random websites from across the internet",
91
+ version="1.0.0",
92
+ lifespan=lifespan,
93
+ )
94
+
95
+ # CORS β€” allow frontend
96
+ app.add_middleware(
97
+ CORSMiddleware,
98
+ allow_origins=["*"],
99
+ allow_credentials=True,
100
+ allow_methods=["*"],
101
+ allow_headers=["*"],
102
+ )
103
+
104
+ # Mount API routes
105
+ app.include_router(router)
backend/requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.6
2
+ uvicorn[standard]==0.34.0
3
+ supabase==2.11.0
4
+ aiohttp==3.11.11
5
+ aiolimiter==1.2.1
6
+ protego==0.3.1
7
+ beautifulsoup4==4.12.3
8
+ websockets==14.1
9
+ pydantic==2.10.4
backend/workers/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Background Workers
backend/workers/common_crawl.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RandomWeb β€” Common Crawl CDX Importer
3
+ Fetches URLs from the Common Crawl CDX Index API to seed the database
4
+ with a broad sample of the internet.
5
+ """
6
+ import asyncio
7
+ import logging
8
+ import random
9
+ from typing import Optional
10
+ from urllib.parse import urlparse
11
+
12
+ import aiohttp
13
+
14
+ from backend.config import (
15
+ COMMON_CRAWL_INDEX_URL,
16
+ COMMON_CRAWL_SAMPLE_SIZE,
17
+ COMMON_CRAWL_RESCAN_HOURS,
18
+ USER_AGENT,
19
+ REQUEST_TIMEOUT,
20
+ )
21
+ from backend.workers.validator import enqueue_url
22
+
23
+ logger = logging.getLogger("randomweb.common_crawl")
24
+
25
+ # Sample TLDs to query for broad coverage
26
+ SAMPLE_QUERIES = [
27
+ "*.com", "*.org", "*.net", "*.io", "*.co",
28
+ "*.edu", "*.gov", "*.dev", "*.app", "*.info",
29
+ "*.me", "*.tv", "*.co.uk", "*.de", "*.fr",
30
+ "*.jp", "*.ru", "*.br", "*.in", "*.ca",
31
+ "*.au", "*.nl", "*.it", "*.es", "*.ch",
32
+ "*.se", "*.no", "*.fi", "*.dk", "*.pl",
33
+ ]
34
+
35
+
36
+ async def _get_latest_crawl_index(
37
+ session: aiohttp.ClientSession,
38
+ ) -> Optional[str]:
39
+ """Fetch the latest Common Crawl index URL."""
40
+ try:
41
+ async with session.get(
42
+ COMMON_CRAWL_INDEX_URL,
43
+ timeout=aiohttp.ClientTimeout(total=30),
44
+ headers={"User-Agent": USER_AGENT},
45
+ ) as resp:
46
+ if resp.status != 200:
47
+ logger.error("Failed to fetch crawl index: HTTP %d", resp.status)
48
+ return None
49
+
50
+ data = await resp.json()
51
+ if data and len(data) > 0:
52
+ # Latest crawl is first in the list
53
+ cdx_api = data[0].get("cdx-api")
54
+ crawl_id = data[0].get("id", "unknown")
55
+ logger.info("Latest Common Crawl: %s", crawl_id)
56
+ return cdx_api
57
+
58
+ except Exception as e:
59
+ logger.error("Failed to get crawl index: %s", e)
60
+
61
+ return None
62
+
63
+
64
+ async def _query_cdx_for_domains(
65
+ session: aiohttp.ClientSession,
66
+ cdx_api: str,
67
+ query: str,
68
+ limit: int = 500,
69
+ ) -> list[str]:
70
+ """Query the CDX API for URLs matching a pattern."""
71
+ urls = []
72
+ try:
73
+ params = {
74
+ "url": query,
75
+ "output": "json",
76
+ "fl": "url",
77
+ "limit": str(limit),
78
+ "filter": "status:200",
79
+ }
80
+
81
+ async with session.get(
82
+ cdx_api,
83
+ params=params,
84
+ timeout=aiohttp.ClientTimeout(total=60),
85
+ headers={"User-Agent": USER_AGENT},
86
+ ) as resp:
87
+ if resp.status != 200:
88
+ logger.debug("CDX query failed for %s: HTTP %d", query, resp.status)
89
+ return urls
90
+
91
+ text = await resp.text()
92
+ lines = text.strip().split("\n")
93
+
94
+ for line in lines:
95
+ line = line.strip()
96
+ if not line or line.startswith("["):
97
+ continue
98
+ try:
99
+ # Lines can be JSON or plain URL
100
+ if line.startswith("{"):
101
+ import json
102
+ data = json.loads(line)
103
+ url = data.get("url", "")
104
+ elif line.startswith('"'):
105
+ url = line.strip('"')
106
+ else:
107
+ url = line
108
+
109
+ if url and url.startswith("http"):
110
+ # Normalize to homepage
111
+ parsed = urlparse(url)
112
+ normalized = f"https://{parsed.netloc}"
113
+ urls.append(normalized)
114
+ except Exception:
115
+ continue
116
+
117
+ except asyncio.TimeoutError:
118
+ logger.debug("CDX query timed out for %s", query)
119
+ except Exception as e:
120
+ logger.debug("CDX query error for %s: %s", query, e)
121
+
122
+ return urls
123
+
124
+
125
+ async def run_common_crawl_importer():
126
+ """
127
+ Main Common Crawl import loop.
128
+ Fetches a broad sample of URLs from the CDX API and queues them.
129
+ Runs once on startup, then rescans weekly.
130
+ """
131
+ logger.info("Common Crawl importer starting")
132
+
133
+ while True:
134
+ try:
135
+ async with aiohttp.ClientSession() as session:
136
+ cdx_api = await _get_latest_crawl_index(session)
137
+ if not cdx_api:
138
+ logger.warning("No CDX API available, retrying in 1 hour")
139
+ await asyncio.sleep(3600)
140
+ continue
141
+
142
+ logger.info("Importing from CDX API: %s", cdx_api)
143
+ total_queued = 0
144
+ seen_domains = set()
145
+
146
+ # Shuffle queries for variety
147
+ queries = SAMPLE_QUERIES.copy()
148
+ random.shuffle(queries)
149
+
150
+ per_query_limit = max(
151
+ 50, COMMON_CRAWL_SAMPLE_SIZE // len(queries)
152
+ )
153
+
154
+ for query in queries:
155
+ if total_queued >= COMMON_CRAWL_SAMPLE_SIZE:
156
+ break
157
+
158
+ urls = await _query_cdx_for_domains(
159
+ session, cdx_api, query, limit=per_query_limit
160
+ )
161
+
162
+ for url in urls:
163
+ domain = urlparse(url).netloc
164
+ if domain and domain not in seen_domains:
165
+ seen_domains.add(domain)
166
+ await enqueue_url(url, source="common_crawl")
167
+ total_queued += 1
168
+
169
+ if total_queued >= COMMON_CRAWL_SAMPLE_SIZE:
170
+ break
171
+
172
+ # Be polite to the CDX API
173
+ await asyncio.sleep(2)
174
+
175
+ logger.info(
176
+ "Common Crawl import complete: %d URLs queued", total_queued
177
+ )
178
+
179
+ except Exception as e:
180
+ logger.error("Common Crawl importer error: %s", e)
181
+
182
+ # Wait before next rescan
183
+ logger.info(
184
+ "Next Common Crawl rescan in %d hours",
185
+ COMMON_CRAWL_RESCAN_HOURS,
186
+ )
187
+ await asyncio.sleep(COMMON_CRAWL_RESCAN_HOURS * 3600)
backend/workers/crawler.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RandomWeb β€” BFS Recursive Crawler
3
+ Breadth-first crawler that extracts and queues all hyperlinks from indexed pages
4
+ to continuously expand the known network graph.
5
+ """
6
+ import asyncio
7
+ import logging
8
+ import re
9
+ from collections import deque
10
+ from typing import Optional
11
+ from urllib.parse import urljoin, urlparse
12
+
13
+ import aiohttp
14
+ from aiolimiter import AsyncLimiter
15
+ from bs4 import BeautifulSoup
16
+ from protego import Protego
17
+
18
+ from backend.config import (
19
+ USER_AGENT,
20
+ REQUEST_TIMEOUT,
21
+ MAX_GLOBAL_CONCURRENCY,
22
+ PER_DOMAIN_RATE_LIMIT,
23
+ CRAWL_DELAY_DEFAULT,
24
+ MAX_CRAWL_DEPTH,
25
+ MAX_LINKS_PER_PAGE,
26
+ MAX_QUEUE_SIZE,
27
+ BLOCKED_TLDS,
28
+ )
29
+ from backend.workers.validator import enqueue_url
30
+ from backend.db import get_client
31
+
32
+ logger = logging.getLogger("randomweb.crawler")
33
+
34
+ # ─── State ───────────────────────────────────────────────────
35
+ _crawl_queue: deque = deque(maxlen=MAX_QUEUE_SIZE)
36
+ _visited: set = set()
37
+ _MAX_VISITED_CACHE = 1_000_000
38
+ _robots_cache: dict[str, Optional[Protego]] = {}
39
+ _domain_limiters: dict[str, AsyncLimiter] = {}
40
+
41
+ # File extensions to skip
42
+ SKIP_EXTENSIONS = {
43
+ ".jpg", ".jpeg", ".png", ".gif", ".svg", ".webp", ".ico",
44
+ ".css", ".js", ".woff", ".woff2", ".ttf", ".eot",
45
+ ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
46
+ ".zip", ".rar", ".7z", ".tar", ".gz",
47
+ ".mp3", ".mp4", ".avi", ".mkv", ".mov", ".flv",
48
+ ".exe", ".msi", ".dmg", ".apk",
49
+ }
50
+
51
+
52
+ def _get_domain_limiter(domain: str) -> AsyncLimiter:
53
+ if domain not in _domain_limiters:
54
+ _domain_limiters[domain] = AsyncLimiter(PER_DOMAIN_RATE_LIMIT, 1.0)
55
+ return _domain_limiters[domain]
56
+
57
+
58
+ async def _fetch_robots(
59
+ session: aiohttp.ClientSession, domain: str
60
+ ) -> Optional[Protego]:
61
+ if domain in _robots_cache:
62
+ return _robots_cache[domain]
63
+
64
+ try:
65
+ async with session.get(
66
+ f"https://{domain}/robots.txt",
67
+ timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
68
+ headers={"User-Agent": USER_AGENT},
69
+ allow_redirects=True,
70
+ ssl=False,
71
+ ) as resp:
72
+ if resp.status == 200:
73
+ text = await resp.text()
74
+ parser = Protego.parse(text)
75
+ _robots_cache[domain] = parser
76
+ return parser
77
+ except Exception:
78
+ pass
79
+
80
+ _robots_cache[domain] = None
81
+ return None
82
+
83
+
84
+ def _normalize_url(base_url: str, href: str) -> Optional[str]:
85
+ """Normalize and validate a discovered URL."""
86
+ try:
87
+ # Resolve relative URLs
88
+ full_url = urljoin(base_url, href)
89
+ parsed = urlparse(full_url)
90
+
91
+ # Only HTTP/HTTPS
92
+ if parsed.scheme not in ("http", "https"):
93
+ return None
94
+
95
+ # Skip blocked TLDs
96
+ domain = parsed.netloc.lower()
97
+ for tld in BLOCKED_TLDS:
98
+ if domain.endswith(tld):
99
+ return None
100
+
101
+ # Skip file extensions we don't want
102
+ path_lower = parsed.path.lower()
103
+ for ext in SKIP_EXTENSIONS:
104
+ if path_lower.endswith(ext):
105
+ return None
106
+
107
+ # Strip fragments and normalize
108
+ clean = f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
109
+ if parsed.query:
110
+ clean += f"?{parsed.query}"
111
+
112
+ # Remove trailing slash for consistency
113
+ clean = clean.rstrip("/")
114
+
115
+ return clean if len(clean) < 2000 else None
116
+
117
+ except Exception:
118
+ return None
119
+
120
+
121
+ async def _crawl_page(
122
+ session: aiohttp.ClientSession,
123
+ url: str,
124
+ depth: int,
125
+ semaphore: asyncio.Semaphore,
126
+ ) -> list[str]:
127
+ """
128
+ Fetch a page and extract all hyperlinks.
129
+ Returns list of discovered URLs.
130
+ """
131
+ domain = urlparse(url).netloc
132
+ limiter = _get_domain_limiter(domain)
133
+
134
+ async with semaphore:
135
+ async with limiter:
136
+ # Check robots.txt
137
+ robots = await _fetch_robots(session, domain)
138
+ if robots and not robots.can_fetch(url, USER_AGENT):
139
+ return []
140
+
141
+ # Respect crawl delay
142
+ delay = CRAWL_DELAY_DEFAULT
143
+ if robots:
144
+ d = robots.crawl_delay(USER_AGENT)
145
+ if d is not None:
146
+ delay = d
147
+ if delay > 0:
148
+ await asyncio.sleep(delay)
149
+
150
+ discovered = []
151
+ try:
152
+ async with session.get(
153
+ url,
154
+ timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
155
+ headers={
156
+ "User-Agent": USER_AGENT,
157
+ "Accept": "text/html",
158
+ },
159
+ allow_redirects=True,
160
+ ssl=False,
161
+ ) as resp:
162
+ if resp.status != 200:
163
+ return []
164
+
165
+ content_type = resp.headers.get("Content-Type", "")
166
+ if "text/html" not in content_type:
167
+ return []
168
+
169
+ # Limit response body to avoid memory issues
170
+ body = await resp.text(errors="ignore")
171
+ if len(body) > 5_000_000: # 5MB limit
172
+ body = body[:5_000_000]
173
+
174
+ soup = BeautifulSoup(body, "html.parser")
175
+ links = soup.find_all("a", href=True)
176
+
177
+ count = 0
178
+ for link in links:
179
+ if count >= MAX_LINKS_PER_PAGE:
180
+ break
181
+
182
+ href = link.get("href", "").strip()
183
+ if not href:
184
+ continue
185
+
186
+ normalized = _normalize_url(url, href)
187
+ if normalized and normalized not in _visited:
188
+ discovered.append(normalized)
189
+ count += 1
190
+
191
+ except asyncio.TimeoutError:
192
+ logger.debug("Timeout crawling %s", url)
193
+ except Exception as e:
194
+ logger.debug("Error crawling %s: %s", url, e)
195
+
196
+ return discovered
197
+
198
+
199
+ async def seed_from_database():
200
+ """Load existing active URLs from database as crawler seeds."""
201
+ try:
202
+ result = (
203
+ get_client()
204
+ .table("websites")
205
+ .select("url")
206
+ .eq("is_active", True)
207
+ .limit(1000)
208
+ .execute()
209
+ )
210
+ if result.data:
211
+ for row in result.data:
212
+ url = row["url"]
213
+ if url not in _visited:
214
+ _crawl_queue.append({"url": url, "depth": 0})
215
+ logger.info("Seeded crawler with %d URLs from database", len(result.data))
216
+ except Exception as e:
217
+ logger.error("Failed to seed from database: %s", e)
218
+
219
+
220
+ async def run_crawler():
221
+ """
222
+ Main BFS crawler loop.
223
+ Continuously crawls pages, extracts links, and queues discoveries
224
+ for validation.
225
+ """
226
+ logger.info("BFS Crawler starting")
227
+
228
+ # Wait for initial seeds to be validated
229
+ await asyncio.sleep(30)
230
+
231
+ # Seed from database
232
+ await seed_from_database()
233
+
234
+ semaphore = asyncio.Semaphore(MAX_GLOBAL_CONCURRENCY)
235
+ connector = aiohttp.TCPConnector(
236
+ limit=MAX_GLOBAL_CONCURRENCY,
237
+ ttl_dns_cache=300,
238
+ force_close=False,
239
+ )
240
+
241
+ async with aiohttp.ClientSession(connector=connector) as session:
242
+ while True:
243
+ try:
244
+ if not _crawl_queue:
245
+ # Re-seed periodically
246
+ await seed_from_database()
247
+ if not _crawl_queue:
248
+ logger.debug("Crawler queue empty, waiting...")
249
+ await asyncio.sleep(60)
250
+ continue
251
+
252
+ # Process a batch
253
+ batch_size = min(10, len(_crawl_queue))
254
+ tasks = []
255
+
256
+ for _ in range(batch_size):
257
+ if not _crawl_queue:
258
+ break
259
+
260
+ item = _crawl_queue.popleft()
261
+ url = item["url"]
262
+ depth = item["depth"]
263
+
264
+ if url in _visited:
265
+ continue
266
+ _visited.add(url)
267
+
268
+ # Evict old entries from visited cache
269
+ if len(_visited) > _MAX_VISITED_CACHE:
270
+ to_remove = list(_visited)[:_MAX_VISITED_CACHE // 2]
271
+ for v in to_remove:
272
+ _visited.discard(v)
273
+
274
+ if depth <= MAX_CRAWL_DEPTH:
275
+ tasks.append(_crawl_page(session, url, depth, semaphore))
276
+
277
+ if tasks:
278
+ results = await asyncio.gather(*tasks, return_exceptions=True)
279
+
280
+ for result in results:
281
+ if isinstance(result, list):
282
+ for discovered_url in result:
283
+ # Extract homepage for validation
284
+ parsed = urlparse(discovered_url)
285
+ homepage = f"https://{parsed.netloc}"
286
+ await enqueue_url(homepage, source="crawler")
287
+
288
+ # Add to crawl queue for further BFS
289
+ if (
290
+ len(_crawl_queue) < MAX_QUEUE_SIZE
291
+ and discovered_url not in _visited
292
+ ):
293
+ current_depth = 1 # simplified
294
+ if current_depth < MAX_CRAWL_DEPTH:
295
+ _crawl_queue.append({
296
+ "url": discovered_url,
297
+ "depth": current_depth + 1,
298
+ })
299
+
300
+ # Small delay between batches
301
+ await asyncio.sleep(0.5)
302
+
303
+ except Exception as e:
304
+ logger.error("Crawler loop error: %s", e)
305
+ await asyncio.sleep(10)
backend/workers/ct_log.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RandomWeb β€” Certificate Transparency Log Worker
3
+ Connects to CertStream WebSocket to discover newly registered domains in real-time.
4
+ """
5
+ import asyncio
6
+ import json
7
+ import logging
8
+ from urllib.parse import urlparse
9
+
10
+ import websockets
11
+
12
+ from backend.config import (
13
+ CERTSTREAM_URL,
14
+ CT_LOG_BATCH_SIZE,
15
+ CT_LOG_RECONNECT_DELAY,
16
+ CT_LOG_MAX_RECONNECT_DELAY,
17
+ BLOCKED_TLDS,
18
+ )
19
+ from backend.workers.validator import enqueue_url
20
+ from backend.db import url_exists
21
+
22
+ logger = logging.getLogger("randomweb.ct_log")
23
+
24
+ # ─── Domain Filtering ───────────────────────────────────────
25
+ _seen_domains: set = set()
26
+ _MAX_SEEN_CACHE = 500_000
27
+
28
+
29
+ def _is_valid_domain(domain: str) -> bool:
30
+ """Filter out invalid, wildcard, IP, and blocked TLD domains."""
31
+ if not domain or len(domain) < 4:
32
+ return False
33
+
34
+ # Skip wildcards
35
+ if domain.startswith("*."):
36
+ domain = domain[2:]
37
+ if "*" in domain:
38
+ return False
39
+
40
+ # Skip IP addresses
41
+ parts = domain.split(".")
42
+ if all(p.isdigit() for p in parts):
43
+ return False
44
+
45
+ # Skip blocked TLDs
46
+ for tld in BLOCKED_TLDS:
47
+ if domain.endswith(tld):
48
+ return False
49
+
50
+ # Must have at least one dot
51
+ if "." not in domain:
52
+ return False
53
+
54
+ # Skip overly long domains (likely garbage)
55
+ if len(domain) > 253:
56
+ return False
57
+
58
+ return True
59
+
60
+
61
+ def _deduplicate(domain: str) -> bool:
62
+ """Returns True if the domain is new (not seen before)."""
63
+ global _seen_domains
64
+ if domain in _seen_domains:
65
+ return False
66
+
67
+ # Evict oldest entries if cache is full
68
+ if len(_seen_domains) >= _MAX_SEEN_CACHE:
69
+ # Remove half the cache (FIFO approximation)
70
+ to_remove = list(_seen_domains)[:_MAX_SEEN_CACHE // 2]
71
+ for d in to_remove:
72
+ _seen_domains.discard(d)
73
+
74
+ _seen_domains.add(domain)
75
+ return True
76
+
77
+
78
+ async def _process_message(message: dict):
79
+ """Process a single CertStream message and extract domains."""
80
+ try:
81
+ msg_type = message.get("message_type")
82
+ if msg_type != "certificate_update":
83
+ return
84
+
85
+ data = message.get("data", {})
86
+ leaf_cert = data.get("leaf_cert", {})
87
+ all_domains = leaf_cert.get("all_domains", [])
88
+
89
+ for domain in all_domains:
90
+ # Strip wildcard prefix
91
+ if domain.startswith("*."):
92
+ domain = domain[2:]
93
+
94
+ domain = domain.lower().strip()
95
+
96
+ if not _is_valid_domain(domain):
97
+ continue
98
+
99
+ if not _deduplicate(domain):
100
+ continue
101
+
102
+ url = f"https://{domain}"
103
+ await enqueue_url(url, source="ct_log")
104
+
105
+ except Exception as e:
106
+ logger.debug("Error processing CT message: %s", e)
107
+
108
+
109
+ async def run_ct_log_worker():
110
+ """
111
+ Main CT log worker loop. Connects to CertStream WebSocket,
112
+ parses certificate updates, and queues new domains for validation.
113
+ Auto-reconnects with exponential backoff.
114
+ """
115
+ logger.info("CT Log worker starting β€” connecting to %s", CERTSTREAM_URL)
116
+ reconnect_delay = CT_LOG_RECONNECT_DELAY
117
+
118
+ while True:
119
+ try:
120
+ async with websockets.connect(
121
+ CERTSTREAM_URL,
122
+ ping_interval=30,
123
+ ping_timeout=10,
124
+ close_timeout=5,
125
+ max_size=2**20, # 1MB max message size
126
+ ) as ws:
127
+ logger.info("Connected to CertStream")
128
+ reconnect_delay = CT_LOG_RECONNECT_DELAY # Reset on success
129
+
130
+ async for raw_message in ws:
131
+ try:
132
+ message = json.loads(raw_message)
133
+ await _process_message(message)
134
+ except json.JSONDecodeError:
135
+ continue
136
+ except Exception as e:
137
+ logger.debug("Message processing error: %s", e)
138
+
139
+ except websockets.exceptions.ConnectionClosed as e:
140
+ logger.warning("CertStream connection closed: %s", e)
141
+ except Exception as e:
142
+ logger.warning("CertStream connection error: %s", e)
143
+
144
+ # Exponential backoff reconnect
145
+ logger.info("Reconnecting to CertStream in %ds...", reconnect_delay)
146
+ await asyncio.sleep(reconnect_delay)
147
+ reconnect_delay = min(reconnect_delay * 2, CT_LOG_MAX_RECONNECT_DELAY)
backend/workers/scheduler.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RandomWeb β€” Re-verification Scheduler
3
+ Rolling yearly re-verification of indexed websites.
4
+ Politely re-checks active URLs and toggles visibility on failure.
5
+ """
6
+ import asyncio
7
+ import logging
8
+ from datetime import datetime, timezone
9
+
10
+ from backend.config import SCHEDULER_INTERVAL_SECONDS, SCHEDULER_BATCH_SIZE
11
+ from backend.db import get_urls_needing_recheck
12
+ from backend.workers.validator import enqueue_url
13
+
14
+ logger = logging.getLogger("randomweb.scheduler")
15
+
16
+
17
+ async def run_scheduler():
18
+ """
19
+ Background scheduler that continuously checks for URLs due re-verification.
20
+ Runs every hour, queries for URLs where next_check <= now(),
21
+ and routes them through the validation queue.
22
+ """
23
+ logger.info("Re-verification scheduler started (interval: %ds)", SCHEDULER_INTERVAL_SECONDS)
24
+
25
+ # Initial delay to let the system warm up
26
+ await asyncio.sleep(120)
27
+
28
+ while True:
29
+ try:
30
+ urls = get_urls_needing_recheck(limit=SCHEDULER_BATCH_SIZE)
31
+
32
+ if urls:
33
+ logger.info("Re-verifying %d URLs", len(urls))
34
+ for record in urls:
35
+ await enqueue_url(record["url"], source="recheck")
36
+ # Small delay between queuing to avoid flooding
37
+ await asyncio.sleep(0.1)
38
+
39
+ logger.info("Queued %d URLs for re-verification", len(urls))
40
+ else:
41
+ logger.debug("No URLs due for re-verification")
42
+
43
+ except Exception as e:
44
+ logger.error("Scheduler error: %s", e)
45
+
46
+ # Wait until next check
47
+ await asyncio.sleep(SCHEDULER_INTERVAL_SECONDS)
backend/workers/validator.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RandomWeb β€” Polite Async HTTP Validator
3
+ Validates discovered URLs with rate limiting, robots.txt compliance,
4
+ clear user-agent identification, and timeout rules.
5
+ """
6
+ import asyncio
7
+ import logging
8
+ from datetime import datetime, timedelta, timezone
9
+ from typing import Optional
10
+ from urllib.parse import urlparse
11
+
12
+ import aiohttp
13
+ from aiolimiter import AsyncLimiter
14
+ from protego import Protego
15
+
16
+ from backend.config import (
17
+ USER_AGENT,
18
+ REQUEST_TIMEOUT,
19
+ VALIDATION_CONCURRENCY,
20
+ PER_DOMAIN_RATE_LIMIT,
21
+ CRAWL_DELAY_DEFAULT,
22
+ RECHECK_INTERVAL_DAYS,
23
+ )
24
+ from backend.db import get_client, extract_domain
25
+
26
+ logger = logging.getLogger("randomweb.validator")
27
+
28
+ # ─── Shared State ────────────────────────────────────────────
29
+ _validation_queue: asyncio.Queue = asyncio.Queue(maxsize=50_000)
30
+ _robots_cache: dict[str, Optional[Protego]] = {}
31
+ _domain_limiters: dict[str, AsyncLimiter] = {}
32
+ _semaphore: Optional[asyncio.Semaphore] = None
33
+
34
+
35
+ def get_validation_queue() -> asyncio.Queue:
36
+ return _validation_queue
37
+
38
+
39
+ async def enqueue_url(url: str, source: str = "unknown"):
40
+ """Add a URL to the validation queue."""
41
+ try:
42
+ _validation_queue.put_nowait({"url": url, "source": source})
43
+ except asyncio.QueueFull:
44
+ logger.warning("Validation queue full, dropping: %s", url)
45
+
46
+
47
+ def _get_domain_limiter(domain: str) -> AsyncLimiter:
48
+ """Get or create a per-domain rate limiter."""
49
+ if domain not in _domain_limiters:
50
+ _domain_limiters[domain] = AsyncLimiter(
51
+ PER_DOMAIN_RATE_LIMIT, 1.0
52
+ )
53
+ return _domain_limiters[domain]
54
+
55
+
56
+ async def _fetch_robots_txt(
57
+ session: aiohttp.ClientSession, domain: str
58
+ ) -> Optional[Protego]:
59
+ """Fetch and parse robots.txt for a domain. Cached."""
60
+ if domain in _robots_cache:
61
+ return _robots_cache[domain]
62
+
63
+ robots_url = f"https://{domain}/robots.txt"
64
+ try:
65
+ async with session.get(
66
+ robots_url,
67
+ timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
68
+ headers={"User-Agent": USER_AGENT},
69
+ allow_redirects=True,
70
+ ssl=False,
71
+ ) as resp:
72
+ if resp.status == 200:
73
+ text = await resp.text()
74
+ parser = Protego.parse(text)
75
+ _robots_cache[domain] = parser
76
+ return parser
77
+ except Exception:
78
+ pass
79
+
80
+ _robots_cache[domain] = None
81
+ return None
82
+
83
+
84
+ async def _can_fetch(
85
+ session: aiohttp.ClientSession, url: str
86
+ ) -> tuple[bool, float]:
87
+ """
88
+ Check if we're allowed to fetch a URL per robots.txt.
89
+ Returns (allowed, crawl_delay).
90
+ """
91
+ domain = extract_domain(url)
92
+ robots = await _fetch_robots_txt(session, domain)
93
+
94
+ if robots is None:
95
+ return True, CRAWL_DELAY_DEFAULT
96
+
97
+ allowed = robots.can_fetch(url, USER_AGENT)
98
+ delay = robots.crawl_delay(USER_AGENT)
99
+ if delay is None:
100
+ delay = CRAWL_DELAY_DEFAULT
101
+
102
+ return allowed, delay
103
+
104
+
105
+ async def validate_url(
106
+ session: aiohttp.ClientSession,
107
+ url: str,
108
+ source: str = "unknown",
109
+ ) -> Optional[dict]:
110
+ """
111
+ Validate a single URL. Returns a record dict if successful, else None.
112
+ Steps:
113
+ 1. Check robots.txt
114
+ 2. Send HEAD request (fallback to GET)
115
+ 3. Return result with status
116
+ """
117
+ domain = extract_domain(url)
118
+ limiter = _get_domain_limiter(domain)
119
+
120
+ # Rate limit per domain
121
+ async with limiter:
122
+ # Check robots.txt
123
+ allowed, delay = await _can_fetch(session, url)
124
+ if not allowed:
125
+ logger.debug("Blocked by robots.txt: %s", url)
126
+ return None
127
+
128
+ # Respect crawl delay
129
+ if delay > 0:
130
+ await asyncio.sleep(delay)
131
+
132
+ now = datetime.now(timezone.utc).isoformat()
133
+ status_code = None
134
+
135
+ try:
136
+ # Try HEAD first (lighter)
137
+ async with session.head(
138
+ url,
139
+ timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
140
+ headers={"User-Agent": USER_AGENT},
141
+ allow_redirects=True,
142
+ ssl=False,
143
+ ) as resp:
144
+ status_code = resp.status
145
+ except Exception:
146
+ try:
147
+ # Fallback to GET
148
+ async with session.get(
149
+ url,
150
+ timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
151
+ headers={"User-Agent": USER_AGENT},
152
+ allow_redirects=True,
153
+ ssl=False,
154
+ ) as resp:
155
+ status_code = resp.status
156
+ except Exception as e:
157
+ logger.debug("Validation failed for %s: %s", url, e)
158
+ status_code = None
159
+
160
+ is_active = status_code == 200
161
+ next_check = (
162
+ (datetime.now(timezone.utc) + timedelta(days=RECHECK_INTERVAL_DAYS)).isoformat()
163
+ if is_active
164
+ else None
165
+ )
166
+
167
+ record = {
168
+ "url": url,
169
+ "domain": domain,
170
+ "source": source,
171
+ "status": status_code,
172
+ "is_active": is_active,
173
+ "last_checked": now,
174
+ "next_check": next_check,
175
+ }
176
+
177
+ return record
178
+
179
+
180
+ async def _process_batch(
181
+ session: aiohttp.ClientSession,
182
+ batch: list[dict],
183
+ ) -> list[dict]:
184
+ """Validate a batch of URLs concurrently."""
185
+ tasks = [
186
+ validate_url(session, item["url"], item.get("source", "unknown"))
187
+ for item in batch
188
+ ]
189
+ results = await asyncio.gather(*tasks, return_exceptions=True)
190
+
191
+ records = []
192
+ for result in results:
193
+ if isinstance(result, dict) and result is not None:
194
+ records.append(result)
195
+ elif isinstance(result, Exception):
196
+ logger.error("Validation task error: %s", result)
197
+
198
+ return records
199
+
200
+
201
+ async def run_validator():
202
+ """
203
+ Main validation loop. Continuously drains the validation queue,
204
+ validates URLs in batches, and upserts results to Supabase.
205
+ """
206
+ global _semaphore
207
+ _semaphore = asyncio.Semaphore(VALIDATION_CONCURRENCY)
208
+
209
+ logger.info("Validation worker started")
210
+
211
+ connector = aiohttp.TCPConnector(
212
+ limit=VALIDATION_CONCURRENCY,
213
+ ttl_dns_cache=300,
214
+ force_close=False,
215
+ )
216
+
217
+ async with aiohttp.ClientSession(connector=connector) as session:
218
+ while True:
219
+ try:
220
+ # Collect a batch
221
+ batch = []
222
+ try:
223
+ # Wait for at least one item
224
+ item = await asyncio.wait_for(
225
+ _validation_queue.get(), timeout=5.0
226
+ )
227
+ batch.append(item)
228
+ except asyncio.TimeoutError:
229
+ await asyncio.sleep(1)
230
+ continue
231
+
232
+ # Drain up to batch size
233
+ while len(batch) < 50 and not _validation_queue.empty():
234
+ try:
235
+ batch.append(_validation_queue.get_nowait())
236
+ except asyncio.QueueEmpty:
237
+ break
238
+
239
+ if batch:
240
+ logger.info("Validating batch of %d URLs", len(batch))
241
+ records = await _process_batch(session, batch)
242
+
243
+ if records:
244
+ # Bulk upsert to Supabase
245
+ try:
246
+ get_client().table("websites").upsert(
247
+ records, on_conflict="url"
248
+ ).execute()
249
+ active = sum(1 for r in records if r["is_active"])
250
+ logger.info(
251
+ "Upserted %d records (%d active)",
252
+ len(records), active,
253
+ )
254
+ except Exception as e:
255
+ logger.error("Bulk upsert failed: %s", e)
256
+
257
+ except Exception as e:
258
+ logger.error("Validator loop error: %s", e)
259
+ await asyncio.sleep(5)
deploy_hf.sh ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Configuration: Update these values
4
+ # ==========================================
5
+ HF_USERNAME="PinkAlpaca"
6
+ SPACE_NAME="RandomWeb"
7
+ # ==========================================
8
+
9
+ # Colors for output
10
+ GREEN='\033[0;32m'
11
+ BLUE='\033[0;34m'
12
+ RED='\033[0;31m'
13
+ NC='\033[0m' # No Color
14
+
15
+ echo -e "${BLUE}==========================================${NC}"
16
+ echo -e "${BLUE} Starting Hugging Face Deployment${NC}"
17
+ echo -e "${BLUE}==========================================${NC}"
18
+
19
+ # Check for git
20
+ if ! command -v git &> /dev/null; then
21
+ echo -e "${RED}Error: git is not installed.${NC}"
22
+ exit 1
23
+ fi
24
+
25
+ # Ensure local git repo is initialized
26
+ if [ ! -d ".git" ]; then
27
+ echo "Initializing local git repository..."
28
+ git init
29
+ git add .
30
+ git commit -m "Initial commit for HF deployment"
31
+ fi
32
+
33
+ # Confirm username is updated
34
+ if [ "$HF_USERNAME" == "UPDATE_WITH_YOUR_HF_USERNAME" ]; then
35
+ echo -e "${RED}Error: Please edit this script and set your HF_USERNAME.${NC}"
36
+ exit 1
37
+ fi
38
+
39
+ # Set remote URL
40
+ REMOTE_URL="https://huggingface.co/spaces/${HF_USERNAME}/${SPACE_NAME}"
41
+ echo -e "Target Space: ${REMOTE_URL}"
42
+
43
+ # Check if 'huggingface' remote exists, add if not
44
+ if ! git remote | grep -q "huggingface"; then
45
+ echo "Adding Hugging Face remote..."
46
+ git remote add huggingface "${REMOTE_URL}"
47
+ else
48
+ echo "Hugging Face remote already exists. Updating URL..."
49
+ git remote set-url huggingface "${REMOTE_URL}"
50
+ fi
51
+
52
+ # Stage all files
53
+ git add .
54
+
55
+ # Commit changes
56
+ COMMIT_MSG="Deploy: $(date '+%Y-%m-%d %H:%M:%S')"
57
+ git commit -m "$COMMIT_MSG" --allow-empty
58
+
59
+ # Push to Hugging Face
60
+ echo -e "${GREEN}Pushing to Hugging Face...${NC}"
61
+ echo "--------------------------------------------------------"
62
+ echo "TIP: Use your Hugging Face Access Token as the password."
63
+ echo "--------------------------------------------------------"
64
+
65
+ git push huggingface main --force
66
+
67
+ if [ $? -eq 0 ]; then
68
+ echo -e "${GREEN}SUCCESS! Your Space is building at: ${REMOTE_URL}${NC}"
69
+ echo "View progress here: ${REMOTE_URL}?logs=build"
70
+ else
71
+ echo -e "${RED}Deployment failed. Please check your credentials or network status.${NC}"
72
+ fi
frontend/app.js ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * RandomWeb β€” Frontend Application Logic
3
+ * Handles random redirect, search, submission, and real-time counter.
4
+ */
5
+
6
+ // ─── Configuration ──────────────────────────────────────────
7
+ const SUPABASE_URL = 'https://oyxgydfmaocqxictnmou.supabase.co';
8
+ const SUPABASE_KEY = 'sb_publishable_9l3BSqU-mIdYLEgZB2Pv2Q_UUZXU385';
9
+ const API_BASE = '/api';
10
+
11
+ // ─── Supabase Client ────────────────────────────────────────
12
+ const supabase = window.supabase.createClient(SUPABASE_URL, SUPABASE_KEY);
13
+
14
+ // ─── DOM Elements ───────────────────────────────────────────
15
+ const randomBtn = document.getElementById('random-btn');
16
+ const btnText = randomBtn.querySelector('.btn-text');
17
+ const searchInput = document.getElementById('search-input');
18
+ const searchResults = document.getElementById('search-results');
19
+ const submitForm = document.getElementById('submit-form');
20
+ const submitInput = document.getElementById('submit-input');
21
+ const submitBtn = document.getElementById('submit-btn');
22
+ const submitFeedback = document.getElementById('submit-feedback');
23
+ const counterValue = document.getElementById('counter-value');
24
+ const headerActiveCount = document.getElementById('header-active-count');
25
+ const toastContainer = document.getElementById('toast-container');
26
+
27
+ // ─── State ──────────────────────────────────────────────────
28
+ let currentCount = 0;
29
+ let targetCount = 0;
30
+ let animationFrame = null;
31
+ let searchDebounceTimer = null;
32
+
33
+ // ─── Utility Functions ──────────────────────────────────────
34
+ function formatNumber(num) {
35
+ if (num >= 1_000_000) {
36
+ return (num / 1_000_000).toFixed(2) + 'M';
37
+ }
38
+ if (num >= 1_000) {
39
+ return (num / 1_000).toFixed(1) + 'K';
40
+ }
41
+ return num.toLocaleString();
42
+ }
43
+
44
+ function formatNumberFull(num) {
45
+ return num.toLocaleString();
46
+ }
47
+
48
+ function showToast(message, type = 'info') {
49
+ const toast = document.createElement('div');
50
+ toast.className = `toast toast-${type}`;
51
+ toast.textContent = message;
52
+ toastContainer.appendChild(toast);
53
+
54
+ setTimeout(() => {
55
+ toast.classList.add('toast-exiting');
56
+ setTimeout(() => toast.remove(), 300);
57
+ }, 4000);
58
+ }
59
+
60
+ // ─── Animated Counter ───────────────────────────────────────
61
+ function animateCounter(target) {
62
+ targetCount = target;
63
+
64
+ if (animationFrame) {
65
+ cancelAnimationFrame(animationFrame);
66
+ }
67
+
68
+ const startCount = currentCount;
69
+ const diff = target - startCount;
70
+ const duration = Math.min(1500, Math.max(300, Math.abs(diff) * 10));
71
+ const startTime = performance.now();
72
+
73
+ function step(timestamp) {
74
+ const elapsed = timestamp - startTime;
75
+ const progress = Math.min(elapsed / duration, 1);
76
+
77
+ // Ease-out cubic
78
+ const eased = 1 - Math.pow(1 - progress, 3);
79
+ currentCount = Math.round(startCount + diff * eased);
80
+
81
+ counterValue.textContent = formatNumberFull(currentCount);
82
+ headerActiveCount.textContent = formatNumber(currentCount);
83
+
84
+ if (progress < 1) {
85
+ animationFrame = requestAnimationFrame(step);
86
+ } else {
87
+ currentCount = target;
88
+ counterValue.textContent = formatNumberFull(target);
89
+ headerActiveCount.textContent = formatNumber(target);
90
+ }
91
+ }
92
+
93
+ animationFrame = requestAnimationFrame(step);
94
+ }
95
+
96
+ // ─── Fetch Stats (Initial) ─────────────────────────────────
97
+ async function fetchStats() {
98
+ try {
99
+ const response = await fetch(`${API_BASE}/stats`);
100
+ if (response.ok) {
101
+ const data = await response.json();
102
+ animateCounter(data.active_count);
103
+ }
104
+ } catch (err) {
105
+ console.warn('Failed to fetch stats:', err);
106
+
107
+ // Fallback: query Supabase directly
108
+ try {
109
+ const { data, error } = await supabase
110
+ .from('stats')
111
+ .select('active_count')
112
+ .eq('id', 1)
113
+ .single();
114
+
115
+ if (!error && data) {
116
+ animateCounter(data.active_count);
117
+ }
118
+ } catch (e) {
119
+ console.warn('Supabase fallback also failed:', e);
120
+ }
121
+ }
122
+ }
123
+
124
+ // ─── Realtime Subscription ──────────────────────────────────
125
+ function setupRealtimeSubscription() {
126
+ const channel = supabase
127
+ .channel('stats-realtime')
128
+ .on(
129
+ 'postgres_changes',
130
+ {
131
+ event: 'UPDATE',
132
+ schema: 'public',
133
+ table: 'stats',
134
+ filter: 'id=eq.1',
135
+ },
136
+ (payload) => {
137
+ const newCount = payload.new.active_count;
138
+ if (newCount !== undefined && newCount !== targetCount) {
139
+ animateCounter(newCount);
140
+ }
141
+ }
142
+ )
143
+ .subscribe((status) => {
144
+ if (status === 'SUBSCRIBED') {
145
+ console.log('Realtime subscription active');
146
+ }
147
+ });
148
+ }
149
+
150
+ // Also poll every 30 seconds as a fallback
151
+ setInterval(fetchStats, 30000);
152
+
153
+ // ─── Random Button ──────────────────────────────────────────
154
+ randomBtn.addEventListener('click', async () => {
155
+ if (randomBtn.classList.contains('loading')) return;
156
+
157
+ randomBtn.classList.add('loading');
158
+ btnText.textContent = 'Finding a website...';
159
+
160
+ try {
161
+ const response = await fetch(`${API_BASE}/random`);
162
+
163
+ if (response.ok) {
164
+ const data = await response.json();
165
+ if (data.url) {
166
+ btnText.textContent = 'Redirecting...';
167
+
168
+ // Small delay for visual feedback
169
+ setTimeout(() => {
170
+ window.open(data.url, '_blank', 'noopener,noreferrer');
171
+ randomBtn.classList.remove('loading');
172
+ btnText.textContent = 'Take Me Somewhere Random';
173
+ }, 500);
174
+ return;
175
+ }
176
+ }
177
+
178
+ // API failed, try direct Supabase query
179
+ const { data: websites, error } = await supabase
180
+ .rpc('get_random_active_website');
181
+
182
+ if (!error && websites && websites.length > 0) {
183
+ btnText.textContent = 'Redirecting...';
184
+ setTimeout(() => {
185
+ window.open(websites[0].url, '_blank', 'noopener,noreferrer');
186
+ randomBtn.classList.remove('loading');
187
+ btnText.textContent = 'Take Me Somewhere Random';
188
+ }, 500);
189
+ return;
190
+ }
191
+
192
+ showToast('No active websites found yet. The system is still indexing.', 'info');
193
+ } catch (err) {
194
+ console.error('Random fetch error:', err);
195
+ showToast('Failed to get a random website. Please try again.', 'error');
196
+ }
197
+
198
+ randomBtn.classList.remove('loading');
199
+ btnText.textContent = 'Take Me Somewhere Random';
200
+ });
201
+
202
+ // ─── Search ─────────────────────────────────────────────────
203
+ searchInput.addEventListener('input', (e) => {
204
+ const query = e.target.value.trim();
205
+
206
+ clearTimeout(searchDebounceTimer);
207
+
208
+ if (query.length < 2) {
209
+ searchResults.innerHTML = '';
210
+ return;
211
+ }
212
+
213
+ searchDebounceTimer = setTimeout(() => performSearch(query), 300);
214
+ });
215
+
216
+ async function performSearch(query) {
217
+ try {
218
+ const response = await fetch(
219
+ `${API_BASE}/search?q=${encodeURIComponent(query)}&limit=15`
220
+ );
221
+
222
+ if (response.ok) {
223
+ const results = await response.json();
224
+ renderSearchResults(results);
225
+ return;
226
+ }
227
+
228
+ // Fallback to direct Supabase
229
+ const { data, error } = await supabase
230
+ .from('websites')
231
+ .select('url, domain, is_active')
232
+ .or(`url.ilike.%${query}%,domain.ilike.%${query}%`)
233
+ .eq('is_active', true)
234
+ .limit(15);
235
+
236
+ if (!error && data) {
237
+ renderSearchResults(data);
238
+ }
239
+ } catch (err) {
240
+ console.error('Search error:', err);
241
+ }
242
+ }
243
+
244
+ function renderSearchResults(results) {
245
+ if (!results || results.length === 0) {
246
+ searchResults.innerHTML = `
247
+ <div class="search-empty">
248
+ No matching websites found. Try a different search term.
249
+ </div>
250
+ `;
251
+ return;
252
+ }
253
+
254
+ searchResults.innerHTML = results
255
+ .map(
256
+ (r) => `
257
+ <a href="${escapeHtml(r.url)}" target="_blank" rel="noopener noreferrer"
258
+ class="search-result-item">
259
+ <div>
260
+ <div class="result-url">${escapeHtml(r.url)}</div>
261
+ <div class="result-domain">${escapeHtml(r.domain)}</div>
262
+ </div>
263
+ <span class="result-arrow">β†’</span>
264
+ </a>
265
+ `
266
+ )
267
+ .join('');
268
+ }
269
+
270
+ function escapeHtml(text) {
271
+ const div = document.createElement('div');
272
+ div.textContent = text;
273
+ return div.innerHTML;
274
+ }
275
+
276
+ // ─── Submit Form ────────────────────────────────────────────
277
+ submitForm.addEventListener('submit', async (e) => {
278
+ e.preventDefault();
279
+
280
+ const url = submitInput.value.trim();
281
+ if (!url) return;
282
+
283
+ submitBtn.disabled = true;
284
+ submitBtn.textContent = 'Submitting...';
285
+ submitFeedback.className = 'submit-feedback';
286
+ submitFeedback.style.display = 'none';
287
+
288
+ try {
289
+ const response = await fetch(`${API_BASE}/submit`, {
290
+ method: 'POST',
291
+ headers: { 'Content-Type': 'application/json' },
292
+ body: JSON.stringify({ url }),
293
+ });
294
+
295
+ const data = await response.json();
296
+
297
+ if (response.ok) {
298
+ submitFeedback.className = 'submit-feedback success';
299
+ submitFeedback.textContent = data.message || 'URL submitted successfully!';
300
+ submitInput.value = '';
301
+ } else {
302
+ submitFeedback.className = 'submit-feedback error';
303
+ submitFeedback.textContent =
304
+ data.detail || 'Failed to submit URL. Please check the format.';
305
+ }
306
+ } catch (err) {
307
+ submitFeedback.className = 'submit-feedback error';
308
+ submitFeedback.textContent = 'Network error. Please try again.';
309
+ }
310
+
311
+ submitBtn.disabled = false;
312
+ submitBtn.textContent = 'Submit URL';
313
+ });
314
+
315
+ // ─── Initialize ─────────────────────────────────────────────
316
+ document.addEventListener('DOMContentLoaded', () => {
317
+ fetchStats();
318
+ setupRealtimeSubscription();
319
+ });
frontend/index.html ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>RandomWeb β€” Discover Random Websites from Across the Internet</title>
7
+ <meta name="description" content="Explore the web randomly. RandomWeb discovers, indexes, and validates websites from across the entire internet. Click and go anywhere.">
8
+ <meta name="theme-color" content="#0a0e1a">
9
+ <meta property="og:title" content="RandomWeb β€” Discover Random Websites">
10
+ <meta property="og:description" content="One click. One random website. Explore the entire internet.">
11
+ <meta property="og:type" content="website">
12
+ <link rel="icon" type="image/svg+xml" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='.9em' font-size='90'>🌐</text></svg>">
13
+ <link rel="stylesheet" href="styles.css">
14
+ </head>
15
+ <body>
16
+
17
+ <!-- Animated background -->
18
+ <div class="bg-grid"></div>
19
+ <div class="orb orb-1"></div>
20
+ <div class="orb orb-2"></div>
21
+ <div class="orb orb-3"></div>
22
+
23
+ <div class="app">
24
+
25
+ <!-- ─── Header ──────────────────────────────── -->
26
+ <header class="header fade-in">
27
+ <div class="container">
28
+ <a href="/" class="logo" id="logo-link">
29
+ <div class="logo-icon">🌐</div>
30
+ <span class="logo-text">RandomWeb</span>
31
+ </a>
32
+ <div class="header-stats">
33
+ <span class="pulse-dot"></span>
34
+ <span id="header-active-count">β€”</span> sites indexed
35
+ </div>
36
+ </div>
37
+ </header>
38
+
39
+ <!-- ─── Main Content ────────────────────────── -->
40
+ <main>
41
+
42
+ <!-- Hero Section -->
43
+ <section class="hero">
44
+ <div class="container">
45
+ <div class="hero-badge slide-up">
46
+ <span class="dot"></span>
47
+ <span>Live β€” Indexing the web in real-time</span>
48
+ </div>
49
+
50
+ <h1 class="slide-up slide-up-delay-1">
51
+ Discover the<br>
52
+ <span class="gradient-text">Entire Web</span>
53
+ </h1>
54
+
55
+ <p class="slide-up slide-up-delay-2">
56
+ One click takes you to a random website from our ever-growing index.
57
+ Powered by Certificate Transparency logs, Common Crawl, and recursive crawling.
58
+ </p>
59
+
60
+ <div class="random-btn-wrapper slide-up slide-up-delay-3">
61
+ <button class="random-btn" id="random-btn" type="button">
62
+ <span class="btn-icon">🎲</span>
63
+ <span class="btn-text">Take Me Somewhere Random</span>
64
+ </button>
65
+ </div>
66
+ </div>
67
+ </section>
68
+
69
+ <!-- Search Section -->
70
+ <section class="search-section slide-up slide-up-delay-3">
71
+ <div class="container">
72
+ <div class="glass-card" style="padding: var(--space-xl);">
73
+ <h2>πŸ” Search the Index</h2>
74
+ <div class="search-box">
75
+ <span class="search-icon">βŒ•</span>
76
+ <input
77
+ type="text"
78
+ id="search-input"
79
+ placeholder="Search for websites... (e.g., github.com, news)"
80
+ autocomplete="off"
81
+ spellcheck="false"
82
+ >
83
+ </div>
84
+ <div class="search-results" id="search-results"></div>
85
+ </div>
86
+ </div>
87
+ </section>
88
+
89
+ <!-- Submit Section -->
90
+ <section class="submit-section slide-up slide-up-delay-4">
91
+ <div class="container">
92
+ <div class="glass-card" style="padding: var(--space-xl);">
93
+ <h2>βž• Submit a Website</h2>
94
+ <p class="subtitle">
95
+ Know a website that's not in our index? Submit it and we'll validate and add it.
96
+ </p>
97
+ <form class="submit-form" id="submit-form">
98
+ <input
99
+ type="text"
100
+ id="submit-input"
101
+ placeholder="Enter a URL (e.g., https://example.com)"
102
+ autocomplete="off"
103
+ spellcheck="false"
104
+ required
105
+ >
106
+ <button type="submit" class="submit-btn" id="submit-btn">
107
+ Submit URL
108
+ </button>
109
+ </form>
110
+ <div class="submit-feedback" id="submit-feedback"></div>
111
+ </div>
112
+ </div>
113
+ </section>
114
+
115
+ </main>
116
+
117
+ <!-- ─── Footer ──────────────────────────────── -->
118
+ <footer class="footer">
119
+ <div class="container">
120
+
121
+ <div class="live-counter">
122
+ <div class="counter-label">
123
+ <span class="live-dot"></span>
124
+ Active Websites Indexed
125
+ </div>
126
+ <div class="counter-value" id="counter-value">0</div>
127
+ <div class="counter-subtext">
128
+ and growing every second
129
+ </div>
130
+ </div>
131
+
132
+ <div class="footer-links">
133
+ <a href="https://github.com/guestcoder0906/RandomWeb" target="_blank" rel="noopener">GitHub</a>
134
+ <span class="divider"></span>
135
+ <span style="color: var(--text-muted);">Built with 🌐 by RandomWeb</span>
136
+ </div>
137
+
138
+ </div>
139
+ </footer>
140
+
141
+ </div>
142
+
143
+ <!-- Toast container -->
144
+ <div class="toast-container" id="toast-container"></div>
145
+
146
+ <!-- Supabase Client (CDN) -->
147
+ <script src="https://cdn.jsdelivr.net/npm/@supabase/supabase-js@2/dist/umd/supabase.min.js"></script>
148
+
149
+ <!-- App Logic -->
150
+ <script src="app.js"></script>
151
+
152
+ </body>
153
+ </html>
frontend/styles.css ADDED
@@ -0,0 +1,801 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ============================================================
2
+ RandomWeb β€” Premium Dark Theme
3
+ Design system: Midnight navy base, cyan↔violet gradients,
4
+ glassmorphism panels, Inter + Outfit fonts, micro-animations
5
+ ============================================================ */
6
+
7
+ /* ─── Google Fonts ─────────────────────────────────────────── */
8
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Outfit:wght@400;500;600;700;800;900&family=JetBrains+Mono:wght@400;500&display=swap');
9
+
10
+ /* ─── CSS Custom Properties ────────────────────────────────── */
11
+ :root {
12
+ /* Core palette */
13
+ --bg-primary: #0a0e1a;
14
+ --bg-secondary: #111827;
15
+ --bg-card: rgba(17, 24, 39, 0.7);
16
+ --bg-glass: rgba(255, 255, 255, 0.03);
17
+ --bg-glass-hover: rgba(255, 255, 255, 0.06);
18
+
19
+ /* Accent gradients */
20
+ --gradient-primary: linear-gradient(135deg, #06b6d4, #8b5cf6);
21
+ --gradient-secondary: linear-gradient(135deg, #8b5cf6, #ec4899);
22
+ --gradient-glow: linear-gradient(135deg, rgba(6, 182, 212, 0.15), rgba(139, 92, 246, 0.15));
23
+ --gradient-hero: linear-gradient(180deg, #0a0e1a 0%, #111827 50%, #0a0e1a 100%);
24
+
25
+ /* Text */
26
+ --text-primary: #f1f5f9;
27
+ --text-secondary: #94a3b8;
28
+ --text-muted: #64748b;
29
+ --text-accent: #06b6d4;
30
+
31
+ /* Borders */
32
+ --border-subtle: rgba(255, 255, 255, 0.06);
33
+ --border-accent: rgba(6, 182, 212, 0.3);
34
+
35
+ /* Shadows */
36
+ --shadow-lg: 0 25px 50px -12px rgba(0, 0, 0, 0.5);
37
+ --shadow-glow-cyan: 0 0 40px rgba(6, 182, 212, 0.15);
38
+ --shadow-glow-violet: 0 0 40px rgba(139, 92, 246, 0.15);
39
+ --shadow-button: 0 0 30px rgba(6, 182, 212, 0.3), 0 0 60px rgba(139, 92, 246, 0.1);
40
+
41
+ /* Typography */
42
+ --font-body: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
43
+ --font-heading: 'Outfit', -apple-system, BlinkMacSystemFont, sans-serif;
44
+ --font-mono: 'JetBrains Mono', 'Fira Code', monospace;
45
+
46
+ /* Spacing */
47
+ --space-xs: 0.25rem;
48
+ --space-sm: 0.5rem;
49
+ --space-md: 1rem;
50
+ --space-lg: 1.5rem;
51
+ --space-xl: 2rem;
52
+ --space-2xl: 3rem;
53
+ --space-3xl: 4rem;
54
+ --space-4xl: 6rem;
55
+
56
+ /* Radius */
57
+ --radius-sm: 0.5rem;
58
+ --radius-md: 0.75rem;
59
+ --radius-lg: 1rem;
60
+ --radius-xl: 1.5rem;
61
+ --radius-full: 9999px;
62
+
63
+ /* Transitions */
64
+ --transition-fast: 150ms cubic-bezier(0.4, 0, 0.2, 1);
65
+ --transition-base: 250ms cubic-bezier(0.4, 0, 0.2, 1);
66
+ --transition-slow: 400ms cubic-bezier(0.4, 0, 0.2, 1);
67
+ --transition-spring: 500ms cubic-bezier(0.34, 1.56, 0.64, 1);
68
+ }
69
+
70
+ /* ─── Reset & Base ─────────────────────────────────────────── */
71
+ *,
72
+ *::before,
73
+ *::after {
74
+ margin: 0;
75
+ padding: 0;
76
+ box-sizing: border-box;
77
+ }
78
+
79
+ html {
80
+ scroll-behavior: smooth;
81
+ -webkit-font-smoothing: antialiased;
82
+ -moz-osx-font-smoothing: grayscale;
83
+ }
84
+
85
+ body {
86
+ font-family: var(--font-body);
87
+ background: var(--bg-primary);
88
+ color: var(--text-primary);
89
+ min-height: 100vh;
90
+ overflow-x: hidden;
91
+ line-height: 1.6;
92
+ }
93
+
94
+ /* ─── Animated Background ──────────────────────────────────── */
95
+ .bg-grid {
96
+ position: fixed;
97
+ inset: 0;
98
+ z-index: 0;
99
+ background-image:
100
+ radial-gradient(ellipse at 20% 50%, rgba(6, 182, 212, 0.08) 0%, transparent 50%),
101
+ radial-gradient(ellipse at 80% 20%, rgba(139, 92, 246, 0.08) 0%, transparent 50%),
102
+ radial-gradient(ellipse at 50% 80%, rgba(236, 72, 153, 0.05) 0%, transparent 50%);
103
+ pointer-events: none;
104
+ }
105
+
106
+ .bg-grid::before {
107
+ content: '';
108
+ position: absolute;
109
+ inset: 0;
110
+ background-image:
111
+ linear-gradient(rgba(255, 255, 255, 0.015) 1px, transparent 1px),
112
+ linear-gradient(90deg, rgba(255, 255, 255, 0.015) 1px, transparent 1px);
113
+ background-size: 60px 60px;
114
+ mask-image: radial-gradient(ellipse at center, black 30%, transparent 70%);
115
+ }
116
+
117
+ /* Floating orbs */
118
+ .orb {
119
+ position: fixed;
120
+ border-radius: 50%;
121
+ filter: blur(80px);
122
+ opacity: 0.4;
123
+ pointer-events: none;
124
+ z-index: 0;
125
+ animation: orbFloat 20s ease-in-out infinite;
126
+ }
127
+
128
+ .orb-1 {
129
+ width: 400px;
130
+ height: 400px;
131
+ background: rgba(6, 182, 212, 0.12);
132
+ top: -100px;
133
+ left: -100px;
134
+ animation-delay: 0s;
135
+ }
136
+
137
+ .orb-2 {
138
+ width: 350px;
139
+ height: 350px;
140
+ background: rgba(139, 92, 246, 0.12);
141
+ bottom: -100px;
142
+ right: -100px;
143
+ animation-delay: -7s;
144
+ }
145
+
146
+ .orb-3 {
147
+ width: 300px;
148
+ height: 300px;
149
+ background: rgba(236, 72, 153, 0.08);
150
+ top: 50%;
151
+ left: 50%;
152
+ transform: translate(-50%, -50%);
153
+ animation-delay: -14s;
154
+ }
155
+
156
+ @keyframes orbFloat {
157
+ 0%, 100% { transform: translate(0, 0) scale(1); }
158
+ 25% { transform: translate(30px, -40px) scale(1.05); }
159
+ 50% { transform: translate(-20px, 20px) scale(0.95); }
160
+ 75% { transform: translate(40px, 30px) scale(1.02); }
161
+ }
162
+
163
+ /* ─── Layout ──────────���────────────────────────────────────── */
164
+ .app {
165
+ position: relative;
166
+ z-index: 1;
167
+ min-height: 100vh;
168
+ display: flex;
169
+ flex-direction: column;
170
+ }
171
+
172
+ .container {
173
+ width: 100%;
174
+ max-width: 800px;
175
+ margin: 0 auto;
176
+ padding: 0 var(--space-lg);
177
+ }
178
+
179
+ /* ─── Header ───────────────────────────────────────────────── */
180
+ .header {
181
+ padding: var(--space-lg) 0;
182
+ border-bottom: 1px solid var(--border-subtle);
183
+ backdrop-filter: blur(20px);
184
+ -webkit-backdrop-filter: blur(20px);
185
+ background: rgba(10, 14, 26, 0.8);
186
+ position: sticky;
187
+ top: 0;
188
+ z-index: 100;
189
+ }
190
+
191
+ .header .container {
192
+ display: flex;
193
+ align-items: center;
194
+ justify-content: space-between;
195
+ }
196
+
197
+ .logo {
198
+ display: flex;
199
+ align-items: center;
200
+ gap: var(--space-sm);
201
+ text-decoration: none;
202
+ }
203
+
204
+ .logo-icon {
205
+ width: 36px;
206
+ height: 36px;
207
+ border-radius: var(--radius-md);
208
+ background: var(--gradient-primary);
209
+ display: flex;
210
+ align-items: center;
211
+ justify-content: center;
212
+ font-size: 1.1rem;
213
+ box-shadow: var(--shadow-glow-cyan);
214
+ }
215
+
216
+ .logo-text {
217
+ font-family: var(--font-heading);
218
+ font-weight: 700;
219
+ font-size: 1.25rem;
220
+ background: var(--gradient-primary);
221
+ -webkit-background-clip: text;
222
+ -webkit-text-fill-color: transparent;
223
+ background-clip: text;
224
+ }
225
+
226
+ .header-stats {
227
+ display: flex;
228
+ align-items: center;
229
+ gap: var(--space-sm);
230
+ font-size: 0.8rem;
231
+ color: var(--text-muted);
232
+ font-family: var(--font-mono);
233
+ }
234
+
235
+ .header-stats .pulse-dot {
236
+ width: 8px;
237
+ height: 8px;
238
+ background: #22c55e;
239
+ border-radius: 50%;
240
+ animation: pulse 2s ease-in-out infinite;
241
+ }
242
+
243
+ @keyframes pulse {
244
+ 0%, 100% { opacity: 1; box-shadow: 0 0 0 0 rgba(34, 197, 94, 0.4); }
245
+ 50% { opacity: 0.7; box-shadow: 0 0 0 6px rgba(34, 197, 94, 0); }
246
+ }
247
+
248
+ /* ─── Hero Section ─────────────────────────────────────────── */
249
+ .hero {
250
+ padding: var(--space-4xl) 0 var(--space-3xl);
251
+ text-align: center;
252
+ }
253
+
254
+ .hero-badge {
255
+ display: inline-flex;
256
+ align-items: center;
257
+ gap: var(--space-sm);
258
+ padding: var(--space-xs) var(--space-md);
259
+ background: var(--bg-glass);
260
+ border: 1px solid var(--border-subtle);
261
+ border-radius: var(--radius-full);
262
+ font-size: 0.8rem;
263
+ color: var(--text-secondary);
264
+ margin-bottom: var(--space-xl);
265
+ backdrop-filter: blur(10px);
266
+ }
267
+
268
+ .hero-badge .dot {
269
+ width: 6px;
270
+ height: 6px;
271
+ background: #22c55e;
272
+ border-radius: 50%;
273
+ animation: pulse 2s ease-in-out infinite;
274
+ }
275
+
276
+ .hero h1 {
277
+ font-family: var(--font-heading);
278
+ font-weight: 900;
279
+ font-size: clamp(2.5rem, 6vw, 4rem);
280
+ line-height: 1.1;
281
+ margin-bottom: var(--space-lg);
282
+ letter-spacing: -0.03em;
283
+ }
284
+
285
+ .hero h1 .gradient-text {
286
+ background: var(--gradient-primary);
287
+ -webkit-background-clip: text;
288
+ -webkit-text-fill-color: transparent;
289
+ background-clip: text;
290
+ }
291
+
292
+ .hero p {
293
+ font-size: 1.1rem;
294
+ color: var(--text-secondary);
295
+ max-width: 500px;
296
+ margin: 0 auto var(--space-2xl);
297
+ line-height: 1.7;
298
+ }
299
+
300
+ /* ─── Random Button ────────────────────────────────────────── */
301
+ .random-btn-wrapper {
302
+ display: flex;
303
+ justify-content: center;
304
+ margin-bottom: var(--space-3xl);
305
+ }
306
+
307
+ .random-btn {
308
+ position: relative;
309
+ display: inline-flex;
310
+ align-items: center;
311
+ gap: var(--space-md);
312
+ padding: 1.15rem 2.5rem;
313
+ background: var(--gradient-primary);
314
+ color: white;
315
+ font-family: var(--font-heading);
316
+ font-weight: 700;
317
+ font-size: 1.15rem;
318
+ border: none;
319
+ border-radius: var(--radius-xl);
320
+ cursor: pointer;
321
+ transition: all var(--transition-base);
322
+ box-shadow: var(--shadow-button);
323
+ text-decoration: none;
324
+ letter-spacing: 0.01em;
325
+ overflow: hidden;
326
+ }
327
+
328
+ .random-btn::before {
329
+ content: '';
330
+ position: absolute;
331
+ inset: 0;
332
+ background: linear-gradient(135deg, rgba(255,255,255,0.15), transparent);
333
+ opacity: 0;
334
+ transition: opacity var(--transition-base);
335
+ }
336
+
337
+ .random-btn:hover {
338
+ transform: translateY(-3px) scale(1.03);
339
+ box-shadow: 0 0 50px rgba(6, 182, 212, 0.4), 0 0 80px rgba(139, 92, 246, 0.2);
340
+ }
341
+
342
+ .random-btn:hover::before {
343
+ opacity: 1;
344
+ }
345
+
346
+ .random-btn:active {
347
+ transform: translateY(-1px) scale(0.98);
348
+ }
349
+
350
+ .random-btn .btn-icon {
351
+ font-size: 1.4rem;
352
+ animation: spinSlow 8s linear infinite;
353
+ }
354
+
355
+ @keyframes spinSlow {
356
+ from { transform: rotate(0deg); }
357
+ to { transform: rotate(360deg); }
358
+ }
359
+
360
+ .random-btn:hover .btn-icon {
361
+ animation-duration: 1s;
362
+ }
363
+
364
+ .random-btn.loading .btn-icon {
365
+ animation: spinSlow 0.6s linear infinite;
366
+ }
367
+
368
+ /* ─── Glass Card ───────────────────────────────────────────── */
369
+ .glass-card {
370
+ background: var(--bg-card);
371
+ border: 1px solid var(--border-subtle);
372
+ border-radius: var(--radius-lg);
373
+ backdrop-filter: blur(20px);
374
+ -webkit-backdrop-filter: blur(20px);
375
+ transition: all var(--transition-base);
376
+ }
377
+
378
+ .glass-card:hover {
379
+ border-color: var(--border-accent);
380
+ background: var(--bg-glass-hover);
381
+ }
382
+
383
+ /* ─── Search Section ───────────────────────────────────────── */
384
+ .search-section {
385
+ margin-bottom: var(--space-2xl);
386
+ }
387
+
388
+ .search-section h2 {
389
+ font-family: var(--font-heading);
390
+ font-weight: 700;
391
+ font-size: 1.3rem;
392
+ margin-bottom: var(--space-md);
393
+ display: flex;
394
+ align-items: center;
395
+ gap: var(--space-sm);
396
+ }
397
+
398
+ .search-box {
399
+ position: relative;
400
+ }
401
+
402
+ .search-box input {
403
+ width: 100%;
404
+ padding: 1rem 1rem 1rem 3rem;
405
+ background: var(--bg-glass);
406
+ border: 1px solid var(--border-subtle);
407
+ border-radius: var(--radius-lg);
408
+ color: var(--text-primary);
409
+ font-family: var(--font-body);
410
+ font-size: 1rem;
411
+ outline: none;
412
+ transition: all var(--transition-base);
413
+ }
414
+
415
+ .search-box input:focus {
416
+ border-color: var(--border-accent);
417
+ box-shadow: 0 0 0 3px rgba(6, 182, 212, 0.1);
418
+ background: var(--bg-glass-hover);
419
+ }
420
+
421
+ .search-box input::placeholder {
422
+ color: var(--text-muted);
423
+ }
424
+
425
+ .search-box .search-icon {
426
+ position: absolute;
427
+ left: 1rem;
428
+ top: 50%;
429
+ transform: translateY(-50%);
430
+ font-size: 1.1rem;
431
+ color: var(--text-muted);
432
+ pointer-events: none;
433
+ }
434
+
435
+ .search-results {
436
+ margin-top: var(--space-md);
437
+ display: flex;
438
+ flex-direction: column;
439
+ gap: var(--space-sm);
440
+ max-height: 400px;
441
+ overflow-y: auto;
442
+ }
443
+
444
+ .search-results::-webkit-scrollbar {
445
+ width: 6px;
446
+ }
447
+
448
+ .search-results::-webkit-scrollbar-track {
449
+ background: transparent;
450
+ }
451
+
452
+ .search-results::-webkit-scrollbar-thumb {
453
+ background: var(--border-subtle);
454
+ border-radius: 3px;
455
+ }
456
+
457
+ .search-result-item {
458
+ display: flex;
459
+ align-items: center;
460
+ justify-content: space-between;
461
+ padding: var(--space-md) var(--space-lg);
462
+ background: var(--bg-glass);
463
+ border: 1px solid var(--border-subtle);
464
+ border-radius: var(--radius-md);
465
+ transition: all var(--transition-fast);
466
+ cursor: pointer;
467
+ text-decoration: none;
468
+ color: var(--text-primary);
469
+ }
470
+
471
+ .search-result-item:hover {
472
+ border-color: var(--border-accent);
473
+ background: var(--bg-glass-hover);
474
+ transform: translateX(4px);
475
+ }
476
+
477
+ .search-result-item .result-url {
478
+ font-family: var(--font-mono);
479
+ font-size: 0.9rem;
480
+ color: var(--text-accent);
481
+ word-break: break-all;
482
+ }
483
+
484
+ .search-result-item .result-domain {
485
+ font-size: 0.8rem;
486
+ color: var(--text-muted);
487
+ margin-top: 2px;
488
+ }
489
+
490
+ .search-result-item .result-arrow {
491
+ font-size: 1.1rem;
492
+ color: var(--text-muted);
493
+ transition: all var(--transition-fast);
494
+ flex-shrink: 0;
495
+ margin-left: var(--space-md);
496
+ }
497
+
498
+ .search-result-item:hover .result-arrow {
499
+ color: var(--text-accent);
500
+ transform: translateX(4px);
501
+ }
502
+
503
+ .search-empty {
504
+ text-align: center;
505
+ padding: var(--space-xl);
506
+ color: var(--text-muted);
507
+ font-size: 0.9rem;
508
+ }
509
+
510
+ /* ─── Submit Section ───────────────────────────────────────── */
511
+ .submit-section {
512
+ margin-bottom: var(--space-3xl);
513
+ }
514
+
515
+ .submit-section h2 {
516
+ font-family: var(--font-heading);
517
+ font-weight: 700;
518
+ font-size: 1.3rem;
519
+ margin-bottom: var(--space-sm);
520
+ display: flex;
521
+ align-items: center;
522
+ gap: var(--space-sm);
523
+ }
524
+
525
+ .submit-section .subtitle {
526
+ color: var(--text-secondary);
527
+ font-size: 0.9rem;
528
+ margin-bottom: var(--space-lg);
529
+ }
530
+
531
+ .submit-form {
532
+ display: flex;
533
+ gap: var(--space-sm);
534
+ }
535
+
536
+ .submit-form input {
537
+ flex: 1;
538
+ padding: 0.85rem 1rem;
539
+ background: var(--bg-glass);
540
+ border: 1px solid var(--border-subtle);
541
+ border-radius: var(--radius-md);
542
+ color: var(--text-primary);
543
+ font-family: var(--font-body);
544
+ font-size: 0.95rem;
545
+ outline: none;
546
+ transition: all var(--transition-base);
547
+ }
548
+
549
+ .submit-form input:focus {
550
+ border-color: var(--border-accent);
551
+ box-shadow: 0 0 0 3px rgba(6, 182, 212, 0.1);
552
+ }
553
+
554
+ .submit-form input::placeholder {
555
+ color: var(--text-muted);
556
+ }
557
+
558
+ .submit-btn {
559
+ padding: 0.85rem 1.5rem;
560
+ background: var(--gradient-primary);
561
+ color: white;
562
+ font-family: var(--font-heading);
563
+ font-weight: 600;
564
+ font-size: 0.9rem;
565
+ border: none;
566
+ border-radius: var(--radius-md);
567
+ cursor: pointer;
568
+ transition: all var(--transition-base);
569
+ white-space: nowrap;
570
+ }
571
+
572
+ .submit-btn:hover {
573
+ transform: translateY(-2px);
574
+ box-shadow: 0 0 20px rgba(6, 182, 212, 0.3);
575
+ }
576
+
577
+ .submit-btn:active {
578
+ transform: translateY(0);
579
+ }
580
+
581
+ .submit-btn:disabled {
582
+ opacity: 0.5;
583
+ cursor: not-allowed;
584
+ transform: none;
585
+ }
586
+
587
+ .submit-feedback {
588
+ margin-top: var(--space-md);
589
+ padding: var(--space-md) var(--space-lg);
590
+ border-radius: var(--radius-md);
591
+ font-size: 0.9rem;
592
+ display: none;
593
+ animation: fadeSlideUp 0.3s ease-out;
594
+ }
595
+
596
+ .submit-feedback.success {
597
+ display: block;
598
+ background: rgba(34, 197, 94, 0.1);
599
+ border: 1px solid rgba(34, 197, 94, 0.2);
600
+ color: #86efac;
601
+ }
602
+
603
+ .submit-feedback.error {
604
+ display: block;
605
+ background: rgba(239, 68, 68, 0.1);
606
+ border: 1px solid rgba(239, 68, 68, 0.2);
607
+ color: #fca5a5;
608
+ }
609
+
610
+ @keyframes fadeSlideUp {
611
+ from { opacity: 0; transform: translateY(8px); }
612
+ to { opacity: 1; transform: translateY(0); }
613
+ }
614
+
615
+ /* ─── Footer ───────────────────────────────────────────────── */
616
+ .footer {
617
+ margin-top: auto;
618
+ border-top: 1px solid var(--border-subtle);
619
+ padding: var(--space-xl) 0;
620
+ background: rgba(10, 14, 26, 0.9);
621
+ backdrop-filter: blur(20px);
622
+ }
623
+
624
+ .footer .container {
625
+ display: flex;
626
+ flex-direction: column;
627
+ align-items: center;
628
+ gap: var(--space-lg);
629
+ }
630
+
631
+ .live-counter {
632
+ text-align: center;
633
+ }
634
+
635
+ .live-counter .counter-label {
636
+ font-size: 0.75rem;
637
+ color: var(--text-muted);
638
+ text-transform: uppercase;
639
+ letter-spacing: 0.15em;
640
+ margin-bottom: var(--space-sm);
641
+ display: flex;
642
+ align-items: center;
643
+ justify-content: center;
644
+ gap: var(--space-sm);
645
+ }
646
+
647
+ .live-counter .counter-label .live-dot {
648
+ width: 8px;
649
+ height: 8px;
650
+ background: #22c55e;
651
+ border-radius: 50%;
652
+ animation: pulse 2s ease-in-out infinite;
653
+ display: inline-block;
654
+ }
655
+
656
+ .live-counter .counter-value {
657
+ font-family: var(--font-heading);
658
+ font-weight: 900;
659
+ font-size: clamp(2rem, 5vw, 3rem);
660
+ background: var(--gradient-primary);
661
+ -webkit-background-clip: text;
662
+ -webkit-text-fill-color: transparent;
663
+ background-clip: text;
664
+ line-height: 1.2;
665
+ transition: all var(--transition-base);
666
+ }
667
+
668
+ .live-counter .counter-subtext {
669
+ font-size: 0.8rem;
670
+ color: var(--text-secondary);
671
+ margin-top: var(--space-xs);
672
+ }
673
+
674
+ .footer-links {
675
+ display: flex;
676
+ align-items: center;
677
+ gap: var(--space-lg);
678
+ font-size: 0.8rem;
679
+ }
680
+
681
+ .footer-links a {
682
+ color: var(--text-muted);
683
+ text-decoration: none;
684
+ transition: color var(--transition-fast);
685
+ }
686
+
687
+ .footer-links a:hover {
688
+ color: var(--text-accent);
689
+ }
690
+
691
+ .footer-links .divider {
692
+ width: 3px;
693
+ height: 3px;
694
+ background: var(--text-muted);
695
+ border-radius: 50%;
696
+ opacity: 0.5;
697
+ }
698
+
699
+ /* ─── Toast Notifications ──────────────────────────────────── */
700
+ .toast-container {
701
+ position: fixed;
702
+ top: var(--space-lg);
703
+ right: var(--space-lg);
704
+ z-index: 1000;
705
+ display: flex;
706
+ flex-direction: column;
707
+ gap: var(--space-sm);
708
+ }
709
+
710
+ .toast {
711
+ padding: var(--space-md) var(--space-lg);
712
+ background: var(--bg-card);
713
+ border: 1px solid var(--border-subtle);
714
+ border-radius: var(--radius-md);
715
+ backdrop-filter: blur(20px);
716
+ animation: toastIn 0.3s ease-out;
717
+ font-size: 0.9rem;
718
+ max-width: 350px;
719
+ box-shadow: var(--shadow-lg);
720
+ }
721
+
722
+ .toast.toast-exiting {
723
+ animation: toastOut 0.3s ease-in forwards;
724
+ }
725
+
726
+ @keyframes toastIn {
727
+ from { opacity: 0; transform: translateX(100px); }
728
+ to { opacity: 1; transform: translateX(0); }
729
+ }
730
+
731
+ @keyframes toastOut {
732
+ from { opacity: 1; transform: translateX(0); }
733
+ to { opacity: 0; transform: translateX(100px); }
734
+ }
735
+
736
+ /* ─── Responsive ───────────────────────────────────────────── */
737
+ @media (max-width: 640px) {
738
+ .container {
739
+ padding: 0 var(--space-md);
740
+ }
741
+
742
+ .hero {
743
+ padding: var(--space-3xl) 0 var(--space-2xl);
744
+ }
745
+
746
+ .submit-form {
747
+ flex-direction: column;
748
+ }
749
+
750
+ .header .container {
751
+ flex-direction: column;
752
+ gap: var(--space-sm);
753
+ }
754
+
755
+ .footer-links {
756
+ flex-wrap: wrap;
757
+ justify-content: center;
758
+ }
759
+ }
760
+
761
+ /* ─── Utility Animations ───────────────────────────────────── */
762
+ @keyframes fadeIn {
763
+ from { opacity: 0; }
764
+ to { opacity: 1; }
765
+ }
766
+
767
+ @keyframes slideUp {
768
+ from { opacity: 0; transform: translateY(20px); }
769
+ to { opacity: 1; transform: translateY(0); }
770
+ }
771
+
772
+ .fade-in {
773
+ animation: fadeIn 0.6s ease-out;
774
+ }
775
+
776
+ .slide-up {
777
+ animation: slideUp 0.6s ease-out;
778
+ }
779
+
780
+ .slide-up-delay-1 { animation-delay: 0.1s; animation-fill-mode: backwards; }
781
+ .slide-up-delay-2 { animation-delay: 0.2s; animation-fill-mode: backwards; }
782
+ .slide-up-delay-3 { animation-delay: 0.3s; animation-fill-mode: backwards; }
783
+ .slide-up-delay-4 { animation-delay: 0.4s; animation-fill-mode: backwards; }
784
+
785
+ /* ─── Loading Skeleton ─────────────────────────────────────── */
786
+ .skeleton {
787
+ background: linear-gradient(
788
+ 90deg,
789
+ var(--bg-glass) 25%,
790
+ rgba(255, 255, 255, 0.06) 50%,
791
+ var(--bg-glass) 75%
792
+ );
793
+ background-size: 200% 100%;
794
+ animation: shimmer 1.5s infinite;
795
+ border-radius: var(--radius-sm);
796
+ }
797
+
798
+ @keyframes shimmer {
799
+ 0% { background-position: 200% 50%; }
800
+ 100% { background-position: -200% 50%; }
801
+ }
nginx.conf ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ worker_processes auto;
2
+ pid /tmp/nginx.pid;
3
+
4
+ events {
5
+ worker_connections 1024;
6
+ }
7
+
8
+ http {
9
+ include /etc/nginx/mime.types;
10
+ default_type application/octet-stream;
11
+
12
+ # Temp paths for non-root
13
+ client_body_temp_path /tmp/nginx-client-body;
14
+ proxy_temp_path /tmp/nginx-proxy;
15
+ fastcgi_temp_path /tmp/nginx-fastcgi;
16
+ uwsgi_temp_path /tmp/nginx-uwsgi;
17
+ scgi_temp_path /tmp/nginx-scgi;
18
+
19
+ sendfile on;
20
+ tcp_nopush on;
21
+ keepalive_timeout 65;
22
+ gzip on;
23
+ gzip_types text/plain text/css application/json application/javascript text/xml;
24
+
25
+ # Logging
26
+ access_log /tmp/nginx-access.log;
27
+ error_log /tmp/nginx-error.log;
28
+
29
+ server {
30
+ listen 7860;
31
+ server_name _;
32
+
33
+ # Frontend static files
34
+ root /app/frontend;
35
+ index index.html;
36
+
37
+ # API proxy β†’ FastAPI
38
+ location /api/ {
39
+ proxy_pass http://127.0.0.1:8000;
40
+ proxy_set_header Host $host;
41
+ proxy_set_header X-Real-IP $remote_addr;
42
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
43
+ proxy_set_header X-Forwarded-Proto $scheme;
44
+
45
+ # Timeouts
46
+ proxy_connect_timeout 10s;
47
+ proxy_send_timeout 30s;
48
+ proxy_read_timeout 30s;
49
+ }
50
+
51
+ # Health check
52
+ location /health {
53
+ proxy_pass http://127.0.0.1:8000;
54
+ }
55
+
56
+ # Frontend SPA fallback
57
+ location / {
58
+ try_files $uri $uri/ /index.html;
59
+ }
60
+
61
+ # Security headers
62
+ add_header X-Frame-Options "SAMEORIGIN" always;
63
+ add_header X-Content-Type-Options "nosniff" always;
64
+ add_header Referrer-Policy "strict-origin-when-cross-origin" always;
65
+ }
66
+ }
run.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ echo "=========================================="
5
+ echo " RandomWeb β€” Starting services"
6
+ echo "=========================================="
7
+
8
+ # Start FastAPI backend in background
9
+ echo "[1/2] Starting FastAPI backend on :8000..."
10
+ cd /app
11
+ python -m uvicorn backend.main:app --host 0.0.0.0 --port 8000 --log-level info &
12
+
13
+ # Wait for backend to be ready
14
+ echo " Waiting for backend..."
15
+ for i in $(seq 1 30); do
16
+ if curl -s http://127.0.0.1:8000/api/health > /dev/null 2>&1; then
17
+ echo " Backend ready!"
18
+ break
19
+ fi
20
+ sleep 1
21
+ done
22
+
23
+ # Start Nginx in foreground
24
+ echo "[2/2] Starting Nginx on :7860..."
25
+ exec nginx -g 'daemon off;'
supabase_schema.sql ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- ============================================================
2
+ -- RandomWeb β€” Supabase Schema
3
+ -- Run this in the Supabase SQL Editor (Dashboard β†’ SQL Editor)
4
+ -- ============================================================
5
+
6
+ -- Enable required extensions
7
+ CREATE EXTENSION IF NOT EXISTS pg_trgm;
8
+
9
+ -- ============================================================
10
+ -- 1. WEBSITES TABLE
11
+ -- ============================================================
12
+ CREATE TABLE IF NOT EXISTS websites (
13
+ id BIGSERIAL PRIMARY KEY,
14
+ url TEXT NOT NULL UNIQUE,
15
+ domain TEXT NOT NULL,
16
+ source TEXT NOT NULL DEFAULT 'unknown',
17
+ status INTEGER,
18
+ is_active BOOLEAN NOT NULL DEFAULT false,
19
+ first_seen TIMESTAMPTZ NOT NULL DEFAULT now(),
20
+ last_checked TIMESTAMPTZ,
21
+ next_check TIMESTAMPTZ,
22
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now()
23
+ );
24
+
25
+ -- Indexes for performance
26
+ CREATE INDEX IF NOT EXISTS idx_websites_is_active ON websites (is_active) WHERE is_active = true;
27
+ CREATE INDEX IF NOT EXISTS idx_websites_domain ON websites (domain);
28
+ CREATE INDEX IF NOT EXISTS idx_websites_next_check ON websites (next_check) WHERE next_check IS NOT NULL;
29
+ CREATE INDEX IF NOT EXISTS idx_websites_random ON websites (id) WHERE is_active = true;
30
+
31
+ -- Trigram index for fuzzy search
32
+ CREATE INDEX IF NOT EXISTS idx_websites_url_trgm ON websites USING gin (url gin_trgm_ops);
33
+ CREATE INDEX IF NOT EXISTS idx_websites_domain_trgm ON websites USING gin (domain gin_trgm_ops);
34
+
35
+ -- ============================================================
36
+ -- 2. STATS TABLE (single-row, live counter)
37
+ -- ============================================================
38
+ CREATE TABLE IF NOT EXISTS stats (
39
+ id INTEGER PRIMARY KEY DEFAULT 1 CHECK (id = 1),
40
+ active_count BIGINT NOT NULL DEFAULT 0,
41
+ total_count BIGINT NOT NULL DEFAULT 0,
42
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
43
+ );
44
+
45
+ INSERT INTO stats (active_count, total_count) VALUES (0, 0)
46
+ ON CONFLICT (id) DO NOTHING;
47
+
48
+ -- ============================================================
49
+ -- 3. TRIGGER β€” Auto-update stats on website changes
50
+ -- ============================================================
51
+ CREATE OR REPLACE FUNCTION update_stats_count()
52
+ RETURNS TRIGGER AS $$
53
+ BEGIN
54
+ UPDATE stats SET
55
+ active_count = (SELECT count(*) FROM websites WHERE is_active = true),
56
+ total_count = (SELECT count(*) FROM websites),
57
+ updated_at = now()
58
+ WHERE id = 1;
59
+ RETURN NULL;
60
+ END;
61
+ $$ LANGUAGE plpgsql;
62
+
63
+ DROP TRIGGER IF EXISTS trg_update_stats ON websites;
64
+ CREATE TRIGGER trg_update_stats
65
+ AFTER INSERT OR UPDATE OF is_active OR DELETE ON websites
66
+ FOR EACH STATEMENT EXECUTE FUNCTION update_stats_count();
67
+
68
+ -- ============================================================
69
+ -- 4. FUNCTION β€” Optimized random active website
70
+ -- ============================================================
71
+ CREATE OR REPLACE FUNCTION get_random_active_website()
72
+ RETURNS TABLE(id BIGINT, url TEXT, domain TEXT) AS $$
73
+ BEGIN
74
+ RETURN QUERY
75
+ SELECT w.id, w.url, w.domain
76
+ FROM websites w
77
+ WHERE w.is_active = true
78
+ ORDER BY random()
79
+ LIMIT 1;
80
+ END;
81
+ $$ LANGUAGE plpgsql;
82
+
83
+ -- ============================================================
84
+ -- 5. ROW LEVEL SECURITY
85
+ -- ============================================================
86
+ ALTER TABLE websites ENABLE ROW LEVEL SECURITY;
87
+ ALTER TABLE stats ENABLE ROW LEVEL SECURITY;
88
+
89
+ -- Public read access for frontend (publishable key)
90
+ CREATE POLICY "Allow public read on websites"
91
+ ON websites FOR SELECT
92
+ USING (true);
93
+
94
+ CREATE POLICY "Allow public read on stats"
95
+ ON stats FOR SELECT
96
+ USING (true);
97
+
98
+ -- Allow inserts/updates from authenticated or service role
99
+ CREATE POLICY "Allow service write on websites"
100
+ ON websites FOR ALL
101
+ USING (true)
102
+ WITH CHECK (true);
103
+
104
+ CREATE POLICY "Allow service write on stats"
105
+ ON stats FOR ALL
106
+ USING (true)
107
+ WITH CHECK (true);
108
+
109
+ -- ============================================================
110
+ -- 6. ENABLE REALTIME on stats table
111
+ -- ============================================================
112
+ ALTER PUBLICATION supabase_realtime ADD TABLE stats;