Spaces:
Configuration error
Configuration error
import requests | |
def hygiene_score(entity: str) -> int: | |
""" | |
If 'entity' is a URL: | |
• robots.txt allows GPTBot → +30 | |
• Response time < 0.3s → +20 | |
• <link rel="canonical"> present → +20 | |
• <img alt="..."> present → +20 | |
• No paywall (“subscribe”/“login” not found) → +10 | |
Cap at 100. | |
Else (non-URL), return default 50. | |
""" | |
if not entity.startswith("http"): | |
return 50 | |
score = 0 | |
try: | |
# 1) Check robots.txt | |
robots_url = entity.rstrip("/") + "/robots.txt" | |
r = requests.get(robots_url, timeout=5) | |
if r.status_code == 200 and "GPTBot" in r.text: | |
score += 30 | |
# 2) Page response time | |
page_resp = requests.get(entity, timeout=5) | |
if page_resp.elapsed.total_seconds() < 0.3: | |
score += 20 | |
# 3) Canonical tag | |
if 'rel="canonical"' in page_resp.text: | |
score += 20 | |
# 4) Alt-text on images | |
if "<img" in page_resp.text and "alt=" in page_resp.text: | |
score += 20 | |
# 5) Paywall check | |
lower = page_resp.text.lower() | |
if "subscribe" not in lower and "login" not in lower: | |
score += 10 | |
return min(score, 100) | |
except Exception: | |
return 40 | |
def hygiene_recommendation(entity: str, score: int) -> str: | |
if score < 50: | |
return ( | |
"Ensure robots.txt allows GPTBot, add a canonical tag, " | |
"include alt-text for all images, and remove any paywalls." | |
) | |
return "Technical hygiene is solid; continue monitoring robots.txt and server speed." | |