aryn25's picture
Create scoring/hygiene.py
88bff1a verified
import requests
def hygiene_score(entity: str) -> int:
"""
If 'entity' is a URL:
• robots.txt allows GPTBot → +30
• Response time < 0.3s → +20
• <link rel="canonical"> present → +20
• <img alt="..."> present → +20
• No paywall (“subscribe”/“login” not found) → +10
Cap at 100.
Else (non-URL), return default 50.
"""
if not entity.startswith("http"):
return 50
score = 0
try:
# 1) Check robots.txt
robots_url = entity.rstrip("/") + "/robots.txt"
r = requests.get(robots_url, timeout=5)
if r.status_code == 200 and "GPTBot" in r.text:
score += 30
# 2) Page response time
page_resp = requests.get(entity, timeout=5)
if page_resp.elapsed.total_seconds() < 0.3:
score += 20
# 3) Canonical tag
if 'rel="canonical"' in page_resp.text:
score += 20
# 4) Alt-text on images
if "<img" in page_resp.text and "alt=" in page_resp.text:
score += 20
# 5) Paywall check
lower = page_resp.text.lower()
if "subscribe" not in lower and "login" not in lower:
score += 10
return min(score, 100)
except Exception:
return 40
def hygiene_recommendation(entity: str, score: int) -> str:
if score < 50:
return (
"Ensure robots.txt allows GPTBot, add a canonical tag, "
"include alt-text for all images, and remove any paywalls."
)
return "Technical hygiene is solid; continue monitoring robots.txt and server speed."