|
|
|
import os |
|
import redis |
|
import requests |
|
import hashlib |
|
from dotenv import load_dotenv |
|
|
|
load_dotenv() |
|
|
|
CATEGORIES = [ |
|
"technology", "business", "science", "health", "world", "entertainment" |
|
] |
|
|
|
GNEWS_API_KEY = os.getenv("GNEWS_API_KEY") |
|
REDIS_URL = os.getenv("UPSTASH_REDIS_URL") |
|
|
|
r = redis.Redis.from_url(REDIS_URL, decode_responses=True) |
|
|
|
def generate_id(url: str) -> str: |
|
return hashlib.sha1(url.encode()).hexdigest() |
|
|
|
def fetch_and_cache_articles(): |
|
print("[INIT] Fetching and caching articles...") |
|
for category in CATEGORIES: |
|
base_url = "https://gnews.io/api/v4/top-headlines" |
|
params = { |
|
"topic": category, |
|
"lang": "en", |
|
"max": 20, |
|
"expand": "content", |
|
"token": GNEWS_API_KEY |
|
} |
|
try: |
|
response = requests.get(base_url, params=params, timeout=10) |
|
response.raise_for_status() |
|
articles = response.json().get("articles", []) |
|
|
|
for article in articles: |
|
article_id = generate_id(article["url"]) |
|
if not r.exists(f"article:{article_id}"): |
|
article_data = { |
|
"id": article_id, |
|
"title": article["title"], |
|
"url": article["url"], |
|
"description": article.get("description"), |
|
"content": article.get("content"), |
|
"image": article.get("image"), |
|
"publishedAt": article["publishedAt"], |
|
"category": category, |
|
"source": article["source"]["name"] |
|
} |
|
r.hset(f"article:{article_id}", mapping=article_data) |
|
r.sadd(f"category:{category}", article_id) |
|
|
|
except Exception as e: |
|
print(f"[ERROR] Failed for category {category}: {e}") |
|
|
|
print("[INIT] Article caching complete.") |
|
|
|
|
|
if __name__ == "__main__": |
|
fetch_and_cache_articles() |
|
|