|
|
|
import requests |
|
import streamlit as st |
|
|
|
BASE_URL = "https://content.guardianapis.com/search" |
|
CONTENT_API = "https://content.guardianapis.com" |
|
|
|
def fetch_headlines(section="world", limit=5): |
|
""" |
|
Returns a list of (headline, url) tuples from The Guardian. |
|
""" |
|
key = st.secrets["GUARDIAN_API_KEY"] |
|
params = { |
|
"api-key": key, |
|
"section": section, |
|
"page-size": limit, |
|
"order-by": "newest", |
|
"show-fields": "headline" |
|
} |
|
resp = requests.get(BASE_URL, params=params) |
|
resp.raise_for_status() |
|
results = resp.json()["response"]["results"] |
|
return [(item["fields"]["headline"], item["webUrl"]) for item in results] |
|
|
|
def fetch_full_article(url: str) -> str: |
|
""" |
|
Given a Guardian article URL, fetch its body text via the Content API. |
|
""" |
|
key = st.secrets["GUARDIAN_API_KEY"] |
|
|
|
prefix = "https://www.theguardian.com" |
|
if url.startswith(prefix): |
|
path = url[len(prefix):] |
|
else: |
|
raise ValueError(f"Unexpected URL format: {url}") |
|
|
|
resp = requests.get( |
|
f"{CONTENT_API}{path}", |
|
params={ |
|
"api-key": key, |
|
"show-fields": "bodyText" |
|
} |
|
) |
|
resp.raise_for_status() |
|
return resp.json()["response"]["content"]["fields"]["bodyText"] |
|
|