File size: 1,375 Bytes
1ac3ba7
 
 
 
faca1ba
 
1ac3ba7
 
 
 
 
a3e69bc
1ac3ba7
faca1ba
 
 
 
1ac3ba7
 
 
 
 
 
 
 
 
cc093e0
1ac3ba7
1bf091f
faca1ba
 
 
 
cc093e0
 
 
1ac3ba7
cc093e0
1ac3ba7
faca1ba
1ac3ba7
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# guardian.py
import requests
import streamlit as st

BASE_URL    = "https://content.guardianapis.com/search"
CONTENT_API = "https://content.guardianapis.com"

def fetch_headlines(section="world", limit=5):
    """
    Returns a list of (headline, url) tuples from The Guardian.
    """
    key = st.secrets["GUARDIAN_API_KEY"]
    params = {
        "api-key":     key,
        "section":     section,
        "page-size":   limit,
        "order-by":    "newest",
        "show-fields": "headline"
    }
    resp = requests.get(BASE_URL, params=params)
    resp.raise_for_status()
    results = resp.json()["response"]["results"]
    return [(item["fields"]["headline"], item["webUrl"]) for item in results]

def fetch_full_article(url: str) -> str:
    """
    Given a Guardian article URL, fetch its body text via the Content API.
    """
    key = st.secrets["GUARDIAN_API_KEY"]
    # strip off the public site domain to get the API path
    prefix = "https://www.theguardian.com"
    if url.startswith(prefix):
        path = url[len(prefix):]
    else:
        raise ValueError(f"Unexpected URL format: {url}")

    resp = requests.get(
        f"{CONTENT_API}{path}",
        params={
            "api-key":     key,
            "show-fields": "bodyText"
        }
    )
    resp.raise_for_status()
    return resp.json()["response"]["content"]["fields"]["bodyText"]