Spaces:
Runtime error
Runtime error
| import os | |
| import re | |
| import httpx | |
| from typing import List, Dict | |
| from bs4 import BeautifulSoup | |
| from fastapi import APIRouter, HTTPException | |
| router = APIRouter() | |
| # 🎯 IMDb GraphQL | |
| GRAPHQL_URL = "https://api.graphql.imdb.com" | |
| HEADERS = {"Content-Type": "application/json"} | |
| QUERY = """ | |
| query GetNews($first: Int!) { | |
| movieNews: news(first: $first, category: MOVIE) { | |
| edges { | |
| node { | |
| id | |
| articleTitle { plainText } | |
| externalUrl | |
| date | |
| text { plaidHtml } | |
| image { url } | |
| } | |
| } | |
| } | |
| tvNews: news(first: $first, category: TV) { | |
| edges { | |
| node { | |
| id | |
| articleTitle { plainText } | |
| externalUrl | |
| date | |
| text { plaidHtml } | |
| image { url } | |
| } | |
| } | |
| } | |
| } | |
| """ | |
| # 🔧 Supabase Config | |
| SUPABASE_URL = "https://iiwbixdrrhejkthxygak.supabase.co" | |
| SUPABASE_KEY = os.getenv("SUPA_KEY") | |
| SUPABASE_ROLE_KEY = os.getenv("SUPA_SERVICE_KEY") | |
| if not SUPABASE_KEY or not SUPABASE_ROLE_KEY: | |
| raise ValueError("❌ SUPA_KEY or SUPA_SERVICE_KEY not set in environment!") | |
| SUPABASE_HEADERS = { | |
| "apikey": SUPABASE_KEY, | |
| "Authorization": f"Bearer {SUPABASE_KEY}", | |
| "Content-Type": "application/json" | |
| } | |
| SUPABASE_ROLE_HEADERS = { | |
| "apikey": SUPABASE_ROLE_KEY, | |
| "Authorization": f"Bearer {SUPABASE_ROLE_KEY}", | |
| "Content-Type": "application/json" | |
| } | |
| # 🧼 HTML Cleanup | |
| def clean_html(raw_html: str) -> str: | |
| text = BeautifulSoup(raw_html or "", "html.parser").get_text(separator=" ", strip=True) | |
| text = re.sub(r"\s+", " ", text) | |
| text = re.sub(r"\s+([.,;:!?])", r"\1", text) | |
| text = re.sub(r"\(\s+", "(", text) | |
| text = re.sub(r"\s+\)", ")", text) | |
| text = re.sub(r"\[\s+", "[", text) | |
| text = re.sub(r"\s+\]", "]", text) | |
| text = re.sub(r"\{\s+", "{", text) | |
| text = re.sub(r"\s+\}", "}", text) | |
| return text.strip() | |
| # 🚀 Endpoint principal | |
| async def get_news(first: int = 20) -> List[Dict]: | |
| payload = { | |
| "query": QUERY, | |
| "variables": {"first": first} | |
| } | |
| async with httpx.AsyncClient(timeout=10.0) as client: | |
| # Pega notícias do IMDb | |
| response = await client.post(GRAPHQL_URL, headers=HEADERS, json=payload) | |
| if response.status_code != 200: | |
| raise HTTPException(status_code=502, detail="Erro ao acessar a API do IMDb") | |
| data = response.json().get("data") | |
| if not data: | |
| raise HTTPException(status_code=500, detail="Resposta inválida da API") | |
| combined = [] | |
| for category_key in ["movieNews", "tvNews"]: | |
| for edge in data.get(category_key, {}).get("edges", []): | |
| node = edge.get("node", {}) | |
| image_data = node.get("image") | |
| combined.append({ | |
| "news_id": node.get("id"), | |
| "title": node.get("articleTitle", {}).get("plainText"), | |
| "url": node.get("externalUrl"), | |
| "date": node.get("date"), | |
| "text": clean_html(node.get("text", {}).get("plaidHtml")), | |
| "image": image_data.get("url") if image_data else None, | |
| "category": category_key.replace("News", "").upper() | |
| }) | |
| # 📌 Verifica quais IDs já existem no Supabase | |
| all_ids = [item["news_id"] for item in combined] | |
| existing_ids = [] | |
| ids_chunks = [all_ids[i:i + 1000] for i in range(0, len(all_ids), 1000)] # evita URL muito grande | |
| for chunk in ids_chunks: | |
| query_ids = ",".join([f"\"{nid}\"" for nid in chunk]) | |
| url = f"{SUPABASE_URL}/rest/v1/news_extraction?select=news_id&news_id=in.({query_ids})" | |
| r = await client.get(url, headers=SUPABASE_HEADERS) | |
| if r.status_code == 200: | |
| existing_ids.extend([item["news_id"] for item in r.json()]) | |
| # 🔎 Filtra apenas as novas notícias | |
| new_entries = [item for item in combined if item["news_id"] not in existing_ids] | |
| # 🧾 Insere novas notícias (em lote) | |
| if new_entries: | |
| insert_url = f"{SUPABASE_URL}/rest/v1/news_extraction" | |
| await client.post(insert_url, headers=SUPABASE_ROLE_HEADERS, json=new_entries) | |
| # 🔃 Ordena por data | |
| combined.sort(key=lambda x: x.get("date"), reverse=True) | |
| return combined |