|
import gradio as gr |
|
import requests |
|
import json |
|
import os |
|
from datetime import datetime, timedelta |
|
from bs4 import BeautifulSoup |
|
from huggingface_hub import InferenceClient |
|
|
|
|
|
|
|
|
|
|
|
API_KEY = os.getenv("SERPHOUSE_API_KEY") |
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
|
MAJOR_COUNTRIES = [ |
|
"United States", "United Kingdom", "Canada", "Australia", "Germany", |
|
"France", "Japan", "South Korea", "China", "India", |
|
"Brazil", "Mexico", "Russia", "Italy", "Spain", |
|
"Netherlands", "Sweden", "Switzerland", "Norway", "Denmark", |
|
"Finland", "Belgium", "Austria", "New Zealand", "Ireland", |
|
"Singapore", "Hong Kong", "Israel", "United Arab Emirates", "Saudi Arabia", |
|
"South Africa", "Turkey", "Egypt", "Poland", "Czech Republic", |
|
"Hungary", "Greece", "Portugal", "Argentina", "Chile", |
|
"Colombia", "Peru", "Venezuela", "Thailand", "Malaysia", |
|
"Indonesia", "Philippines", "Vietnam", "Pakistan", "Bangladesh" |
|
] |
|
|
|
def search_serphouse(query, country, page=1, num_result=10): |
|
url = "https://api.serphouse.com/serp/live" |
|
|
|
now = datetime.utcnow() |
|
yesterday = now - timedelta(days=1) |
|
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}" |
|
|
|
payload = { |
|
"data": { |
|
"q": query, |
|
"domain": "google.com", |
|
"loc": country, |
|
"lang": "en", |
|
"device": "desktop", |
|
"serp_type": "news", |
|
"page": str(page), |
|
"verbatim": "1", |
|
"num": str(num_result), |
|
"date_range": date_range |
|
} |
|
} |
|
|
|
headers = { |
|
"accept": "application/json", |
|
"content-type": "application/json", |
|
"authorization": f"Bearer {API_KEY}" |
|
} |
|
|
|
try: |
|
response = requests.post(url, json=payload, headers=headers) |
|
response.raise_for_status() |
|
return response.json() |
|
except requests.RequestException as e: |
|
error_msg = f"Error: {str(e)}" |
|
if response.text: |
|
error_msg += f"\nResponse content: {response.text}" |
|
return {"error": error_msg} |
|
|
|
def format_results_from_raw(results): |
|
try: |
|
if isinstance(results, dict) and "error" in results: |
|
return "Error: " + results["error"], [] |
|
|
|
if not isinstance(results, dict): |
|
raise ValueError("๊ฒฐ๊ณผ๊ฐ ์ฌ์ ํ์์ด ์๋๋๋ค.") |
|
|
|
|
|
if 'results' in results: |
|
results_content = results['results'] |
|
if 'results' in results_content: |
|
results_content = results_content['results'] |
|
|
|
if 'news' in results_content: |
|
news_results = results_content['news'] |
|
else: |
|
news_results = [] |
|
else: |
|
news_results = [] |
|
else: |
|
news_results = [] |
|
|
|
if not news_results: |
|
return "๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.", [] |
|
|
|
articles = [] |
|
|
|
for idx, result in enumerate(news_results, 1): |
|
title = result.get("title", "์ ๋ชฉ ์์") |
|
link = result.get("url", result.get("link", "#")) |
|
snippet = result.get("snippet", "๋ด์ฉ ์์") |
|
channel = result.get("channel", result.get("source", "์ ์ ์์")) |
|
time = result.get("time", result.get("date", "์ ์ ์๋ ์๊ฐ")) |
|
image_url = result.get("img", result.get("thumbnail", "")) |
|
|
|
articles.append({ |
|
"title": title, |
|
"link": link, |
|
"snippet": snippet, |
|
"channel": channel, |
|
"time": time, |
|
"image_url": image_url |
|
}) |
|
|
|
return "", articles |
|
|
|
except Exception as e: |
|
error_message = f"๊ฒฐ๊ณผ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
return "Error: " + error_message, [] |
|
|
|
def serphouse_search(query, country): |
|
|
|
page = 1 |
|
num_result = 10 |
|
results = search_serphouse(query, country, page, num_result) |
|
error_message, articles = format_results_from_raw(results) |
|
return error_message, articles |
|
|
|
|
|
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=HF_TOKEN) |
|
|
|
def summarize_article(url): |
|
try: |
|
|
|
response = requests.get(url) |
|
response.raise_for_status() |
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
text = ' '.join([p.get_text() for p in soup.find_all('p')]) |
|
if not text.strip(): |
|
return "๊ธฐ์ฌ ๋ด์ฉ์ ๊ฐ์ ธ์ฌ ์ ์์ต๋๋ค." |
|
|
|
|
|
prompt = f"๋ค์ ์์ด ๊ธฐ์ฌ๋ฅผ ํ๊ตญ์ด๋ก 3๋ฌธ์ฅ์ผ๋ก ์์ฝํ์ธ์:\n{text}" |
|
summary = hf_client.text_generation(prompt, max_new_tokens=500) |
|
return summary |
|
except Exception as e: |
|
return f"์์ฝ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
|
|
css = """ |
|
footer { |
|
visibility: hidden; |
|
} |
|
""" |
|
|
|
|
|
with gr.Blocks(css=css, title="NewsAI ์๋น์ค") as iface: |
|
gr.Markdown("๊ฒ์์ด๋ฅผ ์
๋ ฅํ๊ณ ์ํ๋ ๊ตญ๊ฐ๋ฅผ ์ ํํ๋ฉด, ๊ฒ์์ด์ ์ผ์นํ๋ 24์๊ฐ ์ด๋ด ๋ด์ค๋ฅผ ์ต๋ 10๊ฐ ์ถ๋ ฅํฉ๋๋ค.") |
|
|
|
with gr.Column(): |
|
with gr.Row(): |
|
query = gr.Textbox(label="๊ฒ์์ด") |
|
country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ", value="South Korea") |
|
search_button = gr.Button("๊ฒ์") |
|
|
|
article_outputs = [] |
|
|
|
def search_and_display(query, country): |
|
error_message, articles = serphouse_search(query, country) |
|
if error_message: |
|
return gr.update(visible=True, value=error_message) |
|
else: |
|
|
|
for components in article_outputs: |
|
for component in components: |
|
component.visible = False |
|
article_outputs.clear() |
|
|
|
|
|
for article in articles: |
|
with gr.Column(): |
|
title = gr.Markdown(f"### [{article['title']}]({article['link']})") |
|
image = gr.Image(value=article['image_url'], visible=bool(article['image_url']), shape=(200, 150)) |
|
snippet = gr.Markdown(f"**์์ฝ:** {article['snippet']}") |
|
info = gr.Markdown(f"**์ถ์ฒ:** {article['channel']} | **์๊ฐ:** {article['time']}") |
|
analyze_button = gr.Button("๋ถ์") |
|
summary_output = gr.Markdown(visible=False) |
|
|
|
def analyze_article(url): |
|
summary = summarize_article(url) |
|
summary_output.update(value=summary, visible=True) |
|
|
|
analyze_button.click(analyze_article, inputs=gr.State(article['link']), outputs=summary_output) |
|
|
|
article_outputs.append([title, image, snippet, info, analyze_button, summary_output]) |
|
|
|
return gr.update() |
|
|
|
search_button.click( |
|
search_and_display, |
|
inputs=[query, country], |
|
outputs=[] |
|
) |
|
|
|
iface.launch(auth=("gini", "pick")) |
|
|