|
import gradio as gr |
|
import requests |
|
import json |
|
import os |
|
from datetime import datetime, timedelta |
|
from huggingface_hub import InferenceClient |
|
|
|
from bs4 import BeautifulSoup |
|
import concurrent.futures |
|
import time |
|
import re |
|
|
|
MAX_COUNTRY_RESULTS = 100 |
|
MAX_GLOBAL_RESULTS = 1000 |
|
|
|
def create_article_components(max_results): |
|
article_components = [] |
|
for i in range(max_results): |
|
with gr.Group(visible=False) as article_group: |
|
title = gr.Markdown() |
|
image = gr.Image(width=200, height=150) |
|
snippet = gr.Markdown() |
|
info = gr.Markdown() |
|
|
|
article_components.append({ |
|
'group': article_group, |
|
'title': title, |
|
'image': image, |
|
'snippet': snippet, |
|
'info': info, |
|
'index': i, |
|
}) |
|
return article_components |
|
|
|
API_KEY = os.getenv("SERPHOUSE_API_KEY") |
|
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN")) |
|
|
|
|
|
COUNTRY_LANGUAGES = { |
|
"United States": "en", |
|
"United Kingdom": "en", |
|
"Taiwan": "zh-TW", |
|
"Canada": "en", |
|
"Australia": "en", |
|
"Germany": "de", |
|
"France": "fr", |
|
"Japan": "ja", |
|
|
|
"China": "zh", |
|
"India": "hi", |
|
"Brazil": "pt", |
|
"Mexico": "es", |
|
"Russia": "ru", |
|
"Italy": "it", |
|
"Spain": "es", |
|
"Netherlands": "nl", |
|
"Singapore": "en", |
|
"Hong Kong": "zh-HK", |
|
"Indonesia": "id", |
|
"Malaysia": "ms", |
|
"Philippines": "tl", |
|
"Thailand": "th", |
|
"Vietnam": "vi", |
|
"Belgium": "nl", |
|
"Denmark": "da", |
|
"Finland": "fi", |
|
"Ireland": "en", |
|
"Norway": "no", |
|
"Poland": "pl", |
|
"Sweden": "sv", |
|
"Switzerland": "de", |
|
"Austria": "de", |
|
"Czech Republic": "cs", |
|
"Greece": "el", |
|
"Hungary": "hu", |
|
"Portugal": "pt", |
|
"Romania": "ro", |
|
"Turkey": "tr", |
|
"Israel": "he", |
|
"Saudi Arabia": "ar", |
|
"United Arab Emirates": "ar", |
|
"South Africa": "en", |
|
"Argentina": "es", |
|
"Chile": "es", |
|
"Colombia": "es", |
|
"Peru": "es", |
|
"Venezuela": "es", |
|
"New Zealand": "en", |
|
"Bangladesh": "bn", |
|
"Pakistan": "ur", |
|
"Egypt": "ar", |
|
"Morocco": "ar", |
|
"Nigeria": "en", |
|
"Kenya": "sw", |
|
"Ukraine": "uk", |
|
"Croatia": "hr", |
|
"Slovakia": "sk", |
|
"Bulgaria": "bg", |
|
"Serbia": "sr", |
|
"Estonia": "et", |
|
"Latvia": "lv", |
|
"Lithuania": "lt", |
|
"Slovenia": "sl", |
|
"Luxembourg": "fr", |
|
"Malta": "mt", |
|
"Cyprus": "el", |
|
"Iceland": "is" |
|
} |
|
|
|
COUNTRY_LOCATIONS = { |
|
"United States": "United States", |
|
"United Kingdom": "United Kingdom", |
|
"Taiwan": "Taiwan", |
|
"Canada": "Canada", |
|
"Australia": "Australia", |
|
"Germany": "Germany", |
|
"France": "France", |
|
"Japan": "Japan", |
|
|
|
"China": "China", |
|
"India": "India", |
|
"Brazil": "Brazil", |
|
"Mexico": "Mexico", |
|
"Russia": "Russia", |
|
"Italy": "Italy", |
|
"Spain": "Spain", |
|
"Netherlands": "Netherlands", |
|
"Singapore": "Singapore", |
|
"Hong Kong": "Hong Kong", |
|
"Indonesia": "Indonesia", |
|
"Malaysia": "Malaysia", |
|
"Philippines": "Philippines", |
|
"Thailand": "Thailand", |
|
"Vietnam": "Vietnam", |
|
"Belgium": "Belgium", |
|
"Denmark": "Denmark", |
|
"Finland": "Finland", |
|
"Ireland": "Ireland", |
|
"Norway": "Norway", |
|
"Poland": "Poland", |
|
"Sweden": "Sweden", |
|
"Switzerland": "Switzerland", |
|
"Austria": "Austria", |
|
"Czech Republic": "Czech Republic", |
|
"Greece": "Greece", |
|
"Hungary": "Hungary", |
|
"Portugal": "Portugal", |
|
"Romania": "Romania", |
|
"Turkey": "Turkey", |
|
"Israel": "Israel", |
|
"Saudi Arabia": "Saudi Arabia", |
|
"United Arab Emirates": "United Arab Emirates", |
|
"South Africa": "South Africa", |
|
"Argentina": "Argentina", |
|
"Chile": "Chile", |
|
"Colombia": "Colombia", |
|
"Peru": "Peru", |
|
"Venezuela": "Venezuela", |
|
"New Zealand": "New Zealand", |
|
"Bangladesh": "Bangladesh", |
|
"Pakistan": "Pakistan", |
|
"Egypt": "Egypt", |
|
"Morocco": "Morocco", |
|
"Nigeria": "Nigeria", |
|
"Kenya": "Kenya", |
|
"Ukraine": "Ukraine", |
|
"Croatia": "Croatia", |
|
"Slovakia": "Slovakia", |
|
"Bulgaria": "Bulgaria", |
|
"Serbia": "Serbia", |
|
"Estonia": "Estonia", |
|
"Latvia": "Latvia", |
|
"Lithuania": "Lithuania", |
|
"Slovenia": "Slovenia", |
|
"Luxembourg": "Luxembourg", |
|
"Malta": "Malta", |
|
"Cyprus": "Cyprus", |
|
"Iceland": "Iceland" |
|
} |
|
|
|
MAJOR_COUNTRIES = list(COUNTRY_LOCATIONS.keys()) |
|
|
|
def translate_query(query, country): |
|
try: |
|
|
|
if is_english(query): |
|
print(f"์์ด ๊ฒ์์ด ๊ฐ์ง - ์๋ณธ ์ฌ์ฉ: {query}") |
|
return query |
|
|
|
|
|
if country in COUNTRY_LANGUAGES: |
|
|
|
if country == "South Korea": |
|
print(f"ํ๊ตญ ์ ํ - ์๋ณธ ์ฌ์ฉ: {query}") |
|
return query |
|
|
|
target_lang = COUNTRY_LANGUAGES[country] |
|
print(f"๋ฒ์ญ ์๋: {query} -> {country}({target_lang})") |
|
|
|
url = f"https://translate.googleapis.com/translate_a/single" |
|
params = { |
|
"client": "gtx", |
|
"sl": "auto", |
|
"tl": target_lang, |
|
"dt": "t", |
|
"q": query |
|
} |
|
|
|
response = requests.get(url, params=params) |
|
translated_text = response.json()[0][0][0] |
|
print(f"๋ฒ์ญ ์๋ฃ: {query} -> {translated_text} ({country})") |
|
return translated_text |
|
|
|
return query |
|
|
|
except Exception as e: |
|
print(f"๋ฒ์ญ ์ค๋ฅ: {str(e)}") |
|
return query |
|
|
|
def translate_to_korean(text): |
|
try: |
|
url = "https://translate.googleapis.com/translate_a/single" |
|
params = { |
|
"client": "gtx", |
|
"sl": "auto", |
|
"tl": "ko", |
|
"dt": "t", |
|
"q": text |
|
} |
|
|
|
response = requests.get(url, params=params) |
|
translated_text = response.json()[0][0][0] |
|
return translated_text |
|
except Exception as e: |
|
print(f"ํ๊ธ ๋ฒ์ญ ์ค๋ฅ: {str(e)}") |
|
return text |
|
|
|
def is_english(text): |
|
return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', '')) |
|
|
|
def is_korean(text): |
|
return any('\uAC00' <= char <= '\uD7A3' for char in text) |
|
|
|
def search_serphouse(query, country, page=1, num_result=10): |
|
url = "https://api.serphouse.com/serp/live" |
|
|
|
now = datetime.utcnow() |
|
yesterday = now - timedelta(days=1) |
|
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}" |
|
|
|
translated_query = translate_query(query, country) |
|
print(f"Original query: {query}") |
|
print(f"Translated query: {translated_query}") |
|
|
|
payload = { |
|
"data": { |
|
"q": translated_query, |
|
"domain": "google.com", |
|
"loc": COUNTRY_LOCATIONS.get(country, "United States"), |
|
"lang": COUNTRY_LANGUAGES.get(country, "en"), |
|
"device": "desktop", |
|
"serp_type": "news", |
|
"page": "1", |
|
"num": "10", |
|
"date_range": date_range, |
|
"sort_by": "date" |
|
} |
|
} |
|
|
|
headers = { |
|
"accept": "application/json", |
|
"content-type": "application/json", |
|
"authorization": f"Bearer {API_KEY}" |
|
} |
|
|
|
try: |
|
response = requests.post(url, json=payload, headers=headers) |
|
print("Request payload:", json.dumps(payload, indent=2, ensure_ascii=False)) |
|
print("Response status:", response.status_code) |
|
|
|
response.raise_for_status() |
|
return {"results": response.json(), "translated_query": translated_query} |
|
except requests.RequestException as e: |
|
return {"error": f"Error: {str(e)}", "translated_query": query} |
|
|
|
def format_results_from_raw(response_data): |
|
if "error" in response_data: |
|
return "Error: " + response_data["error"], [] |
|
|
|
try: |
|
results = response_data["results"] |
|
translated_query = response_data["translated_query"] |
|
|
|
news_results = results.get('results', {}).get('results', {}).get('news', []) |
|
if not news_results: |
|
return "๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.", [] |
|
|
|
articles = [] |
|
for idx, result in enumerate(news_results, 1): |
|
articles.append({ |
|
"index": idx, |
|
"title": result.get("title", "์ ๋ชฉ ์์"), |
|
"link": result.get("url", result.get("link", "#")), |
|
"snippet": result.get("snippet", "๋ด์ฉ ์์"), |
|
"channel": result.get("channel", result.get("source", "์ ์ ์์")), |
|
"time": result.get("time", result.get("date", "์ ์ ์๋ ์๊ฐ")), |
|
"image_url": result.get("img", result.get("thumbnail", "")), |
|
"translated_query": translated_query |
|
}) |
|
return "", articles |
|
except Exception as e: |
|
return f"๊ฒฐ๊ณผ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}", [] |
|
|
|
def serphouse_search(query, country): |
|
response_data = search_serphouse(query, country) |
|
return format_results_from_raw(response_data) |
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_hn_item(item_id): |
|
"""๊ฐ๋ณ ์์ดํ
์ ๋ณด ๊ฐ์ ธ์ค๊ธฐ""" |
|
try: |
|
response = requests.get(f"https://hacker-news.firebaseio.com/v0/item/{item_id}.json") |
|
return response.json() |
|
except: |
|
return None |
|
|
|
def get_recent_stories(): |
|
"""์ต์ ์คํ ๋ฆฌ ๊ฐ์ ธ์ค๊ธฐ""" |
|
try: |
|
response = requests.get("https://hacker-news.firebaseio.com/v0/newstories.json") |
|
story_ids = response.json() |
|
|
|
recent_stories = [] |
|
current_time = datetime.now().timestamp() |
|
day_ago = current_time - (24 * 60 * 60) |
|
|
|
for story_id in story_ids: |
|
story = get_hn_item(story_id) |
|
if story and 'time' in story and story['time'] > day_ago: |
|
recent_stories.append(story) |
|
|
|
if len(recent_stories) >= 100: |
|
break |
|
|
|
return recent_stories |
|
except Exception as e: |
|
print(f"Error fetching HN stories: {str(e)}") |
|
return [] |
|
|
|
def format_hn_time(timestamp): |
|
"""Unix timestamp๋ฅผ ์ฝ๊ธฐ ์ฌ์ด ํ์์ผ๋ก ๋ณํ""" |
|
try: |
|
dt = datetime.fromtimestamp(timestamp) |
|
return dt.strftime("%Y-%m-%d %H:%M:%S") |
|
except: |
|
return "Unknown time" |
|
|
|
|
|
def clean_text(text): |
|
"""HTML ํ๊ทธ ์ ๊ฑฐ ๋ฐ ํ
์คํธ ์ ๋ฆฌ""" |
|
text = re.sub(r'\s+', ' ', text) |
|
text = re.sub(r'<[^>]+>', '', text) |
|
return text.strip() |
|
|
|
def get_article_content(url): |
|
"""URL์์ ๊ธฐ์ฌ ๋ด์ฉ ์คํฌ๋ํ""" |
|
if not url or 'github.com' in url or 'twitter.com' in url: |
|
return None |
|
|
|
try: |
|
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'} |
|
response = requests.get(url, headers=headers, timeout=10) |
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
|
|
for tag in soup(['script', 'style', 'nav', 'footer', 'header']): |
|
tag.decompose() |
|
|
|
paragraphs = soup.find_all('p') |
|
text = ' '.join(p.get_text() for p in paragraphs) |
|
text = clean_text(text) |
|
|
|
return text[:4000] |
|
except Exception as e: |
|
print(f"Scraping error for {url}: {str(e)}") |
|
return None |
|
|
|
def generate_summary(text): |
|
"""CohereForAI ๋ชจ๋ธ์ ์ฌ์ฉํ ์์ฝ ์์ฑ""" |
|
if not text: |
|
return None |
|
|
|
prompt = """๋ฐ๋์ ํ๊ธ(ํ๊ตญ์ด)๋ก ์์ฑํ๋ผ. 250 ํ ํฐ ์ด๋ด๋ก ์์ฝ์ ํ์ฌ์ผ ํ๋ค. Please analyze and summarize the following text in 2-3 sentences. |
|
Focus on the main points and key information: |
|
Text: {text} |
|
|
|
Summary:""" |
|
|
|
try: |
|
response = hf_client.text_generation( |
|
prompt.format(text=text), |
|
max_new_tokens=300, |
|
temperature=0.5, |
|
repetition_penalty=1.2 |
|
) |
|
return response |
|
except Exception as e: |
|
print(f"Summary generation error: {str(e)}") |
|
return None |
|
|
|
def process_hn_story(story, progress=None): |
|
"""๊ฐ๋ณ ์คํ ๋ฆฌ ์ฒ๋ฆฌ ๋ฐ ์์ฝ""" |
|
try: |
|
url = story.get('url') |
|
if not url: |
|
return story, None |
|
|
|
content = get_article_content(url) |
|
if not content: |
|
return story, None |
|
|
|
summary_en = generate_summary(content) |
|
if not summary_en: |
|
return story, None |
|
|
|
summary_ko = translate_to_korean(summary_en) |
|
return story, summary_ko |
|
|
|
except Exception as e: |
|
print(f"Story processing error: {str(e)}") |
|
return story, None |
|
|
|
def refresh_hn_stories(): |
|
"""Hacker News ์คํ ๋ฆฌ ์๋ก๊ณ ์นจ (์ค์๊ฐ ์ถ๋ ฅ ๋ฒ์ )""" |
|
status_msg = "Hacker News ํฌ์คํธ๋ฅผ ๊ฐ์ ธ์ค๋ ์ค..." |
|
outputs = [gr.update(value=status_msg, visible=True)] |
|
|
|
|
|
for comp in hn_article_components: |
|
outputs.extend([ |
|
gr.update(visible=False), |
|
gr.update(), |
|
gr.update() |
|
]) |
|
|
|
yield outputs |
|
|
|
|
|
stories = get_recent_stories() |
|
processed_count = 0 |
|
|
|
|
|
processed_stories = [] |
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: |
|
future_to_story = {executor.submit(process_hn_story, story): story |
|
for story in stories[:100]} |
|
|
|
for future in concurrent.futures.as_completed(future_to_story): |
|
story, summary = future.result() |
|
processed_count += 1 |
|
|
|
if summary: |
|
|
|
processed_stories.insert(0, (story, summary)) |
|
|
|
|
|
outputs = [gr.update(value=f"์ฒ๋ฆฌ ์ค... ({processed_count}/{len(stories)})", visible=True)] |
|
|
|
|
|
for idx, comp in enumerate(hn_article_components): |
|
if idx < len(processed_stories): |
|
current_story, current_summary = processed_stories[idx] |
|
outputs.extend([ |
|
gr.update(visible=True), |
|
gr.update(value=f"### [{current_story.get('title', 'Untitled')}]({current_story.get('url', '#')})"), |
|
gr.update(value=f""" |
|
**์์ฑ์:** {current_story.get('by', 'unknown')} | |
|
**์๊ฐ:** {format_hn_time(current_story.get('time', 0))} | |
|
**์ ์:** {current_story.get('score', 0)} | |
|
**๋๊ธ:** {len(current_story.get('kids', []))}๊ฐ\n |
|
**AI ์์ฝ:** {current_summary} |
|
""") |
|
]) |
|
else: |
|
outputs.extend([ |
|
gr.update(visible=False), |
|
gr.update(), |
|
gr.update() |
|
]) |
|
|
|
yield outputs |
|
|
|
|
|
final_outputs = [gr.update(value=f"์ด {len(processed_stories)}๊ฐ์ ํฌ์คํธ๊ฐ ์ฒ๋ฆฌ๋์์ต๋๋ค.", visible=True)] |
|
|
|
for idx, comp in enumerate(hn_article_components): |
|
if idx < len(processed_stories): |
|
story, summary = processed_stories[idx] |
|
outputs.extend([ |
|
gr.update(visible=True), |
|
gr.update(value=f"### [{story.get('title', 'Untitled')}]({story.get('url', '#')})"), |
|
gr.update(value=f""" |
|
**์์ฑ์:** {story.get('by', 'unknown')} | |
|
**์๊ฐ:** {format_hn_time(story.get('time', 0))} | |
|
**์ ์:** {story.get('score', 0)} | |
|
**๋๊ธ:** {len(story.get('kids', []))}๊ฐ |
|
"""), |
|
gr.update(value=f"**AI ์์ฝ:**\n{summary}"), |
|
gr.update(visible=True), |
|
gr.update(visible=False), |
|
gr.update(value=""), |
|
gr.update(value="ํผ์ณ ๋ณด๊ธฐ") |
|
]) |
|
|
|
|
|
comp['report_button'].click( |
|
generate_report, |
|
inputs=[comp['title'], comp['summary']], |
|
outputs=[comp['report_content']], |
|
_js="() => {document.querySelector('.reporting-section').style.display='block';}" |
|
) |
|
|
|
|
|
comp['show_report'].click( |
|
toggle_report, |
|
inputs=[comp['report_section'], comp['report_content'], comp['show_report']], |
|
outputs=[comp['report_section'], comp['show_report']] |
|
) |
|
|
|
yield final_outputs |
|
|
|
|
|
|
|
|
|
def generate_report(title, summary): |
|
"""๋ฆฌํฌํ
์์ฑ""" |
|
prompt = f"""๋๋ Hacker News ํฌ์คํธ๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ๋ณด๋ ๊ธฐ์ฌ ํํ์ ๋ฆฌํฌํ
์ ์์ฑํ๋ ์ญํ ์ด๋ค. |
|
๋๋ ๋ฐ๋์ ํ๊ธ๋ก ๋ฆฌํฌํ
ํ์์ ๊ฐ๊ด์ ๊ธฐ์ฌ ํํ๋ก ์์ฑํ์ฌ์ผ ํ๋ค. |
|
์์ฑ์ 6ํ์์น์ ์
๊ฐํ๊ณ ๊ธธ์ด๋ 4000ํ ํฐ์ ๋์ง ์์๊ฒ. |
|
๋์ ์ถ์ฒ๋ ๋ชจ๋ธ, ์ง์๋ฌธ ๋ฑ์ ๋
ธ์ถํ์ง ๋ง๊ฒ |
|
|
|
์ ๋ชฉ: {title} |
|
๋ด์ฉ ์์ฝ: {summary} |
|
""" |
|
|
|
try: |
|
response = hf_client.text_generation( |
|
prompt, |
|
max_new_tokens=4000, |
|
temperature=0.7, |
|
repetition_penalty=1.2 |
|
) |
|
return response |
|
except Exception as e: |
|
print(f"Report generation error: {str(e)}") |
|
return None |
|
|
|
def toggle_report(report_section, report_content, show_report): |
|
"""๋ฆฌํฌํธ ํ์/์จ๊น ํ ๊ธ""" |
|
is_visible = report_section.visible |
|
return { |
|
report_section: gr.update(visible=not is_visible), |
|
show_report: gr.update(value="์ ๊ธฐ" if not is_visible else "ํผ์ณ ๋ณด๊ธฐ") |
|
} |
|
|
|
|
|
css = """ |
|
footer {visibility: hidden;} |
|
#status_area { |
|
background: rgba(255, 255, 255, 0.9); /* ์ฝ๊ฐ ํฌ๋ช
ํ ํฐ์ ๋ฐฐ๊ฒฝ */ |
|
padding: 15px; |
|
border-bottom: 1px solid #ddd; |
|
margin-bottom: 20px; |
|
box-shadow: 0 2px 5px rgba(0,0,0,0.1); /* ๋ถ๋๋ฌ์ด ๊ทธ๋ฆผ์ ํจ๊ณผ */ |
|
} |
|
#results_area { |
|
padding: 10px; |
|
margin-top: 10px; |
|
} |
|
/* ํญ ์คํ์ผ ๊ฐ์ */ |
|
.tabs { |
|
border-bottom: 2px solid #ddd !important; |
|
margin-bottom: 20px !important; |
|
} |
|
.tab-nav { |
|
border-bottom: none !important; |
|
margin-bottom: 0 !important; |
|
} |
|
.tab-nav button { |
|
font-weight: bold !important; |
|
padding: 10px 20px !important; |
|
} |
|
.tab-nav button.selected { |
|
border-bottom: 2px solid #1f77b4 !important; /* ์ ํ๋ ํญ ๊ฐ์กฐ */ |
|
color: #1f77b4 !important; |
|
} |
|
/* ๊ฒ์ ์ํ ๋ฉ์์ง ์คํ์ผ */ |
|
#status_area .markdown-text { |
|
font-size: 1.1em; |
|
color: #2c3e50; |
|
padding: 10px 0; |
|
} |
|
/* ๊ฒ์ ๊ฒฐ๊ณผ ์ปจํ
์ด๋ ์คํ์ผ */ |
|
.group { |
|
border: 1px solid #eee; |
|
padding: 15px; |
|
margin-bottom: 15px; |
|
border-radius: 5px; |
|
background: white; |
|
} |
|
/* ๊ฒ์ ๋ฒํผ ์คํ์ผ */ |
|
.primary-btn { |
|
background: #1f77b4 !important; |
|
border: none !important; |
|
} |
|
/* ๊ฒ์์ด ์
๋ ฅ์ฐฝ ์คํ์ผ */ |
|
.textbox { |
|
border: 1px solid #ddd !important; |
|
border-radius: 4px !important; |
|
} |
|
|
|
.hn-article-group { |
|
height: 250px; /* ๊ณ ์ ๋์ด ์ค์ */ |
|
overflow: hidden; /* ๋ด์ฉ์ด ๋์น๋ฉด ์จ๊น */ |
|
margin-bottom: 20px; |
|
padding: 15px; |
|
border: 1px solid #eee; |
|
border-radius: 5px; |
|
} |
|
.hn-summary { |
|
height: 100px; /* ์์ฝ ํ
์คํธ ์์ญ ๊ณ ์ ๋์ด */ |
|
overflow: hidden; |
|
text-overflow: ellipsis; |
|
display: -webkit-box; |
|
-webkit-line-clamp: 4; /* ์ต๋ 4์ค๊น์ง ํ์ */ |
|
-webkit-box-orient: vertical; |
|
} |
|
.reporting-section { |
|
margin-top: 10px; |
|
border-top: 1px solid #eee; |
|
padding-top: 10px; |
|
} |
|
""" |
|
|
|
with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์๋น์ค") as iface: |
|
with gr.Tabs(): |
|
|
|
with gr.Tab("๊ตญ๊ฐ๋ณ"): |
|
gr.Markdown("๊ฒ์์ด๋ฅผ ์
๋ ฅํ๊ณ ์ํ๋ ๊ตญ๊ฐ(ํ๊ตญ ์ ์ธ)๋ฅผ๋ฅผ ์ ํํ๋ฉด, ๊ฒ์์ด์ ์ผ์นํ๋ 24์๊ฐ ์ด๋ด ๋ด์ค๋ฅผ ์ต๋ 100๊ฐ ์ถ๋ ฅํฉ๋๋ค.") |
|
gr.Markdown("๊ตญ๊ฐ ์ ํํ ๊ฒ์์ด์ 'ํ๊ธ'์ ์
๋ ฅํ๋ฉด ํ์ง ์ธ์ด๋ก ๋ฒ์ญ๋์ด ๊ฒ์ํฉ๋๋ค. ์: 'Taiwan' ๊ตญ๊ฐ ์ ํํ '์ผ์ฑ' ์
๋ ฅ์ 'ไธๆ'์ผ๋ก ์๋ ๊ฒ์") |
|
|
|
with gr.Column(): |
|
with gr.Row(): |
|
query = gr.Textbox(label="๊ฒ์์ด") |
|
country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ", value="United States") |
|
|
|
status_message = gr.Markdown("", visible=True) |
|
translated_query_display = gr.Markdown(visible=False) |
|
search_button = gr.Button("๊ฒ์", variant="primary") |
|
|
|
progress = gr.Progress() |
|
articles_state = gr.State([]) |
|
|
|
article_components = [] |
|
for i in range(100): |
|
with gr.Group(visible=False) as article_group: |
|
title = gr.Markdown() |
|
image = gr.Image(width=200, height=150) |
|
snippet = gr.Markdown() |
|
info = gr.Markdown() |
|
|
|
article_components.append({ |
|
'group': article_group, |
|
'title': title, |
|
'image': image, |
|
'snippet': snippet, |
|
'info': info, |
|
'index': i, |
|
}) |
|
|
|
|
|
with gr.Tab("์ ์ธ๊ณ"): |
|
gr.Markdown("๊ฒ์์ด๋ฅผ ์
๋ ฅํ๋ฉด 67๊ฐ๊ตญ(ํ๊ตญ ์ ์ธ) ์ ์ฒด์ ๋ํด ๊ตญ๊ฐ๋ณ๋ก ๊ตฌ๋ถํ์ฌ 24์๊ฐ ์ด๋ด ๋ด์ค๊ฐ ์ต๋ 1000๊ฐ ์์ฐจ ์ถ๋ ฅ๋ฉ๋๋ค.") |
|
gr.Markdown("๊ตญ๊ฐ ์ ํํ ๊ฒ์์ด์ 'ํ๊ธ'์ ์
๋ ฅํ๋ฉด ํ์ง ์ธ์ด๋ก ๋ฒ์ญ๋์ด ๊ฒ์ํฉ๋๋ค. ์: 'Taiwan' ๊ตญ๊ฐ ์ ํํ '์ผ์ฑ' ์
๋ ฅ์ 'ไธๆ'์ผ๋ก ์๋ ๊ฒ์") |
|
|
|
with gr.Column(): |
|
with gr.Column(elem_id="status_area"): |
|
with gr.Row(): |
|
query_global = gr.Textbox(label="๊ฒ์์ด") |
|
search_button_global = gr.Button("์ ์ธ๊ณ ๊ฒ์", variant="primary") |
|
|
|
status_message_global = gr.Markdown("") |
|
translated_query_display_global = gr.Markdown("") |
|
|
|
with gr.Column(elem_id="results_area"): |
|
articles_state_global = gr.State([]) |
|
|
|
global_article_components = [] |
|
for i in range(1000): |
|
with gr.Group(visible=False) as article_group: |
|
title = gr.Markdown() |
|
image = gr.Image(width=200, height=150) |
|
snippet = gr.Markdown() |
|
info = gr.Markdown() |
|
|
|
global_article_components.append({ |
|
'group': article_group, |
|
'title': title, |
|
'image': image, |
|
'snippet': snippet, |
|
'info': info, |
|
'index': i, |
|
}) |
|
|
|
with gr.Tab("AI ๋ฆฌํฌํฐ"): |
|
gr.Markdown("์ง๋ 24์๊ฐ ๋์์ Hacker News ํฌ์คํธ๋ฅผ AI๊ฐ ์์ฝํ์ฌ ๋ณด์ฌ์ค๋๋ค.") |
|
|
|
with gr.Column(): |
|
refresh_button = gr.Button("์๋ก๊ณ ์นจ", variant="primary") |
|
status_message_hn = gr.Markdown("") |
|
|
|
with gr.Column(elem_id="hn_results_area"): |
|
hn_articles_state = gr.State([]) |
|
|
|
hn_article_components = [] |
|
for i in range(100): |
|
with gr.Group(visible=False, elem_classes="hn-article-group") as article_group: |
|
title = gr.Markdown() |
|
info = gr.Markdown() |
|
with gr.Column(elem_classes="hn-summary"): |
|
summary = gr.Markdown() |
|
report_button = gr.Button("๋ฆฌํฌํ
์์ฑ", size="sm") |
|
with gr.Column(visible=False, elem_classes="reporting-section") as report_section: |
|
report_content = gr.Markdown() |
|
show_report = gr.Button("ํผ์ณ ๋ณด๊ธฐ") |
|
|
|
hn_article_components.append({ |
|
'group': article_group, |
|
'title': title, |
|
'info': info, |
|
'summary': summary, |
|
'report_button': report_button, |
|
'report_section': report_section, |
|
'report_content': report_content, |
|
'show_report': show_report, |
|
'index': i, |
|
}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
def search_and_display(query, country, articles_state, progress=gr.Progress()): |
|
status_msg = "๊ฒ์์ ์งํ์ค์
๋๋ค. ์ ์๋ง ๊ธฐ๋ค๋ฆฌ์ธ์..." |
|
|
|
progress(0, desc="๊ฒ์์ด ๋ฒ์ญ ์ค...") |
|
translated_query = translate_query(query, country) |
|
translated_display = f"**์๋ณธ ๊ฒ์์ด:** {query}\n**๋ฒ์ญ๋ ๊ฒ์์ด:** {translated_query}" if translated_query != query else f"**๊ฒ์์ด:** {query}" |
|
|
|
progress(0.2, desc="๊ฒ์ ์์...") |
|
error_message, articles = serphouse_search(query, country) |
|
progress(0.5, desc="๊ฒฐ๊ณผ ์ฒ๋ฆฌ ์ค...") |
|
|
|
outputs = [] |
|
outputs.append(gr.update(value=status_msg, visible=True)) |
|
outputs.append(gr.update(value=translated_display, visible=True)) |
|
|
|
if error_message: |
|
outputs.append(gr.update(value=error_message, visible=True)) |
|
for comp in article_components: |
|
outputs.extend([ |
|
gr.update(visible=False), gr.update(), gr.update(), |
|
gr.update(), gr.update() |
|
]) |
|
articles_state = [] |
|
else: |
|
outputs.append(gr.update(value="", visible=False)) |
|
total_articles = len(articles) |
|
for idx, comp in enumerate(article_components): |
|
progress((idx + 1) / total_articles, desc=f"๊ฒฐ๊ณผ ํ์ ์ค... {idx + 1}/{total_articles}") |
|
if idx < len(articles): |
|
article = articles[idx] |
|
image_url = article['image_url'] |
|
image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False) |
|
|
|
korean_summary = translate_to_korean(article['snippet']) |
|
|
|
outputs.extend([ |
|
gr.update(visible=True), |
|
gr.update(value=f"### [{article['title']}]({article['link']})"), |
|
image_update, |
|
gr.update(value=f"**์์ฝ:** {article['snippet']}\n\n**ํ๊ธ ์์ฝ:** {korean_summary}"), |
|
gr.update(value=f"**์ถ์ฒ:** {article['channel']} | **์๊ฐ:** {article['time']}") |
|
]) |
|
else: |
|
outputs.extend([ |
|
gr.update(visible=False), gr.update(), gr.update(), |
|
gr.update(), gr.update() |
|
]) |
|
articles_state = articles |
|
|
|
progress(1.0, desc="์๋ฃ!") |
|
outputs.append(articles_state) |
|
outputs[0] = gr.update(value="", visible=False) |
|
|
|
return outputs |
|
|
|
def search_global(query, articles_state_global): |
|
status_msg = "์ ์ธ๊ณ ๊ฒ์์ ์์ํฉ๋๋ค..." |
|
all_results = [] |
|
|
|
outputs = [ |
|
gr.update(value=status_msg, visible=True), |
|
gr.update(value=f"**๊ฒ์์ด:** {query}", visible=True), |
|
] |
|
|
|
for _ in global_article_components: |
|
outputs.extend([ |
|
gr.update(visible=False), gr.update(), gr.update(), |
|
gr.update(), gr.update() |
|
]) |
|
outputs.append([]) |
|
|
|
yield outputs |
|
|
|
total_countries = len(COUNTRY_LOCATIONS) |
|
for idx, (country, location) in enumerate(COUNTRY_LOCATIONS.items(), 1): |
|
try: |
|
status_msg = f"{country} ๊ฒ์ ์ค... ({idx}/{total_countries} ๊ตญ๊ฐ)" |
|
outputs[0] = gr.update(value=status_msg, visible=True) |
|
yield outputs |
|
|
|
error_message, articles = serphouse_search(query, country) |
|
if not error_message and articles: |
|
for article in articles: |
|
article['source_country'] = country |
|
|
|
all_results.extend(articles) |
|
sorted_results = sorted(all_results, key=lambda x: x.get('time', ''), reverse=True) |
|
|
|
seen_urls = set() |
|
unique_results = [] |
|
for article in sorted_results: |
|
url = article.get('link', '') |
|
if url not in seen_urls: |
|
seen_urls.add(url) |
|
unique_results.append(article) |
|
|
|
unique_results = unique_results[:1000] |
|
|
|
outputs = [ |
|
gr.update(value=f"{idx}/{total_countries} ๊ตญ๊ฐ ๊ฒ์ ์๋ฃ\nํ์ฌ๊น์ง ๋ฐ๊ฒฌ๋ ๋ด์ค: {len(unique_results)}๊ฑด", visible=True), |
|
gr.update(value=f"**๊ฒ์์ด:** {query}", visible=True), |
|
] |
|
|
|
for idx, comp in enumerate(global_article_components): |
|
if idx < len(unique_results): |
|
article = unique_results[idx] |
|
image_url = article.get('image_url', '') |
|
image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False) |
|
|
|
korean_summary = translate_to_korean(article['snippet']) |
|
|
|
outputs.extend([ |
|
gr.update(visible=True), |
|
gr.update(value=f"### [{article['title']}]({article['link']})"), |
|
image_update, |
|
gr.update(value=f"**์์ฝ:** {article['snippet']}\n\n**ํ๊ธ ์์ฝ:** {korean_summary}"), |
|
gr.update(value=f"**์ถ์ฒ:** {article['channel']} | **๊ตญ๊ฐ:** {article['source_country']} | **์๊ฐ:** {article['time']}") |
|
]) |
|
else: |
|
outputs.extend([ |
|
gr.update(visible=False), gr.update(), gr.update(), |
|
gr.update(), gr.update() |
|
]) |
|
|
|
outputs.append(unique_results) |
|
yield outputs |
|
|
|
except Exception as e: |
|
print(f"Error searching {country}: {str(e)}") |
|
continue |
|
|
|
final_status = f"๊ฒ์ ์๋ฃ! ์ด {len(unique_results)}๊ฐ์ ๋ด์ค๊ฐ ๋ฐ๊ฒฌ๋์์ต๋๋ค." |
|
outputs[0] = gr.update(value=final_status, visible=True) |
|
yield outputs |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
search_outputs = [ |
|
status_message, |
|
translated_query_display, |
|
gr.Markdown(visible=False) |
|
] |
|
|
|
for comp in article_components: |
|
search_outputs.extend([ |
|
comp['group'], comp['title'], comp['image'], |
|
comp['snippet'], comp['info'] |
|
]) |
|
search_outputs.append(articles_state) |
|
|
|
search_button.click( |
|
search_and_display, |
|
inputs=[query, country, articles_state], |
|
outputs=search_outputs, |
|
show_progress=True |
|
) |
|
|
|
|
|
global_search_outputs = [ |
|
status_message_global, |
|
translated_query_display_global, |
|
] |
|
|
|
for comp in global_article_components: |
|
global_search_outputs.extend([ |
|
comp['group'], comp['title'], comp['image'], |
|
comp['snippet'], comp['info'] |
|
]) |
|
global_search_outputs.append(articles_state_global) |
|
|
|
search_button_global.click( |
|
search_global, |
|
inputs=[query_global, articles_state_global], |
|
outputs=global_search_outputs |
|
) |
|
|
|
|
|
hn_outputs = [status_message_hn] |
|
for comp in hn_article_components: |
|
hn_outputs.extend([ |
|
comp['group'], |
|
comp['title'], |
|
comp['info'] |
|
]) |
|
|
|
refresh_button.click( |
|
refresh_hn_stories, |
|
outputs=hn_outputs |
|
) |
|
|
|
|
|
iface.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
share=False, |
|
auth=("it1","chosun1"), |
|
ssl_verify=False, |
|
show_error=True |
|
) |