|
import gradio as gr |
|
import requests |
|
import json |
|
import os |
|
from datetime import datetime, timedelta |
|
from concurrent.futures import ThreadPoolExecutor |
|
from functools import lru_cache |
|
from requests.adapters import HTTPAdapter |
|
from requests.packages.urllib3.util.retry import Retry |
|
from openai import OpenAI |
|
from bs4 import BeautifulSoup |
|
import re |
|
import json |
|
import os |
|
from datetime import datetime |
|
import sqlite3 |
|
import pathlib |
|
|
|
|
|
def init_db(): |
|
db_path = pathlib.Path("search_results.db") |
|
conn = sqlite3.connect(db_path) |
|
c = conn.cursor() |
|
c.execute('''CREATE TABLE IF NOT EXISTS searches |
|
(id INTEGER PRIMARY KEY AUTOINCREMENT, |
|
keyword TEXT, |
|
country TEXT, |
|
results TEXT, |
|
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP)''') |
|
conn.commit() |
|
conn.close() |
|
|
|
|
|
def save_to_db(keyword, country, results): |
|
conn = sqlite3.connect("search_results.db") |
|
c = conn.cursor() |
|
c.execute("INSERT INTO searches (keyword, country, results) VALUES (?, ?, ?)", |
|
(keyword, country, json.dumps(results))) |
|
conn.commit() |
|
conn.close() |
|
|
|
|
|
def load_from_db(keyword, country): |
|
conn = sqlite3.connect("search_results.db") |
|
c = conn.cursor() |
|
c.execute("SELECT results, timestamp FROM searches WHERE keyword=? AND country=? ORDER BY timestamp DESC LIMIT 1", |
|
(keyword, country)) |
|
result = c.fetchone() |
|
conn.close() |
|
if result: |
|
return json.loads(result[0]), result[1] |
|
return None, None |
|
|
|
|
|
def search_samsung_us(): |
|
error_message, articles = serphouse_search("samsung", "United States") |
|
if not error_message and articles: |
|
save_to_db("samsung", "United States", articles) |
|
return display_results(articles) |
|
return "검색 결과가 없습니다." |
|
|
|
|
|
def load_samsung_us(): |
|
results, timestamp = load_from_db("samsung", "United States") |
|
if results: |
|
return f"저장 시간: {timestamp}\n\n" + display_results(results) |
|
return "저장된 결과가 없습니다." |
|
|
|
|
|
def display_results(articles): |
|
output = "" |
|
for idx, article in enumerate(articles, 1): |
|
output += f"### {idx}. {article['title']}\n" |
|
output += f"출처: {article['channel']}\n" |
|
output += f"시간: {article['time']}\n" |
|
output += f"링크: {article['link']}\n" |
|
output += f"요약: {article['snippet']}\n\n" |
|
return output |
|
|
|
|
|
ACCESS_TOKEN = os.getenv("HF_TOKEN") |
|
if not ACCESS_TOKEN: |
|
raise ValueError("HF_TOKEN environment variable is not set") |
|
|
|
client = OpenAI( |
|
base_url="https://api-inference.huggingface.co/v1/", |
|
api_key=ACCESS_TOKEN, |
|
) |
|
|
|
MAX_COUNTRY_RESULTS = 100 |
|
MAX_GLOBAL_RESULTS = 1000 |
|
|
|
def create_article_components(max_results): |
|
article_components = [] |
|
for i in range(max_results): |
|
with gr.Group(visible=False) as article_group: |
|
title = gr.Markdown() |
|
image = gr.Image(width=200, height=150) |
|
snippet = gr.Markdown() |
|
info = gr.Markdown() |
|
|
|
article_components.append({ |
|
'group': article_group, |
|
'title': title, |
|
'image': image, |
|
'snippet': snippet, |
|
'info': info, |
|
'index': i, |
|
}) |
|
return article_components |
|
|
|
API_KEY = os.getenv("SERPHOUSE_API_KEY") |
|
|
|
|
|
COUNTRY_LANGUAGES = { |
|
"United States": "en", |
|
"KOREA": "ko", |
|
"United Kingdom": "en", |
|
"Taiwan": "zh-TW", |
|
"Canada": "en", |
|
"Australia": "en", |
|
"Germany": "de", |
|
"France": "fr", |
|
"Japan": "ja", |
|
"China": "zh", |
|
"India": "hi", |
|
"Brazil": "pt", |
|
"Mexico": "es", |
|
"Russia": "ru", |
|
"Italy": "it", |
|
"Spain": "es", |
|
"Netherlands": "nl", |
|
"Singapore": "en", |
|
"Hong Kong": "zh-HK", |
|
"Indonesia": "id", |
|
"Malaysia": "ms", |
|
"Philippines": "tl", |
|
"Thailand": "th", |
|
"Vietnam": "vi", |
|
"Belgium": "nl", |
|
"Denmark": "da", |
|
"Finland": "fi", |
|
"Ireland": "en", |
|
"Norway": "no", |
|
"Poland": "pl", |
|
"Sweden": "sv", |
|
"Switzerland": "de", |
|
"Austria": "de", |
|
"Czech Republic": "cs", |
|
"Greece": "el", |
|
"Hungary": "hu", |
|
"Portugal": "pt", |
|
"Romania": "ro", |
|
"Turkey": "tr", |
|
"Israel": "he", |
|
"Saudi Arabia": "ar", |
|
"United Arab Emirates": "ar", |
|
"South Africa": "en", |
|
"Argentina": "es", |
|
"Chile": "es", |
|
"Colombia": "es", |
|
"Peru": "es", |
|
"Venezuela": "es", |
|
"New Zealand": "en", |
|
"Bangladesh": "bn", |
|
"Pakistan": "ur", |
|
"Egypt": "ar", |
|
"Morocco": "ar", |
|
"Nigeria": "en", |
|
"Kenya": "sw", |
|
"Ukraine": "uk", |
|
"Croatia": "hr", |
|
"Slovakia": "sk", |
|
"Bulgaria": "bg", |
|
"Serbia": "sr", |
|
"Estonia": "et", |
|
"Latvia": "lv", |
|
"Lithuania": "lt", |
|
"Slovenia": "sl", |
|
"Luxembourg": "fr", |
|
"Malta": "mt", |
|
"Cyprus": "el", |
|
"Iceland": "is" |
|
} |
|
|
|
COUNTRY_LOCATIONS = { |
|
"United States": "United States", |
|
"KOREA": "kr", |
|
"United Kingdom": "United Kingdom", |
|
"Taiwan": "Taiwan", |
|
"Canada": "Canada", |
|
"Australia": "Australia", |
|
"Germany": "Germany", |
|
"France": "France", |
|
"Japan": "Japan", |
|
"China": "China", |
|
"India": "India", |
|
"Brazil": "Brazil", |
|
"Mexico": "Mexico", |
|
"Russia": "Russia", |
|
"Italy": "Italy", |
|
"Spain": "Spain", |
|
"Netherlands": "Netherlands", |
|
"Singapore": "Singapore", |
|
"Hong Kong": "Hong Kong", |
|
"Indonesia": "Indonesia", |
|
"Malaysia": "Malaysia", |
|
"Philippines": "Philippines", |
|
"Thailand": "Thailand", |
|
"Vietnam": "Vietnam", |
|
"Belgium": "Belgium", |
|
"Denmark": "Denmark", |
|
"Finland": "Finland", |
|
"Ireland": "Ireland", |
|
"Norway": "Norway", |
|
"Poland": "Poland", |
|
"Sweden": "Sweden", |
|
"Switzerland": "Switzerland", |
|
"Austria": "Austria", |
|
"Czech Republic": "Czech Republic", |
|
"Greece": "Greece", |
|
"Hungary": "Hungary", |
|
"Portugal": "Portugal", |
|
"Romania": "Romania", |
|
"Turkey": "Turkey", |
|
"Israel": "Israel", |
|
"Saudi Arabia": "Saudi Arabia", |
|
"United Arab Emirates": "United Arab Emirates", |
|
"South Africa": "South Africa", |
|
"Argentina": "Argentina", |
|
"Chile": "Chile", |
|
"Colombia": "Colombia", |
|
"Peru": "Peru", |
|
"Venezuela": "Venezuela", |
|
"New Zealand": "New Zealand", |
|
"Bangladesh": "Bangladesh", |
|
"Pakistan": "Pakistan", |
|
"Egypt": "Egypt", |
|
"Morocco": "Morocco", |
|
"Nigeria": "Nigeria", |
|
"Kenya": "Kenya", |
|
"Ukraine": "Ukraine", |
|
"Croatia": "Croatia", |
|
"Slovakia": "Slovakia", |
|
"Bulgaria": "Bulgaria", |
|
"Serbia": "Serbia", |
|
"Estonia": "Estonia", |
|
"Latvia": "Latvia", |
|
"Lithuania": "Lithuania", |
|
"Slovenia": "Slovenia", |
|
"Luxembourg": "Luxembourg", |
|
"Malta": "Malta", |
|
"Cyprus": "Cyprus", |
|
"Iceland": "Iceland" |
|
} |
|
|
|
|
|
|
|
COUNTRY_LANGUAGES_EAST_ASIA = { |
|
"KOREA": "ko", |
|
"Taiwan": "zh-TW", |
|
"Japan": "ja", |
|
"China": "zh", |
|
"Hong Kong": "zh-HK" |
|
} |
|
|
|
COUNTRY_LOCATIONS_EAST_ASIA = { |
|
"KOREA": "KOREA", |
|
"Taiwan": "Taiwan", |
|
"Japan": "Japan", |
|
"China": "China", |
|
"Hong Kong": "Hong Kong" |
|
} |
|
|
|
|
|
COUNTRY_LANGUAGES_SOUTHEAST_ASIA_OCEANIA = { |
|
"Indonesia": "id", |
|
"Malaysia": "ms", |
|
"Philippines": "tl", |
|
"Thailand": "th", |
|
"Vietnam": "vi", |
|
"Singapore": "en", |
|
"Papua New Guinea": "en", |
|
"Australia": "en", |
|
"New Zealand": "en" |
|
} |
|
|
|
COUNTRY_LOCATIONS_SOUTHEAST_ASIA_OCEANIA = { |
|
"Indonesia": "Indonesia", |
|
"Malaysia": "Malaysia", |
|
"Philippines": "Philippines", |
|
"Thailand": "Thailand", |
|
"Vietnam": "Vietnam", |
|
"Singapore": "Singapore", |
|
"Papua New Guinea": "Papua New Guinea", |
|
"Australia": "Australia", |
|
"New Zealand": "New Zealand" |
|
} |
|
|
|
|
|
COUNTRY_LANGUAGES_EAST_EUROPE = { |
|
"Poland": "pl", |
|
"Czech Republic": "cs", |
|
"Greece": "el", |
|
"Hungary": "hu", |
|
"Romania": "ro", |
|
"Ukraine": "uk", |
|
"Croatia": "hr", |
|
"Slovakia": "sk", |
|
"Bulgaria": "bg", |
|
"Serbia": "sr", |
|
"Estonia": "et", |
|
"Latvia": "lv", |
|
"Lithuania": "lt", |
|
"Slovenia": "sl", |
|
"Malta": "mt", |
|
"Cyprus": "el", |
|
"Iceland": "is", |
|
"Russia": "ru" |
|
} |
|
|
|
COUNTRY_LOCATIONS_EAST_EUROPE = { |
|
"Poland": "Poland", |
|
"Czech Republic": "Czech Republic", |
|
"Greece": "Greece", |
|
"Hungary": "Hungary", |
|
"Romania": "Romania", |
|
"Ukraine": "Ukraine", |
|
"Croatia": "Croatia", |
|
"Slovakia": "Slovakia", |
|
"Bulgaria": "Bulgaria", |
|
"Serbia": "Serbia", |
|
"Estonia": "Estonia", |
|
"Latvia": "Latvia", |
|
"Lithuania": "Lithuania", |
|
"Slovenia": "Slovenia", |
|
"Malta": "Malta", |
|
"Cyprus": "Cyprus", |
|
"Iceland": "Iceland", |
|
"Russia": "Russia" |
|
} |
|
|
|
|
|
COUNTRY_LANGUAGES_WEST_EUROPE = { |
|
"Germany": "de", |
|
"France": "fr", |
|
"Italy": "it", |
|
"Spain": "es", |
|
"Netherlands": "nl", |
|
"Belgium": "nl", |
|
"Ireland": "en", |
|
"Sweden": "sv", |
|
"Switzerland": "de", |
|
"Austria": "de", |
|
"Portugal": "pt", |
|
"Luxembourg": "fr", |
|
"United Kingdom": "en" |
|
} |
|
|
|
COUNTRY_LOCATIONS_WEST_EUROPE = { |
|
"Germany": "Germany", |
|
"France": "France", |
|
"Italy": "Italy", |
|
"Spain": "Spain", |
|
"Netherlands": "Netherlands", |
|
"Belgium": "Belgium", |
|
"Ireland": "Ireland", |
|
"Sweden": "Sweden", |
|
"Switzerland": "Switzerland", |
|
"Austria": "Austria", |
|
"Portugal": "Portugal", |
|
"Luxembourg": "Luxembourg", |
|
"United Kingdom": "United Kingdom" |
|
} |
|
|
|
|
|
COUNTRY_LANGUAGES_ARAB_AFRICA = { |
|
"South Africa": "en", |
|
"Nigeria": "en", |
|
"Kenya": "sw", |
|
"Egypt": "ar", |
|
"Morocco": "ar", |
|
"Saudi Arabia": "ar", |
|
"United Arab Emirates": "ar", |
|
"Israel": "he" |
|
} |
|
|
|
COUNTRY_LOCATIONS_ARAB_AFRICA = { |
|
"South Africa": "South Africa", |
|
"Nigeria": "Nigeria", |
|
"Kenya": "Kenya", |
|
"Egypt": "Egypt", |
|
"Morocco": "Morocco", |
|
"Saudi Arabia": "Saudi Arabia", |
|
"United Arab Emirates": "United Arab Emirates", |
|
"Israel": "Israel" |
|
} |
|
|
|
|
|
COUNTRY_LANGUAGES_AMERICA = { |
|
"United States": "en", |
|
"Canada": "en", |
|
"Mexico": "es", |
|
"Brazil": "pt", |
|
"Argentina": "es", |
|
"Chile": "es", |
|
"Colombia": "es", |
|
"Peru": "es", |
|
"Venezuela": "es" |
|
} |
|
|
|
COUNTRY_LOCATIONS_AMERICA = { |
|
"United States": "United States", |
|
"Canada": "Canada", |
|
"Mexico": "Mexico", |
|
"Brazil": "Brazil", |
|
"Argentina": "Argentina", |
|
"Chile": "Chile", |
|
"Colombia": "Colombia", |
|
"Peru": "Peru", |
|
"Venezuela": "Venezuela" |
|
} |
|
|
|
|
|
REGIONS = [ |
|
"동아시아", |
|
"동남아시아/오세아니아", |
|
"동유럽", |
|
"서유럽", |
|
"중동/아프리카", |
|
"아메리카" |
|
] |
|
|
|
|
|
@lru_cache(maxsize=100) |
|
def translate_query(query, country): |
|
try: |
|
if is_english(query): |
|
return query |
|
|
|
if country in COUNTRY_LANGUAGES: |
|
if country == "South Korea": |
|
return query |
|
|
|
target_lang = COUNTRY_LANGUAGES[country] |
|
|
|
url = "https://translate.googleapis.com/translate_a/single" |
|
params = { |
|
"client": "gtx", |
|
"sl": "auto", |
|
"tl": target_lang, |
|
"dt": "t", |
|
"q": query |
|
} |
|
|
|
session = requests.Session() |
|
retries = Retry(total=3, backoff_factor=0.5) |
|
session.mount('https://', HTTPAdapter(max_retries=retries)) |
|
|
|
response = session.get(url, params=params, timeout=(5, 10)) |
|
translated_text = response.json()[0][0][0] |
|
return translated_text |
|
|
|
return query |
|
|
|
except Exception as e: |
|
print(f"번역 오류: {str(e)}") |
|
return query |
|
|
|
|
|
@lru_cache(maxsize=200) |
|
def translate_to_korean(text): |
|
try: |
|
url = "https://translate.googleapis.com/translate_a/single" |
|
params = { |
|
"client": "gtx", |
|
"sl": "auto", |
|
"tl": "ko", |
|
"dt": "t", |
|
"q": text |
|
} |
|
|
|
session = requests.Session() |
|
retries = Retry(total=3, backoff_factor=0.5) |
|
session.mount('https://', HTTPAdapter(max_retries=retries)) |
|
|
|
response = session.get(url, params=params, timeout=(5, 10)) |
|
translated_text = response.json()[0][0][0] |
|
return translated_text |
|
except Exception as e: |
|
print(f"한글 번역 오류: {str(e)}") |
|
return text |
|
|
|
def is_english(text): |
|
return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', '')) |
|
|
|
def is_korean(text): |
|
return any('\uAC00' <= char <= '\uD7A3' for char in text) |
|
|
|
def search_serphouse(query, country, page=1, num_result=10): |
|
url = "https://api.serphouse.com/serp/live" |
|
|
|
now = datetime.utcnow() |
|
yesterday = now - timedelta(days=1) |
|
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}" |
|
|
|
translated_query = translate_query(query, country) |
|
|
|
payload = { |
|
"data": { |
|
"q": translated_query, |
|
"domain": "google.com", |
|
"loc": COUNTRY_LOCATIONS.get(country, "United States"), |
|
"lang": COUNTRY_LANGUAGES.get(country, "en"), |
|
"device": "desktop", |
|
"serp_type": "news", |
|
"page": "1", |
|
"num": "100", |
|
"date_range": date_range, |
|
"sort_by": "date" |
|
} |
|
} |
|
|
|
headers = { |
|
"accept": "application/json", |
|
"content-type": "application/json", |
|
"authorization": f"Bearer {API_KEY}" |
|
} |
|
|
|
try: |
|
|
|
session = requests.Session() |
|
|
|
|
|
retries = Retry( |
|
total=5, |
|
backoff_factor=1, |
|
status_forcelist=[500, 502, 503, 504, 429], |
|
allowed_methods=["POST"] |
|
) |
|
|
|
|
|
adapter = HTTPAdapter(max_retries=retries) |
|
session.mount('http://', adapter) |
|
session.mount('https://', adapter) |
|
|
|
|
|
response = session.post( |
|
url, |
|
json=payload, |
|
headers=headers, |
|
timeout=(30, 30) |
|
) |
|
|
|
response.raise_for_status() |
|
return {"results": response.json(), "translated_query": translated_query} |
|
|
|
except requests.exceptions.Timeout: |
|
return { |
|
"error": "검색 시간이 초과되었습니다. 잠시 후 다시 시도해주세요.", |
|
"translated_query": query |
|
} |
|
except requests.exceptions.RequestException as e: |
|
return { |
|
"error": f"검색 중 오류가 발생했습니다: {str(e)}", |
|
"translated_query": query |
|
} |
|
except Exception as e: |
|
return { |
|
"error": f"예기치 않은 오류가 발생했습니다: {str(e)}", |
|
"translated_query": query |
|
} |
|
|
|
def format_results_from_raw(response_data): |
|
if "error" in response_data: |
|
return "Error: " + response_data["error"], [] |
|
|
|
try: |
|
results = response_data["results"] |
|
translated_query = response_data["translated_query"] |
|
|
|
news_results = results.get('results', {}).get('results', {}).get('news', []) |
|
if not news_results: |
|
return "검색 결과가 없습니다.", [] |
|
|
|
|
|
korean_domains = ['.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun', |
|
'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'] |
|
korean_keywords = ['korea', 'korean', 'seoul', 'busan', 'incheon', 'daegu', |
|
'gwangju', 'daejeon', 'ulsan', 'sejong'] |
|
|
|
filtered_articles = [] |
|
for idx, result in enumerate(news_results, 1): |
|
url = result.get("url", result.get("link", "")).lower() |
|
title = result.get("title", "").lower() |
|
channel = result.get("channel", result.get("source", "")).lower() |
|
|
|
|
|
is_korean_content = any(domain in url or domain in channel for domain in korean_domains) or \ |
|
any(keyword in title.lower() for keyword in korean_keywords) |
|
|
|
if not is_korean_content: |
|
filtered_articles.append({ |
|
"index": idx, |
|
"title": result.get("title", "제목 없음"), |
|
"link": url, |
|
"snippet": result.get("snippet", "내용 없음"), |
|
"channel": result.get("channel", result.get("source", "알 수 없음")), |
|
"time": result.get("time", result.get("date", "알 수 없는 시간")), |
|
"image_url": result.get("img", result.get("thumbnail", "")), |
|
"translated_query": translated_query |
|
}) |
|
|
|
return "", filtered_articles |
|
except Exception as e: |
|
return f"결과 처리 중 오류 발생: {str(e)}", [] |
|
|
|
def serphouse_search(query, country): |
|
response_data = search_serphouse(query, country) |
|
return format_results_from_raw(response_data) |
|
|
|
|
|
def search_and_display(query, country, articles_state, progress=gr.Progress()): |
|
with ThreadPoolExecutor(max_workers=3) as executor: |
|
progress(0, desc="검색어 번역 중...") |
|
future_translation = executor.submit(translate_query, query, country) |
|
translated_query = future_translation.result() |
|
translated_display = f"**원본 검색어:** {query}\n**번역된 검색어:** {translated_query}" if translated_query != query else f"**검색어:** {query}" |
|
|
|
progress(0.3, desc="검색 중...") |
|
response_data = search_serphouse(query, country) |
|
|
|
progress(0.6, desc="결과 처리 중...") |
|
error_message, articles = format_results_from_raw(response_data) |
|
|
|
outputs = [] |
|
outputs.append(gr.update(value="검색을 진행중입니다...", visible=True)) |
|
outputs.append(gr.update(value=translated_display, visible=True)) |
|
|
|
if error_message: |
|
outputs.append(gr.update(value=error_message, visible=True)) |
|
for comp in article_components: |
|
outputs.extend([ |
|
gr.update(visible=False), gr.update(), gr.update(), |
|
gr.update(), gr.update() |
|
]) |
|
articles_state = [] |
|
else: |
|
outputs.append(gr.update(value="", visible=False)) |
|
if not error_message and articles: |
|
futures = [] |
|
for article in articles: |
|
future = executor.submit(translate_to_korean, article['snippet']) |
|
futures.append((article, future)) |
|
|
|
progress(0.8, desc="번역 처리 중...") |
|
for article, future in futures: |
|
article['korean_summary'] = future.result() |
|
|
|
total_articles = len(articles) |
|
for idx, comp in enumerate(article_components): |
|
progress((idx + 1) / total_articles, desc=f"결과 표시 중... {idx + 1}/{total_articles}") |
|
if idx < len(articles): |
|
article = articles[idx] |
|
image_url = article['image_url'] |
|
image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False) |
|
|
|
outputs.extend([ |
|
gr.update(visible=True), |
|
gr.update(value=f"### [{article['title']}]({article['link']})"), |
|
image_update, |
|
gr.update(value=f"**요약:** {article['snippet']}\n\n**한글 요약:** {article['korean_summary']}"), |
|
gr.update(value=f"**출처:** {article['channel']} | **시간:** {article['time']}") |
|
]) |
|
else: |
|
outputs.extend([ |
|
gr.update(visible=False), gr.update(), gr.update(), |
|
gr.update(), gr.update() |
|
]) |
|
articles_state = articles |
|
|
|
progress(1.0, desc="완료!") |
|
outputs.append(articles_state) |
|
outputs[0] = gr.update(value="", visible=False) |
|
|
|
return outputs |
|
|
|
def get_region_countries(region): |
|
"""선택된 지역의 국가 및 언어 정보 반환""" |
|
if region == "동아시아": |
|
return COUNTRY_LOCATIONS_EAST_ASIA, COUNTRY_LANGUAGES_EAST_ASIA |
|
elif region == "동남아시아/오세아니아": |
|
return COUNTRY_LOCATIONS_SOUTHEAST_ASIA_OCEANIA, COUNTRY_LANGUAGES_SOUTHEAST_ASIA_OCEANIA |
|
elif region == "동유럽": |
|
return COUNTRY_LOCATIONS_EAST_EUROPE, COUNTRY_LANGUAGES_EAST_EUROPE |
|
elif region == "서유럽": |
|
return COUNTRY_LOCATIONS_WEST_EUROPE, COUNTRY_LANGUAGES_WEST_EUROPE |
|
elif region == "중동/아프리카": |
|
return COUNTRY_LOCATIONS_ARAB_AFRICA, COUNTRY_LANGUAGES_ARAB_AFRICA |
|
elif region == "아메리카": |
|
return COUNTRY_LOCATIONS_AMERICA, COUNTRY_LANGUAGES_AMERICA |
|
return {}, {} |
|
|
|
def search_global(query, region, articles_state_global): |
|
"""지역별 검색 함수""" |
|
status_msg = f"{region} 지역 검색을 시작합니다..." |
|
all_results = [] |
|
|
|
outputs = [ |
|
gr.update(value=status_msg, visible=True), |
|
gr.update(value=f"**검색어:** {query}", visible=True), |
|
] |
|
|
|
for _ in global_article_components: |
|
outputs.extend([ |
|
gr.update(visible=False), gr.update(), gr.update(), |
|
gr.update(), gr.update() |
|
]) |
|
outputs.append([]) |
|
|
|
yield outputs |
|
|
|
|
|
locations, languages = get_region_countries(region) |
|
total_countries = len(locations) |
|
|
|
for idx, (country, location) in enumerate(locations.items(), 1): |
|
try: |
|
status_msg = f"{region} - {country} 검색 중... ({idx}/{total_countries} 국가)" |
|
outputs[0] = gr.update(value=status_msg, visible=True) |
|
yield outputs |
|
|
|
error_message, articles = serphouse_search(query, country) |
|
if not error_message and articles: |
|
for article in articles: |
|
article['source_country'] = country |
|
article['region'] = region |
|
|
|
all_results.extend(articles) |
|
sorted_results = sorted(all_results, key=lambda x: x.get('time', ''), reverse=True) |
|
|
|
seen_urls = set() |
|
unique_results = [] |
|
for article in sorted_results: |
|
url = article.get('link', '') |
|
if url not in seen_urls: |
|
seen_urls.add(url) |
|
unique_results.append(article) |
|
|
|
unique_results = unique_results[:MAX_GLOBAL_RESULTS] |
|
|
|
outputs = [ |
|
gr.update(value=f"{region} - {idx}/{total_countries} 국가 검색 완료\n현재까지 발견된 뉴스: {len(unique_results)}건", visible=True), |
|
gr.update(value=f"**검색어:** {query} | **지역:** {region}", visible=True), |
|
] |
|
|
|
for idx, comp in enumerate(global_article_components): |
|
if idx < len(unique_results): |
|
article = unique_results[idx] |
|
image_url = article.get('image_url', '') |
|
image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False) |
|
|
|
korean_summary = translate_to_korean(article['snippet']) |
|
|
|
outputs.extend([ |
|
gr.update(visible=True), |
|
gr.update(value=f"### [{article['title']}]({article['link']})"), |
|
image_update, |
|
gr.update(value=f"**요약:** {article['snippet']}\n\n**한글 요약:** {korean_summary}"), |
|
gr.update(value=f"**출처:** {article['channel']} | **국가:** {article['source_country']} | **지역:** {article['region']} | **시간:** {article['time']}") |
|
]) |
|
else: |
|
outputs.extend([ |
|
gr.update(visible=False), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(), |
|
gr.update() |
|
]) |
|
|
|
outputs.append(unique_results) |
|
yield outputs |
|
|
|
except Exception as e: |
|
print(f"Error searching {country}: {str(e)}") |
|
continue |
|
|
|
final_status = f"{region} 검색 완료! 총 {len(unique_results)}개의 뉴스가 발견되었습니다." |
|
outputs[0] = gr.update(value=final_status, visible=True) |
|
yield outputs |
|
|
|
css = """ |
|
/* 전역 스타일 */ |
|
footer {visibility: hidden;} |
|
|
|
/* 레이아웃 컨테이너 */ |
|
#status_area { |
|
background: rgba(255, 255, 255, 0.9); |
|
padding: 15px; |
|
border-bottom: 1px solid #ddd; |
|
margin-bottom: 20px; |
|
box-shadow: 0 2px 5px rgba(0,0,0,0.1); |
|
} |
|
|
|
#results_area { |
|
padding: 10px; |
|
margin-top: 10px; |
|
} |
|
|
|
/* 탭 스타일 */ |
|
.tabs { |
|
border-bottom: 2px solid #ddd !important; |
|
margin-bottom: 20px !important; |
|
} |
|
|
|
.tab-nav { |
|
border-bottom: none !important; |
|
margin-bottom: 0 !important; |
|
} |
|
|
|
.tab-nav button { |
|
font-weight: bold !important; |
|
padding: 10px 20px !important; |
|
} |
|
|
|
.tab-nav button.selected { |
|
border-bottom: 2px solid #1f77b4 !important; |
|
color: #1f77b4 !important; |
|
} |
|
|
|
/* 상태 메시지 */ |
|
#status_area .markdown-text { |
|
font-size: 1.1em; |
|
color: #2c3e50; |
|
padding: 10px 0; |
|
} |
|
|
|
/* 기본 컨테이너 */ |
|
.group { |
|
border: 1px solid #eee; |
|
padding: 15px; |
|
margin-bottom: 15px; |
|
border-radius: 5px; |
|
background: white; |
|
} |
|
|
|
/* 버튼 스타일 */ |
|
.primary-btn { |
|
background: #1f77b4 !important; |
|
border: none !important; |
|
} |
|
|
|
/* 입력 필드 */ |
|
.textbox { |
|
border: 1px solid #ddd !important; |
|
border-radius: 4px !important; |
|
} |
|
|
|
/* 프로그레스바 컨테이너 */ |
|
.progress-container { |
|
position: fixed; |
|
top: 0; |
|
left: 0; |
|
width: 100%; |
|
height: 6px; |
|
background: #e0e0e0; |
|
z-index: 1000; |
|
} |
|
|
|
/* 프로그레스바 */ |
|
.progress-bar { |
|
height: 100%; |
|
background: linear-gradient(90deg, #2196F3, #00BCD4); |
|
box-shadow: 0 0 10px rgba(33, 150, 243, 0.5); |
|
transition: width 0.3s ease; |
|
animation: progress-glow 1.5s ease-in-out infinite; |
|
} |
|
|
|
/* 프로그레스 텍스트 */ |
|
.progress-text { |
|
position: fixed; |
|
top: 8px; |
|
left: 50%; |
|
transform: translateX(-50%); |
|
background: #333; |
|
color: white; |
|
padding: 4px 12px; |
|
border-radius: 15px; |
|
font-size: 14px; |
|
z-index: 1001; |
|
box-shadow: 0 2px 5px rgba(0,0,0,0.2); |
|
} |
|
|
|
/* 프로그레스바 애니메이션 */ |
|
@keyframes progress-glow { |
|
0% { |
|
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5); |
|
} |
|
50% { |
|
box-shadow: 0 0 20px rgba(33, 150, 243, 0.8); |
|
} |
|
100% { |
|
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5); |
|
} |
|
} |
|
|
|
/* 반응형 디자인 */ |
|
@media (max-width: 768px) { |
|
.group { |
|
padding: 10px; |
|
margin-bottom: 15px; |
|
} |
|
|
|
.progress-text { |
|
font-size: 12px; |
|
padding: 3px 10px; |
|
} |
|
} |
|
|
|
/* 로딩 상태 표시 개선 */ |
|
.loading { |
|
opacity: 0.7; |
|
pointer-events: none; |
|
transition: opacity 0.3s ease; |
|
} |
|
|
|
/* 결과 컨테이너 애니메이션 */ |
|
.group { |
|
transition: all 0.3s ease; |
|
opacity: 0; |
|
transform: translateY(20px); |
|
} |
|
|
|
.group.visible { |
|
opacity: 1; |
|
transform: translateY(0); |
|
} |
|
|
|
/* Examples 스타일링 */ |
|
.examples-table { |
|
margin-top: 10px !important; |
|
margin-bottom: 20px !important; |
|
} |
|
|
|
.examples-table button { |
|
background-color: #f0f0f0 !important; |
|
border: 1px solid #ddd !important; |
|
border-radius: 4px !important; |
|
padding: 5px 10px !important; |
|
margin: 2px !important; |
|
transition: all 0.3s ease !important; |
|
} |
|
|
|
.examples-table button:hover { |
|
background-color: #e0e0e0 !important; |
|
transform: translateY(-1px) !important; |
|
box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important; |
|
} |
|
|
|
.examples-table .label { |
|
font-weight: bold !important; |
|
color: #444 !important; |
|
margin-bottom: 5px !important; |
|
} |
|
""" |
|
|
|
|
|
def get_article_content(url): |
|
try: |
|
headers = { |
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' |
|
} |
|
session = requests.Session() |
|
retries = Retry(total=3, backoff_factor=0.5) |
|
session.mount('https://', HTTPAdapter(max_retries=retries)) |
|
|
|
response = session.get(url, headers=headers, timeout=30) |
|
response.raise_for_status() |
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
|
|
|
|
title = soup.find('meta', property='og:title') or soup.find('title') |
|
title = title.get('content', '') if hasattr(title, 'get') else title.string if title else '' |
|
|
|
description = soup.find('meta', property='og:description') or soup.find('meta', {'name': 'description'}) |
|
description = description.get('content', '') if description else '' |
|
|
|
|
|
article_content = '' |
|
|
|
|
|
content_selectors = [ |
|
'article', '.article-body', '.article-content', '#article-body', |
|
'.story-body', '.post-content', '.entry-content', '.content-body', |
|
'[itemprop="articleBody"]', '.story-content' |
|
] |
|
|
|
for selector in content_selectors: |
|
content = soup.select_one(selector) |
|
if content: |
|
|
|
for tag in content.find_all(['script', 'style', 'nav', 'header', 'footer', 'aside']): |
|
tag.decompose() |
|
|
|
|
|
paragraphs = content.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']) |
|
if paragraphs: |
|
article_content = '\n\n'.join([p.get_text().strip() for p in paragraphs if p.get_text().strip()]) |
|
break |
|
|
|
|
|
if not article_content: |
|
paragraphs = soup.find_all('p') |
|
article_content = '\n\n'.join([p.get_text().strip() for p in paragraphs if len(p.get_text().strip()) > 50]) |
|
|
|
|
|
full_content = f"Title: {title}\n\nDescription: {description}\n\nContent:\n{article_content}" |
|
|
|
|
|
full_content = re.sub(r'\s+', ' ', full_content) |
|
full_content = re.sub(r'\n\s*\n', '\n\n', full_content) |
|
|
|
return full_content.strip() |
|
|
|
except Exception as e: |
|
print(f"Crawling error details: {str(e)}") |
|
return f"Error crawling content: {str(e)}" |
|
|
|
def respond(url, history, system_message, max_tokens, temperature, top_p): |
|
if not url.startswith('http'): |
|
history.append((url, "올바른 URL을 입력해주세요.")) |
|
return history |
|
|
|
try: |
|
article_content = get_article_content(url) |
|
|
|
translation_prompt = f"""다음 영문 기사를 한국어로 번역하고 기사를 작성해주세요. |
|
|
|
1단계: 전문 번역 |
|
===번역 시작=== |
|
{article_content} |
|
===번역 끝=== |
|
|
|
2단계: 기사 작성 가이드라인 |
|
다음 요구사항에 따라 한국어 기사를 작성하세요: |
|
|
|
1. 구조 |
|
- 헤드라인: 핵심 내용을 담은 강력한 제목 |
|
- 부제목: 헤드라인 보완 설명 |
|
- 리드문: 기사의 핵심을 요약한 첫 문단 |
|
- 본문: 상세 내용 전개 |
|
|
|
2. 작성 규칙 |
|
- 객관적이고 정확한 사실 전달 |
|
- 문장은 '다.'로 종결 |
|
- 단락 간 자연스러운 흐름 |
|
- 인용구는 따옴표 처리 |
|
- 핵심 정보를 앞부분에 배치 |
|
- 전문 용어는 적절한 설명 추가 |
|
|
|
3. 형식 |
|
- 적절한 단락 구분 |
|
- 읽기 쉬운 문장 길이 |
|
- 논리적인 정보 구성 |
|
|
|
각 단계는 '===번역===', '===기사==='로 명확히 구분하여 출력하세요. |
|
""" |
|
|
|
messages = [ |
|
{ |
|
"role": "system", |
|
"content": system_message |
|
}, |
|
{"role": "user", "content": translation_prompt} |
|
] |
|
|
|
history.append((url, "번역 및 기사 작성을 시작합니다...")) |
|
|
|
full_response = "" |
|
for message in client.chat.completions.create( |
|
model="CohereForAI/c4ai-command-r-plus-08-2024", |
|
max_tokens=max_tokens, |
|
stream=True, |
|
temperature=temperature, |
|
top_p=top_p, |
|
messages=messages, |
|
): |
|
if hasattr(message.choices[0].delta, 'content'): |
|
token = message.choices[0].delta.content |
|
if token: |
|
full_response += token |
|
history[-1] = (url, full_response) |
|
yield history |
|
|
|
except Exception as e: |
|
error_message = f"처리 중 오류가 발생했습니다: {str(e)}" |
|
history.append((url, error_message)) |
|
yield history |
|
|
|
return history |
|
|
|
|
|
def continue_writing(history, system_message, max_tokens, temperature, top_p): |
|
if not history: |
|
return history |
|
|
|
last_response = history[-1][1] if history else "" |
|
continue_prompt = f"""이전 내용을 이어서 계속 작성해주세요. |
|
마지막 응답: {last_response} |
|
|
|
추가 지침: |
|
1. 이전 내용의 맥락을 유지하며 자연스럽게 이어서 작성 |
|
2. 새로운 정보나 상세 설명을 추가 |
|
3. 필요한 경우 보충 설명이나 분석 제공 |
|
4. 기사 형식과 스타일 유지 |
|
5. 필요한 경우 추가적인 이미지 프롬프트 생성 |
|
""" |
|
|
|
|
|
messages = [ |
|
{"role": "system", "content": system_message}, |
|
{"role": "user", "content": continue_prompt} |
|
] |
|
|
|
try: |
|
full_response = "" |
|
for message in client.chat.completions.create( |
|
model="CohereForAI/c4ai-command-r-plus-08-2024", |
|
max_tokens=max_tokens, |
|
stream=True, |
|
temperature=temperature, |
|
top_p=top_p, |
|
messages=messages, |
|
): |
|
if hasattr(message.choices[0].delta, 'content'): |
|
token = message.choices[0].delta.content |
|
if token: |
|
full_response += token |
|
|
|
new_history = history.copy() |
|
new_history.append(("계속 작성", full_response)) |
|
yield new_history |
|
|
|
except Exception as e: |
|
error_message = f"계속 작성 중 오류가 발생했습니다: {str(e)}" |
|
new_history = history.copy() |
|
new_history.append(("오류", error_message)) |
|
yield new_history |
|
|
|
return history |
|
|
|
with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI 서비스") as iface: |
|
init_db() |
|
|
|
with gr.Tabs(): |
|
|
|
with gr.Tab("DB 검색"): |
|
gr.Markdown("삼성/미국 검색 결과를 DB에 저장하고 불러옵니다.") |
|
|
|
with gr.Row(): |
|
search_button = gr.Button("검색: samsung/미국", variant="primary") |
|
load_button = gr.Button("출력: samsung/미국", variant="secondary") |
|
|
|
results_display = gr.Markdown() |
|
|
|
|
|
search_button.click( |
|
fn=search_samsung_us, |
|
outputs=results_display |
|
) |
|
|
|
load_button.click( |
|
fn=load_samsung_us, |
|
outputs=results_display |
|
) |
|
|
|
with gr.Tab("국가별"): |
|
gr.Markdown("검색어를 입력하고 원하는 국가(한국 제외)를를 선택하면, 검색어와 일치하는 24시간 이내 뉴스를 최대 100개 출력합니다.") |
|
gr.Markdown("국가 선택후 검색어에 '한글'을 입력하면 현지 언어로 번역되어 검색합니다. 예: 'Taiwan' 국가 선택후 '삼성' 입력시 '三星'으로 자동 검색") |
|
|
|
with gr.Column(): |
|
with gr.Row(): |
|
query = gr.Textbox(label="검색어") |
|
country = gr.Dropdown( |
|
choices=sorted(list(COUNTRY_LOCATIONS.keys())), |
|
label="국가", |
|
value="United States" |
|
) |
|
|
|
|
|
gr.Examples( |
|
examples=[ |
|
"artificial intelligence", |
|
"NVIDIA", |
|
"OPENAI", |
|
"META LLAMA", |
|
"black forest labs", |
|
"GOOGLE gemini", |
|
"anthropic Claude", |
|
"X.AI", |
|
"HUGGINGFACE", |
|
"HYNIX", |
|
"Large Language model", |
|
"CHATGPT", |
|
"StabilityAI", |
|
"MISTRALAI", |
|
"QWEN", |
|
"MIDJOURNEY", |
|
"GPU" |
|
], |
|
inputs=query, |
|
label="자주 사용되는 검색어" |
|
) |
|
|
|
status_message = gr.Markdown("", visible=True) |
|
translated_query_display = gr.Markdown(visible=False) |
|
search_button = gr.Button("검색", variant="primary") |
|
|
|
progress = gr.Progress() |
|
articles_state = gr.State([]) |
|
|
|
article_components = [] |
|
for i in range(100): |
|
with gr.Group(visible=False) as article_group: |
|
title = gr.Markdown() |
|
image = gr.Image(width=200, height=150) |
|
snippet = gr.Markdown() |
|
info = gr.Markdown() |
|
|
|
article_components.append({ |
|
'group': article_group, |
|
'title': title, |
|
'image': image, |
|
'snippet': snippet, |
|
'info': info, |
|
'index': i, |
|
}) |
|
|
|
|
|
with gr.Tab("전세계"): |
|
gr.Markdown("대륙별로 24시간 이내 뉴스를 검색합니다.") |
|
|
|
with gr.Column(): |
|
with gr.Column(elem_id="status_area"): |
|
with gr.Row(): |
|
query_global = gr.Textbox(label="검색어") |
|
region_select = gr.Dropdown( |
|
choices=REGIONS, |
|
label="지역 선택", |
|
value="동아시아" |
|
) |
|
search_button_global = gr.Button("검색", variant="primary") |
|
|
|
status_message_global = gr.Markdown("") |
|
translated_query_display_global = gr.Markdown("") |
|
|
|
with gr.Column(elem_id="results_area"): |
|
articles_state_global = gr.State([]) |
|
global_article_components = [] |
|
for i in range(MAX_GLOBAL_RESULTS): |
|
with gr.Group(visible=False) as article_group: |
|
title = gr.Markdown() |
|
image = gr.Image(width=200, height=150) |
|
snippet = gr.Markdown() |
|
info = gr.Markdown() |
|
|
|
global_article_components.append({ |
|
'group': article_group, |
|
'title': title, |
|
'image': image, |
|
'snippet': snippet, |
|
'info': info, |
|
'index': i, |
|
}) |
|
|
|
|
|
with gr.Tab("AI 기사 생성"): |
|
gr.Markdown("뉴스 URL을 입력하면 AI가 한국어로 번역하여 기사 형식으로 작성합니다.") |
|
gr.Markdown("이미지 생성: https://huggingface.co/spaces/ginipick/FLUXllama ") |
|
|
|
with gr.Column(): |
|
chatbot = gr.Chatbot(height=600) |
|
|
|
with gr.Row(): |
|
url_input = gr.Textbox( |
|
label="뉴스 URL", |
|
placeholder="https://..." |
|
) |
|
|
|
with gr.Row(): |
|
translate_button = gr.Button("기사 생성", variant="primary") |
|
continue_button = gr.Button("계속 이어서 작성", variant="secondary") |
|
|
|
with gr.Accordion("고급 설정", open=False): |
|
system_message = gr.Textbox( |
|
value="""You are a professional translator and journalist. Follow these steps strictly: |
|
1. TRANSLATION |
|
- Start with ===번역=== marker |
|
- Provide accurate Korean translation |
|
- Maintain original meaning and context |
|
2. ARTICLE WRITING |
|
- Start with ===기사=== marker |
|
- Write a new Korean news article based on the translation |
|
- Follow newspaper article format |
|
- Use formal news writing style |
|
- End sentences with '다.' |
|
- Include headline and subheadline |
|
- Organize paragraphs clearly |
|
- Put key information first |
|
- Use quotes appropriately |
|
|
|
3. IMAGE PROMPT GENERATION |
|
- Start with ===이미지 프롬프트=== marker |
|
- Create detailed Korean prompts for image generation |
|
- Prompts should reflect the article's main theme and content |
|
- Include key visual elements mentioned in the article |
|
- Specify style, mood, and composition |
|
- Format: "이미지 설명: [상세 설명]" |
|
- Add style keywords: "스타일: [관련 키워드들]" |
|
- Add mood keywords: "분위기: [관련 키워드들]" |
|
IMPORTANT: |
|
- Must complete all three steps in order |
|
- Clearly separate each section with markers |
|
- Never skip or combine steps |
|
- Ensure image prompts align with article content""", |
|
label="System message" |
|
) |
|
|
|
max_tokens = gr.Slider( |
|
minimum=1, |
|
maximum=7800, |
|
value=7624, |
|
step=1, |
|
label="Max new tokens" |
|
) |
|
temperature = gr.Slider( |
|
minimum=0.1, |
|
maximum=4.0, |
|
value=0.7, |
|
step=0.1, |
|
label="Temperature" |
|
) |
|
top_p = gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.95, |
|
step=0.05, |
|
label="Top-P" |
|
) |
|
|
|
|
|
|
|
search_outputs = [status_message, translated_query_display, gr.Markdown(visible=False)] |
|
for comp in article_components: |
|
search_outputs.extend([ |
|
comp['group'], comp['title'], comp['image'], |
|
comp['snippet'], comp['info'] |
|
]) |
|
search_outputs.append(articles_state) |
|
|
|
search_button.click( |
|
fn=search_and_display, |
|
inputs=[query, country, articles_state], |
|
outputs=search_outputs, |
|
show_progress=True |
|
) |
|
|
|
|
|
global_search_outputs = [status_message_global, translated_query_display_global] |
|
for comp in global_article_components: |
|
global_search_outputs.extend([ |
|
comp['group'], comp['title'], comp['image'], |
|
comp['snippet'], comp['info'] |
|
]) |
|
global_search_outputs.append(articles_state_global) |
|
|
|
search_button_global.click( |
|
fn=search_global, |
|
inputs=[query_global, region_select, articles_state_global], |
|
outputs=global_search_outputs, |
|
show_progress=True |
|
) |
|
|
|
|
|
translate_button.click( |
|
fn=respond, |
|
inputs=[ |
|
url_input, |
|
chatbot, |
|
system_message, |
|
max_tokens, |
|
temperature, |
|
top_p, |
|
], |
|
outputs=chatbot |
|
) |
|
|
|
|
|
continue_button.click( |
|
fn=continue_writing, |
|
inputs=[ |
|
chatbot, |
|
system_message, |
|
max_tokens, |
|
temperature, |
|
top_p, |
|
], |
|
outputs=chatbot |
|
) |
|
|
|
iface.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
share=True, |
|
auth=("gini","pick"), |
|
ssl_verify=False, |
|
show_error=True |
|
) |