ainews / app-backup2.py
ginipick's picture
Update app-backup2.py
7c3b41f verified
raw
history blame
47.1 kB
import gradio as gr
import requests
import json
import os
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor
from functools import lru_cache
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from openai import OpenAI
from bs4 import BeautifulSoup
import re # re 모듈 추가
import json
import os
from datetime import datetime
import sqlite3
import pathlib
# DB 초기화 함수
def init_db():
db_path = pathlib.Path("search_results.db")
conn = sqlite3.connect(db_path)
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS searches
(id INTEGER PRIMARY KEY AUTOINCREMENT,
keyword TEXT,
country TEXT,
results TEXT,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP)''')
conn.commit()
conn.close()
# 검색 결과 저장 함수
def save_to_db(keyword, country, results):
conn = sqlite3.connect("search_results.db")
c = conn.cursor()
c.execute("INSERT INTO searches (keyword, country, results) VALUES (?, ?, ?)",
(keyword, country, json.dumps(results)))
conn.commit()
conn.close()
# DB에서 검색 결과 불러오기 함수
def load_from_db(keyword, country):
conn = sqlite3.connect("search_results.db")
c = conn.cursor()
c.execute("SELECT results, timestamp FROM searches WHERE keyword=? AND country=? ORDER BY timestamp DESC LIMIT 1",
(keyword, country))
result = c.fetchone()
conn.close()
if result:
return json.loads(result[0]), result[1]
return None, None
# 삼성/미국 검색 함수
def search_samsung_us():
error_message, articles = serphouse_search("samsung", "United States")
if not error_message and articles:
save_to_db("samsung", "United States", articles)
return display_results(articles)
return "검색 결과가 없습니다."
# DB에서 삼성/미국 결과 불러오기 함수
def load_samsung_us():
results, timestamp = load_from_db("samsung", "United States")
if results:
return f"저장 시간: {timestamp}\n\n" + display_results(results)
return "저장된 결과가 없습니다."
# 결과 표시 함수
def display_results(articles):
output = ""
for idx, article in enumerate(articles, 1):
output += f"### {idx}. {article['title']}\n"
output += f"출처: {article['channel']}\n"
output += f"시간: {article['time']}\n"
output += f"링크: {article['link']}\n"
output += f"요약: {article['snippet']}\n\n"
return output
ACCESS_TOKEN = os.getenv("HF_TOKEN")
if not ACCESS_TOKEN:
raise ValueError("HF_TOKEN environment variable is not set")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
MAX_COUNTRY_RESULTS = 100 # 국가별 최대 결과 수
MAX_GLOBAL_RESULTS = 1000 # 전세계 최대 결과 수
def create_article_components(max_results):
article_components = []
for i in range(max_results):
with gr.Group(visible=False) as article_group:
title = gr.Markdown()
image = gr.Image(width=200, height=150)
snippet = gr.Markdown()
info = gr.Markdown()
article_components.append({
'group': article_group,
'title': title,
'image': image,
'snippet': snippet,
'info': info,
'index': i,
})
return article_components
API_KEY = os.getenv("SERPHOUSE_API_KEY")
# 국가별 언어 코드 매핑
COUNTRY_LANGUAGES = {
"United States": "en",
"KOREA": "ko",
"United Kingdom": "en",
"Taiwan": "zh-TW",
"Canada": "en",
"Australia": "en",
"Germany": "de",
"France": "fr",
"Japan": "ja",
"China": "zh",
"India": "hi",
"Brazil": "pt",
"Mexico": "es",
"Russia": "ru",
"Italy": "it",
"Spain": "es",
"Netherlands": "nl",
"Singapore": "en",
"Hong Kong": "zh-HK",
"Indonesia": "id",
"Malaysia": "ms",
"Philippines": "tl",
"Thailand": "th",
"Vietnam": "vi",
"Belgium": "nl",
"Denmark": "da",
"Finland": "fi",
"Ireland": "en",
"Norway": "no",
"Poland": "pl",
"Sweden": "sv",
"Switzerland": "de",
"Austria": "de",
"Czech Republic": "cs",
"Greece": "el",
"Hungary": "hu",
"Portugal": "pt",
"Romania": "ro",
"Turkey": "tr",
"Israel": "he",
"Saudi Arabia": "ar",
"United Arab Emirates": "ar",
"South Africa": "en",
"Argentina": "es",
"Chile": "es",
"Colombia": "es",
"Peru": "es",
"Venezuela": "es",
"New Zealand": "en",
"Bangladesh": "bn",
"Pakistan": "ur",
"Egypt": "ar",
"Morocco": "ar",
"Nigeria": "en",
"Kenya": "sw",
"Ukraine": "uk",
"Croatia": "hr",
"Slovakia": "sk",
"Bulgaria": "bg",
"Serbia": "sr",
"Estonia": "et",
"Latvia": "lv",
"Lithuania": "lt",
"Slovenia": "sl",
"Luxembourg": "fr",
"Malta": "mt",
"Cyprus": "el",
"Iceland": "is"
}
COUNTRY_LOCATIONS = {
"United States": "United States",
"KOREA": "kr",
"United Kingdom": "United Kingdom",
"Taiwan": "Taiwan",
"Canada": "Canada",
"Australia": "Australia",
"Germany": "Germany",
"France": "France",
"Japan": "Japan",
"China": "China",
"India": "India",
"Brazil": "Brazil",
"Mexico": "Mexico",
"Russia": "Russia",
"Italy": "Italy",
"Spain": "Spain",
"Netherlands": "Netherlands",
"Singapore": "Singapore",
"Hong Kong": "Hong Kong",
"Indonesia": "Indonesia",
"Malaysia": "Malaysia",
"Philippines": "Philippines",
"Thailand": "Thailand",
"Vietnam": "Vietnam",
"Belgium": "Belgium",
"Denmark": "Denmark",
"Finland": "Finland",
"Ireland": "Ireland",
"Norway": "Norway",
"Poland": "Poland",
"Sweden": "Sweden",
"Switzerland": "Switzerland",
"Austria": "Austria",
"Czech Republic": "Czech Republic",
"Greece": "Greece",
"Hungary": "Hungary",
"Portugal": "Portugal",
"Romania": "Romania",
"Turkey": "Turkey",
"Israel": "Israel",
"Saudi Arabia": "Saudi Arabia",
"United Arab Emirates": "United Arab Emirates",
"South Africa": "South Africa",
"Argentina": "Argentina",
"Chile": "Chile",
"Colombia": "Colombia",
"Peru": "Peru",
"Venezuela": "Venezuela",
"New Zealand": "New Zealand",
"Bangladesh": "Bangladesh",
"Pakistan": "Pakistan",
"Egypt": "Egypt",
"Morocco": "Morocco",
"Nigeria": "Nigeria",
"Kenya": "Kenya",
"Ukraine": "Ukraine",
"Croatia": "Croatia",
"Slovakia": "Slovakia",
"Bulgaria": "Bulgaria",
"Serbia": "Serbia",
"Estonia": "Estonia",
"Latvia": "Latvia",
"Lithuania": "Lithuania",
"Slovenia": "Slovenia",
"Luxembourg": "Luxembourg",
"Malta": "Malta",
"Cyprus": "Cyprus",
"Iceland": "Iceland"
}
# 지역 정의
# 동아시아 지역
COUNTRY_LANGUAGES_EAST_ASIA = {
"KOREA": "ko",
"Taiwan": "zh-TW",
"Japan": "ja",
"China": "zh",
"Hong Kong": "zh-HK"
}
COUNTRY_LOCATIONS_EAST_ASIA = {
"KOREA": "KOREA",
"Taiwan": "Taiwan",
"Japan": "Japan",
"China": "China",
"Hong Kong": "Hong Kong"
}
# 동남아시아/오세아니아 지역
COUNTRY_LANGUAGES_SOUTHEAST_ASIA_OCEANIA = {
"Indonesia": "id",
"Malaysia": "ms",
"Philippines": "tl",
"Thailand": "th",
"Vietnam": "vi",
"Singapore": "en",
"Papua New Guinea": "en",
"Australia": "en",
"New Zealand": "en"
}
COUNTRY_LOCATIONS_SOUTHEAST_ASIA_OCEANIA = {
"Indonesia": "Indonesia",
"Malaysia": "Malaysia",
"Philippines": "Philippines",
"Thailand": "Thailand",
"Vietnam": "Vietnam",
"Singapore": "Singapore",
"Papua New Guinea": "Papua New Guinea",
"Australia": "Australia",
"New Zealand": "New Zealand"
}
# 동유럽 지역
COUNTRY_LANGUAGES_EAST_EUROPE = {
"Poland": "pl",
"Czech Republic": "cs",
"Greece": "el",
"Hungary": "hu",
"Romania": "ro",
"Ukraine": "uk",
"Croatia": "hr",
"Slovakia": "sk",
"Bulgaria": "bg",
"Serbia": "sr",
"Estonia": "et",
"Latvia": "lv",
"Lithuania": "lt",
"Slovenia": "sl",
"Malta": "mt",
"Cyprus": "el",
"Iceland": "is",
"Russia": "ru"
}
COUNTRY_LOCATIONS_EAST_EUROPE = {
"Poland": "Poland",
"Czech Republic": "Czech Republic",
"Greece": "Greece",
"Hungary": "Hungary",
"Romania": "Romania",
"Ukraine": "Ukraine",
"Croatia": "Croatia",
"Slovakia": "Slovakia",
"Bulgaria": "Bulgaria",
"Serbia": "Serbia",
"Estonia": "Estonia",
"Latvia": "Latvia",
"Lithuania": "Lithuania",
"Slovenia": "Slovenia",
"Malta": "Malta",
"Cyprus": "Cyprus",
"Iceland": "Iceland",
"Russia": "Russia"
}
# 서유럽 지역
COUNTRY_LANGUAGES_WEST_EUROPE = {
"Germany": "de",
"France": "fr",
"Italy": "it",
"Spain": "es",
"Netherlands": "nl",
"Belgium": "nl",
"Ireland": "en",
"Sweden": "sv",
"Switzerland": "de",
"Austria": "de",
"Portugal": "pt",
"Luxembourg": "fr",
"United Kingdom": "en"
}
COUNTRY_LOCATIONS_WEST_EUROPE = {
"Germany": "Germany",
"France": "France",
"Italy": "Italy",
"Spain": "Spain",
"Netherlands": "Netherlands",
"Belgium": "Belgium",
"Ireland": "Ireland",
"Sweden": "Sweden",
"Switzerland": "Switzerland",
"Austria": "Austria",
"Portugal": "Portugal",
"Luxembourg": "Luxembourg",
"United Kingdom": "United Kingdom"
}
# 중동/아프리카 지역
COUNTRY_LANGUAGES_ARAB_AFRICA = {
"South Africa": "en",
"Nigeria": "en",
"Kenya": "sw",
"Egypt": "ar",
"Morocco": "ar",
"Saudi Arabia": "ar",
"United Arab Emirates": "ar",
"Israel": "he"
}
COUNTRY_LOCATIONS_ARAB_AFRICA = {
"South Africa": "South Africa",
"Nigeria": "Nigeria",
"Kenya": "Kenya",
"Egypt": "Egypt",
"Morocco": "Morocco",
"Saudi Arabia": "Saudi Arabia",
"United Arab Emirates": "United Arab Emirates",
"Israel": "Israel"
}
# 아메리카 지역
COUNTRY_LANGUAGES_AMERICA = {
"United States": "en",
"Canada": "en",
"Mexico": "es",
"Brazil": "pt",
"Argentina": "es",
"Chile": "es",
"Colombia": "es",
"Peru": "es",
"Venezuela": "es"
}
COUNTRY_LOCATIONS_AMERICA = {
"United States": "United States",
"Canada": "Canada",
"Mexico": "Mexico",
"Brazil": "Brazil",
"Argentina": "Argentina",
"Chile": "Chile",
"Colombia": "Colombia",
"Peru": "Peru",
"Venezuela": "Venezuela"
}
# 지역 선택 리스트
REGIONS = [
"동아시아",
"동남아시아/오세아니아",
"동유럽",
"서유럽",
"중동/아프리카",
"아메리카"
]
@lru_cache(maxsize=100)
def translate_query(query, country):
try:
if is_english(query):
return query
if country in COUNTRY_LANGUAGES:
if country == "South Korea":
return query
target_lang = COUNTRY_LANGUAGES[country]
url = "https://translate.googleapis.com/translate_a/single"
params = {
"client": "gtx",
"sl": "auto",
"tl": target_lang,
"dt": "t",
"q": query
}
session = requests.Session()
retries = Retry(total=3, backoff_factor=0.5)
session.mount('https://', HTTPAdapter(max_retries=retries))
response = session.get(url, params=params, timeout=(5, 10))
translated_text = response.json()[0][0][0]
return translated_text
return query
except Exception as e:
print(f"번역 오류: {str(e)}")
return query
@lru_cache(maxsize=200)
def translate_to_korean(text):
try:
url = "https://translate.googleapis.com/translate_a/single"
params = {
"client": "gtx",
"sl": "auto",
"tl": "ko",
"dt": "t",
"q": text
}
session = requests.Session()
retries = Retry(total=3, backoff_factor=0.5)
session.mount('https://', HTTPAdapter(max_retries=retries))
response = session.get(url, params=params, timeout=(5, 10))
translated_text = response.json()[0][0][0]
return translated_text
except Exception as e:
print(f"한글 번역 오류: {str(e)}")
return text
def is_english(text):
return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
def is_korean(text):
return any('\uAC00' <= char <= '\uD7A3' for char in text)
def search_serphouse(query, country, page=1, num_result=10):
url = "https://api.serphouse.com/serp/live"
now = datetime.utcnow()
yesterday = now - timedelta(days=1)
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
translated_query = translate_query(query, country)
payload = {
"data": {
"q": translated_query,
"domain": "google.com",
"loc": COUNTRY_LOCATIONS.get(country, "United States"),
"lang": COUNTRY_LANGUAGES.get(country, "en"),
"device": "desktop",
"serp_type": "news",
"page": "1",
"num": "100",
"date_range": date_range,
"sort_by": "date"
}
}
headers = {
"accept": "application/json",
"content-type": "application/json",
"authorization": f"Bearer {API_KEY}"
}
try:
# 세션 설정 개선
session = requests.Session()
# 재시도 설정 강화
retries = Retry(
total=5, # 최대 재시도 횟수 증가
backoff_factor=1, # 재시도 간격 증가
status_forcelist=[500, 502, 503, 504, 429], # 재시도할 HTTP 상태 코드
allowed_methods=["POST"] # POST 요청에 대한 재시도 허용
)
# 타임아웃 설정 조정
adapter = HTTPAdapter(max_retries=retries)
session.mount('http://', adapter)
session.mount('https://', adapter)
# 타임아웃 값 증가 (connect timeout, read timeout)
response = session.post(
url,
json=payload,
headers=headers,
timeout=(30, 30) # 연결 타임아웃 30초, 읽기 타임아웃 30초
)
response.raise_for_status()
return {"results": response.json(), "translated_query": translated_query}
except requests.exceptions.Timeout:
return {
"error": "검색 시간이 초과되었습니다. 잠시 후 다시 시도해주세요.",
"translated_query": query
}
except requests.exceptions.RequestException as e:
return {
"error": f"검색 중 오류가 발생했습니다: {str(e)}",
"translated_query": query
}
except Exception as e:
return {
"error": f"예기치 않은 오류가 발생했습니다: {str(e)}",
"translated_query": query
}
def format_results_from_raw(response_data):
if "error" in response_data:
return "Error: " + response_data["error"], []
try:
results = response_data["results"]
translated_query = response_data["translated_query"]
news_results = results.get('results', {}).get('results', {}).get('news', [])
if not news_results:
return "검색 결과가 없습니다.", []
# 한국 도메인 및 한국 관련 키워드 필터링
korean_domains = ['.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
'donga', 'joins', 'hani', 'koreatimes', 'koreaherald']
korean_keywords = ['korea', 'korean', 'seoul', 'busan', 'incheon', 'daegu',
'gwangju', 'daejeon', 'ulsan', 'sejong']
filtered_articles = []
for idx, result in enumerate(news_results, 1):
url = result.get("url", result.get("link", "")).lower()
title = result.get("title", "").lower()
channel = result.get("channel", result.get("source", "")).lower()
# 한국 관련 컨텐츠 필터링
is_korean_content = any(domain in url or domain in channel for domain in korean_domains) or \
any(keyword in title.lower() for keyword in korean_keywords)
if not is_korean_content:
filtered_articles.append({
"index": idx,
"title": result.get("title", "제목 없음"),
"link": url,
"snippet": result.get("snippet", "내용 없음"),
"channel": result.get("channel", result.get("source", "알 수 없음")),
"time": result.get("time", result.get("date", "알 수 없는 시간")),
"image_url": result.get("img", result.get("thumbnail", "")),
"translated_query": translated_query
})
return "", filtered_articles
except Exception as e:
return f"결과 처리 중 오류 발생: {str(e)}", []
def serphouse_search(query, country):
response_data = search_serphouse(query, country)
return format_results_from_raw(response_data)
def search_and_display(query, country, articles_state, progress=gr.Progress()):
with ThreadPoolExecutor(max_workers=3) as executor:
progress(0, desc="검색어 번역 중...")
future_translation = executor.submit(translate_query, query, country)
translated_query = future_translation.result()
translated_display = f"**원본 검색어:** {query}\n**번역된 검색어:** {translated_query}" if translated_query != query else f"**검색어:** {query}"
progress(0.3, desc="검색 중...")
response_data = search_serphouse(query, country)
progress(0.6, desc="결과 처리 중...")
error_message, articles = format_results_from_raw(response_data)
outputs = []
outputs.append(gr.update(value="검색을 진행중입니다...", visible=True))
outputs.append(gr.update(value=translated_display, visible=True))
if error_message:
outputs.append(gr.update(value=error_message, visible=True))
for comp in article_components:
outputs.extend([
gr.update(visible=False), gr.update(), gr.update(),
gr.update(), gr.update()
])
articles_state = []
else:
outputs.append(gr.update(value="", visible=False))
if not error_message and articles:
futures = []
for article in articles:
future = executor.submit(translate_to_korean, article['snippet'])
futures.append((article, future))
progress(0.8, desc="번역 처리 중...")
for article, future in futures:
article['korean_summary'] = future.result()
total_articles = len(articles)
for idx, comp in enumerate(article_components):
progress((idx + 1) / total_articles, desc=f"결과 표시 중... {idx + 1}/{total_articles}")
if idx < len(articles):
article = articles[idx]
image_url = article['image_url']
image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False)
outputs.extend([
gr.update(visible=True),
gr.update(value=f"### [{article['title']}]({article['link']})"),
image_update,
gr.update(value=f"**요약:** {article['snippet']}\n\n**한글 요약:** {article['korean_summary']}"),
gr.update(value=f"**출처:** {article['channel']} | **시간:** {article['time']}")
])
else:
outputs.extend([
gr.update(visible=False), gr.update(), gr.update(),
gr.update(), gr.update()
])
articles_state = articles
progress(1.0, desc="완료!")
outputs.append(articles_state)
outputs[0] = gr.update(value="", visible=False)
return outputs
def get_region_countries(region):
"""선택된 지역의 국가 및 언어 정보 반환"""
if region == "동아시아":
return COUNTRY_LOCATIONS_EAST_ASIA, COUNTRY_LANGUAGES_EAST_ASIA
elif region == "동남아시아/오세아니아":
return COUNTRY_LOCATIONS_SOUTHEAST_ASIA_OCEANIA, COUNTRY_LANGUAGES_SOUTHEAST_ASIA_OCEANIA
elif region == "동유럽":
return COUNTRY_LOCATIONS_EAST_EUROPE, COUNTRY_LANGUAGES_EAST_EUROPE
elif region == "서유럽":
return COUNTRY_LOCATIONS_WEST_EUROPE, COUNTRY_LANGUAGES_WEST_EUROPE
elif region == "중동/아프리카":
return COUNTRY_LOCATIONS_ARAB_AFRICA, COUNTRY_LANGUAGES_ARAB_AFRICA
elif region == "아메리카":
return COUNTRY_LOCATIONS_AMERICA, COUNTRY_LANGUAGES_AMERICA
return {}, {}
def search_global(query, region, articles_state_global):
"""지역별 검색 함수"""
status_msg = f"{region} 지역 검색을 시작합니다..."
all_results = []
outputs = [
gr.update(value=status_msg, visible=True),
gr.update(value=f"**검색어:** {query}", visible=True),
]
for _ in global_article_components:
outputs.extend([
gr.update(visible=False), gr.update(), gr.update(),
gr.update(), gr.update()
])
outputs.append([])
yield outputs
# 선택된 지역의 국가 정보 가져오기
locations, languages = get_region_countries(region)
total_countries = len(locations)
for idx, (country, location) in enumerate(locations.items(), 1):
try:
status_msg = f"{region} - {country} 검색 중... ({idx}/{total_countries} 국가)"
outputs[0] = gr.update(value=status_msg, visible=True)
yield outputs
error_message, articles = serphouse_search(query, country)
if not error_message and articles:
for article in articles:
article['source_country'] = country
article['region'] = region
all_results.extend(articles)
sorted_results = sorted(all_results, key=lambda x: x.get('time', ''), reverse=True)
seen_urls = set()
unique_results = []
for article in sorted_results:
url = article.get('link', '')
if url not in seen_urls:
seen_urls.add(url)
unique_results.append(article)
unique_results = unique_results[:MAX_GLOBAL_RESULTS]
outputs = [
gr.update(value=f"{region} - {idx}/{total_countries} 국가 검색 완료\n현재까지 발견된 뉴스: {len(unique_results)}건", visible=True),
gr.update(value=f"**검색어:** {query} | **지역:** {region}", visible=True),
]
for idx, comp in enumerate(global_article_components):
if idx < len(unique_results):
article = unique_results[idx]
image_url = article.get('image_url', '')
image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False)
korean_summary = translate_to_korean(article['snippet'])
outputs.extend([
gr.update(visible=True),
gr.update(value=f"### [{article['title']}]({article['link']})"),
image_update,
gr.update(value=f"**요약:** {article['snippet']}\n\n**한글 요약:** {korean_summary}"),
gr.update(value=f"**출처:** {article['channel']} | **국가:** {article['source_country']} | **지역:** {article['region']} | **시간:** {article['time']}")
])
else:
outputs.extend([
gr.update(visible=False),
gr.update(),
gr.update(),
gr.update(),
gr.update()
])
outputs.append(unique_results)
yield outputs
except Exception as e:
print(f"Error searching {country}: {str(e)}")
continue
final_status = f"{region} 검색 완료! 총 {len(unique_results)}개의 뉴스가 발견되었습니다."
outputs[0] = gr.update(value=final_status, visible=True)
yield outputs
css = """
/* 전역 스타일 */
footer {visibility: hidden;}
/* 레이아웃 컨테이너 */
#status_area {
background: rgba(255, 255, 255, 0.9);
padding: 15px;
border-bottom: 1px solid #ddd;
margin-bottom: 20px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
}
#results_area {
padding: 10px;
margin-top: 10px;
}
/* 탭 스타일 */
.tabs {
border-bottom: 2px solid #ddd !important;
margin-bottom: 20px !important;
}
.tab-nav {
border-bottom: none !important;
margin-bottom: 0 !important;
}
.tab-nav button {
font-weight: bold !important;
padding: 10px 20px !important;
}
.tab-nav button.selected {
border-bottom: 2px solid #1f77b4 !important;
color: #1f77b4 !important;
}
/* 상태 메시지 */
#status_area .markdown-text {
font-size: 1.1em;
color: #2c3e50;
padding: 10px 0;
}
/* 기본 컨테이너 */
.group {
border: 1px solid #eee;
padding: 15px;
margin-bottom: 15px;
border-radius: 5px;
background: white;
}
/* 버튼 스타일 */
.primary-btn {
background: #1f77b4 !important;
border: none !important;
}
/* 입력 필드 */
.textbox {
border: 1px solid #ddd !important;
border-radius: 4px !important;
}
/* 프로그레스바 컨테이너 */
.progress-container {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 6px;
background: #e0e0e0;
z-index: 1000;
}
/* 프로그레스바 */
.progress-bar {
height: 100%;
background: linear-gradient(90deg, #2196F3, #00BCD4);
box-shadow: 0 0 10px rgba(33, 150, 243, 0.5);
transition: width 0.3s ease;
animation: progress-glow 1.5s ease-in-out infinite;
}
/* 프로그레스 텍스트 */
.progress-text {
position: fixed;
top: 8px;
left: 50%;
transform: translateX(-50%);
background: #333;
color: white;
padding: 4px 12px;
border-radius: 15px;
font-size: 14px;
z-index: 1001;
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
}
/* 프로그레스바 애니메이션 */
@keyframes progress-glow {
0% {
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
}
50% {
box-shadow: 0 0 20px rgba(33, 150, 243, 0.8);
}
100% {
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
}
}
/* 반응형 디자인 */
@media (max-width: 768px) {
.group {
padding: 10px;
margin-bottom: 15px;
}
.progress-text {
font-size: 12px;
padding: 3px 10px;
}
}
/* 로딩 상태 표시 개선 */
.loading {
opacity: 0.7;
pointer-events: none;
transition: opacity 0.3s ease;
}
/* 결과 컨테이너 애니메이션 */
.group {
transition: all 0.3s ease;
opacity: 0;
transform: translateY(20px);
}
.group.visible {
opacity: 1;
transform: translateY(0);
}
/* Examples 스타일링 */
.examples-table {
margin-top: 10px !important;
margin-bottom: 20px !important;
}
.examples-table button {
background-color: #f0f0f0 !important;
border: 1px solid #ddd !important;
border-radius: 4px !important;
padding: 5px 10px !important;
margin: 2px !important;
transition: all 0.3s ease !important;
}
.examples-table button:hover {
background-color: #e0e0e0 !important;
transform: translateY(-1px) !important;
box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important;
}
.examples-table .label {
font-weight: bold !important;
color: #444 !important;
margin-bottom: 5px !important;
}
"""
def get_article_content(url):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
session = requests.Session()
retries = Retry(total=3, backoff_factor=0.5)
session.mount('https://', HTTPAdapter(max_retries=retries))
response = session.get(url, headers=headers, timeout=30)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
# 메타 데이터 추출
title = soup.find('meta', property='og:title') or soup.find('title')
title = title.get('content', '') if hasattr(title, 'get') else title.string if title else ''
description = soup.find('meta', property='og:description') or soup.find('meta', {'name': 'description'})
description = description.get('content', '') if description else ''
# 본문 추출 개선
article_content = ''
# 일반적인 기사 본문 컨테이너 검색
content_selectors = [
'article', '.article-body', '.article-content', '#article-body',
'.story-body', '.post-content', '.entry-content', '.content-body',
'[itemprop="articleBody"]', '.story-content'
]
for selector in content_selectors:
content = soup.select_one(selector)
if content:
# 불필요한 요소 제거
for tag in content.find_all(['script', 'style', 'nav', 'header', 'footer', 'aside']):
tag.decompose()
# 단락 추출
paragraphs = content.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
if paragraphs:
article_content = '\n\n'.join([p.get_text().strip() for p in paragraphs if p.get_text().strip()])
break
# 백업 방법: 모든 단락 추출
if not article_content:
paragraphs = soup.find_all('p')
article_content = '\n\n'.join([p.get_text().strip() for p in paragraphs if len(p.get_text().strip()) > 50])
# 최종 콘텐츠 구성
full_content = f"Title: {title}\n\nDescription: {description}\n\nContent:\n{article_content}"
# 텍스트 정제
full_content = re.sub(r'\s+', ' ', full_content) # 연속된 공백 제거
full_content = re.sub(r'\n\s*\n', '\n\n', full_content) # 연속된 빈 줄 제거
return full_content.strip()
except Exception as e:
print(f"Crawling error details: {str(e)}") # 디버깅을 위한 상세 에러 출력
return f"Error crawling content: {str(e)}"
def respond(url, history, system_message, max_tokens, temperature, top_p):
if not url.startswith('http'):
history.append((url, "올바른 URL을 입력해주세요."))
return history
try:
article_content = get_article_content(url)
translation_prompt = f"""다음 영문 기사를 한국어로 번역하고 기사를 작성해주세요.
1단계: 전문 번역
===번역 시작===
{article_content}
===번역 끝===
2단계: 기사 작성 가이드라인
다음 요구사항에 따라 한국어 기사를 작성하세요:
1. 구조
- 헤드라인: 핵심 내용을 담은 강력한 제목
- 부제목: 헤드라인 보완 설명
- 리드문: 기사의 핵심을 요약한 첫 문단
- 본문: 상세 내용 전개
2. 작성 규칙
- 객관적이고 정확한 사실 전달
- 문장은 '다.'로 종결
- 단락 간 자연스러운 흐름
- 인용구는 따옴표 처리
- 핵심 정보를 앞부분에 배치
- 전문 용어는 적절한 설명 추가
3. 형식
- 적절한 단락 구분
- 읽기 쉬운 문장 길이
- 논리적인 정보 구성
각 단계는 '===번역===', '===기사==='로 명확히 구분하여 출력하세요.
"""
messages = [
{
"role": "system",
"content": system_message
},
{"role": "user", "content": translation_prompt}
]
history.append((url, "번역 및 기사 작성을 시작합니다..."))
full_response = ""
for message in client.chat.completions.create(
model="CohereForAI/c4ai-command-r-plus-08-2024",
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
messages=messages,
):
if hasattr(message.choices[0].delta, 'content'):
token = message.choices[0].delta.content
if token:
full_response += token
history[-1] = (url, full_response)
yield history
except Exception as e:
error_message = f"처리 중 오류가 발생했습니다: {str(e)}"
history.append((url, error_message))
yield history
return history
def continue_writing(history, system_message, max_tokens, temperature, top_p):
if not history:
return history
last_response = history[-1][1] if history else ""
continue_prompt = f"""이전 내용을 이어서 계속 작성해주세요.
마지막 응답: {last_response}
추가 지침:
1. 이전 내용의 맥락을 유지하며 자연스럽게 이어서 작성
2. 새로운 정보나 상세 설명을 추가
3. 필요한 경우 보충 설명이나 분석 제공
4. 기사 형식과 스타일 유지
5. 필요한 경우 추가적인 이미지 프롬프트 생성
"""
# 메시지 구조 수정
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": continue_prompt} # 사용자 메시지로 시작
]
try:
full_response = ""
for message in client.chat.completions.create(
model="CohereForAI/c4ai-command-r-plus-08-2024",
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
messages=messages,
):
if hasattr(message.choices[0].delta, 'content'):
token = message.choices[0].delta.content
if token:
full_response += token
# 이전 대화 기록을 유지하면서 새로운 응답 추가
new_history = history.copy()
new_history.append(("계속 작성", full_response))
yield new_history
except Exception as e:
error_message = f"계속 작성 중 오류가 발생했습니다: {str(e)}"
new_history = history.copy()
new_history.append(("오류", error_message))
yield new_history
return history
with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI 서비스") as iface:
init_db() # DB 초기화
with gr.Tabs():
# DB 저장/불러오기 탭
with gr.Tab("DB 검색"):
gr.Markdown("삼성/미국 검색 결과를 DB에 저장하고 불러옵니다.")
with gr.Row():
search_button = gr.Button("검색: samsung/미국", variant="primary")
load_button = gr.Button("출력: samsung/미국", variant="secondary")
results_display = gr.Markdown()
# 버튼 이벤트 연결
search_button.click(
fn=search_samsung_us,
outputs=results_display
)
load_button.click(
fn=load_samsung_us,
outputs=results_display
)
with gr.Tab("국가별"):
gr.Markdown("검색어를 입력하고 원하는 국가(한국 제외)를를 선택하면, 검색어와 일치하는 24시간 이내 뉴스를 최대 100개 출력합니다.")
gr.Markdown("국가 선택후 검색어에 '한글'을 입력하면 현지 언어로 번역되어 검색합니다. 예: 'Taiwan' 국가 선택후 '삼성' 입력시 '三星'으로 자동 검색")
with gr.Column():
with gr.Row():
query = gr.Textbox(label="검색어")
country = gr.Dropdown(
choices=sorted(list(COUNTRY_LOCATIONS.keys())),
label="국가",
value="United States"
)
# Examples 추가
gr.Examples(
examples=[
"artificial intelligence",
"NVIDIA",
"OPENAI",
"META LLAMA",
"black forest labs",
"GOOGLE gemini",
"anthropic Claude",
"X.AI",
"HUGGINGFACE",
"HYNIX",
"Large Language model",
"CHATGPT",
"StabilityAI",
"MISTRALAI",
"QWEN",
"MIDJOURNEY",
"GPU"
],
inputs=query,
label="자주 사용되는 검색어"
)
status_message = gr.Markdown("", visible=True)
translated_query_display = gr.Markdown(visible=False)
search_button = gr.Button("검색", variant="primary")
progress = gr.Progress()
articles_state = gr.State([])
article_components = []
for i in range(100):
with gr.Group(visible=False) as article_group:
title = gr.Markdown()
image = gr.Image(width=200, height=150)
snippet = gr.Markdown()
info = gr.Markdown()
article_components.append({
'group': article_group,
'title': title,
'image': image,
'snippet': snippet,
'info': info,
'index': i,
})
# 전세계 탭
with gr.Tab("전세계"):
gr.Markdown("대륙별로 24시간 이내 뉴스를 검색합니다.")
with gr.Column():
with gr.Column(elem_id="status_area"):
with gr.Row():
query_global = gr.Textbox(label="검색어")
region_select = gr.Dropdown(
choices=REGIONS,
label="지역 선택",
value="동아시아"
)
search_button_global = gr.Button("검색", variant="primary")
status_message_global = gr.Markdown("")
translated_query_display_global = gr.Markdown("")
with gr.Column(elem_id="results_area"):
articles_state_global = gr.State([])
global_article_components = []
for i in range(MAX_GLOBAL_RESULTS):
with gr.Group(visible=False) as article_group:
title = gr.Markdown()
image = gr.Image(width=200, height=150)
snippet = gr.Markdown()
info = gr.Markdown()
global_article_components.append({
'group': article_group,
'title': title,
'image': image,
'snippet': snippet,
'info': info,
'index': i,
})
# AI 번역 탭
with gr.Tab("AI 기사 생성"):
gr.Markdown("뉴스 URL을 입력하면 AI가 한국어로 번역하여 기사 형식으로 작성합니다.")
gr.Markdown("이미지 생성: https://huggingface.co/spaces/ginipick/FLUXllama ")
with gr.Column():
chatbot = gr.Chatbot(height=600)
with gr.Row():
url_input = gr.Textbox(
label="뉴스 URL",
placeholder="https://..."
)
with gr.Row():
translate_button = gr.Button("기사 생성", variant="primary")
continue_button = gr.Button("계속 이어서 작성", variant="secondary")
with gr.Accordion("고급 설정", open=False):
system_message = gr.Textbox(
value="""You are a professional translator and journalist. Follow these steps strictly:
1. TRANSLATION
- Start with ===번역=== marker
- Provide accurate Korean translation
- Maintain original meaning and context
2. ARTICLE WRITING
- Start with ===기사=== marker
- Write a new Korean news article based on the translation
- Follow newspaper article format
- Use formal news writing style
- End sentences with '다.'
- Include headline and subheadline
- Organize paragraphs clearly
- Put key information first
- Use quotes appropriately
3. IMAGE PROMPT GENERATION
- Start with ===이미지 프롬프트=== marker
- Create detailed Korean prompts for image generation
- Prompts should reflect the article's main theme and content
- Include key visual elements mentioned in the article
- Specify style, mood, and composition
- Format: "이미지 설명: [상세 설명]"
- Add style keywords: "스타일: [관련 키워드들]"
- Add mood keywords: "분위기: [관련 키워드들]"
IMPORTANT:
- Must complete all three steps in order
- Clearly separate each section with markers
- Never skip or combine steps
- Ensure image prompts align with article content""",
label="System message"
)
max_tokens = gr.Slider(
minimum=1,
maximum=7800,
value=7624,
step=1,
label="Max new tokens"
)
temperature = gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P"
)
# 이벤트 연결 부분
# 국가별 탭 이벤트
search_outputs = [status_message, translated_query_display, gr.Markdown(visible=False)]
for comp in article_components:
search_outputs.extend([
comp['group'], comp['title'], comp['image'],
comp['snippet'], comp['info']
])
search_outputs.append(articles_state)
search_button.click(
fn=search_and_display,
inputs=[query, country, articles_state],
outputs=search_outputs,
show_progress=True
)
# 전세계 탭 이벤트
global_search_outputs = [status_message_global, translated_query_display_global]
for comp in global_article_components:
global_search_outputs.extend([
comp['group'], comp['title'], comp['image'],
comp['snippet'], comp['info']
])
global_search_outputs.append(articles_state_global)
search_button_global.click(
fn=search_global,
inputs=[query_global, region_select, articles_state_global],
outputs=global_search_outputs,
show_progress=True
)
# AI 번역 탭 이벤트
translate_button.click(
fn=respond,
inputs=[
url_input,
chatbot,
system_message,
max_tokens,
temperature,
top_p,
],
outputs=chatbot
)
# 계속 작성 버튼 이벤트
continue_button.click(
fn=continue_writing,
inputs=[
chatbot,
system_message,
max_tokens,
temperature,
top_p,
],
outputs=chatbot
)
iface.launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
auth=("gini","pick"),
ssl_verify=False,
show_error=True
)