Spaces:
Sleeping
Sleeping
# app.py | |
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
from transformers import BartForConditionalGeneration, PreTrainedTokenizerFast | |
import torch | |
import re | |
# 모델 로딩 | |
tokenizer = PreTrainedTokenizerFast.from_pretrained("gogamza/kobart-summarization") | |
model = BartForConditionalGeneration.from_pretrained("gogamza/kobart-summarization") | |
# 요약 함수 | |
def summarize_news(url, min_len, max_len): | |
try: | |
res = requests.get(url) | |
soup = BeautifulSoup(res.text, "html.parser") | |
article = soup.find("article") | |
if article: | |
text = article.get_text() | |
else: | |
body = soup.find("div", id="articleBody") or soup.find("div", class_="news_body") | |
if body: | |
text = body.get_text() | |
else: | |
paragraphs = [p.get_text() for p in soup.find_all("p")] | |
paragraphs = [p.strip() for p in paragraphs if len(p.strip()) > 40] | |
text = " ".join(paragraphs) | |
if len(text) < 30: | |
text = soup.get_text() | |
text = re.sub(r'[\r\n\t]+', ' ', text) | |
text = re.sub(r'\s+', ' ', text).strip() | |
if len(text) < 30: | |
return "본문이 너무 짧거나 추출에 실패했습니다. 다른 뉴스 URL을 시도해보세요." | |
input_ids = tokenizer.encode(text, return_tensors="pt", max_length=1024, truncation=True) | |
summary_ids = model.generate( | |
input_ids, | |
max_length=int(max_len), | |
min_length=int(min_len), | |
num_beams=4, | |
early_stopping=True, | |
length_penalty=1.2, | |
no_repeat_ngram_size=3, | |
repetition_penalty=1.5 | |
) | |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
return summary | |
except Exception as e: | |
return f"오류 발생: {e}" | |
# Gradio UI | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("## 📰 뉴스 요약기 (KoBART 기반)") | |
gr.Markdown("뉴스 기사 URL을 입력하면 AI가 요약해줍니다.") | |
with gr.Row(): | |
url_input = gr.Textbox(label="뉴스 URL", placeholder="https://news.naver.com/article/...", lines=1) | |
submit_btn = gr.Button("요약하기") | |
with gr.Row(): | |
min_len = gr.Slider(20, 200, value=50, step=10, label="최소 길이") | |
max_len = gr.Slider(50, 400, value=150, step=10, label="최대 길이") | |
output = gr.Textbox(label="요약 결과", lines=10) | |
submit_btn.click(fn=summarize_news, inputs=[url_input, min_len, max_len], outputs=output) | |
# ✅ Hugging Face Spaces에서는 이렇게 실행 | |
if __name__ == "__main__": | |
demo.launch() | |