AIRider's picture
Update app.py
22bef0a verified
raw
history blame
11.1 kB
import os
import random
import time
import re
import json
import requests
from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import openai
import gradio as gr
from fpdf import FPDF as FPDF2
from datetime import datetime
# API ํ‚ค ์„ค์ •
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# OpenAI ์„ค์ •
openai.api_key = OPENAI_API_KEY
def setup_session():
try:
session = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
session.mount('https://', HTTPAdapter(max_retries=retries))
return session
except Exception as e:
return None
def generate_naver_search_url(query):
base_url = "https://search.naver.com/search.naver?"
params = {"ssc": "tab.blog.all", "sm": "tab_jum", "query": query}
url = base_url + "&".join(f"{key}={value}" for key, value in params.items())
return url
def crawl_blog_content(url, session):
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Referer": "https://search.naver.com/search.naver",
}
delay = random.uniform(1, 2)
time.sleep(delay)
response = session.get(url, headers=headers)
if response.status_code != 200:
return ""
soup = BeautifulSoup(response.content, "html.parser")
content = soup.find("div", attrs={'class': 'se-main-container'})
if content:
return clean_text(content.get_text())
else:
return ""
except Exception as e:
return ""
def crawl_naver_search_results(url, session):
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Referer": "https://search.naver.com/search.naver",
}
response = session.get(url, headers=headers)
if response.status_code != 200:
return []
soup = BeautifulSoup(response.content, "html.parser")
results = []
count = 0
for li in soup.find_all("li", class_=re.compile("bx.*")):
if count >= 10:
break
for div in li.find_all("div", class_="detail_box"):
for div2 in div.find_all("div", class_="title_area"):
title = div2.text.strip()
for a in div2.find_all("a", href=True):
link = a["href"]
if "blog.naver" in link:
link = link.replace("https://", "https://m.")
results.append({"์ œ๋ชฉ": title, "๋งํฌ": link})
count += 1
if count >= 10:
break
if count >= 10:
break
if count >= 10:
break
return results
except Exception as e:
return []
def clean_text(text):
text = re.sub(r'\s+', ' ', text).strip()
return text
def fetch_references(topic):
search_url = generate_naver_search_url(topic)
session = setup_session()
if session is None:
return ["์„ธ์…˜ ์„ค์ • ์‹คํŒจ"] * 3
results = crawl_naver_search_results(search_url, session)
if len(results) < 3:
return ["์ถฉ๋ถ„ํ•œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค."] * 3
selected_results = random.sample(results, 3)
references = []
for result in selected_results:
content = crawl_blog_content(result['๋งํฌ'], session)
references.append(f"์ œ๋ชฉ: {result['์ œ๋ชฉ']}\n๋‚ด์šฉ: {content}")
return references
def fetch_crawl_results(query):
references = fetch_references(query)
return references[0], references[1], references[2]
def generate_blog_post(query, prompt_template):
try:
# ์ฐธ๊ณ ๊ธ€ ํฌ๋กค๋ง
references = fetch_references(query)
ref1, ref2, ref3 = references
combined_content = f"์ฐธ๊ณ ๊ธ€1:\n{ref1}\n\n์ฐธ๊ณ ๊ธ€2:\n{ref2}\n\n์ฐธ๊ณ ๊ธ€3:\n{ref3}"
# ๋žœ๋ค ์‹œ๋“œ ์ƒ์„ฑ
random_seed = random.randint(1, 100000)
full_prompt = f"์ฃผ์ œ: {query}\n\n{prompt_template}\n\n์ฐธ๊ณ  ๋‚ด์šฉ:\n{combined_content}"
response = openai.ChatCompletion.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": prompt_template},
{"role": "user", "content": full_prompt}
],
max_tokens=10000,
temperature=0.75,
top_p=1.0,
frequency_penalty=0.5,
presence_penalty=0.3,
seed=random_seed
)
return f"์ฃผ์ œ: {query}\n\n{response.choices[0].message['content']}", ref1, ref2, ref3
except Exception as e:
return f"๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", "", "", ""
# PDF ํด๋ž˜์Šค ๋ฐ ๊ด€๋ จ ํ•จ์ˆ˜ ์ •์˜
class PDF(FPDF2):
def __init__(self):
super().__init__()
current_dir = os.path.dirname(__file__)
self.add_font("NanumGothic", "", os.path.join(current_dir, "NanumGothic.ttf"))
self.add_font("NanumGothic", "B", os.path.join(current_dir, "NanumGothicBold.ttf"))
self.add_font("NanumGothicExtraBold", "", os.path.join(current_dir, "NanumGothicExtraBold.ttf"))
self.add_font("NanumGothicLight", "", os.path.join(current_dir, "NanumGothicLight.ttf"))
def header(self):
self.set_font('NanumGothic', '', 10)
def footer(self):
self.set_y(-15)
self.set_font('NanumGothic', '', 8)
self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
def save_to_pdf(blog_post, user_topic):
pdf = PDF()
pdf.add_page()
lines = blog_post.split('\n')
title = lines[0].strip()
content = '\n'.join(lines[1:]).strip()
# ํ˜„์žฌ ๋‚ ์งœ์™€ ์‹œ๊ฐ„์„ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค (๋Œ€ํ•œ๋ฏผ๊ตญ ์‹œ๊ฐ„ ๊ธฐ์ค€)
now = datetime.now()
date_str = now.strftime("%y%m%d")
time_str = now.strftime("%H%M")
# ํŒŒ์ผ๋ช… ์ƒ์„ฑ
filename = f"{date_str}_{time_str}_{format_filename(user_topic)}.pdf"
pdf.set_font("NanumGothic", 'B', size=14)
pdf.cell(0, 10, title, ln=True, align='C')
pdf.ln(10)
pdf.set_font("NanumGothic", '', size=11)
pdf.multi_cell(0, 5, content)
print(f"Saving PDF as: {filename}")
pdf.output(filename)
return filename
def format_filename(text):
text = re.sub(r'[^\w\s-]', '', text)
return text[:50].strip()
def save_content_to_pdf(blog_post, user_topic):
return save_to_pdf(blog_post, user_topic)
# ๊ธฐ๋ณธ ํ”„๋กฌํ”„ํŠธ ํ…œํ”Œ๋ฆฟ
DEFAULT_PROMPT_TEMPLATE = """
[๋ธ”๋กœ๊ทธ ๊ธ€ ์ž‘์„ฑ ๊ธฐ๋ณธ ๊ทœ์น™]
1. ๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€๋กœ ์ž‘์„ฑํ•˜๋ผ
2. ์ฃผ์–ด์ง„ ์ฐธ๊ณ ๊ธ€์„ ๋ฐ”ํƒ•์œผ๋กœ 1๊ฐœ์˜ ์ƒํ’ˆ๋ฆฌ๋ทฐํ˜•(Product Review) ๋ธ”๋กœ๊ทธ๋ฅผ ์ž‘์„ฑ
3. ์ฃผ์ œ์™€ ์ œ๋ชฉ์„ ์ œ์™ธํ•œ ๊ธ€์ด 1500๋‹จ์–ด ์ด์ƒ์ด ๋˜๋„๋ก ์ž‘์„ฑ
4. ๊ธ€์˜ ์ œ๋ชฉ์„ ์ƒํ’ˆ๋ฆฌ๋ทฐํ˜• ๋ธ”๋กœ๊ทธ ํ˜•ํƒœ์— ๋งž๋Š” ์ ์ ˆํ•œ ์ œ๋ชฉ์œผ๋กœ ์ถœ๋ ฅ
- ์ฐธ๊ณ ๊ธ€์˜ ์ œ๋ชฉ๋„ ์ฐธ๊ณ ํ•˜๋˜, ๋™์ผํ•˜๊ฒŒ ์ž‘์„ฑํ•˜์ง€ ๋ง ๊ฒƒ
5. ๋ฐ˜๋“œ์‹œ ๋งˆํฌ๋‹ค์šด ํ˜•์‹์ด ์•„๋‹Œ ์ˆœ์ˆ˜ํ•œ ํ…์ŠคํŠธ๋กœ๋งŒ ์ถœ๋ ฅํ•˜๋ผ
6. ๋‹ค์‹œํ•œ๋ฒˆ ์ฐธ๊ณ ๊ธ€์„ ๊ฒ€ํ† ํ•˜์—ฌ ๋‚ด์šฉ์„ ์ถฉ๋ถ„ํžˆ ๋ฐ˜์˜ํ•˜๋˜, ์ฐธ๊ณ ๊ธ€์˜ ๊ธ€์„ ๊ทธ๋Œ€๋กœ ์žฌ์ž‘์„ฑํ•˜์ง€๋Š” ๋ง ๊ฒƒ
[๋ธ”๋กœ๊ทธ ๊ธ€ ์ž‘์„ฑ ์„ธ๋ถ€ ๊ทœ์น™]
1. ์‚ฌ์šฉ์ž๊ฐ€ ์ž…๋ ฅํ•œ ์ฃผ์ œ์™€ ์ฃผ์–ด์ง„ ์ฐธ๊ณ ๊ธ€ 3๊ฐœ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์ƒํ’ˆ๋ฆฌ๋ทฐํ˜• ๋ธ”๋กœ๊ทธ ๊ธ€ 1๊ฐœ๋ฅผ ์ž‘์„ฑํ•˜๋ผ
2. ์ฃผ์–ด์ง„ ๋ชจ๋“  ๊ธ€์„ ๋ถ„์„ํ•˜์—ฌ ํ•˜๋‚˜์˜ ๋Œ€์ฃผ์ œ๋ฅผ ์„ ์ •ํ•˜๋ผ(1๊ฐœ์˜ ์ฐธ๊ณ ๊ธ€์— ์น˜์šฐ์น˜์ง€ ๋ง๊ณ  ๋‹ค์–‘ํ•œ ๋‚ด์š”์„ ๋‹ด์„๊ฒƒ)
3. ์—ฌ๋Ÿฌ๊ฐ€์ง€ ์ƒํ’ˆ์ด๋ผ๋ฉด ์ƒํ’ˆ 1๊ฐœ์— ์น˜์šฐ์นœ ๋ฆฌ๋ทฐ๋ฅผ ์ž‘์„ฑํ•˜์ง€ ๋ง ๊ฒƒ.
4. ๋Œ€์ฃผ์ œ์— ๋งž๊ฒŒ ๊ธ€์˜ ๋งฅ๋ฝ์„ ์œ ์ง€ํ•˜๋ผ
5. ์ฐธ๊ณ ๊ธ€์— ์ž‘์„ฑ๋œ ์ƒํ’ˆ๊ณผ ๊ธฐ๋Šฅ์— ์ง‘์ค‘ํ•˜์—ฌ ์ž‘์„ฑํ•˜๋ผ
6. ์‹ค์ œ ๋‚ด๊ฐ€ ์‚ฌ์šฉํ•ด๋ณด๊ณ  ๊ฒฝํ—˜ํ•œ ๋‚ด์šฉ์„ ์ž‘์„ฑํ•œ ๋ฆฌ๋ทฐ ํ˜•ํƒœ๋กœ ๊ธ€์„ ์ž‘์„ฑ
7. ๋‚ด์šฉ์€ ๊ธ์ •์ ์œผ๋กœ ์ž‘์„ฑํ•˜๋˜, ์ƒํ’ˆ์ด ๋‹๋ณด์ด๋„๋ก ์ž‘์„ฑ(ํ•˜๋‚˜์˜ ์ƒํ’ˆ์— ์น˜์šฐ์น˜์ง€ ๋ง ๊ฒƒ)
8. ์ƒํ’ˆ์˜ ๊ฐ€์น˜๋ฅผ ๊ณ ๊ฐ์—๊ฒŒ ์–ดํ•„ํ•˜๋ผ.
9. ๊ธ€์˜ ์•ž, ๋’ค ๋ฌธ์žฅ์ด ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์ด์–ด์ง€๋„๋ก ์ž‘์„ฑ
10. ์–ดํˆฌ๋Š” ์ฃผ์–ด์ง„ ์ฐธ๊ณ ๊ธ€ 3๊ฐ€์ง€์˜ ์–ดํˆฌ๋ฅผ ์ ์ ˆํžˆ ๋ฐ˜์˜ํ•˜๋ผ
- ํŠนํžˆ ๋ฌธ์žฅ์˜ ๋ ๋ถ€๋ถ„์„ ์ ์ ˆํžˆ ๋ฐ˜์˜(๊ฐ€๊ธ‰์  '~์š”'๋กœ ๋๋‚˜๋„๋ก ์ž‘์„ฑ)
- ๋„ˆ๋ฌด ๋”ฑ๋”ฑํ•˜์ง€ ์•Š๊ฒŒ ํŽธ์•ˆํ•˜๊ฒŒ ์ฝ์„ ์ˆ˜ ์žˆ๋„๋ก ์ž์—ฐ์Šค๋Ÿฌ์šด ๋Œ€ํ™”์ฒด๋ฅผ ๋ฐ˜์˜
[์ œ์™ธ ๊ทœ์น™]
1. ๋ฐ˜๋“œ์‹œ ์ฐธ๊ณ ๊ธ€์˜ ํฌํ•จ๋œ ๋งํฌ(URL)๋Š” ์ œ์™ธ
2. ์ฐธ๊ณ ๊ธ€์—์„œ '๋งํฌ๋ฅผ ํ™•์ธํ•ด์ฃผ์„ธ์š”'์™€ ๊ฐ™์€ ๋งํฌ ์ด๋™์˜ ๋ฌธ๊ตฌ๋Š” ์ œ์™ธ
3. ์ฐธ๊ณ ๊ธ€์— ์žˆ๋Š” ์ž‘์„ฑ์ž, ํ™”์ž, ์œ ํŠœ๋ฒ„, ๊ธฐ์ž(Writer, speaker, YouTuber, reporter)์˜ ์ด๋ฆ„, ์• ์นญ, ๋‹‰๋„ค์ž„(Name, Nkickname)์€ ๋ฐ˜๋“œ์‹œ ์ œ์™ธ
4. '์—…์ฒด๋กœ ๋ถ€ํ„ฐ ์ œ๊ณต ๋ฐ›์•„์„œ ์ž‘์„ฑ', '์ฟ ํŒก ํŒŒํŠธ๋„ˆ์Šค'๋“ฑ์˜ ํ‘œํ˜„์„ ๋ฐ˜๋“œ์‹œ ์ œ์™ธํ•˜๋ผ.
5. ๊ธ€์˜ ๊ตฌ์กฐ๊ฐ€ ๋“œ๋Ÿฌ๋‚˜๊ฒŒ ์ž‘์„ฑํ•˜์ง€ ๋ง ๊ฒƒ(์‹œ์ž‘, ๋์— ๋Œ€ํ•œ ํ‘œํ˜„)
"""
# Gradio ์•ฑ ์ƒ์„ฑ
with gr.Blocks() as iface:
gr.Markdown("# ๋ธ”๋กœ๊ทธ ๊ธ€ ์ž‘์„ฑ๊ธฐ_๋ฆฌ๋ทฐ_๊ธฐ๋Šฅ์ง‘์ค‘ํ˜•")
gr.Markdown("์ฃผ์ œ๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑ ๋ฒ„ํŠผ์„ ๋ˆ„๋ฅด๋ฉด ์ž๋™์œผ๋กœ ๋ธ”๋กœ๊ทธ ๊ธ€์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.")
query_input = gr.Textbox(lines=1, placeholder="๋ธ”๋กœ๊ทธ ๊ธ€์˜ ์ฃผ์ œ๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”...", label="์ฃผ์ œ")
prompt_input = gr.Textbox(lines=10, value=DEFAULT_PROMPT_TEMPLATE, label="ํ”„๋กฌํ”„ํŠธ ํ…œํ”Œ๋ฆฟ", visible=True)
generate_button = gr.Button("๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑ")
output_text = gr.Textbox(label="์ƒ์„ฑ๋œ ๋ธ”๋กœ๊ทธ ๊ธ€")
ref1_text = gr.Textbox(label="์ฐธ๊ณ ๊ธ€ 1", lines=10, visible=True)
ref2_text = gr.Textbox(label="์ฐธ๊ณ ๊ธ€ 2", lines=10, visible=True)
ref3_text = gr.Textbox(label="์ฐธ๊ณ ๊ธ€ 3", lines=10, visible=True)
save_pdf_button = gr.Button("PDF๋กœ ์ €์žฅ")
pdf_output = gr.File(label="์ƒ์„ฑ๋œ PDF ํŒŒ์ผ")
generate_button.click(
generate_blog_post,
inputs=[query_input, prompt_input],
outputs=[output_text, ref1_text, ref2_text, ref3_text],
show_progress=True
)
save_pdf_button.click(
save_content_to_pdf,
inputs=[output_text, query_input],
outputs=[pdf_output],
show_progress=True
)
# Gradio ์•ฑ ์‹คํ–‰
if __name__ == "__main__":
iface.launch()