Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +273 -0
- requirements.txt +6 -0
app.py
ADDED
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import random
|
3 |
+
import time
|
4 |
+
import re
|
5 |
+
import json
|
6 |
+
import requests
|
7 |
+
from bs4 import BeautifulSoup
|
8 |
+
from requests.adapters import HTTPAdapter
|
9 |
+
from requests.packages.urllib3.util.retry import Retry
|
10 |
+
import openai
|
11 |
+
import gradio as gr
|
12 |
+
from fpdf import FPDF as FPDF2
|
13 |
+
from datetime import datetime
|
14 |
+
|
15 |
+
# API ํค ์ค์
|
16 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
17 |
+
|
18 |
+
# OpenAI ์ค์
|
19 |
+
openai.api_key = OPENAI_API_KEY
|
20 |
+
|
21 |
+
def setup_session():
|
22 |
+
try:
|
23 |
+
session = requests.Session()
|
24 |
+
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
|
25 |
+
session.mount('https://', HTTPAdapter(max_retries=retries))
|
26 |
+
return session
|
27 |
+
except Exception as e:
|
28 |
+
return None
|
29 |
+
|
30 |
+
def generate_naver_search_url(query):
|
31 |
+
base_url = "https://search.naver.com/search.naver?"
|
32 |
+
params = {"ssc": "tab.blog.all", "sm": "tab_jum", "query": query}
|
33 |
+
url = base_url + "&".join(f"{key}={value}" for key, value in params.items())
|
34 |
+
return url
|
35 |
+
|
36 |
+
def crawl_blog_content(url, session):
|
37 |
+
try:
|
38 |
+
headers = {
|
39 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
|
40 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
41 |
+
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
|
42 |
+
"Accept-Encoding": "gzip, deflate, br",
|
43 |
+
"Connection": "keep-alive",
|
44 |
+
"Referer": "https://search.naver.com/search.naver",
|
45 |
+
}
|
46 |
+
|
47 |
+
delay = random.uniform(1, 2)
|
48 |
+
time.sleep(delay)
|
49 |
+
|
50 |
+
response = session.get(url, headers=headers)
|
51 |
+
if response.status_code != 200:
|
52 |
+
return ""
|
53 |
+
|
54 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
55 |
+
content = soup.find("div", attrs={'class': 'se-main-container'})
|
56 |
+
|
57 |
+
if content:
|
58 |
+
return clean_text(content.get_text())
|
59 |
+
else:
|
60 |
+
return ""
|
61 |
+
except Exception as e:
|
62 |
+
return ""
|
63 |
+
|
64 |
+
def crawl_naver_search_results(url, session):
|
65 |
+
try:
|
66 |
+
headers = {
|
67 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
|
68 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
69 |
+
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
|
70 |
+
"Accept-Encoding": "gzip, deflate, br",
|
71 |
+
"Connection": "keep-alive",
|
72 |
+
"Referer": "https://search.naver.com/search.naver",
|
73 |
+
}
|
74 |
+
response = session.get(url, headers=headers)
|
75 |
+
if response.status_code != 200:
|
76 |
+
return []
|
77 |
+
|
78 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
79 |
+
results = []
|
80 |
+
count = 0
|
81 |
+
for li in soup.find_all("li", class_=re.compile("bx.*")):
|
82 |
+
if count >= 10:
|
83 |
+
break
|
84 |
+
for div in li.find_all("div", class_="detail_box"):
|
85 |
+
for div2 in div.find_all("div", class_="title_area"):
|
86 |
+
title = div2.text.strip()
|
87 |
+
for a in div2.find_all("a", href=True):
|
88 |
+
link = a["href"]
|
89 |
+
if "blog.naver" in link:
|
90 |
+
link = link.replace("https://", "https://m.")
|
91 |
+
results.append({"์ ๋ชฉ": title, "๋งํฌ": link})
|
92 |
+
count += 1
|
93 |
+
if count >= 10:
|
94 |
+
break
|
95 |
+
if count >= 10:
|
96 |
+
break
|
97 |
+
if count >= 10:
|
98 |
+
break
|
99 |
+
|
100 |
+
return results
|
101 |
+
except Exception as e:
|
102 |
+
return []
|
103 |
+
|
104 |
+
def clean_text(text):
|
105 |
+
text = re.sub(r'\s+', ' ', text).strip()
|
106 |
+
return text
|
107 |
+
|
108 |
+
def fetch_references(topic):
|
109 |
+
search_url = generate_naver_search_url(topic)
|
110 |
+
session = setup_session()
|
111 |
+
if session is None:
|
112 |
+
return ["์ธ์
์ค์ ์คํจ"] * 3
|
113 |
+
results = crawl_naver_search_results(search_url, session)
|
114 |
+
if len(results) < 3:
|
115 |
+
return ["์ถฉ๋ถํ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ์ฐพ์ง ๋ชปํ์ต๋๋ค."] * 3
|
116 |
+
|
117 |
+
selected_results = random.sample(results, 3)
|
118 |
+
references = []
|
119 |
+
for result in selected_results:
|
120 |
+
content = crawl_blog_content(result['๋งํฌ'], session)
|
121 |
+
references.append(f"์ ๋ชฉ: {result['์ ๋ชฉ']}\n๋ด์ฉ: {content}")
|
122 |
+
|
123 |
+
return references
|
124 |
+
|
125 |
+
def fetch_crawl_results(query):
|
126 |
+
references = fetch_references(query)
|
127 |
+
return references[0], references[1], references[2]
|
128 |
+
|
129 |
+
def generate_blog_post(query, prompt_template):
|
130 |
+
try:
|
131 |
+
# ์ฐธ๊ณ ๊ธ ํฌ๋กค๋ง
|
132 |
+
references = fetch_references(query)
|
133 |
+
ref1, ref2, ref3 = references
|
134 |
+
|
135 |
+
combined_content = f"์ฐธ๊ณ ๊ธ1:\n{ref1}\n\n์ฐธ๊ณ ๊ธ2:\n{ref2}\n\n์ฐธ๊ณ ๊ธ3:\n{ref3}"
|
136 |
+
|
137 |
+
# ๋๋ค ์๋ ์์ฑ
|
138 |
+
random_seed = random.randint(1, 10000)
|
139 |
+
|
140 |
+
full_prompt = f"์ฃผ์ : {query}\n\n{prompt_template}\n\n์ฐธ๊ณ ๋ด์ฉ:\n{combined_content}"
|
141 |
+
|
142 |
+
response = openai.ChatCompletion.create(
|
143 |
+
model="gpt-4o-mini",
|
144 |
+
messages=[
|
145 |
+
{"role": "system", "content": prompt_template},
|
146 |
+
{"role": "user", "content": full_prompt}
|
147 |
+
],
|
148 |
+
max_tokens=10000,
|
149 |
+
temperature=0.75,
|
150 |
+
top_p=1.0,
|
151 |
+
frequency_penalty=0.5
|
152 |
+
)
|
153 |
+
|
154 |
+
return f"์ฃผ์ : {query}\n\n{response.choices[0].message['content']}", ref1, ref2, ref3
|
155 |
+
except Exception as e:
|
156 |
+
return f"๋ธ๋ก๊ทธ ๊ธ ์์ฑ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}", "", "", ""
|
157 |
+
|
158 |
+
# PDF ํด๋์ค ๋ฐ ๊ด๋ จ ํจ์ ์ ์
|
159 |
+
class PDF(FPDF2):
|
160 |
+
def __init__(self):
|
161 |
+
super().__init__()
|
162 |
+
current_dir = os.path.dirname(__file__)
|
163 |
+
self.add_font("NanumGothic", "", os.path.join(current_dir, "NanumGothic.ttf"))
|
164 |
+
self.add_font("NanumGothic", "B", os.path.join(current_dir, "NanumGothicBold.ttf"))
|
165 |
+
self.add_font("NanumGothicExtraBold", "", os.path.join(current_dir, "NanumGothicExtraBold.ttf"))
|
166 |
+
self.add_font("NanumGothicLight", "", os.path.join(current_dir, "NanumGothicLight.ttf"))
|
167 |
+
|
168 |
+
def header(self):
|
169 |
+
self.set_font('NanumGothic', '', 10)
|
170 |
+
|
171 |
+
def footer(self):
|
172 |
+
self.set_y(-15)
|
173 |
+
self.set_font('NanumGothic', '', 8)
|
174 |
+
self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
|
175 |
+
|
176 |
+
def save_to_pdf(blog_post, user_topic):
|
177 |
+
pdf = PDF()
|
178 |
+
pdf.add_page()
|
179 |
+
lines = blog_post.split('\n')
|
180 |
+
title = lines[0].strip()
|
181 |
+
content = '\n'.join(lines[1:]).strip()
|
182 |
+
|
183 |
+
# ํ์ฌ ๋ ์ง์ ์๊ฐ์ ๊ฐ์ ธ์ต๋๋ค (๋ํ๋ฏผ๊ตญ ์๊ฐ ๊ธฐ์ค)
|
184 |
+
now = datetime.now()
|
185 |
+
date_str = now.strftime("%y%m%d")
|
186 |
+
time_str = now.strftime("%H%M")
|
187 |
+
|
188 |
+
# ํ์ผ๋ช
์์ฑ
|
189 |
+
filename = f"{date_str}_{time_str}_{format_filename(user_topic)}.pdf"
|
190 |
+
|
191 |
+
pdf.set_font("NanumGothic", 'B', size=14)
|
192 |
+
pdf.cell(0, 10, title, ln=True, align='C')
|
193 |
+
pdf.ln(10)
|
194 |
+
pdf.set_font("NanumGothic", '', size=11)
|
195 |
+
pdf.multi_cell(0, 5, content)
|
196 |
+
print(f"Saving PDF as: {filename}")
|
197 |
+
pdf.output(filename)
|
198 |
+
return filename
|
199 |
+
|
200 |
+
def format_filename(text):
|
201 |
+
text = re.sub(r'[^\w\s-]', '', text)
|
202 |
+
return text[:50].strip()
|
203 |
+
|
204 |
+
def save_content_to_pdf(blog_post, user_topic):
|
205 |
+
return save_to_pdf(blog_post, user_topic)
|
206 |
+
|
207 |
+
# ๊ธฐ๋ณธ ํ๋กฌํํธ ํ
ํ๋ฆฟ
|
208 |
+
DEFAULT_PROMPT_TEMPLATE = """
|
209 |
+
[๋ธ๋ก๊ทธ ๊ธ ์์ฑ ๊ธฐ๋ณธ ๊ท์น]
|
210 |
+
1. ๋ฐ๋์ ํ๊ธ๋ก ์์ฑํ๋ผ
|
211 |
+
2. ์ฃผ์ด์ง ์ฐธ๊ณ ๊ธ์ ๋ฐํ์ผ๋ก 1๊ฐ์ ์ํ๋ฆฌ๋ทฐํ(Product Review) ๋ธ๋ก๊ทธ๋ฅผ ์์ฑ
|
212 |
+
3. ์ฃผ์ ์ ์ ๋ชฉ์ ์ ์ธํ ๊ธ์ด 1500๋จ์ด ์ด์์ด ๋๋๋ก ์์ฑ
|
213 |
+
4. ๊ธ์ ์ ๋ชฉ์ ์ํ๋ฆฌ๋ทฐํ ๋ธ๋ก๊ทธ ํํ์ ๋ง๋ ์ ์ ํ ์ ๋ชฉ์ผ๋ก ์ถ๋ ฅ
|
214 |
+
- ์ฐธ๊ณ ๊ธ์ ์ ๋ชฉ๋ ์ฐธ๊ณ ํ๋, ๋์ผํ๊ฒ ์์ฑํ์ง ๋ง ๊ฒ
|
215 |
+
5. ๋ฐ๋์ ๋งํฌ๋ค์ด ํ์์ด ์๋ ์์ํ ํ
์คํธ๋ก๋ง ์ถ๋ ฅํ๋ผ
|
216 |
+
6. ๋ค์ํ๋ฒ ์ฐธ๊ณ ๊ธ์ ๊ฒํ ํ์ฌ ๋ด์ฉ์ ์ถฉ๋ถํ ๋ฐ์ํ๋, ์ฐธ๊ณ ๊ธ์ ๊ธ์ ๊ทธ๋๋ก ์ฌ์์ฑํ์ง๋ ๋ง ๊ฒ
|
217 |
+
|
218 |
+
[๋ธ๋ก๊ทธ ๊ธ ์์ฑ ์ธ๋ถ ๊ท์น]
|
219 |
+
1. ์ฌ์ฉ์๊ฐ ์
๋ ฅํ ์ฃผ์ ์ ์ฃผ์ด์ง ์ฐธ๊ณ ๊ธ 3๊ฐ๋ฅผ ๋ฐํ์ผ๋ก ์ํ๋ฆฌ๋ทฐํ ๋ธ๋ก๊ทธ ๊ธ 1๊ฐ๋ฅผ ์์ฑํ๋ผ
|
220 |
+
2. ์ฃผ์ด์ง ๋ชจ๋ ๊ธ์ ๋ถ์ํ์ฌ ํ๋์ ๋์ฃผ์ ๋ฅผ ์ ์ ํ๋ผ(1๊ฐ์ ์ฐธ๊ณ ๊ธ์ ์น์ฐ์น์ง ๋ง๊ณ ๋ค์ํ ๋ด์์ ๋ด์๊ฒ)
|
221 |
+
3. ์ฌ๋ฌ๊ฐ์ง ์ํ์ด๋ผ๋ฉด ์ํ 1๊ฐ์ ์น์ฐ์น ๋ฆฌ๋ทฐ๋ฅผ ์์ฑํ์ง ๋ง ๊ฒ.
|
222 |
+
4. ๋์ฃผ์ ์ ๋ง๊ฒ ๊ธ์ ๋งฅ๋ฝ์ ์ ์งํ๋ผ
|
223 |
+
5. ์ฐธ๊ณ ๊ธ์ ์์ฑ๋ ์ํ๊ณผ ๊ธฐ๋ฅ์ ์ง์คํ์ฌ ์์ฑํ๋ผ
|
224 |
+
6. ์ค์ ๋ด๊ฐ ์ฌ์ฉํด๋ณด๊ณ ๊ฒฝํํ ๋ด์ฉ์ ์์ฑํ ๋ฆฌ๋ทฐ ํํ๋ก ๊ธ์ ์์ฑ
|
225 |
+
7. ๋ด์ฉ์ ๊ธ์ ์ ์ผ๋ก ์์ฑํ๋, ์ํ์ด ๋๋ณด์ด๋๋ก ์์ฑ(ํ๋์ ์ํ์ ์น์ฐ์น์ง ๋ง ๊ฒ)
|
226 |
+
8. ์ํ์ ๊ฐ์น๋ฅผ ๊ณ ๊ฐ์๊ฒ ์ดํํ๋ผ.
|
227 |
+
9. ๊ธ์ ์, ๋ค ๋ฌธ์ฅ์ด ์์ฐ์ค๋ฝ๊ฒ ์ด์ด์ง๋๋ก ์์ฑ
|
228 |
+
10. ์ดํฌ๋ ์ฃผ์ด์ง ์ฐธ๊ณ ๊ธ 3๊ฐ์ง์ ์ดํฌ๋ฅผ ์ ์ ํ ๋ฐ์ํ๋ผ
|
229 |
+
- ํนํ ๋ฌธ์ฅ์ ๋ ๋ถ๋ถ์ ์ ์ ํ ๋ฐ์(๊ฐ๊ธ์ '~์'๋ก ๋๋๋๋ก ์์ฑ)
|
230 |
+
- ๋๋ฌด ๋ฑ๋ฑํ์ง ์๊ฒ ํธ์ํ๊ฒ ์ฝ์ ์ ์๋๋ก ์์ฐ์ค๋ฌ์ด ๋ํ์ฒด๋ฅผ ๋ฐ์
|
231 |
+
|
232 |
+
[์ ์ธ ๊ท์น]
|
233 |
+
1. ๋ฐ๋์ ์ฐธ๊ณ ๊ธ์ ํฌํจ๋ ๋งํฌ(URL)๋ ์ ์ธ
|
234 |
+
2. ์ฐธ๊ณ ๊ธ์์ '๋งํฌ๋ฅผ ํ์ธํด์ฃผ์ธ์'์ ๊ฐ์ ๋งํฌ ์ด๋์ ๋ฌธ๊ตฌ๋ ์ ์ธ
|
235 |
+
3. ์ฐธ๊ณ ๊ธ์ ์๋ ์์ฑ์, ํ์, ์ ํ๋ฒ, ๊ธฐ์(Writer, speaker, YouTuber, reporter)์ ์ด๋ฆ, ์ ์นญ, ๋๋ค์(Name, Nkickname)์ ๋ฐ๋์ ์ ์ธ
|
236 |
+
4. '์
์ฒด๋ก ๋ถํฐ ์ ๊ณต ๋ฐ์์ ์์ฑ', '์ฟ ํก ํํธ๋์ค'๋ฑ์ ํํ์ ๋ฐ๋์ ์ ์ธํ๋ผ.
|
237 |
+
5. ๊ธ์ ๊ตฌ์กฐ๊ฐ ๋๋ฌ๋๊ฒ ์์ฑํ์ง ๋ง ๊ฒ(์์, ๋์ ๋ํ ํํ)
|
238 |
+
"""
|
239 |
+
|
240 |
+
# Gradio ์ฑ ์์ฑ
|
241 |
+
with gr.Blocks() as iface:
|
242 |
+
gr.Markdown("# ๋ธ๋ก๊ทธ ๊ธ ์์ฑ๊ธฐ_๋ฆฌ๋ทฐ_์ผ๋ฐ")
|
243 |
+
gr.Markdown("์ฃผ์ ๋ฅผ ์
๋ ฅํ๊ณ ๋ธ๋ก๊ทธ ๊ธ ์์ฑ ๋ฒํผ์ ๋๋ฅด๋ฉด ์๋์ผ๋ก ๋ธ๋ก๊ทธ ๊ธ์ ์์ฑํฉ๋๋ค.")
|
244 |
+
|
245 |
+
query_input = gr.Textbox(lines=1, placeholder="๋ธ๋ก๊ทธ ๊ธ์ ์ฃผ์ ๋ฅผ ์
๋ ฅํด์ฃผ์ธ์...", label="์ฃผ์ ")
|
246 |
+
|
247 |
+
prompt_input = gr.Textbox(lines=10, value=DEFAULT_PROMPT_TEMPLATE, label="ํ๋กฌํํธ ํ
ํ๋ฆฟ", visible=True)
|
248 |
+
generate_button = gr.Button("๋ธ๋ก๊ทธ ๊ธ ์์ฑ")
|
249 |
+
|
250 |
+
output_text = gr.Textbox(label="์์ฑ๋ ๋ธ๋ก๊ทธ ๊ธ")
|
251 |
+
ref1_text = gr.Textbox(label="์ฐธ๊ณ ๊ธ 1", lines=10, visible=True)
|
252 |
+
ref2_text = gr.Textbox(label="์ฐธ๊ณ ๊ธ 2", lines=10, visible=True)
|
253 |
+
ref3_text = gr.Textbox(label="์ฐธ๊ณ ๊ธ 3", lines=10, visible=True)
|
254 |
+
|
255 |
+
save_pdf_button = gr.Button("PDF๋ก ์ ์ฅ")
|
256 |
+
pdf_output = gr.File(label="์์ฑ๋ PDF ํ์ผ")
|
257 |
+
|
258 |
+
generate_button.click(
|
259 |
+
generate_blog_post,
|
260 |
+
inputs=[query_input, prompt_input],
|
261 |
+
outputs=[output_text, ref1_text, ref2_text, ref3_text],
|
262 |
+
show_progress=True
|
263 |
+
)
|
264 |
+
|
265 |
+
save_pdf_button.click(
|
266 |
+
save_content_to_pdf,
|
267 |
+
inputs=[output_text, query_input],
|
268 |
+
outputs=[pdf_output],
|
269 |
+
show_progress=True
|
270 |
+
)
|
271 |
+
# Gradio ์ฑ ์คํ
|
272 |
+
if __name__ == "__main__":
|
273 |
+
iface.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
openai==0.28
|
3 |
+
fpdf2
|
4 |
+
requests
|
5 |
+
beautifulsoup4
|
6 |
+
pytz
|