AIRider commited on
Commit
a76e765
ยท
verified ยท
1 Parent(s): d1b24f1

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +273 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+ import time
4
+ import re
5
+ import json
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
+ from requests.adapters import HTTPAdapter
9
+ from requests.packages.urllib3.util.retry import Retry
10
+ import openai
11
+ import gradio as gr
12
+ from fpdf import FPDF as FPDF2
13
+ from datetime import datetime
14
+
15
+ # API ํ‚ค ์„ค์ •
16
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
17
+
18
+ # OpenAI ์„ค์ •
19
+ openai.api_key = OPENAI_API_KEY
20
+
21
+ def setup_session():
22
+ try:
23
+ session = requests.Session()
24
+ retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
25
+ session.mount('https://', HTTPAdapter(max_retries=retries))
26
+ return session
27
+ except Exception as e:
28
+ return None
29
+
30
+ def generate_naver_search_url(query):
31
+ base_url = "https://search.naver.com/search.naver?"
32
+ params = {"ssc": "tab.blog.all", "sm": "tab_jum", "query": query}
33
+ url = base_url + "&".join(f"{key}={value}" for key, value in params.items())
34
+ return url
35
+
36
+ def crawl_blog_content(url, session):
37
+ try:
38
+ headers = {
39
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
40
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
41
+ "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
42
+ "Accept-Encoding": "gzip, deflate, br",
43
+ "Connection": "keep-alive",
44
+ "Referer": "https://search.naver.com/search.naver",
45
+ }
46
+
47
+ delay = random.uniform(1, 2)
48
+ time.sleep(delay)
49
+
50
+ response = session.get(url, headers=headers)
51
+ if response.status_code != 200:
52
+ return ""
53
+
54
+ soup = BeautifulSoup(response.content, "html.parser")
55
+ content = soup.find("div", attrs={'class': 'se-main-container'})
56
+
57
+ if content:
58
+ return clean_text(content.get_text())
59
+ else:
60
+ return ""
61
+ except Exception as e:
62
+ return ""
63
+
64
+ def crawl_naver_search_results(url, session):
65
+ try:
66
+ headers = {
67
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
68
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
69
+ "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
70
+ "Accept-Encoding": "gzip, deflate, br",
71
+ "Connection": "keep-alive",
72
+ "Referer": "https://search.naver.com/search.naver",
73
+ }
74
+ response = session.get(url, headers=headers)
75
+ if response.status_code != 200:
76
+ return []
77
+
78
+ soup = BeautifulSoup(response.content, "html.parser")
79
+ results = []
80
+ count = 0
81
+ for li in soup.find_all("li", class_=re.compile("bx.*")):
82
+ if count >= 10:
83
+ break
84
+ for div in li.find_all("div", class_="detail_box"):
85
+ for div2 in div.find_all("div", class_="title_area"):
86
+ title = div2.text.strip()
87
+ for a in div2.find_all("a", href=True):
88
+ link = a["href"]
89
+ if "blog.naver" in link:
90
+ link = link.replace("https://", "https://m.")
91
+ results.append({"์ œ๋ชฉ": title, "๋งํฌ": link})
92
+ count += 1
93
+ if count >= 10:
94
+ break
95
+ if count >= 10:
96
+ break
97
+ if count >= 10:
98
+ break
99
+
100
+ return results
101
+ except Exception as e:
102
+ return []
103
+
104
+ def clean_text(text):
105
+ text = re.sub(r'\s+', ' ', text).strip()
106
+ return text
107
+
108
+ def fetch_references(topic):
109
+ search_url = generate_naver_search_url(topic)
110
+ session = setup_session()
111
+ if session is None:
112
+ return ["์„ธ์…˜ ์„ค์ • ์‹คํŒจ"] * 3
113
+ results = crawl_naver_search_results(search_url, session)
114
+ if len(results) < 3:
115
+ return ["์ถฉ๋ถ„ํ•œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค."] * 3
116
+
117
+ selected_results = random.sample(results, 3)
118
+ references = []
119
+ for result in selected_results:
120
+ content = crawl_blog_content(result['๋งํฌ'], session)
121
+ references.append(f"์ œ๋ชฉ: {result['์ œ๋ชฉ']}\n๋‚ด์šฉ: {content}")
122
+
123
+ return references
124
+
125
+ def fetch_crawl_results(query):
126
+ references = fetch_references(query)
127
+ return references[0], references[1], references[2]
128
+
129
+ def generate_blog_post(query, prompt_template):
130
+ try:
131
+ # ์ฐธ๊ณ ๊ธ€ ํฌ๋กค๋ง
132
+ references = fetch_references(query)
133
+ ref1, ref2, ref3 = references
134
+
135
+ combined_content = f"์ฐธ๊ณ ๊ธ€1:\n{ref1}\n\n์ฐธ๊ณ ๊ธ€2:\n{ref2}\n\n์ฐธ๊ณ ๊ธ€3:\n{ref3}"
136
+
137
+ # ๋žœ๋ค ์‹œ๋“œ ์ƒ์„ฑ
138
+ random_seed = random.randint(1, 10000)
139
+
140
+ full_prompt = f"์ฃผ์ œ: {query}\n\n{prompt_template}\n\n์ฐธ๊ณ  ๋‚ด์šฉ:\n{combined_content}"
141
+
142
+ response = openai.ChatCompletion.create(
143
+ model="gpt-4o-mini",
144
+ messages=[
145
+ {"role": "system", "content": prompt_template},
146
+ {"role": "user", "content": full_prompt}
147
+ ],
148
+ max_tokens=10000,
149
+ temperature=0.75,
150
+ top_p=1.0,
151
+ frequency_penalty=0.5
152
+ )
153
+
154
+ return f"์ฃผ์ œ: {query}\n\n{response.choices[0].message['content']}", ref1, ref2, ref3
155
+ except Exception as e:
156
+ return f"๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", "", "", ""
157
+
158
+ # PDF ํด๋ž˜์Šค ๋ฐ ๊ด€๋ จ ํ•จ์ˆ˜ ์ •์˜
159
+ class PDF(FPDF2):
160
+ def __init__(self):
161
+ super().__init__()
162
+ current_dir = os.path.dirname(__file__)
163
+ self.add_font("NanumGothic", "", os.path.join(current_dir, "NanumGothic.ttf"))
164
+ self.add_font("NanumGothic", "B", os.path.join(current_dir, "NanumGothicBold.ttf"))
165
+ self.add_font("NanumGothicExtraBold", "", os.path.join(current_dir, "NanumGothicExtraBold.ttf"))
166
+ self.add_font("NanumGothicLight", "", os.path.join(current_dir, "NanumGothicLight.ttf"))
167
+
168
+ def header(self):
169
+ self.set_font('NanumGothic', '', 10)
170
+
171
+ def footer(self):
172
+ self.set_y(-15)
173
+ self.set_font('NanumGothic', '', 8)
174
+ self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
175
+
176
+ def save_to_pdf(blog_post, user_topic):
177
+ pdf = PDF()
178
+ pdf.add_page()
179
+ lines = blog_post.split('\n')
180
+ title = lines[0].strip()
181
+ content = '\n'.join(lines[1:]).strip()
182
+
183
+ # ํ˜„์žฌ ๋‚ ์งœ์™€ ์‹œ๊ฐ„์„ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค (๋Œ€ํ•œ๋ฏผ๊ตญ ์‹œ๊ฐ„ ๊ธฐ์ค€)
184
+ now = datetime.now()
185
+ date_str = now.strftime("%y%m%d")
186
+ time_str = now.strftime("%H%M")
187
+
188
+ # ํŒŒ์ผ๋ช… ์ƒ์„ฑ
189
+ filename = f"{date_str}_{time_str}_{format_filename(user_topic)}.pdf"
190
+
191
+ pdf.set_font("NanumGothic", 'B', size=14)
192
+ pdf.cell(0, 10, title, ln=True, align='C')
193
+ pdf.ln(10)
194
+ pdf.set_font("NanumGothic", '', size=11)
195
+ pdf.multi_cell(0, 5, content)
196
+ print(f"Saving PDF as: {filename}")
197
+ pdf.output(filename)
198
+ return filename
199
+
200
+ def format_filename(text):
201
+ text = re.sub(r'[^\w\s-]', '', text)
202
+ return text[:50].strip()
203
+
204
+ def save_content_to_pdf(blog_post, user_topic):
205
+ return save_to_pdf(blog_post, user_topic)
206
+
207
+ # ๊ธฐ๋ณธ ํ”„๋กฌํ”„ํŠธ ํ…œํ”Œ๋ฆฟ
208
+ DEFAULT_PROMPT_TEMPLATE = """
209
+ [๋ธ”๋กœ๊ทธ ๊ธ€ ์ž‘์„ฑ ๊ธฐ๋ณธ ๊ทœ์น™]
210
+ 1. ๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€๋กœ ์ž‘์„ฑํ•˜๋ผ
211
+ 2. ์ฃผ์–ด์ง„ ์ฐธ๊ณ ๊ธ€์„ ๋ฐ”ํƒ•์œผ๋กœ 1๊ฐœ์˜ ์ƒํ’ˆ๋ฆฌ๋ทฐํ˜•(Product Review) ๋ธ”๋กœ๊ทธ๋ฅผ ์ž‘์„ฑ
212
+ 3. ์ฃผ์ œ์™€ ์ œ๋ชฉ์„ ์ œ์™ธํ•œ ๊ธ€์ด 1500๋‹จ์–ด ์ด์ƒ์ด ๋˜๋„๋ก ์ž‘์„ฑ
213
+ 4. ๊ธ€์˜ ์ œ๋ชฉ์„ ์ƒํ’ˆ๋ฆฌ๋ทฐํ˜• ๋ธ”๋กœ๊ทธ ํ˜•ํƒœ์— ๋งž๋Š” ์ ์ ˆํ•œ ์ œ๋ชฉ์œผ๋กœ ์ถœ๋ ฅ
214
+ - ์ฐธ๊ณ ๊ธ€์˜ ์ œ๋ชฉ๋„ ์ฐธ๊ณ ํ•˜๋˜, ๋™์ผํ•˜๊ฒŒ ์ž‘์„ฑํ•˜์ง€ ๋ง ๊ฒƒ
215
+ 5. ๋ฐ˜๋“œ์‹œ ๋งˆํฌ๋‹ค์šด ํ˜•์‹์ด ์•„๋‹Œ ์ˆœ์ˆ˜ํ•œ ํ…์ŠคํŠธ๋กœ๋งŒ ์ถœ๋ ฅํ•˜๋ผ
216
+ 6. ๋‹ค์‹œํ•œ๋ฒˆ ์ฐธ๊ณ ๊ธ€์„ ๊ฒ€ํ† ํ•˜์—ฌ ๋‚ด์šฉ์„ ์ถฉ๋ถ„ํžˆ ๋ฐ˜์˜ํ•˜๋˜, ์ฐธ๊ณ ๊ธ€์˜ ๊ธ€์„ ๊ทธ๋Œ€๋กœ ์žฌ์ž‘์„ฑํ•˜์ง€๋Š” ๋ง ๊ฒƒ
217
+
218
+ [๋ธ”๋กœ๊ทธ ๊ธ€ ์ž‘์„ฑ ์„ธ๋ถ€ ๊ทœ์น™]
219
+ 1. ์‚ฌ์šฉ์ž๊ฐ€ ์ž…๋ ฅํ•œ ์ฃผ์ œ์™€ ์ฃผ์–ด์ง„ ์ฐธ๊ณ ๊ธ€ 3๊ฐœ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์ƒํ’ˆ๋ฆฌ๋ทฐํ˜• ๋ธ”๋กœ๊ทธ ๊ธ€ 1๊ฐœ๋ฅผ ์ž‘์„ฑํ•˜๋ผ
220
+ 2. ์ฃผ์–ด์ง„ ๋ชจ๋“  ๊ธ€์„ ๋ถ„์„ํ•˜์—ฌ ํ•˜๋‚˜์˜ ๋Œ€์ฃผ์ œ๋ฅผ ์„ ์ •ํ•˜๋ผ(1๊ฐœ์˜ ์ฐธ๊ณ ๊ธ€์— ์น˜์šฐ์น˜์ง€ ๋ง๊ณ  ๋‹ค์–‘ํ•œ ๋‚ด์š”์„ ๋‹ด์„๊ฒƒ)
221
+ 3. ์—ฌ๋Ÿฌ๊ฐ€์ง€ ์ƒํ’ˆ์ด๋ผ๋ฉด ์ƒํ’ˆ 1๊ฐœ์— ์น˜์šฐ์นœ ๋ฆฌ๋ทฐ๋ฅผ ์ž‘์„ฑํ•˜์ง€ ๋ง ๊ฒƒ.
222
+ 4. ๋Œ€์ฃผ์ œ์— ๋งž๊ฒŒ ๊ธ€์˜ ๋งฅ๋ฝ์„ ์œ ์ง€ํ•˜๋ผ
223
+ 5. ์ฐธ๊ณ ๊ธ€์— ์ž‘์„ฑ๋œ ์ƒํ’ˆ๊ณผ ๊ธฐ๋Šฅ์— ์ง‘์ค‘ํ•˜์—ฌ ์ž‘์„ฑํ•˜๋ผ
224
+ 6. ์‹ค์ œ ๋‚ด๊ฐ€ ์‚ฌ์šฉํ•ด๋ณด๊ณ  ๊ฒฝํ—˜ํ•œ ๋‚ด์šฉ์„ ์ž‘์„ฑํ•œ ๋ฆฌ๋ทฐ ํ˜•ํƒœ๋กœ ๊ธ€์„ ์ž‘์„ฑ
225
+ 7. ๋‚ด์šฉ์€ ๊ธ์ •์ ์œผ๋กœ ์ž‘์„ฑํ•˜๋˜, ์ƒํ’ˆ์ด ๋‹๋ณด์ด๋„๋ก ์ž‘์„ฑ(ํ•˜๋‚˜์˜ ์ƒํ’ˆ์— ์น˜์šฐ์น˜์ง€ ๋ง ๊ฒƒ)
226
+ 8. ์ƒํ’ˆ์˜ ๊ฐ€์น˜๋ฅผ ๊ณ ๊ฐ์—๊ฒŒ ์–ดํ•„ํ•˜๋ผ.
227
+ 9. ๊ธ€์˜ ์•ž, ๋’ค ๋ฌธ์žฅ์ด ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์ด์–ด์ง€๋„๋ก ์ž‘์„ฑ
228
+ 10. ์–ดํˆฌ๋Š” ์ฃผ์–ด์ง„ ์ฐธ๊ณ ๊ธ€ 3๊ฐ€์ง€์˜ ์–ดํˆฌ๋ฅผ ์ ์ ˆํžˆ ๋ฐ˜์˜ํ•˜๋ผ
229
+ - ํŠนํžˆ ๋ฌธ์žฅ์˜ ๋ ๋ถ€๋ถ„์„ ์ ์ ˆํžˆ ๋ฐ˜์˜(๊ฐ€๊ธ‰์  '~์š”'๋กœ ๋๋‚˜๋„๋ก ์ž‘์„ฑ)
230
+ - ๋„ˆ๋ฌด ๋”ฑ๋”ฑํ•˜์ง€ ์•Š๊ฒŒ ํŽธ์•ˆํ•˜๊ฒŒ ์ฝ์„ ์ˆ˜ ์žˆ๋„๋ก ์ž์—ฐ์Šค๋Ÿฌ์šด ๋Œ€ํ™”์ฒด๋ฅผ ๋ฐ˜์˜
231
+
232
+ [์ œ์™ธ ๊ทœ์น™]
233
+ 1. ๋ฐ˜๋“œ์‹œ ์ฐธ๊ณ ๊ธ€์˜ ํฌํ•จ๋œ ๋งํฌ(URL)๋Š” ์ œ์™ธ
234
+ 2. ์ฐธ๊ณ ๊ธ€์—์„œ '๋งํฌ๋ฅผ ํ™•์ธํ•ด์ฃผ์„ธ์š”'์™€ ๊ฐ™์€ ๋งํฌ ์ด๋™์˜ ๋ฌธ๊ตฌ๋Š” ์ œ์™ธ
235
+ 3. ์ฐธ๊ณ ๊ธ€์— ์žˆ๋Š” ์ž‘์„ฑ์ž, ํ™”์ž, ์œ ํŠœ๋ฒ„, ๊ธฐ์ž(Writer, speaker, YouTuber, reporter)์˜ ์ด๋ฆ„, ์• ์นญ, ๋‹‰๋„ค์ž„(Name, Nkickname)์€ ๋ฐ˜๋“œ์‹œ ์ œ์™ธ
236
+ 4. '์—…์ฒด๋กœ ๋ถ€ํ„ฐ ์ œ๊ณต ๋ฐ›์•„์„œ ์ž‘์„ฑ', '์ฟ ํŒก ํŒŒํŠธ๋„ˆ์Šค'๋“ฑ์˜ ํ‘œํ˜„์„ ๋ฐ˜๋“œ์‹œ ์ œ์™ธํ•˜๋ผ.
237
+ 5. ๊ธ€์˜ ๊ตฌ์กฐ๊ฐ€ ๋“œ๋Ÿฌ๋‚˜๊ฒŒ ์ž‘์„ฑํ•˜์ง€ ๋ง ๊ฒƒ(์‹œ์ž‘, ๋์— ๋Œ€ํ•œ ํ‘œํ˜„)
238
+ """
239
+
240
+ # Gradio ์•ฑ ์ƒ์„ฑ
241
+ with gr.Blocks() as iface:
242
+ gr.Markdown("# ๋ธ”๋กœ๊ทธ ๊ธ€ ์ž‘์„ฑ๊ธฐ_๋ฆฌ๋ทฐ_์ผ๋ฐ˜")
243
+ gr.Markdown("์ฃผ์ œ๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑ ๋ฒ„ํŠผ์„ ๋ˆ„๋ฅด๋ฉด ์ž๋™์œผ๋กœ ๋ธ”๋กœ๊ทธ ๊ธ€์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.")
244
+
245
+ query_input = gr.Textbox(lines=1, placeholder="๋ธ”๋กœ๊ทธ ๊ธ€์˜ ์ฃผ์ œ๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”...", label="์ฃผ์ œ")
246
+
247
+ prompt_input = gr.Textbox(lines=10, value=DEFAULT_PROMPT_TEMPLATE, label="ํ”„๋กฌํ”„ํŠธ ํ…œํ”Œ๋ฆฟ", visible=True)
248
+ generate_button = gr.Button("๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑ")
249
+
250
+ output_text = gr.Textbox(label="์ƒ์„ฑ๋œ ๋ธ”๋กœ๊ทธ ๊ธ€")
251
+ ref1_text = gr.Textbox(label="์ฐธ๊ณ ๊ธ€ 1", lines=10, visible=True)
252
+ ref2_text = gr.Textbox(label="์ฐธ๊ณ ๊ธ€ 2", lines=10, visible=True)
253
+ ref3_text = gr.Textbox(label="์ฐธ๊ณ ๊ธ€ 3", lines=10, visible=True)
254
+
255
+ save_pdf_button = gr.Button("PDF๋กœ ์ €์žฅ")
256
+ pdf_output = gr.File(label="์ƒ์„ฑ๋œ PDF ํŒŒ์ผ")
257
+
258
+ generate_button.click(
259
+ generate_blog_post,
260
+ inputs=[query_input, prompt_input],
261
+ outputs=[output_text, ref1_text, ref2_text, ref3_text],
262
+ show_progress=True
263
+ )
264
+
265
+ save_pdf_button.click(
266
+ save_content_to_pdf,
267
+ inputs=[output_text, query_input],
268
+ outputs=[pdf_output],
269
+ show_progress=True
270
+ )
271
+ # Gradio ์•ฑ ์‹คํ–‰
272
+ if __name__ == "__main__":
273
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ openai==0.28
3
+ fpdf2
4
+ requests
5
+ beautifulsoup4
6
+ pytz