AIRider commited on
Commit
842635e
Β·
verified Β·
1 Parent(s): 1aab0fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -27
app.py CHANGED
@@ -40,12 +40,13 @@ def get_youtube_script(url):
40
  title = data.get("title", "제λͺ© μ—†μŒ")
41
  description = data.get("description", "μ„€λͺ… μ—†μŒ")
42
  transcription_text = data.get("transcriptionAsText", "")
 
43
 
44
  if not transcription_text:
45
  raise ValueError("μΆ”μΆœλœ μŠ€ν¬λ¦½νŠΈκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
46
 
47
  logging.info("슀크립트 μΆ”μΆœ μ™„λ£Œ")
48
- return title, description, transcription_text
49
  except Exception as e:
50
  logging.exception("슀크립트 μΆ”μΆœ 쀑 였λ₯˜ λ°œμƒ")
51
  raise
@@ -69,7 +70,8 @@ def summarize_text(title, description, text):
69
  [유튜브 μš”μ•½ κ·œμΉ™]
70
  1. λ„ˆλŠ” 유튜브 μ˜μƒ μ „λ¬Έ ν•΄μ„€κ°€λ‘œμ„œ 지침에 맞게 이 글을 μž‘μ„±ν•˜λΌ
71
  2. μ•„λž˜μ˜ 제λͺ©κ³Ό μ„€λͺ…은 이 유튜브 μ˜μƒμ˜ 원본 메타데이터이닀.
72
- 3. λ°˜λ“œμ‹œ 제λͺ©κ³Ό μ„€λͺ…μœΌλ‘œ μ£Όμ œμ™€ λ¬Έλ§₯을 λ¨Όμ € νŒŒμ•…ν•˜κ³ , μ•„λž˜μ˜ λŒ€λ³Έμ„ λ°˜λ“œμ‹œ 지침에 맞게 μƒμ„Έν•˜κ²Œ μš”μ•½ν•˜λΌ
 
73
  4. λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ μž‘μ„±ν•˜λΌ
74
  5. λ°˜λ“œμ‹œ '이 유튜브 λŒ€λ³Έμ€', '이 μ˜μƒμ€', '이 μœ νŠœλΈŒλŠ”'λ“±μ˜ μ†Œκ°œμ‹ ν‘œν˜„μ€ μ œμ™Έν•˜λΌ
75
  6. μš”μ•½λ¬Έλ§ŒμœΌλ‘œλ„ μ˜μƒμ„ 직접 μ‹œμ²­ν•œ 것과 λ™μΌν•œ μˆ˜μ€€μœΌλ‘œ λ‚΄μš©μ„ 이해할 수 μžˆλ„λ‘ μƒμ„Ένžˆ μž‘μ„±
@@ -80,7 +82,6 @@ def summarize_text(title, description, text):
80
  11. λ“±μž₯인물, μž₯μ†Œ, 사건 λ“± μ€‘μš”ν•œ μš”μ†Œλ₯Ό μ •ν™•ν•˜κ²Œ μž‘μ„±
81
  12. λŒ€λ³Έμ—μ„œ μ „λ‹¬ν•˜λŠ” κ°μ •μ΄λ‚˜ λΆ„μœ„κΈ°λ„ 포함
82
  13. λ°˜λ“œμ‹œ 기술적 μš©μ–΄λ‚˜ μ „λ¬Έ μš©μ–΄κ°€ μžˆμ„ 경우, 이λ₯Ό μ •ν™•ν•˜κ²Œ μ‚¬μš©
83
-
84
  14. λ°˜λ“œμ‹œ 핡심 μ„Ήμ…˜(μ†Œμ£Όμ œ)λ₯Ό νŒŒμ•…ν•˜μ—¬ μ„Ήμ…˜μ— 맞게 글을 μš”μ•½ν•˜λΌ(κΈ€μ˜ 양을 κ³ λ €ν•˜μ—¬ μ„Ήμ…˜μ˜ 개수λ₯Ό 탄λ ₯적으둜 μ„€μ •)
85
  15. 각 μ„Ήμ…˜μ˜ 제λͺ©(μ†Œμ£Όμ œ)μ—λŠ” λ‚΄μš©κ³Ό μ–΄μšΈλ¦¬λŠ” μ μ ˆν•œ 이λͺ¨μ§€λ‘œ μ†Œμ£Όμ œλ₯Ό μ‹œμž‘ν•˜λΌ
86
  16. 각 μ„Ήμ…˜μ˜ λ‚΄μš©μ€ Bullet Pointλ₯Ό μ‚¬μš©ν•˜μ—¬ 가독성을 높여라(λ¬Έμž₯ λ‹¨μœ„λ‘œ ꡬ뢄)
@@ -125,57 +126,72 @@ def split_sentences(text):
125
  def display_script(title, script):
126
  script_sentences = split_sentences(script)
127
  formatted_script = "\n\n".join(script_sentences)
128
- return f"""<div style="background-color: #f0f0f0; padding: 20px; border-radius: 10px;">
129
- <h3>원문 슀크립트</h3>
130
  <details>
131
  <summary>ν΄λ¦­ν•˜μ—¬ 펼치기</summary>
132
- <h2>{title}</h2>
133
- <pre style="white-space: pre-wrap;">{formatted_script}</pre>
134
  </details>
135
  </div>"""
136
 
137
  def display_summary(title, summary):
138
- return f"""<div style="background-color: #e6f3ff; padding: 20px; border-radius: 10px; margin-top: 20px;">
139
- <h3>μš”μ•½</h3>
140
- <h2>{title}</h2>
141
  {summary}
142
  </div>"""
143
 
 
 
 
 
 
 
144
  def analyze(url):
145
  # 슀크립트 μΆ”μΆœ
146
- yield "슀크립트 μΆ”μΆœ 쀑...", "슀크립트 μΆ”μΆœ 쀑..."
147
- title, description, script = get_youtube_script(url)
148
  script_content = display_script(title, script)
 
149
 
150
  # 원문 슀크립트 ν‘œμ‹œ 및 μš”μ•½ μ‹œμž‘
151
- yield script_content, "μš”μ•½ 생성 쀑..."
152
 
153
  # μš”μ•½ 생성
154
  summary = summarize_text(title, description, script)
155
 
 
156
  lines = summary.split('\n')
157
  formatted_lines = []
158
  for line in lines:
159
- if line.startswith('# '):
160
- line = f"<h1>{html.escape(line[2:])}</h1>"
161
- elif line.startswith('## '):
162
- line = f"<h2>{html.escape(line[3:])}</h2>"
163
- elif line.startswith('### '):
164
- line = f"<h3>{html.escape(line[4:])}</h3>"
165
- else:
166
- line = f"<p>{html.escape(line)}</p>"
167
- formatted_lines.append(line)
 
 
 
 
 
 
 
 
 
 
168
 
169
  formatted_summary = '\n'.join(formatted_lines)
170
 
171
- summary_content = f"""<div style="background-color: #e6f3ff; padding: 20px; border-radius: 10px; margin-top: 20px;">
172
- <h3>μš”μ•½</h3>
173
- <h2>{html.escape(title)}</h2>
174
  {formatted_summary}
175
  </div>"""
176
 
177
  # μ΅œμ’… κ²°κ³Ό ν‘œμ‹œ
178
- yield script_content, summary_content
179
 
180
  # Gradio μΈν„°νŽ˜μ΄μŠ€
181
  with gr.Blocks() as demo:
@@ -184,11 +200,12 @@ with gr.Blocks() as demo:
184
  analyze_button = gr.Button("λΆ„μ„ν•˜κΈ°")
185
  script_output = gr.HTML(label="원문 슀크립트")
186
  summary_output = gr.HTML(label="μš”μ•½")
 
187
 
188
  analyze_button.click(
189
  analyze,
190
  inputs=[youtube_url_input],
191
- outputs=[script_output, summary_output]
192
  )
193
 
194
  if __name__ == "__main__":
 
40
  title = data.get("title", "제λͺ© μ—†μŒ")
41
  description = data.get("description", "μ„€λͺ… μ—†μŒ")
42
  transcription_text = data.get("transcriptionAsText", "")
43
+ thumbnails = data.get("thumbnails", [])
44
 
45
  if not transcription_text:
46
  raise ValueError("μΆ”μΆœλœ μŠ€ν¬λ¦½νŠΈκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
47
 
48
  logging.info("슀크립트 μΆ”μΆœ μ™„λ£Œ")
49
+ return title, description, transcription_text, thumbnails
50
  except Exception as e:
51
  logging.exception("슀크립트 μΆ”μΆœ 쀑 였λ₯˜ λ°œμƒ")
52
  raise
 
70
  [유튜브 μš”μ•½ κ·œμΉ™]
71
  1. λ„ˆλŠ” 유튜브 μ˜μƒ μ „λ¬Έ ν•΄μ„€κ°€λ‘œμ„œ 지침에 맞게 이 글을 μž‘μ„±ν•˜λΌ
72
  2. μ•„λž˜μ˜ 제λͺ©κ³Ό μ„€λͺ…은 이 유튜브 μ˜μƒμ˜ 원본 메타데이터이닀.
73
+ 3. λ°˜λ“œμ‹œ 제λͺ©κ³Ό μ„€λͺ…μœΌλ‘œ μ£Όμ œμ™€ λ¬Έλ§₯, 철자(Spelling)을 λ¨Όμ € νŒŒμ•…ν•˜κ³ , μ•„λž˜μ˜ λŒ€λ³Έμ„ λ°˜λ“œμ‹œ 지침에 맞게 μƒμ„Έν•˜κ²Œ μš”μ•½ν•˜λΌ
74
+ - λ°˜λ“œμ‹œ 주어진 제λͺ©, μ„€λͺ…에 μžˆλŠ” 철자(Spelling)λ₯Ό μš”μ•½μ— λ°˜μ˜ν•˜λΌ(원문 λŒ€λ³Έμ—λŠ” μ˜€νƒˆμžκ°€ μžˆμ„ 수 μžˆλ‹€)
75
  4. λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ μž‘μ„±ν•˜λΌ
76
  5. λ°˜λ“œμ‹œ '이 유튜브 λŒ€λ³Έμ€', '이 μ˜μƒμ€', '이 μœ νŠœλΈŒλŠ”'λ“±μ˜ μ†Œκ°œμ‹ ν‘œν˜„μ€ μ œμ™Έν•˜λΌ
77
  6. μš”μ•½λ¬Έλ§ŒμœΌλ‘œλ„ μ˜μƒμ„ 직접 μ‹œμ²­ν•œ 것과 λ™μΌν•œ μˆ˜μ€€μœΌλ‘œ λ‚΄μš©μ„ 이해할 수 μžˆλ„λ‘ μƒμ„Ένžˆ μž‘μ„±
 
82
  11. λ“±μž₯인물, μž₯μ†Œ, 사건 λ“± μ€‘μš”ν•œ μš”μ†Œλ₯Ό μ •ν™•ν•˜κ²Œ μž‘μ„±
83
  12. λŒ€λ³Έμ—μ„œ μ „λ‹¬ν•˜λŠ” κ°μ •μ΄λ‚˜ λΆ„μœ„κΈ°λ„ 포함
84
  13. λ°˜λ“œμ‹œ 기술적 μš©μ–΄λ‚˜ μ „λ¬Έ μš©μ–΄κ°€ μžˆμ„ 경우, 이λ₯Ό μ •ν™•ν•˜κ²Œ μ‚¬μš©
 
85
  14. λ°˜λ“œμ‹œ 핡심 μ„Ήμ…˜(μ†Œμ£Όμ œ)λ₯Ό νŒŒμ•…ν•˜μ—¬ μ„Ήμ…˜μ— 맞게 글을 μš”μ•½ν•˜λΌ(κΈ€μ˜ 양을 κ³ λ €ν•˜μ—¬ μ„Ήμ…˜μ˜ 개수λ₯Ό 탄λ ₯적으둜 μ„€μ •)
86
  15. 각 μ„Ήμ…˜μ˜ 제λͺ©(μ†Œμ£Όμ œ)μ—λŠ” λ‚΄μš©κ³Ό μ–΄μšΈλ¦¬λŠ” μ μ ˆν•œ 이λͺ¨μ§€λ‘œ μ†Œμ£Όμ œλ₯Ό μ‹œμž‘ν•˜λΌ
87
  16. 각 μ„Ήμ…˜μ˜ λ‚΄μš©μ€ Bullet Pointλ₯Ό μ‚¬μš©ν•˜μ—¬ 가독성을 높여라(λ¬Έμž₯ λ‹¨μœ„λ‘œ ꡬ뢄)
 
126
  def display_script(title, script):
127
  script_sentences = split_sentences(script)
128
  formatted_script = "\n\n".join(script_sentences)
129
+ return f"""<div class="script-box">
 
130
  <details>
131
  <summary>ν΄λ¦­ν•˜μ—¬ 펼치기</summary>
132
+ <div class="output-title">{title}</div>
133
+ <p style="white-space: pre-wrap;">{formatted_script}</p>
134
  </details>
135
  </div>"""
136
 
137
  def display_summary(title, summary):
138
+ return f"""<div class="script-box">
139
+ <div class="output-title">{title}</div>
 
140
  {summary}
141
  </div>"""
142
 
143
+ def get_thumbnail_url(thumbnails):
144
+ for thumbnail in thumbnails:
145
+ if thumbnail.get("width") == 640 and thumbnail.get("height") == 480:
146
+ return thumbnail.get("url")
147
+ return "640x480 크기의 썸넀일을 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
148
+
149
  def analyze(url):
150
  # 슀크립트 μΆ”μΆœ
151
+ yield "슀크립트 μΆ”μΆœ 쀑...", "슀크립트 μΆ”μΆœ 쀑...", ""
152
+ title, description, script, thumbnails = get_youtube_script(url)
153
  script_content = display_script(title, script)
154
+ thumbnail_url = get_thumbnail_url(thumbnails)
155
 
156
  # 원문 슀크립트 ν‘œμ‹œ 및 μš”μ•½ μ‹œμž‘
157
+ yield script_content, "μš”μ•½ 생성 쀑...", thumbnail_url
158
 
159
  # μš”μ•½ 생성
160
  summary = summarize_text(title, description, script)
161
 
162
+ # HTML둜 λ³€ν™˜ (convert_to_html λ‘œμ§μ„ 직접 톡합)
163
  lines = summary.split('\n')
164
  formatted_lines = []
165
  for line in lines:
166
+ line = line.strip()
167
+ if line.startswith('####'):
168
+ formatted_lines.append(f"<h4>{html.escape(line[4:].strip())}</h4>")
169
+ elif line.startswith('###'):
170
+ formatted_lines.append(f"<h3>{html.escape(line[3:].strip())}</h3>")
171
+ elif line.startswith('##'):
172
+ formatted_lines.append(f"<h2>{html.escape(line[2:].strip())}</h2>")
173
+ elif line.startswith('#'):
174
+ formatted_lines.append(f"<h1>{html.escape(line[1:].strip())}</h1>")
175
+ elif line.startswith('- '): # 리슀트 μ•„μ΄ν…œ
176
+ content = html.escape(line[2:])
177
+ bold_content = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', content)
178
+ formatted_lines.append(f"<li>{bold_content}</li>")
179
+ elif line: # 일반 ν…μŠ€νŠΈ (빈 쀄 μ œμ™Έ)
180
+ content = html.escape(line)
181
+ bold_content = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', content)
182
+ formatted_lines.append(f"<p>{bold_content}</p>")
183
+ else: # 빈 쀄
184
+ formatted_lines.append("<br>")
185
 
186
  formatted_summary = '\n'.join(formatted_lines)
187
 
188
+ summary_content = f"""<div class="script-box">
189
+ <div class="output-title">{html.escape(title)}</div>
 
190
  {formatted_summary}
191
  </div>"""
192
 
193
  # μ΅œμ’… κ²°κ³Ό ν‘œμ‹œ
194
+ yield script_content, summary_content, thumbnail_url
195
 
196
  # Gradio μΈν„°νŽ˜μ΄μŠ€
197
  with gr.Blocks() as demo:
 
200
  analyze_button = gr.Button("λΆ„μ„ν•˜κΈ°")
201
  script_output = gr.HTML(label="원문 슀크립트")
202
  summary_output = gr.HTML(label="μš”μ•½")
203
+ thumbnail_output = gr.Textbox(label="썸넀일 URL (640x480)") # 이 쀄 μΆ”κ°€
204
 
205
  analyze_button.click(
206
  analyze,
207
  inputs=[youtube_url_input],
208
+ outputs=[script_output, summary_output, thumbnail_output] # thumbnail_output μΆ”κ°€
209
  )
210
 
211
  if __name__ == "__main__":