Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -40,12 +40,13 @@ def get_youtube_script(url):
|
|
40 |
title = data.get("title", "μ λͺ© μμ")
|
41 |
description = data.get("description", "μ€λͺ
μμ")
|
42 |
transcription_text = data.get("transcriptionAsText", "")
|
|
|
43 |
|
44 |
if not transcription_text:
|
45 |
raise ValueError("μΆμΆλ μ€ν¬λ¦½νΈκ° μμ΅λλ€.")
|
46 |
|
47 |
logging.info("μ€ν¬λ¦½νΈ μΆμΆ μλ£")
|
48 |
-
return title, description, transcription_text
|
49 |
except Exception as e:
|
50 |
logging.exception("μ€ν¬λ¦½νΈ μΆμΆ μ€ μ€λ₯ λ°μ")
|
51 |
raise
|
@@ -69,7 +70,8 @@ def summarize_text(title, description, text):
|
|
69 |
[μ νλΈ μμ½ κ·μΉ]
|
70 |
1. λλ μ νλΈ μμ μ λ¬Έ ν΄μ€κ°λ‘μ μ§μΉ¨μ λ§κ² μ΄ κΈμ μμ±νλΌ
|
71 |
2. μλμ μ λͺ©κ³Ό μ€λͺ
μ μ΄ μ νλΈ μμμ μλ³Έ λ©νλ°μ΄ν°μ΄λ€.
|
72 |
-
3. λ°λμ μ λͺ©κ³Ό μ€λͺ
μΌλ‘ μ£Όμ μ
|
|
|
73 |
4. λ°λμ νκΈλ‘ μμ±νλΌ
|
74 |
5. λ°λμ 'μ΄ μ νλΈ λλ³Έμ', 'μ΄ μμμ', 'μ΄ μ νλΈλ'λ±μ μκ°μ ννμ μ μΈνλΌ
|
75 |
6. μμ½λ¬Έλ§μΌλ‘λ μμμ μ§μ μμ²ν κ²κ³Ό λμΌν μμ€μΌλ‘ λ΄μ©μ μ΄ν΄ν μ μλλ‘ μμΈν μμ±
|
@@ -80,7 +82,6 @@ def summarize_text(title, description, text):
|
|
80 |
11. λ±μ₯μΈλ¬Ό, μ₯μ, μ¬κ±΄ λ± μ€μν μμλ₯Ό μ ννκ² μμ±
|
81 |
12. λλ³Έμμ μ λ¬νλ κ°μ μ΄λ λΆμκΈ°λ ν¬ν¨
|
82 |
13. λ°λμ κΈ°μ μ μ©μ΄λ μ λ¬Έ μ©μ΄κ° μμ κ²½μ°, μ΄λ₯Ό μ ννκ² μ¬μ©
|
83 |
-
|
84 |
14. λ°λμ ν΅μ¬ μΉμ
(μμ£Όμ )λ₯Ό νμ
νμ¬ μΉμ
μ λ§κ² κΈμ μμ½νλΌ(κΈμ μμ κ³ λ €νμ¬ μΉμ
μ κ°μλ₯Ό νλ ₯μ μΌλ‘ μ€μ )
|
85 |
15. κ° μΉμ
μ μ λͺ©(μμ£Όμ )μλ λ΄μ©κ³Ό μ΄μΈλ¦¬λ μ μ ν μ΄λͺ¨μ§λ‘ μμ£Όμ λ₯Ό μμνλΌ
|
86 |
16. κ° μΉμ
μ λ΄μ©μ Bullet Pointλ₯Ό μ¬μ©νμ¬ κ°λ
μ±μ λμ¬λΌ(λ¬Έμ₯ λ¨μλ‘ κ΅¬λΆ)
|
@@ -125,57 +126,72 @@ def split_sentences(text):
|
|
125 |
def display_script(title, script):
|
126 |
script_sentences = split_sentences(script)
|
127 |
formatted_script = "\n\n".join(script_sentences)
|
128 |
-
return f"""<div
|
129 |
-
<h3>μλ¬Έ μ€ν¬λ¦½νΈ</h3>
|
130 |
<details>
|
131 |
<summary>ν΄λ¦νμ¬ νΌμΉκΈ°</summary>
|
132 |
-
<
|
133 |
-
<
|
134 |
</details>
|
135 |
</div>"""
|
136 |
|
137 |
def display_summary(title, summary):
|
138 |
-
return f"""<div
|
139 |
-
<
|
140 |
-
<h2>{title}</h2>
|
141 |
{summary}
|
142 |
</div>"""
|
143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
def analyze(url):
|
145 |
# μ€ν¬λ¦½νΈ μΆμΆ
|
146 |
-
yield "μ€ν¬λ¦½νΈ μΆμΆ μ€...", "μ€ν¬λ¦½νΈ μΆμΆ μ€..."
|
147 |
-
title, description, script = get_youtube_script(url)
|
148 |
script_content = display_script(title, script)
|
|
|
149 |
|
150 |
# μλ¬Έ μ€ν¬λ¦½νΈ νμ λ° μμ½ μμ
|
151 |
-
yield script_content, "μμ½ μμ± μ€..."
|
152 |
|
153 |
# μμ½ μμ±
|
154 |
summary = summarize_text(title, description, script)
|
155 |
|
|
|
156 |
lines = summary.split('\n')
|
157 |
formatted_lines = []
|
158 |
for line in lines:
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
|
169 |
formatted_summary = '\n'.join(formatted_lines)
|
170 |
|
171 |
-
summary_content = f"""<div
|
172 |
-
<
|
173 |
-
<h2>{html.escape(title)}</h2>
|
174 |
{formatted_summary}
|
175 |
</div>"""
|
176 |
|
177 |
# μ΅μ’
κ²°κ³Ό νμ
|
178 |
-
yield script_content, summary_content
|
179 |
|
180 |
# Gradio μΈν°νμ΄μ€
|
181 |
with gr.Blocks() as demo:
|
@@ -184,11 +200,12 @@ with gr.Blocks() as demo:
|
|
184 |
analyze_button = gr.Button("λΆμνκΈ°")
|
185 |
script_output = gr.HTML(label="μλ¬Έ μ€ν¬λ¦½νΈ")
|
186 |
summary_output = gr.HTML(label="μμ½")
|
|
|
187 |
|
188 |
analyze_button.click(
|
189 |
analyze,
|
190 |
inputs=[youtube_url_input],
|
191 |
-
outputs=[script_output, summary_output]
|
192 |
)
|
193 |
|
194 |
if __name__ == "__main__":
|
|
|
40 |
title = data.get("title", "μ λͺ© μμ")
|
41 |
description = data.get("description", "μ€λͺ
μμ")
|
42 |
transcription_text = data.get("transcriptionAsText", "")
|
43 |
+
thumbnails = data.get("thumbnails", [])
|
44 |
|
45 |
if not transcription_text:
|
46 |
raise ValueError("μΆμΆλ μ€ν¬λ¦½νΈκ° μμ΅λλ€.")
|
47 |
|
48 |
logging.info("μ€ν¬λ¦½νΈ μΆμΆ μλ£")
|
49 |
+
return title, description, transcription_text, thumbnails
|
50 |
except Exception as e:
|
51 |
logging.exception("μ€ν¬λ¦½νΈ μΆμΆ μ€ μ€λ₯ λ°μ")
|
52 |
raise
|
|
|
70 |
[μ νλΈ μμ½ κ·μΉ]
|
71 |
1. λλ μ νλΈ μμ μ λ¬Έ ν΄μ€κ°λ‘μ μ§μΉ¨μ λ§κ² μ΄ κΈμ μμ±νλΌ
|
72 |
2. μλμ μ λͺ©κ³Ό μ€λͺ
μ μ΄ μ νλΈ μμμ μλ³Έ λ©νλ°μ΄ν°μ΄λ€.
|
73 |
+
3. λ°λμ μ λͺ©κ³Ό μ€λͺ
μΌλ‘ μ£Όμ μ λ¬Έλ§₯, μ² μ(Spelling)μ λ¨Όμ νμ
νκ³ , μλμ λλ³Έμ λ°λμ μ§μΉ¨μ λ§κ² μμΈνκ² μμ½νλΌ
|
74 |
+
- λ°λμ μ£Όμ΄μ§ μ λͺ©, μ€λͺ
μ μλ μ² μ(Spelling)λ₯Ό μμ½μ λ°μνλΌ(μλ¬Έ λλ³Έμλ μ€νμκ° μμ μ μλ€)
|
75 |
4. λ°λμ νκΈλ‘ μμ±νλΌ
|
76 |
5. λ°λμ 'μ΄ μ νλΈ λλ³Έμ', 'μ΄ μμμ', 'μ΄ μ νλΈλ'λ±μ μκ°μ ννμ μ μΈνλΌ
|
77 |
6. μμ½λ¬Έλ§μΌλ‘λ μμμ μ§μ μμ²ν κ²κ³Ό λμΌν μμ€μΌλ‘ λ΄μ©μ μ΄ν΄ν μ μλλ‘ μμΈν μμ±
|
|
|
82 |
11. λ±μ₯μΈλ¬Ό, μ₯μ, μ¬κ±΄ λ± μ€μν μμλ₯Ό μ ννκ² μμ±
|
83 |
12. λλ³Έμμ μ λ¬νλ κ°μ μ΄λ λΆμκΈ°λ ν¬ν¨
|
84 |
13. λ°λμ κΈ°μ μ μ©μ΄λ μ λ¬Έ μ©μ΄κ° μμ κ²½μ°, μ΄λ₯Ό μ ννκ² μ¬μ©
|
|
|
85 |
14. λ°λμ ν΅μ¬ μΉμ
(μμ£Όμ )λ₯Ό νμ
νμ¬ μΉμ
μ λ§κ² κΈμ μμ½νλΌ(κΈμ μμ κ³ λ €νμ¬ μΉμ
μ κ°μλ₯Ό νλ ₯μ μΌλ‘ μ€μ )
|
86 |
15. κ° μΉμ
μ μ λͺ©(μμ£Όμ )μλ λ΄μ©κ³Ό μ΄μΈλ¦¬λ μ μ ν μ΄λͺ¨μ§λ‘ μμ£Όμ λ₯Ό μμνλΌ
|
87 |
16. κ° μΉμ
μ λ΄μ©μ Bullet Pointλ₯Ό μ¬μ©νμ¬ κ°λ
μ±μ λμ¬λΌ(λ¬Έμ₯ λ¨μλ‘ κ΅¬λΆ)
|
|
|
126 |
def display_script(title, script):
|
127 |
script_sentences = split_sentences(script)
|
128 |
formatted_script = "\n\n".join(script_sentences)
|
129 |
+
return f"""<div class="script-box">
|
|
|
130 |
<details>
|
131 |
<summary>ν΄λ¦νμ¬ νΌμΉκΈ°</summary>
|
132 |
+
<div class="output-title">{title}</div>
|
133 |
+
<p style="white-space: pre-wrap;">{formatted_script}</p>
|
134 |
</details>
|
135 |
</div>"""
|
136 |
|
137 |
def display_summary(title, summary):
|
138 |
+
return f"""<div class="script-box">
|
139 |
+
<div class="output-title">{title}</div>
|
|
|
140 |
{summary}
|
141 |
</div>"""
|
142 |
|
143 |
+
def get_thumbnail_url(thumbnails):
|
144 |
+
for thumbnail in thumbnails:
|
145 |
+
if thumbnail.get("width") == 640 and thumbnail.get("height") == 480:
|
146 |
+
return thumbnail.get("url")
|
147 |
+
return "640x480 ν¬κΈ°μ μΈλ€μΌμ μ°Ύμ μ μμ΅λλ€."
|
148 |
+
|
149 |
def analyze(url):
|
150 |
# μ€ν¬λ¦½νΈ μΆμΆ
|
151 |
+
yield "μ€ν¬λ¦½νΈ μΆμΆ μ€...", "μ€ν¬λ¦½νΈ μΆμΆ μ€...", ""
|
152 |
+
title, description, script, thumbnails = get_youtube_script(url)
|
153 |
script_content = display_script(title, script)
|
154 |
+
thumbnail_url = get_thumbnail_url(thumbnails)
|
155 |
|
156 |
# μλ¬Έ μ€ν¬λ¦½νΈ νμ λ° μμ½ μμ
|
157 |
+
yield script_content, "μμ½ μμ± μ€...", thumbnail_url
|
158 |
|
159 |
# μμ½ μμ±
|
160 |
summary = summarize_text(title, description, script)
|
161 |
|
162 |
+
# HTMLλ‘ λ³ν (convert_to_html λ‘μ§μ μ§μ ν΅ν©)
|
163 |
lines = summary.split('\n')
|
164 |
formatted_lines = []
|
165 |
for line in lines:
|
166 |
+
line = line.strip()
|
167 |
+
if line.startswith('####'):
|
168 |
+
formatted_lines.append(f"<h4>{html.escape(line[4:].strip())}</h4>")
|
169 |
+
elif line.startswith('###'):
|
170 |
+
formatted_lines.append(f"<h3>{html.escape(line[3:].strip())}</h3>")
|
171 |
+
elif line.startswith('##'):
|
172 |
+
formatted_lines.append(f"<h2>{html.escape(line[2:].strip())}</h2>")
|
173 |
+
elif line.startswith('#'):
|
174 |
+
formatted_lines.append(f"<h1>{html.escape(line[1:].strip())}</h1>")
|
175 |
+
elif line.startswith('- '): # 리μ€νΈ μμ΄ν
|
176 |
+
content = html.escape(line[2:])
|
177 |
+
bold_content = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', content)
|
178 |
+
formatted_lines.append(f"<li>{bold_content}</li>")
|
179 |
+
elif line: # μΌλ° ν
μ€νΈ (λΉ μ€ μ μΈ)
|
180 |
+
content = html.escape(line)
|
181 |
+
bold_content = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', content)
|
182 |
+
formatted_lines.append(f"<p>{bold_content}</p>")
|
183 |
+
else: # λΉ μ€
|
184 |
+
formatted_lines.append("<br>")
|
185 |
|
186 |
formatted_summary = '\n'.join(formatted_lines)
|
187 |
|
188 |
+
summary_content = f"""<div class="script-box">
|
189 |
+
<div class="output-title">{html.escape(title)}</div>
|
|
|
190 |
{formatted_summary}
|
191 |
</div>"""
|
192 |
|
193 |
# μ΅μ’
κ²°κ³Ό νμ
|
194 |
+
yield script_content, summary_content, thumbnail_url
|
195 |
|
196 |
# Gradio μΈν°νμ΄μ€
|
197 |
with gr.Blocks() as demo:
|
|
|
200 |
analyze_button = gr.Button("λΆμνκΈ°")
|
201 |
script_output = gr.HTML(label="μλ¬Έ μ€ν¬λ¦½νΈ")
|
202 |
summary_output = gr.HTML(label="μμ½")
|
203 |
+
thumbnail_output = gr.Textbox(label="μΈλ€μΌ URL (640x480)") # μ΄ μ€ μΆκ°
|
204 |
|
205 |
analyze_button.click(
|
206 |
analyze,
|
207 |
inputs=[youtube_url_input],
|
208 |
+
outputs=[script_output, summary_output, thumbnail_output] # thumbnail_output μΆκ°
|
209 |
)
|
210 |
|
211 |
if __name__ == "__main__":
|