Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,8 @@ from transformers.pipelines.audio_utils import ffmpeg_read
|
|
8 |
|
9 |
import tempfile
|
10 |
import os
|
|
|
|
|
11 |
|
12 |
MODEL_NAME = "openai/whisper-large-v3"
|
13 |
BATCH_SIZE = 8
|
@@ -25,8 +27,6 @@ pipe = pipeline(
|
|
25 |
|
26 |
|
27 |
@spaces.GPU
|
28 |
-
import re
|
29 |
-
|
30 |
def transcribe(inputs, task):
|
31 |
if inputs is None:
|
32 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
@@ -65,8 +65,6 @@ def transcribe(inputs, task):
|
|
65 |
return " ".join(final_text), timestamp_str
|
66 |
|
67 |
|
68 |
-
|
69 |
-
|
70 |
def _return_yt_html_embed(yt_url):
|
71 |
video_id = yt_url.split("?v=")[-1]
|
72 |
HTML_str = (
|
@@ -75,6 +73,7 @@ def _return_yt_html_embed(yt_url):
|
|
75 |
)
|
76 |
return HTML_str
|
77 |
|
|
|
78 |
def download_yt_audio(yt_url, filename):
|
79 |
info_loader = youtube_dl.YoutubeDL()
|
80 |
|
@@ -106,6 +105,7 @@ def download_yt_audio(yt_url, filename):
|
|
106 |
except youtube_dl.utils.ExtractorError as err:
|
107 |
raise gr.Error(str(err))
|
108 |
|
|
|
109 |
@spaces.GPU
|
110 |
def yt_transcribe(yt_url, task, max_filesize=75.0):
|
111 |
html_embed_str = _return_yt_html_embed(yt_url)
|
@@ -123,9 +123,34 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
|
|
123 |
text = result["text"]
|
124 |
timestamps = result["chunks"]
|
125 |
|
126 |
-
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
|
131 |
demo = gr.Blocks()
|
|
|
8 |
|
9 |
import tempfile
|
10 |
import os
|
11 |
+
import re
|
12 |
+
import time
|
13 |
|
14 |
MODEL_NAME = "openai/whisper-large-v3"
|
15 |
BATCH_SIZE = 8
|
|
|
27 |
|
28 |
|
29 |
@spaces.GPU
|
|
|
|
|
30 |
def transcribe(inputs, task):
|
31 |
if inputs is None:
|
32 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
|
|
65 |
return " ".join(final_text), timestamp_str
|
66 |
|
67 |
|
|
|
|
|
68 |
def _return_yt_html_embed(yt_url):
|
69 |
video_id = yt_url.split("?v=")[-1]
|
70 |
HTML_str = (
|
|
|
73 |
)
|
74 |
return HTML_str
|
75 |
|
76 |
+
|
77 |
def download_yt_audio(yt_url, filename):
|
78 |
info_loader = youtube_dl.YoutubeDL()
|
79 |
|
|
|
105 |
except youtube_dl.utils.ExtractorError as err:
|
106 |
raise gr.Error(str(err))
|
107 |
|
108 |
+
|
109 |
@spaces.GPU
|
110 |
def yt_transcribe(yt_url, task, max_filesize=75.0):
|
111 |
html_embed_str = _return_yt_html_embed(yt_url)
|
|
|
123 |
text = result["text"]
|
124 |
timestamps = result["chunks"]
|
125 |
|
126 |
+
# Список для хранения текстов с правильными разделителями
|
127 |
+
final_text = []
|
128 |
+
timestamp_str = ""
|
129 |
+
|
130 |
+
current_chunk = []
|
131 |
+
current_timestamp = None
|
132 |
+
|
133 |
+
for chunk in timestamps:
|
134 |
+
# Текст текущего чанка
|
135 |
+
chunk_text = chunk["text"]
|
136 |
+
chunk_timestamp = chunk["timestamp"]
|
137 |
+
|
138 |
+
# Проверим, не заканчивается ли текст на точке, восклицательном или вопросительном знаке
|
139 |
+
if re.search(r'[.!?]$', chunk_text):
|
140 |
+
current_chunk.append(chunk_text)
|
141 |
+
final_text.append(" ".join(current_chunk))
|
142 |
+
timestamp_str += f"[{chunk_timestamp}] " + " ".join(current_chunk) + "\n"
|
143 |
+
current_chunk = []
|
144 |
+
else:
|
145 |
+
# Если текст не завершен, собираем его в текущий чанке
|
146 |
+
current_chunk.append(chunk_text)
|
147 |
+
|
148 |
+
# Если есть незавершенные чанки (например, последний кусок текста не заканчивается на пунктуацию)
|
149 |
+
if current_chunk:
|
150 |
+
final_text.append(" ".join(current_chunk))
|
151 |
+
timestamp_str += f"[{chunk_timestamp}] " + " ".join(current_chunk) + "\n"
|
152 |
+
|
153 |
+
return html_embed_str, " ".join(final_text), timestamp_str
|
154 |
|
155 |
|
156 |
demo = gr.Blocks()
|