Update app.py
Browse files
app.py
CHANGED
@@ -32,11 +32,13 @@ print(device)
|
|
32 |
|
33 |
asr_model_base = whisper.load_model("base", device=device)
|
34 |
asr_model_small = whisper.load_model("small", device=device)
|
35 |
-
|
36 |
'base': asr_model_base,
|
37 |
'small': asr_model_small
|
38 |
}
|
39 |
|
|
|
|
|
40 |
transcribe_options = dict(beam_size=3, best_of=3, without_timestamps=False)
|
41 |
|
42 |
source_languages = {
|
@@ -189,7 +191,7 @@ def get_youtube(video_url):
|
|
189 |
|
190 |
return abs_video_path
|
191 |
|
192 |
-
|
193 |
"""
|
194 |
# Youtube with translated subtitles using OpenAI Whisper and Opus-MT models.
|
195 |
# Currently supports only English audio
|
@@ -216,14 +218,10 @@ async def speech_to_text(video_file_path, selected_translation_lang, whisper_mod
|
|
216 |
|
217 |
try:
|
218 |
print(f'Transcribing via local model')
|
219 |
-
transcribe_options = dict(beam_size=5, best_of=5, without_timestamps=False)
|
220 |
|
221 |
-
|
|
|
222 |
|
223 |
-
|
224 |
-
#translation_options = dict(language=selected_translation_lang, beam_size=5, best_of=5, without_timestamps=False)
|
225 |
-
#translations = asr_model.transcribe(audio, **translation_options)
|
226 |
-
|
227 |
df = pd.DataFrame(columns=['start','end','text'])
|
228 |
|
229 |
|
|
|
32 |
|
33 |
asr_model_base = whisper.load_model("base", device=device)
|
34 |
asr_model_small = whisper.load_model("small", device=device)
|
35 |
+
whisper_models_dict = {
|
36 |
'base': asr_model_base,
|
37 |
'small': asr_model_small
|
38 |
}
|
39 |
|
40 |
+
whisper_models = ["base", "small"]
|
41 |
+
|
42 |
transcribe_options = dict(beam_size=3, best_of=3, without_timestamps=False)
|
43 |
|
44 |
source_languages = {
|
|
|
191 |
|
192 |
return abs_video_path
|
193 |
|
194 |
+
def speech_to_text(video_file_path, selected_translation_lang, whisper_model):
|
195 |
"""
|
196 |
# Youtube with translated subtitles using OpenAI Whisper and Opus-MT models.
|
197 |
# Currently supports only English audio
|
|
|
218 |
|
219 |
try:
|
220 |
print(f'Transcribing via local model')
|
|
|
221 |
|
222 |
+
transcribe_options = dict(beam_size=5, best_of=5, without_timestamps=False)
|
223 |
+
transcription = whisper_models_dict.get(whisper_model).transcribe(audio, **transcribe_options)
|
224 |
|
|
|
|
|
|
|
|
|
225 |
df = pd.DataFrame(columns=['start','end','text'])
|
226 |
|
227 |
|