Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,8 @@ LOCAL_DIR.mkdir(exist_ok=True)
|
|
15 |
SAVE_DIR = LOCAL_DIR / "transcripts"
|
16 |
SAVE_DIR.mkdir(exist_ok=True)
|
17 |
LANGS = [lang.capitalize() for lang in list(LANGUAGES.values())]
|
18 |
-
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
|
19 |
loaded_model = whisper.load_model("medium", "cpu")
|
20 |
current_size = "None"
|
21 |
|
@@ -27,10 +28,9 @@ def generate_random_filename():
|
|
27 |
return filename
|
28 |
|
29 |
@spaces.GPU
|
30 |
-
def get_transcript(audio_path, task_selection:str, language:str, max_line_width=0
|
31 |
, max_line_count=0, max_words_per_line=0):
|
32 |
-
|
33 |
-
writer = get_writer(output_format, SAVE_DIR)
|
34 |
writer_args = {
|
35 |
"max_line_width": max_line_width if max_line_width > 0 else None,
|
36 |
"max_line_count": max_line_count if max_line_count > 0 else None,
|
@@ -41,12 +41,10 @@ def get_transcript(audio_path, task_selection:str, language:str, max_line_width=
|
|
41 |
options = dict(task=task_selection.lower(), best_of=5, language=TO_LANGUAGE_CODE[language.lower()])
|
42 |
loaded_model.to(DEVICE)
|
43 |
results = loaded_model.transcribe(audio_path, verbose=True, word_timestamps=True, **options)
|
44 |
-
# sample_rate, audio = audiodata
|
45 |
-
# results = loaded_model.transcribe(audio, verbose=True, word_timestamps=True, **options)
|
46 |
filename = generate_random_filename()
|
47 |
writer(results, filename, **writer_args)
|
48 |
|
49 |
-
return str(SAVE_DIR / f"{filename}.
|
50 |
|
51 |
title="""
|
52 |
<div style="text-align: center; max-width: 500px; margin: 0 auto;">
|
@@ -76,23 +74,18 @@ with gr.Blocks() as monapp:
|
|
76 |
with gr.Accordion("Transcribe options", open=True):
|
77 |
task_selection = gr.Radio(["Transcribe", "Translate"], value="Transcribe", label="Select a Task")
|
78 |
language = gr.Dropdown(choices=LANGS, value="English", label="Language spoken in the audio")
|
|
|
79 |
with gr.Column():
|
80 |
gr.HTML("<p>keep at 0 to <strong>don't use</strong></p>\n<p>max_words_per_line has no effect with max_line_width activated\nWord-level timestamps on translations may not be reliable.</p>")
|
81 |
-
# gr.HTML("<p>max_words_per_line has no effect with max_line_width activated</p>")
|
82 |
max_line_width = gr.Number(label="Maximum number of characters in a line before breaking the line", minimum=0, precision=0, value=0, step=1)
|
83 |
max_line_count = gr.Number(label="Maximum number of lines in a segment", minimum=0, precision=0, value=0, step=1)
|
84 |
max_words_per_line = gr.Number(label="Maximum number of words in a segment", minimum=0, precision=0, value=0, step=1)
|
85 |
-
# with gr.Group():
|
86 |
-
# active_img_bg= gr.Checkbox(False, label="Enable Background image")
|
87 |
-
# img_bg = gr.Textbox(None, label="Background image", placeholder="Background image path", show_label=False)
|
88 |
submit_btn = gr.Button("Transcribe")
|
89 |
|
90 |
with gr.Column():
|
91 |
-
|
92 |
-
transcript_srt = gr.File(height=50)
|
93 |
-
transcript_vtt = gr.File(height=50)
|
94 |
|
95 |
-
submit_btn.click(fn=get_transcript, inputs=[audio_input, task_selection, language, max_line_width, max_line_count, max_words_per_line], outputs=[
|
96 |
|
97 |
monapp.launch(debug=True, show_error=True)
|
98 |
|
|
|
15 |
SAVE_DIR = LOCAL_DIR / "transcripts"
|
16 |
SAVE_DIR.mkdir(exist_ok=True)
|
17 |
LANGS = [lang.capitalize() for lang in list(LANGUAGES.values())]
|
18 |
+
# DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
19 |
+
DEVICE = 'cuda'
|
20 |
loaded_model = whisper.load_model("medium", "cpu")
|
21 |
current_size = "None"
|
22 |
|
|
|
28 |
return filename
|
29 |
|
30 |
@spaces.GPU
|
31 |
+
def get_transcript(audio_path, task_selection:str, language:str, output_format:str, max_line_width=0
|
32 |
, max_line_count=0, max_words_per_line=0):
|
33 |
+
writer = get_writer(output_format.lower(), SAVE_DIR)
|
|
|
34 |
writer_args = {
|
35 |
"max_line_width": max_line_width if max_line_width > 0 else None,
|
36 |
"max_line_count": max_line_count if max_line_count > 0 else None,
|
|
|
41 |
options = dict(task=task_selection.lower(), best_of=5, language=TO_LANGUAGE_CODE[language.lower()])
|
42 |
loaded_model.to(DEVICE)
|
43 |
results = loaded_model.transcribe(audio_path, verbose=True, word_timestamps=True, **options)
|
|
|
|
|
44 |
filename = generate_random_filename()
|
45 |
writer(results, filename, **writer_args)
|
46 |
|
47 |
+
return str(SAVE_DIR / f"{filename}.{output_format.lower()}")
|
48 |
|
49 |
title="""
|
50 |
<div style="text-align: center; max-width: 500px; margin: 0 auto;">
|
|
|
74 |
with gr.Accordion("Transcribe options", open=True):
|
75 |
task_selection = gr.Radio(["Transcribe", "Translate"], value="Transcribe", label="Select a Task")
|
76 |
language = gr.Dropdown(choices=LANGS, value="English", label="Language spoken in the audio")
|
77 |
+
output_format = gr.Radio(["TXT", "VTT", "SRT", "TSV", "JSON"], value="TXT", label="Format of the output file")
|
78 |
with gr.Column():
|
79 |
gr.HTML("<p>keep at 0 to <strong>don't use</strong></p>\n<p>max_words_per_line has no effect with max_line_width activated\nWord-level timestamps on translations may not be reliable.</p>")
|
|
|
80 |
max_line_width = gr.Number(label="Maximum number of characters in a line before breaking the line", minimum=0, precision=0, value=0, step=1)
|
81 |
max_line_count = gr.Number(label="Maximum number of lines in a segment", minimum=0, precision=0, value=0, step=1)
|
82 |
max_words_per_line = gr.Number(label="Maximum number of words in a segment", minimum=0, precision=0, value=0, step=1)
|
|
|
|
|
|
|
83 |
submit_btn = gr.Button("Transcribe")
|
84 |
|
85 |
with gr.Column():
|
86 |
+
transcript = gr.File(height=50)
|
|
|
|
|
87 |
|
88 |
+
submit_btn.click(fn=get_transcript, inputs=[audio_input, task_selection, language, output_format, max_line_width, max_line_count, max_words_per_line], outputs=[transcript])
|
89 |
|
90 |
monapp.launch(debug=True, show_error=True)
|
91 |
|