CharlieAmalet commited on
Commit
8b01502
·
verified ·
1 Parent(s): c6ed614

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -15
app.py CHANGED
@@ -15,7 +15,8 @@ LOCAL_DIR.mkdir(exist_ok=True)
15
  SAVE_DIR = LOCAL_DIR / "transcripts"
16
  SAVE_DIR.mkdir(exist_ok=True)
17
  LANGS = [lang.capitalize() for lang in list(LANGUAGES.values())]
18
- DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
 
19
  loaded_model = whisper.load_model("medium", "cpu")
20
  current_size = "None"
21
 
@@ -27,10 +28,9 @@ def generate_random_filename():
27
  return filename
28
 
29
  @spaces.GPU
30
- def get_transcript(audio_path, task_selection:str, language:str, max_line_width=0
31
  , max_line_count=0, max_words_per_line=0):
32
- output_format = "all"
33
- writer = get_writer(output_format, SAVE_DIR)
34
  writer_args = {
35
  "max_line_width": max_line_width if max_line_width > 0 else None,
36
  "max_line_count": max_line_count if max_line_count > 0 else None,
@@ -41,12 +41,10 @@ def get_transcript(audio_path, task_selection:str, language:str, max_line_width=
41
  options = dict(task=task_selection.lower(), best_of=5, language=TO_LANGUAGE_CODE[language.lower()])
42
  loaded_model.to(DEVICE)
43
  results = loaded_model.transcribe(audio_path, verbose=True, word_timestamps=True, **options)
44
- # sample_rate, audio = audiodata
45
- # results = loaded_model.transcribe(audio, verbose=True, word_timestamps=True, **options)
46
  filename = generate_random_filename()
47
  writer(results, filename, **writer_args)
48
 
49
- return str(SAVE_DIR / f"{filename}.txt"), str(SAVE_DIR / f"{filename}.srt"), str(SAVE_DIR / f"{filename}.vtt")
50
 
51
  title="""
52
  <div style="text-align: center; max-width: 500px; margin: 0 auto;">
@@ -76,23 +74,18 @@ with gr.Blocks() as monapp:
76
  with gr.Accordion("Transcribe options", open=True):
77
  task_selection = gr.Radio(["Transcribe", "Translate"], value="Transcribe", label="Select a Task")
78
  language = gr.Dropdown(choices=LANGS, value="English", label="Language spoken in the audio")
 
79
  with gr.Column():
80
  gr.HTML("<p>keep at 0 to <strong>don't use</strong></p>\n<p>max_words_per_line has no effect with max_line_width activated\nWord-level timestamps on translations may not be reliable.</p>")
81
- # gr.HTML("<p>max_words_per_line has no effect with max_line_width activated</p>")
82
  max_line_width = gr.Number(label="Maximum number of characters in a line before breaking the line", minimum=0, precision=0, value=0, step=1)
83
  max_line_count = gr.Number(label="Maximum number of lines in a segment", minimum=0, precision=0, value=0, step=1)
84
  max_words_per_line = gr.Number(label="Maximum number of words in a segment", minimum=0, precision=0, value=0, step=1)
85
- # with gr.Group():
86
- # active_img_bg= gr.Checkbox(False, label="Enable Background image")
87
- # img_bg = gr.Textbox(None, label="Background image", placeholder="Background image path", show_label=False)
88
  submit_btn = gr.Button("Transcribe")
89
 
90
  with gr.Column():
91
- transcript_txt = gr.File(height=50)
92
- transcript_srt = gr.File(height=50)
93
- transcript_vtt = gr.File(height=50)
94
 
95
- submit_btn.click(fn=get_transcript, inputs=[audio_input, task_selection, language, max_line_width, max_line_count, max_words_per_line], outputs=[transcript_txt, transcript_srt, transcript_vtt])
96
 
97
  monapp.launch(debug=True, show_error=True)
98
 
 
15
  SAVE_DIR = LOCAL_DIR / "transcripts"
16
  SAVE_DIR.mkdir(exist_ok=True)
17
  LANGS = [lang.capitalize() for lang in list(LANGUAGES.values())]
18
+ # DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
19
+ DEVICE = 'cuda'
20
  loaded_model = whisper.load_model("medium", "cpu")
21
  current_size = "None"
22
 
 
28
  return filename
29
 
30
  @spaces.GPU
31
+ def get_transcript(audio_path, task_selection:str, language:str, output_format:str, max_line_width=0
32
  , max_line_count=0, max_words_per_line=0):
33
+ writer = get_writer(output_format.lower(), SAVE_DIR)
 
34
  writer_args = {
35
  "max_line_width": max_line_width if max_line_width > 0 else None,
36
  "max_line_count": max_line_count if max_line_count > 0 else None,
 
41
  options = dict(task=task_selection.lower(), best_of=5, language=TO_LANGUAGE_CODE[language.lower()])
42
  loaded_model.to(DEVICE)
43
  results = loaded_model.transcribe(audio_path, verbose=True, word_timestamps=True, **options)
 
 
44
  filename = generate_random_filename()
45
  writer(results, filename, **writer_args)
46
 
47
+ return str(SAVE_DIR / f"{filename}.{output_format.lower()}")
48
 
49
  title="""
50
  <div style="text-align: center; max-width: 500px; margin: 0 auto;">
 
74
  with gr.Accordion("Transcribe options", open=True):
75
  task_selection = gr.Radio(["Transcribe", "Translate"], value="Transcribe", label="Select a Task")
76
  language = gr.Dropdown(choices=LANGS, value="English", label="Language spoken in the audio")
77
+ output_format = gr.Radio(["TXT", "VTT", "SRT", "TSV", "JSON"], value="TXT", label="Format of the output file")
78
  with gr.Column():
79
  gr.HTML("<p>keep at 0 to <strong>don't use</strong></p>\n<p>max_words_per_line has no effect with max_line_width activated\nWord-level timestamps on translations may not be reliable.</p>")
 
80
  max_line_width = gr.Number(label="Maximum number of characters in a line before breaking the line", minimum=0, precision=0, value=0, step=1)
81
  max_line_count = gr.Number(label="Maximum number of lines in a segment", minimum=0, precision=0, value=0, step=1)
82
  max_words_per_line = gr.Number(label="Maximum number of words in a segment", minimum=0, precision=0, value=0, step=1)
 
 
 
83
  submit_btn = gr.Button("Transcribe")
84
 
85
  with gr.Column():
86
+ transcript = gr.File(height=50)
 
 
87
 
88
+ submit_btn.click(fn=get_transcript, inputs=[audio_input, task_selection, language, output_format, max_line_width, max_line_count, max_words_per_line], outputs=[transcript])
89
 
90
  monapp.launch(debug=True, show_error=True)
91