whisper-demo-french

Sleeping

bofenghuang commited on Jul 24, 2023

Commit

0585497

•

1 Parent(s): 2eface4

up

Files changed (2) hide show

run_demo_ct2.py CHANGED Viewed

@@ -149,8 +149,7 @@ def infer(model, filename, with_timestamps, return_df=False):
     else:
         # text = model.transcribe(filename, without_timestamps=True, **GEN_KWARGS)["text"]
         model_outputs, _ = model.transcribe(filename, without_timestamps=True, **GEN_KWARGS)
-        model_outputs = list(model_outputs)
-        text = model_outputs[0].text
         if return_df:
             return pd.DataFrame({"text": sent_tokenize(text)})
         else:
@@ -201,7 +200,7 @@ def video_transcribe(video_file_path, with_timestamps, model_name=DEFAULT_MODEL_
         raise ValueError("Failed to transcribe video as no video_file_path has been defined")
     audio_file_path = re.sub(r"\.mp4$", ".wav", video_file_path)
-    os.system(f'ffmpeg -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file_path}"')
     model = maybe_load_cached_pipeline(model_name)
     # text = model.transcribe("audio.mp3", **GEN_KWARGS)["text"]

     else:
         # text = model.transcribe(filename, without_timestamps=True, **GEN_KWARGS)["text"]
         model_outputs, _ = model.transcribe(filename, without_timestamps=True, **GEN_KWARGS)
+        text = " ".join([segment.text for segment in model_outputs])
         if return_df:
             return pd.DataFrame({"text": sent_tokenize(text)})
         else:
         raise ValueError("Failed to transcribe video as no video_file_path has been defined")
     audio_file_path = re.sub(r"\.mp4$", ".wav", video_file_path)
+    os.system(f'ffmpeg -hide_banner -loglevel error -y -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file_path}"')
     model = maybe_load_cached_pipeline(model_name)
     # text = model.transcribe("audio.mp3", **GEN_KWARGS)["text"]

run_demo_openai_layout.py CHANGED Viewed

@@ -216,7 +216,7 @@ with gr.Blocks() as demo:
               </div>
               Transcribe long-form microphone or audio inputs!
-              Demo uses the fine-tuned checkpoint: <a href='https://huggingface.co/{DEFAULT_MODEL_NAME}' target='_blank'><b>{DEFAULT_MODEL_NAME}</b></a> to transcribe audio files of arbitrary length.
           """
         )
@@ -252,7 +252,7 @@ with gr.Blocks() as demo:
     #           </div>
     #           Transcribe long-form YouTube videos!
-    #           Demo uses the fine-tuned checkpoint: <a href='https://huggingface.co/{DEFAULT_MODEL_NAME}' target='_blank'><b>{DEFAULT_MODEL_NAME}</b></a> to transcribe video files of arbitrary length.
     #       """
     #     )
@@ -282,7 +282,7 @@ with gr.Blocks() as demo:
               </div>
               Transcribe long-form YouTube videos or uploaded video inputs!
-              Demo uses the fine-tuned checkpoint: <a href='https://huggingface.co/{DEFAULT_MODEL_NAME}' target='_blank'><b>{DEFAULT_MODEL_NAME}</b></a> to transcribe video files of arbitrary length.
           """
         )

               </div>
               Transcribe long-form microphone or audio inputs!
+              Demo uses the fine-tuned checkpoint <a href='https://huggingface.co/{DEFAULT_MODEL_NAME}' target='_blank'><b>{DEFAULT_MODEL_NAME}</b></a> to transcribe audio files of arbitrary length.
           """
         )
     #           </div>
     #           Transcribe long-form YouTube videos!
+    #           Demo uses the fine-tuned checkpoint <a href='https://huggingface.co/{DEFAULT_MODEL_NAME}' target='_blank'><b>{DEFAULT_MODEL_NAME}</b></a> to transcribe video files of arbitrary length.
     #       """
     #     )
               </div>
               Transcribe long-form YouTube videos or uploaded video inputs!
+              Demo uses the fine-tuned checkpoint <a href='https://huggingface.co/{DEFAULT_MODEL_NAME}' target='_blank'><b>{DEFAULT_MODEL_NAME}</b></a> to transcribe video files of arbitrary length.
           """
         )