Realtime-whisper-large-v3-turbo-german

Paused

MR-Eder commited on Oct 11

Commit

a628e83

•

1 Parent(s): a80ba5c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ subprocess.run(
 device = "cuda" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16
-MODEL_NAME = "openai/whisper-large-v3-turbo"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
     MODEL_NAME, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation="flash_attention_2"
@@ -79,7 +79,7 @@ def clear():
 with gr.Blocks() as microphone:
     with gr.Column():
-        gr.Markdown(f"# Realtime Whisper Large V3 Turbo: \n Transcribe Audio in Realtime. This Demo uses the Checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers.\n Note: The first token takes about 5 seconds. After that, it works flawlessly.")
         with gr.Row():
             input_audio_microphone = gr.Audio(streaming=True)
             output = gr.Textbox(label="Transcription", value="")
@@ -92,7 +92,7 @@ with gr.Blocks() as microphone:
 with gr.Blocks() as file:
     with gr.Column():
-        gr.Markdown(f"# Realtime Whisper Large V3 Turbo: \n Transcribe Audio in Realtime. This Demo uses the Checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers.\n Note: The first token takes about 5 seconds. After that, it works flawlessly.")
         with gr.Row():
             input_audio_microphone = gr.Audio(sources="upload", type="numpy")
             output = gr.Textbox(label="Transcription", value="")

 device = "cuda" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16
+MODEL_NAME = "primeline/whisper-large-v3-turbo-german"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
     MODEL_NAME, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation="flash_attention_2"
 with gr.Blocks() as microphone:
     with gr.Column():
+        gr.Markdown(f"# Realtime Whisper Large V3 Turbo German: \n Transcribe Audio in Realtime. This Demo uses the Checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers.\n Note: The first token takes about 5 seconds. After that, it works flawlessly.")
         with gr.Row():
             input_audio_microphone = gr.Audio(streaming=True)
             output = gr.Textbox(label="Transcription", value="")
 with gr.Blocks() as file:
     with gr.Column():
+        gr.Markdown(f"# Realtime Whisper Large V3 Turbo German: \n Transcribe Audio in Realtime. This Demo uses the Checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers.\n Note: The first token takes about 5 seconds. After that, it works flawlessly.")
         with gr.Row():
             input_audio_microphone = gr.Audio(sources="upload", type="numpy")
             output = gr.Textbox(label="Transcription", value="")