Spaces:

litagin
/

anime-whisper-demo

Running on Zero

App Files Files Community

litagin commited on Nov 12, 2024

Commit

0b6b418

1 Parent(s): 0d7f0d9

delete large-v3 since it's large

Browse files

Files changed (1) hide show

app.py +7 -12

app.py CHANGED Viewed

@@ -26,8 +26,7 @@ generate_kwargs = {
 model_dict = {
-    "whisper-large-v2": "openai/whisper-large-v2",
-    "whisper-large-v3": "openai/whisper-large-v3",
     "kotoba-whisper-v2.0": "kotoba-tech/kotoba-whisper-v2.0",
     "anime-whisper": "litagin/anime-whisper",
 }
@@ -75,11 +74,10 @@ def transcribe_common(audio: str, model: str) -> str:
     return result
-def transcribe_others(audio) -> tuple[str, str, str]:
-    result_v2 = transcribe_common(audio, "whisper-large-v2")
-    result_v3 = transcribe_common(audio, "whisper-large-v3")
     result_kotoba_v2 = transcribe_common(audio, "kotoba-whisper-v2.0")
-    return result_v2, result_v3, result_kotoba_v2
 def transcribe_anime_whisper(audio) -> str:
@@ -94,9 +92,9 @@ initial_md = """
 - ベースモデル: [kotoba-whisper-v2.0](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.0)
 - デモでは**音声は15秒まで**しか受け付けません
 - 日本語のみ対応 (Japanese only)
-- 比較のために [openai/whisper-large-v2](https://huggingface.co/openai/whisper-large-v2) と [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) と [kotoba-tech/kotoba-whisper-v2.0](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.0) も用意しています
-pipeに渡しているkwargsは以下の最低限のもの:
 ```python
 generate_kwargs = {
     "language": "Japanese",
@@ -119,9 +117,6 @@ with gr.Blocks() as app:
     gr.Markdown("### Comparison")
     button_others = gr.Button("Transcribe with other models")
     with gr.Row():
-        with gr.Column():
-            gr.Markdown("### Whisper-Large-V2")
-            output_v2 = gr.Textbox(label="Result")
         with gr.Column():
             gr.Markdown("### Whisper-Large-V3")
             output_v3 = gr.Textbox(label="Result")
@@ -137,7 +132,7 @@ with gr.Blocks() as app:
     button_others.click(
         transcribe_others,
         inputs=[audio],
-        outputs=[output_v2, output_v3, output_kotoba_v2],
     )
 app.launch(inbrowser=True)

 model_dict = {
+    "whisper-large-v3-turbo": "openai/whisper-large-v3-turbo",
     "kotoba-whisper-v2.0": "kotoba-tech/kotoba-whisper-v2.0",
     "anime-whisper": "litagin/anime-whisper",
 }
     return result
+def transcribe_others(audio) -> tuple[str, str]:
+    result_v3 = transcribe_common(audio, "whisper-large-v3-turbo")
     result_kotoba_v2 = transcribe_common(audio, "kotoba-whisper-v2.0")
+    return result_v3, result_kotoba_v2
 def transcribe_anime_whisper(audio) -> str:
 - ベースモデル: [kotoba-whisper-v2.0](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.0)
 - デモでは**音声は15秒まで**しか受け付けません
 - 日本語のみ対応 (Japanese only)
+- 比較のために [openai/whisper-large-v3-turbo](https://huggingface.co/openai/whisper-large-v3-turbo) と [kotoba-tech/kotoba-whisper-v2.0](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.0) も用意しています
+pipeに渡しているkwargsは以下:
 ```python
 generate_kwargs = {
     "language": "Japanese",
     gr.Markdown("### Comparison")
     button_others = gr.Button("Transcribe with other models")
     with gr.Row():
         with gr.Column():
             gr.Markdown("### Whisper-Large-V3")
             output_v3 = gr.Textbox(label="Result")
     button_others.click(
         transcribe_others,
         inputs=[audio],
+        outputs=[output_v3, output_kotoba_v2],
     )
 app.launch(inbrowser=True)