Spaces:
Running
on
Zero
Running
on
Zero
delete large-v3 since it's large
Browse files
app.py
CHANGED
@@ -26,8 +26,7 @@ generate_kwargs = {
|
|
26 |
|
27 |
|
28 |
model_dict = {
|
29 |
-
"whisper-large-
|
30 |
-
"whisper-large-v3": "openai/whisper-large-v3",
|
31 |
"kotoba-whisper-v2.0": "kotoba-tech/kotoba-whisper-v2.0",
|
32 |
"anime-whisper": "litagin/anime-whisper",
|
33 |
}
|
@@ -75,11 +74,10 @@ def transcribe_common(audio: str, model: str) -> str:
|
|
75 |
return result
|
76 |
|
77 |
|
78 |
-
def transcribe_others(audio) -> tuple[str, str
|
79 |
-
|
80 |
-
result_v3 = transcribe_common(audio, "whisper-large-v3")
|
81 |
result_kotoba_v2 = transcribe_common(audio, "kotoba-whisper-v2.0")
|
82 |
-
return
|
83 |
|
84 |
|
85 |
def transcribe_anime_whisper(audio) -> str:
|
@@ -94,9 +92,9 @@ initial_md = """
|
|
94 |
- ベースモデル: [kotoba-whisper-v2.0](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.0)
|
95 |
- デモでは**音声は15秒まで**しか受け付けません
|
96 |
- 日本語のみ対応 (Japanese only)
|
97 |
-
- 比較のために [openai/whisper-large-
|
98 |
|
99 |
-
pipeに渡しているkwargs
|
100 |
```python
|
101 |
generate_kwargs = {
|
102 |
"language": "Japanese",
|
@@ -119,9 +117,6 @@ with gr.Blocks() as app:
|
|
119 |
gr.Markdown("### Comparison")
|
120 |
button_others = gr.Button("Transcribe with other models")
|
121 |
with gr.Row():
|
122 |
-
with gr.Column():
|
123 |
-
gr.Markdown("### Whisper-Large-V2")
|
124 |
-
output_v2 = gr.Textbox(label="Result")
|
125 |
with gr.Column():
|
126 |
gr.Markdown("### Whisper-Large-V3")
|
127 |
output_v3 = gr.Textbox(label="Result")
|
@@ -137,7 +132,7 @@ with gr.Blocks() as app:
|
|
137 |
button_others.click(
|
138 |
transcribe_others,
|
139 |
inputs=[audio],
|
140 |
-
outputs=[
|
141 |
)
|
142 |
|
143 |
app.launch(inbrowser=True)
|
|
|
26 |
|
27 |
|
28 |
model_dict = {
|
29 |
+
"whisper-large-v3-turbo": "openai/whisper-large-v3-turbo",
|
|
|
30 |
"kotoba-whisper-v2.0": "kotoba-tech/kotoba-whisper-v2.0",
|
31 |
"anime-whisper": "litagin/anime-whisper",
|
32 |
}
|
|
|
74 |
return result
|
75 |
|
76 |
|
77 |
+
def transcribe_others(audio) -> tuple[str, str]:
|
78 |
+
result_v3 = transcribe_common(audio, "whisper-large-v3-turbo")
|
|
|
79 |
result_kotoba_v2 = transcribe_common(audio, "kotoba-whisper-v2.0")
|
80 |
+
return result_v3, result_kotoba_v2
|
81 |
|
82 |
|
83 |
def transcribe_anime_whisper(audio) -> str:
|
|
|
92 |
- ベースモデル: [kotoba-whisper-v2.0](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.0)
|
93 |
- デモでは**音声は15秒まで**しか受け付けません
|
94 |
- 日本語のみ対応 (Japanese only)
|
95 |
+
- 比較のために [openai/whisper-large-v3-turbo](https://huggingface.co/openai/whisper-large-v3-turbo) と [kotoba-tech/kotoba-whisper-v2.0](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.0) も用意しています
|
96 |
|
97 |
+
pipeに渡しているkwargsは以下:
|
98 |
```python
|
99 |
generate_kwargs = {
|
100 |
"language": "Japanese",
|
|
|
117 |
gr.Markdown("### Comparison")
|
118 |
button_others = gr.Button("Transcribe with other models")
|
119 |
with gr.Row():
|
|
|
|
|
|
|
120 |
with gr.Column():
|
121 |
gr.Markdown("### Whisper-Large-V3")
|
122 |
output_v3 = gr.Textbox(label="Result")
|
|
|
132 |
button_others.click(
|
133 |
transcribe_others,
|
134 |
inputs=[audio],
|
135 |
+
outputs=[output_v3, output_kotoba_v2],
|
136 |
)
|
137 |
|
138 |
app.launch(inbrowser=True)
|