litagin commited on
Commit
0b6b418
·
1 Parent(s): 0d7f0d9

delete large-v3 since it's large

Browse files
Files changed (1) hide show
  1. app.py +7 -12
app.py CHANGED
@@ -26,8 +26,7 @@ generate_kwargs = {
26
 
27
 
28
  model_dict = {
29
- "whisper-large-v2": "openai/whisper-large-v2",
30
- "whisper-large-v3": "openai/whisper-large-v3",
31
  "kotoba-whisper-v2.0": "kotoba-tech/kotoba-whisper-v2.0",
32
  "anime-whisper": "litagin/anime-whisper",
33
  }
@@ -75,11 +74,10 @@ def transcribe_common(audio: str, model: str) -> str:
75
  return result
76
 
77
 
78
- def transcribe_others(audio) -> tuple[str, str, str]:
79
- result_v2 = transcribe_common(audio, "whisper-large-v2")
80
- result_v3 = transcribe_common(audio, "whisper-large-v3")
81
  result_kotoba_v2 = transcribe_common(audio, "kotoba-whisper-v2.0")
82
- return result_v2, result_v3, result_kotoba_v2
83
 
84
 
85
  def transcribe_anime_whisper(audio) -> str:
@@ -94,9 +92,9 @@ initial_md = """
94
  - ベースモデル: [kotoba-whisper-v2.0](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.0)
95
  - デモでは**音声は15秒まで**しか受け付けません
96
  - 日本語のみ対応 (Japanese only)
97
- - 比較のために [openai/whisper-large-v2](https://huggingface.co/openai/whisper-large-v2) と [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) と [kotoba-tech/kotoba-whisper-v2.0](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.0) も用意しています
98
 
99
- pipeに渡しているkwargsは以下の最低限のもの:
100
  ```python
101
  generate_kwargs = {
102
  "language": "Japanese",
@@ -119,9 +117,6 @@ with gr.Blocks() as app:
119
  gr.Markdown("### Comparison")
120
  button_others = gr.Button("Transcribe with other models")
121
  with gr.Row():
122
- with gr.Column():
123
- gr.Markdown("### Whisper-Large-V2")
124
- output_v2 = gr.Textbox(label="Result")
125
  with gr.Column():
126
  gr.Markdown("### Whisper-Large-V3")
127
  output_v3 = gr.Textbox(label="Result")
@@ -137,7 +132,7 @@ with gr.Blocks() as app:
137
  button_others.click(
138
  transcribe_others,
139
  inputs=[audio],
140
- outputs=[output_v2, output_v3, output_kotoba_v2],
141
  )
142
 
143
  app.launch(inbrowser=True)
 
26
 
27
 
28
  model_dict = {
29
+ "whisper-large-v3-turbo": "openai/whisper-large-v3-turbo",
 
30
  "kotoba-whisper-v2.0": "kotoba-tech/kotoba-whisper-v2.0",
31
  "anime-whisper": "litagin/anime-whisper",
32
  }
 
74
  return result
75
 
76
 
77
+ def transcribe_others(audio) -> tuple[str, str]:
78
+ result_v3 = transcribe_common(audio, "whisper-large-v3-turbo")
 
79
  result_kotoba_v2 = transcribe_common(audio, "kotoba-whisper-v2.0")
80
+ return result_v3, result_kotoba_v2
81
 
82
 
83
  def transcribe_anime_whisper(audio) -> str:
 
92
  - ベースモデル: [kotoba-whisper-v2.0](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.0)
93
  - デモでは**音声は15秒まで**しか受け付けません
94
  - 日本語のみ対応 (Japanese only)
95
+ - 比較のために [openai/whisper-large-v3-turbo](https://huggingface.co/openai/whisper-large-v3-turbo) と [kotoba-tech/kotoba-whisper-v2.0](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.0) も用意しています
96
 
97
+ pipeに渡しているkwargsは以下:
98
  ```python
99
  generate_kwargs = {
100
  "language": "Japanese",
 
117
  gr.Markdown("### Comparison")
118
  button_others = gr.Button("Transcribe with other models")
119
  with gr.Row():
 
 
 
120
  with gr.Column():
121
  gr.Markdown("### Whisper-Large-V3")
122
  output_v3 = gr.Textbox(label="Result")
 
132
  button_others.click(
133
  transcribe_others,
134
  inputs=[audio],
135
+ outputs=[output_v3, output_kotoba_v2],
136
  )
137
 
138
  app.launch(inbrowser=True)