aliosha commited on
Commit
705befd
·
1 Parent(s): e6e142f

updating app + requirements

Browse files
Files changed (2) hide show
  1. app.py +19 -58
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,35 +1,40 @@
1
 
2
  import gradio as gr
3
  import os
4
- os.system("pip install git+https://github.com/openai/whisper.git")
5
  import whisper
6
 
7
  model = whisper.load_model("small")
8
  model_en = whisper.load_model("small.en")
 
9
 
10
  # model = whisper.load_model("medium")
11
 
12
 
 
 
 
 
 
 
13
 
14
- def inference(audio):
15
- audio = whisper.load_audio(audio)
16
  audio = whisper.pad_or_trim(audio)
17
-
18
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
19
-
20
  _, probs = model.detect_language(mel)
21
-
22
  if max(probs, key=probs.get) == "en":
23
  _model = model_en
24
  else:
25
  _model = model
26
 
27
- options = whisper.DecodingOptions(fp16=False)
28
- result = whisper.decode(_model, mel, options)
29
 
 
30
  segmented_text_list = []
31
 
32
- for segment in result["segments"]:
33
  segmented_text_list.append(
34
  f'{segment["start"]:.4f} - {segment["end"]:.4f}: {segment["text"]}')
35
  segmented_text = "\n".join(segmented_text_list)
@@ -112,61 +117,20 @@ block = gr.Blocks(css=css)
112
  with block:
113
  gr.HTML(
114
  """
115
- <div style="text-align: center; max-width: 650px; margin: 0 auto;">
116
- <div
117
- style="
118
- display: inline-flex;
119
- align-items: center;
120
- gap: 0.8rem;
121
- font-size: 1.75rem;
122
- "
123
- >
124
- <svg
125
- width="0.65em"
126
- height="0.65em"
127
- viewBox="0 0 115 115"
128
- fill="none"
129
- xmlns="http://www.w3.org/2000/svg"
130
- >
131
- <rect width="23" height="23" fill="white"></rect>
132
- <rect y="69" width="23" height="23" fill="white"></rect>
133
- <rect x="23" width="23" height="23" fill="#AEAEAE"></rect>
134
- <rect x="23" y="69" width="23" height="23" fill="#AEAEAE"></rect>
135
- <rect x="46" width="23" height="23" fill="white"></rect>
136
- <rect x="46" y="69" width="23" height="23" fill="white"></rect>
137
- <rect x="69" width="23" height="23" fill="black"></rect>
138
- <rect x="69" y="69" width="23" height="23" fill="black"></rect>
139
- <rect x="92" width="23" height="23" fill="#D9D9D9"></rect>
140
- <rect x="92" y="69" width="23" height="23" fill="#AEAEAE"></rect>
141
- <rect x="115" y="46" width="23" height="23" fill="white"></rect>
142
- <rect x="115" y="115" width="23" height="23" fill="white"></rect>
143
- <rect x="115" y="69" width="23" height="23" fill="#D9D9D9"></rect>
144
- <rect x="92" y="46" width="23" height="23" fill="#AEAEAE"></rect>
145
- <rect x="92" y="115" width="23" height="23" fill="#AEAEAE"></rect>
146
- <rect x="92" y="69" width="23" height="23" fill="white"></rect>
147
- <rect x="69" y="46" width="23" height="23" fill="white"></rect>
148
- <rect x="69" y="115" width="23" height="23" fill="white"></rect>
149
- <rect x="69" y="69" width="23" height="23" fill="#D9D9D9"></rect>
150
- <rect x="46" y="46" width="23" height="23" fill="black"></rect>
151
- <rect x="46" y="115" width="23" height="23" fill="black"></rect>
152
- <rect x="46" y="69" width="23" height="23" fill="black"></rect>
153
- <rect x="23" y="46" width="23" height="23" fill="#D9D9D9"></rect>
154
- <rect x="23" y="115" width="23" height="23" fill="#AEAEAE"></rect>
155
- <rect x="23" y="69" width="23" height="23" fill="black"></rect>
156
- </svg>
157
  <h1 style="font-weight: 900; margin-bottom: 7px;">
158
  Audio Transcription using OpenAI Whisper
159
  </h1>
160
- </div>
161
  <p style="margin-bottom: 10px; font-size: 94%">
162
  Whisper is a general-purpose speech recognition model.
163
  Simple wrapping to be used as an API.
164
  </p>
165
- </div>
166
  """
167
  )
168
  with gr.Group():
169
  with gr.Box():
 
 
 
170
  with gr.Row().style(mobile_collapse=False, equal_height=True):
171
  audio = gr.Audio(
172
  label="Input Audio",
@@ -180,11 +144,8 @@ with block:
180
 
181
  btn.click(inference, inputs=[audio], outputs=[
182
  text], api_name="transcription")
 
 
183
 
184
- gr.HTML('''
185
- <div class="footer">
186
- </p>
187
- </div>
188
- ''')
189
 
190
  block.launch()
 
1
 
2
  import gradio as gr
3
  import os
4
+ # os.system("pip install git+https://github.com/openai/whisper.git")
5
  import whisper
6
 
7
  model = whisper.load_model("small")
8
  model_en = whisper.load_model("small.en")
9
+ current_size = 'base'
10
 
11
  # model = whisper.load_model("medium")
12
 
13
 
14
+ def change_model(size):
15
+ if size == current_size:
16
+ return
17
+ model = whisper.load_model(size)
18
+ model_en = whisper.load_model(f"{size}.en")
19
+ current_size = size
20
 
21
+ def inference(audio_file):
22
+ audio = whisper.load_audio(audio_file)
23
  audio = whisper.pad_or_trim(audio)
 
24
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
 
25
  _, probs = model.detect_language(mel)
 
26
  if max(probs, key=probs.get) == "en":
27
  _model = model_en
28
  else:
29
  _model = model
30
 
31
+ # options = whisper.DecodingOptions(fp16=False)
32
+ # result = whisper.decode(_model, mel, options)
33
 
34
+ result = _model.transcribe(audio_file)
35
  segmented_text_list = []
36
 
37
+ for segment in result.result:
38
  segmented_text_list.append(
39
  f'{segment["start"]:.4f} - {segment["end"]:.4f}: {segment["text"]}')
40
  segmented_text = "\n".join(segmented_text_list)
 
117
  with block:
118
  gr.HTML(
119
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  <h1 style="font-weight: 900; margin-bottom: 7px;">
121
  Audio Transcription using OpenAI Whisper
122
  </h1>
 
123
  <p style="margin-bottom: 10px; font-size: 94%">
124
  Whisper is a general-purpose speech recognition model.
125
  Simple wrapping to be used as an API.
126
  </p>
 
127
  """
128
  )
129
  with gr.Group():
130
  with gr.Box():
131
+ sz = gr.Dropdown(label="Model Size", choices=[
132
+ 'base', 'small', 'medium', 'large'], value='base')
133
+
134
  with gr.Row().style(mobile_collapse=False, equal_height=True):
135
  audio = gr.Audio(
136
  label="Input Audio",
 
144
 
145
  btn.click(inference, inputs=[audio], outputs=[
146
  text], api_name="transcription")
147
+
148
+ sz.change(change_model, inputs=[sz], outputs=[])
149
 
 
 
 
 
 
150
 
151
  block.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ git+https://github.com/openai/whisper.git