csukuangfj commited on
Commit
27c18ec
1 Parent(s): f5b3bb8

add whisper

Browse files
Files changed (2) hide show
  1. app.py +0 -6
  2. model.py +37 -0
app.py CHANGED
@@ -19,7 +19,6 @@
19
  # References:
20
  # https://gradio.app/docs/#dropdown
21
 
22
- import base64
23
  import logging
24
  import os
25
  import tempfile
@@ -47,11 +46,6 @@ def convert_to_wav(in_filename: str) -> str:
47
  f"ffmpeg -hide_banner -loglevel error -i '{in_filename}' -ar 16000 '{out_filename}.flac'"
48
  )
49
 
50
- with open(out_filename + ".flac", "rb") as f:
51
- s = "\n" + out_filename + "\n"
52
- s += base64.b64encode(f.read()).decode()
53
- logging.info(s)
54
-
55
  return out_filename
56
 
57
 
 
19
  # References:
20
  # https://gradio.app/docs/#dropdown
21
 
 
22
  import logging
23
  import os
24
  import tempfile
 
46
  f"ffmpeg -hide_banner -loglevel error -i '{in_filename}' -ar 16000 '{out_filename}.flac'"
47
  )
48
 
 
 
 
 
 
49
  return out_filename
50
 
51
 
model.py CHANGED
@@ -269,6 +269,39 @@ def _get_aishell2_pretrained_model(
269
  return recognizer
270
 
271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
  @lru_cache(maxsize=10)
273
  def _get_gigaspeech_pre_trained_model(
274
  repo_id: str,
@@ -839,6 +872,10 @@ chinese_models = {
839
  }
840
 
841
  english_models = {
 
 
 
 
842
  "wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2": _get_gigaspeech_pre_trained_model, # noqa
843
  "yfyeung/icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04": _get_english_model, # noqa
844
  "yfyeung/icefall-asr-finetune-mux-pruned_transducer_stateless7-2023-05-19": _get_english_model, # noqa
 
269
  return recognizer
270
 
271
 
272
+ @lru_cache(maxsize=10)
273
+ def _get_gigaspeech_pre_trained_model(
274
+ repo_id: str, decoding_method: str, num_active_paths: int
275
+ ) -> sherpa_onnx.OfflineRecognizer:
276
+ assert repo_id in ("tiny.en", "base.en", "small.en", "medium.en"), repo_id
277
+ name = repo_id
278
+ full_repo_id = "csukuangfj/sherpa-onnx-whisper-" + name
279
+ encoder = _get_nn_model_filename(
280
+ repo_id=full_repo_id,
281
+ filename=f"{name}-encoder.int8.ort",
282
+ subfolder=".",
283
+ )
284
+
285
+ decoder = _get_nn_model_filename(
286
+ repo_id=full_repo_id,
287
+ filename=f"{name}-decoder.int8.ort",
288
+ subfolder=".",
289
+ )
290
+
291
+ tokens = _get_token_filename(
292
+ repo_id=full_repo_id, subfolder=".", filename=f"{name}-tokens.txt"
293
+ )
294
+
295
+ recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
296
+ encoder=encoder,
297
+ decoder=decoder,
298
+ tokens=tokens,
299
+ num_threads=2,
300
+ )
301
+
302
+ return recognizer
303
+
304
+
305
  @lru_cache(maxsize=10)
306
  def _get_gigaspeech_pre_trained_model(
307
  repo_id: str,
 
872
  }
873
 
874
  english_models = {
875
+ "whisper-tiny.en": _get_whisper_model,
876
+ "whisper-base.en": _get_whisper_model,
877
+ "whisper-small.en": _get_whisper_model,
878
+ "whisper-medium.en": _get_whisper_model,
879
  "wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2": _get_gigaspeech_pre_trained_model, # noqa
880
  "yfyeung/icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04": _get_english_model, # noqa
881
  "yfyeung/icefall-asr-finetune-mux-pruned_transducer_stateless7-2023-05-19": _get_english_model, # noqa