csukuangfj commited on
Commit
b26be81
1 Parent(s): bf79b4c

update examples

Browse files
Files changed (2) hide show
  1. app.py +10 -1
  2. model.py +41 -0
app.py CHANGED
@@ -62,7 +62,16 @@ examples = [
62
  "whisper-tiny.en",
63
  "President-Obama-on-the-Importance-of-Education.mp4",
64
  ],
65
- ["English", "whisper-tiny.en", "jobs-at-stanford.mp4"],
 
 
 
 
 
 
 
 
 
66
  ]
67
 
68
  for _, _, name in examples:
 
62
  "whisper-tiny.en",
63
  "President-Obama-on-the-Importance-of-Education.mp4",
64
  ],
65
+ [
66
+ "English",
67
+ "whisper-tiny.en",
68
+ "jobs-at-stanford.mp4",
69
+ ],
70
+ [
71
+ "English",
72
+ "yfyeung/icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04",
73
+ "obama's-message-for-america's-students.mp4",
74
+ ],
75
  ]
76
 
77
  for _, _, name in examples:
model.py CHANGED
@@ -243,6 +243,46 @@ def _get_wenetspeech_pre_trained_model(repo_id):
243
  return recognizer
244
 
245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  chinese_models = {
247
  "csukuangfj/sherpa-onnx-conformer-zh-stateless2-2023-05-23": _get_wenetspeech_pre_trained_model, # noqa
248
  }
@@ -251,6 +291,7 @@ english_models = {
251
  "whisper-tiny.en": _get_whisper_model,
252
  "whisper-base.en": _get_whisper_model,
253
  "whisper-small.en": _get_whisper_model,
 
254
  }
255
 
256
  chinese_english_mixed_models = {
 
243
  return recognizer
244
 
245
 
246
+ def _get_english_model(repo_id: str) -> sherpa_onnx.OfflineRecognizer:
247
+ assert (
248
+ repo_id
249
+ == "yfyeung/icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04"
250
+ ), repo_id
251
+
252
+ encoder_model = _get_nn_model_filename(
253
+ repo_id=repo_id,
254
+ filename="encoder-epoch-30-avg-4.onnx",
255
+ subfolder="exp",
256
+ )
257
+
258
+ decoder_model = _get_nn_model_filename(
259
+ repo_id=repo_id,
260
+ filename="decoder-epoch-30-avg-4.onnx",
261
+ subfolder="exp",
262
+ )
263
+
264
+ joiner_model = _get_nn_model_filename(
265
+ repo_id=repo_id,
266
+ filename="joiner-epoch-30-avg-4.onnx",
267
+ subfolder="exp",
268
+ )
269
+
270
+ tokens = _get_token_filename(repo_id=repo_id, subfolder="lang_bpe_500")
271
+
272
+ recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
273
+ tokens=tokens,
274
+ encoder=encoder_model,
275
+ decoder=decoder_model,
276
+ joiner=joiner_model,
277
+ num_threads=2,
278
+ sample_rate=16000,
279
+ feature_dim=80,
280
+ decoding_method="greedy_search",
281
+ )
282
+
283
+ return recognizer
284
+
285
+
286
  chinese_models = {
287
  "csukuangfj/sherpa-onnx-conformer-zh-stateless2-2023-05-23": _get_wenetspeech_pre_trained_model, # noqa
288
  }
 
291
  "whisper-tiny.en": _get_whisper_model,
292
  "whisper-base.en": _get_whisper_model,
293
  "whisper-small.en": _get_whisper_model,
294
+ "yfyeung/icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04": _get_english_model, # noqa
295
  }
296
 
297
  chinese_english_mixed_models = {