csukuangfj commited on
Commit
b30f90b
1 Parent(s): f968199

add moonshine

Browse files
Files changed (2) hide show
  1. model.py +55 -0
  2. requirements.txt +2 -2
model.py CHANGED
@@ -363,6 +363,59 @@ def _get_multi_zh_hans_pre_trained_model(repo_id):
363
  return recognizer
364
 
365
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
  def _get_english_model(repo_id: str) -> sherpa_onnx.OfflineRecognizer:
367
  assert (
368
  repo_id
@@ -564,6 +617,8 @@ chinese_models = {
564
 
565
  english_models = {
566
  "whisper-tiny.en": _get_whisper_model,
 
 
567
  "whisper-base.en": _get_whisper_model,
568
  "whisper-small.en": _get_whisper_model,
569
  "whisper-distil-small.en": _get_whisper_model,
 
363
  return recognizer
364
 
365
 
366
+ @lru_cache(maxsize=10)
367
+ def _get_moonshine_model(repo_id: str) -> sherpa_onnx.OfflineRecognizer:
368
+ assert repo_id in ("moonshine-tiny", "moonshine-base"), repo_id
369
+
370
+ if repo_id == "moonshine-tiny":
371
+ full_repo_id = "csukuangfj/sherpa-onnx-moonshine-tiny-en-int8"
372
+ elif repo_id == "moonshine-base":
373
+ full_repo_id = "csukuangfj/sherpa-onnx-moonshine-base-en-int8"
374
+ else:
375
+ raise ValueError(f"Unknown repo_id: {repo_id}")
376
+
377
+ preprocessor = _get_nn_model_filename(
378
+ repo_id=full_repo_id,
379
+ filename=f"preprocess.onnx",
380
+ subfolder=".",
381
+ )
382
+
383
+ encoder = _get_nn_model_filename(
384
+ repo_id=full_repo_id,
385
+ filename=f"encode.int8.onnx",
386
+ subfolder=".",
387
+ )
388
+
389
+ uncached_decoder = _get_nn_model_filename(
390
+ repo_id=full_repo_id,
391
+ filename=f"uncached_decode.int8.onnx",
392
+ subfolder=".",
393
+ )
394
+
395
+ cached_decoder = _get_nn_model_filename(
396
+ repo_id=full_repo_id,
397
+ filename=f"cached_decode.int8.onnx",
398
+ subfolder=".",
399
+ )
400
+
401
+ tokens = _get_token_filename(
402
+ repo_id=full_repo_id,
403
+ subfolder=".",
404
+ filename="tokens.txt",
405
+ )
406
+
407
+ recognizer = sherpa_onnx.OfflineRecognizer.from_moonshine(
408
+ preprocessor=preprocessor,
409
+ encoder=encoder,
410
+ uncached_decoder=uncached_decoder,
411
+ cached_decoder=cached_decoder,
412
+ tokens=tokens,
413
+ num_threads=2,
414
+ )
415
+
416
+ return recognizer
417
+
418
+
419
  def _get_english_model(repo_id: str) -> sherpa_onnx.OfflineRecognizer:
420
  assert (
421
  repo_id
 
617
 
618
  english_models = {
619
  "whisper-tiny.en": _get_whisper_model,
620
+ "moonshine-tiny": _get_moonshine_model,
621
+ "moonshine-base": _get_moonshine_model,
622
  "whisper-base.en": _get_whisper_model,
623
  "whisper-small.en": _get_whisper_model,
624
  "whisper-distil-small.en": _get_whisper_model,
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
 
2
- #sherpa-onnx>=1.9.21
3
  ffmpeg-python
4
- https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/cpu/1.10.28/sherpa_onnx-1.10.28-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
 
1
 
2
+ sherpa-onnx>=1.10.30
3
  ffmpeg-python
4
+ #https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/cpu/1.10.28/sherpa_onnx-1.10.28-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl