Spaces:

jkang
/

espnet2_asr_librispeech_100h

Runtime error

App Files Files Community

jaekookang commited on Feb 16, 2022

Commit

49041a5

1 Parent(s): a07e2df

first upload

Browse files

Files changed (6) hide show

.gitignore +8 -0
examples/gentleman.wav +0 -0
examples/jaekoo_numbers.wav +0 -0
examples/maybe_next_time.wav +0 -0
examples/old_oily_rag.wav +0 -0
gradio_asr_en_libri100.py +82 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+*~
+__pycache__
+*.log
+*.db
+*.nohup
+.vscode
+keyble_ssl/*
+README_github.md

examples/gentleman.wav ADDED Viewed

Binary file (153 kB). View file

examples/jaekoo_numbers.wav ADDED Viewed

Binary file (218 kB). View file

examples/maybe_next_time.wav ADDED Viewed

Binary file (25.7 kB). View file

examples/old_oily_rag.wav ADDED Viewed

Binary file (67.8 kB). View file

gradio_asr_en_libri100.py ADDED Viewed

	@@ -0,0 +1,82 @@

+'''Librispeech 100h English ASR demo
+@ML2
+2022-02-11
+'''
+import os
+from glob import glob
+from loguru import logger
+import soundfile as sf
+import gradio as gr
+from espnet_model_zoo.downloader import ModelDownloader
+from espnet2.bin.asr_inference import Speech2Text
+# ---------- Settings ----------
+GPU_ID = '-1'
+os.environ['CUDA_VISIBLE_DEVICES'] = GPU_ID
+DEVICE = 'cuda' if GPU_ID != '-1' else 'cpu'
+SERVER_PORT = 42208
+SERVER_NAME = "0.0.0.0"
+SSL_DIR = './keyble_ssl'
+MODEL_DIR = '/home/jkang/HDD4T/jkang/huggingface'
+EXAMPLE_DIR = './examples'
+examples = sorted(glob(os.path.join(EXAMPLE_DIR, '*.wav')))
+# ---------- Logging ----------
+logger.add('app.log', mode='a')
+logger.info('============================= App restarted =============================')
+# ---------- Model ----------
+logger.info('download model')
+d = ModelDownloader(MODEL_DIR)
+out = d.download_and_unpack("jkang/espnet2_librispeech_100_conformer")
+logger.info('model downloaded')
+model = Speech2Text.from_pretrained(
+    asr_train_config=out['asr_train_config'],
+    asr_model_file=out['asr_model_file']
+)
+logger.info('model loaded')
+def predict(wav_file):
+    logger.info('wav file loaded')
+    speech, rate = sf.read(wav_file)
+    nbests = model(speech)
+    text, *_ = nbests[0]
+    logger.info('predicted')
+    return text
+iface = gr.Interface(
+    predict,
+    title='영어 음성인식 데모 (espnet libri100) -- 프로토타입',
+    description='영어 음성 파일을 업로드하면 텍스트 내용을 결과로 보여줍니다.',
+    inputs=[
+        gr.inputs.Audio(label='영어 음성', source='upload', type='filepath')
+    ],
+    outputs=[
+        gr.outputs.Textbox(label='음성 인식 디코딩결과'),
+    ],
+    examples=examples,
+    article='<p style="text-align:center">i-Scream AI</p>',
+)
+if __name__ == '__main__':
+    try:
+        iface.launch(debug=True,
+                     server_name=SERVER_NAME,
+                     server_port=SERVER_PORT,
+                     enable_queue=True,
+                    #  ssl_keyfile=SSL_DIR,
+                    #  ssl_certfile=SSL_DIR
+                     )
+    except KeyboardInterrupt as e:
+        print(e)
+    finally:
+        iface.close()