John6666 commited on
Commit
db5bcc3
1 Parent(s): 7d3a7e3

Upload 5 files

Browse files
Files changed (5) hide show
  1. README.md +13 -12
  2. app.py +53 -0
  3. packages.txt +1 -0
  4. pre-requirements.txt +1 -0
  5. requirements.txt +51 -0
README.md CHANGED
@@ -1,12 +1,13 @@
1
- ---
2
- title: Testnemoasr
3
- emoji: 💻
4
- colorFrom: gray
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 5.5.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
1
+ ---
2
+ title: test nemo
3
+ emoji: 🙄
4
+ colorFrom: indigo
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 4.44.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ if os.environ.get("SPACES_ZERO_GPU") is not None:
3
+ import spaces
4
+ else:
5
+ class spaces:
6
+ @staticmethod
7
+ def GPU(func):
8
+ def wrapper(*args, **kwargs):
9
+ return func(*args, **kwargs)
10
+ return wrapper
11
+ import gradio as gr
12
+ import subprocess
13
+
14
+ #subprocess.run("git clone https://github.com/AI4Bharat/NeMo.git && cd NeMo && git checkout nemo-v2 && bash reinstall.sh", shell=True)
15
+
16
+ import torch
17
+ import nemo.collections.asr as nemo_asr
18
+
19
+ from pathlib import Path
20
+
21
+ model = nemo_asr.models.ASRModel.from_pretrained("ai4bharat/indicconformer_stt_ml_hybrid_rnnt_large")
22
+
23
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24
+ model.freeze() # inference mode
25
+ model = model.to(device) # transfer model to device
26
+
27
+ @spaces.GPU
28
+ def infer(srcfile: str):
29
+ tmpfile = "sample_audio_infer_ready.wav"
30
+
31
+ subprocess.run(f"ffmpeg -i {srcfile} -ac 1 -ar 16000 {tmpfile}", shell=True)
32
+ model.cur_decoder = "ctc"
33
+ ctc_text = model.transcribe([tmpfile], batch_size=1, logprobs=False, language_id='ml')[0]
34
+ print(ctc_text)
35
+
36
+ model.cur_decoder = "rnnt"
37
+ rnnt_text = model.transcribe([tmpfile], batch_size=1, language_id='ml')[0]
38
+ print(rnnt_text)
39
+
40
+ if Path(tmpfile).exists(): Path(tmpfile).unlink()
41
+
42
+ return ctc_text, rnnt_text
43
+
44
+ with gr.Blocks() as demo:
45
+ input_audio = gr.Audio(label="Input", type="filepath", sources=["upload", "microphone"], format="wav")
46
+ run_button = gr.Button("Run", variant="primary")
47
+ with gr.Row():
48
+ ctc_text = gr.Textbox(label="CTC", value="", show_copy_button=True)
49
+ rnnt_text = gr.Textbox(label="RNNT", value="", show_copy_button=True)
50
+
51
+ run_button.click(infer, [input_audio], [ctc_text, rnnt_text])
52
+
53
+ demo.launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
pre-requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ pip>=24.1
requirements.txt ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ huggingface_hub===0.20.3
2
+ torch
3
+ git+https://github.com/AI4Bharat/NeMo
4
+ numba
5
+ numpy<2
6
+ onnx>=1.7.0
7
+ python-dateutil
8
+ ruamel.yaml
9
+ scikit-learn
10
+ setuptools>=65.5.1
11
+ tensorboard
12
+ text-unidecode
13
+ torch
14
+ tqdm>=4.41.0
15
+ triton
16
+ wget
17
+ wrapt
18
+ datasets
19
+ inflect
20
+ pandas
21
+ sacremoses>=0.0.43
22
+ sentencepiece<1.0.0
23
+ braceexpand
24
+ editdistance
25
+ g2p_en
26
+ ipywidgets
27
+ jiwer
28
+ kaldi-python-io
29
+ kaldiio
30
+ lhotse>=1.20.0
31
+ librosa>=0.10.0
32
+ marshmallow
33
+ matplotlib
34
+ packaging
35
+ pyannote.core
36
+ pyannote.metrics
37
+ pydub
38
+ pyloudnorm
39
+ resampy
40
+ ruamel.yaml
41
+ scipy>=0.14
42
+ soundfile
43
+ sox
44
+ texterrors
45
+ hydra-core>1.3,<=1.3.2
46
+ omegaconf<=2.3
47
+ pytorch-lightning>=2.2.1
48
+ torchmetrics>=0.11.0
49
+ transformers>=4.36.0
50
+ wandb
51
+ webdataset>=0.2.86