Upload 5 files
Browse files- README.md +13 -12
- app.py +53 -0
- packages.txt +1 -0
- pre-requirements.txt +1 -0
- requirements.txt +51 -0
README.md
CHANGED
@@ -1,12 +1,13 @@
|
|
1 |
-
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
1 |
+
---
|
2 |
+
title: test nemo
|
3 |
+
emoji: 🙄
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: purple
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 4.44.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
if os.environ.get("SPACES_ZERO_GPU") is not None:
|
3 |
+
import spaces
|
4 |
+
else:
|
5 |
+
class spaces:
|
6 |
+
@staticmethod
|
7 |
+
def GPU(func):
|
8 |
+
def wrapper(*args, **kwargs):
|
9 |
+
return func(*args, **kwargs)
|
10 |
+
return wrapper
|
11 |
+
import gradio as gr
|
12 |
+
import subprocess
|
13 |
+
|
14 |
+
#subprocess.run("git clone https://github.com/AI4Bharat/NeMo.git && cd NeMo && git checkout nemo-v2 && bash reinstall.sh", shell=True)
|
15 |
+
|
16 |
+
import torch
|
17 |
+
import nemo.collections.asr as nemo_asr
|
18 |
+
|
19 |
+
from pathlib import Path
|
20 |
+
|
21 |
+
model = nemo_asr.models.ASRModel.from_pretrained("ai4bharat/indicconformer_stt_ml_hybrid_rnnt_large")
|
22 |
+
|
23 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
24 |
+
model.freeze() # inference mode
|
25 |
+
model = model.to(device) # transfer model to device
|
26 |
+
|
27 |
+
@spaces.GPU
|
28 |
+
def infer(srcfile: str):
|
29 |
+
tmpfile = "sample_audio_infer_ready.wav"
|
30 |
+
|
31 |
+
subprocess.run(f"ffmpeg -i {srcfile} -ac 1 -ar 16000 {tmpfile}", shell=True)
|
32 |
+
model.cur_decoder = "ctc"
|
33 |
+
ctc_text = model.transcribe([tmpfile], batch_size=1, logprobs=False, language_id='ml')[0]
|
34 |
+
print(ctc_text)
|
35 |
+
|
36 |
+
model.cur_decoder = "rnnt"
|
37 |
+
rnnt_text = model.transcribe([tmpfile], batch_size=1, language_id='ml')[0]
|
38 |
+
print(rnnt_text)
|
39 |
+
|
40 |
+
if Path(tmpfile).exists(): Path(tmpfile).unlink()
|
41 |
+
|
42 |
+
return ctc_text, rnnt_text
|
43 |
+
|
44 |
+
with gr.Blocks() as demo:
|
45 |
+
input_audio = gr.Audio(label="Input", type="filepath", sources=["upload", "microphone"], format="wav")
|
46 |
+
run_button = gr.Button("Run", variant="primary")
|
47 |
+
with gr.Row():
|
48 |
+
ctc_text = gr.Textbox(label="CTC", value="", show_copy_button=True)
|
49 |
+
rnnt_text = gr.Textbox(label="RNNT", value="", show_copy_button=True)
|
50 |
+
|
51 |
+
run_button.click(infer, [input_audio], [ctc_text, rnnt_text])
|
52 |
+
|
53 |
+
demo.launch()
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
ffmpeg
|
pre-requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
pip>=24.1
|
requirements.txt
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
huggingface_hub===0.20.3
|
2 |
+
torch
|
3 |
+
git+https://github.com/AI4Bharat/NeMo
|
4 |
+
numba
|
5 |
+
numpy<2
|
6 |
+
onnx>=1.7.0
|
7 |
+
python-dateutil
|
8 |
+
ruamel.yaml
|
9 |
+
scikit-learn
|
10 |
+
setuptools>=65.5.1
|
11 |
+
tensorboard
|
12 |
+
text-unidecode
|
13 |
+
torch
|
14 |
+
tqdm>=4.41.0
|
15 |
+
triton
|
16 |
+
wget
|
17 |
+
wrapt
|
18 |
+
datasets
|
19 |
+
inflect
|
20 |
+
pandas
|
21 |
+
sacremoses>=0.0.43
|
22 |
+
sentencepiece<1.0.0
|
23 |
+
braceexpand
|
24 |
+
editdistance
|
25 |
+
g2p_en
|
26 |
+
ipywidgets
|
27 |
+
jiwer
|
28 |
+
kaldi-python-io
|
29 |
+
kaldiio
|
30 |
+
lhotse>=1.20.0
|
31 |
+
librosa>=0.10.0
|
32 |
+
marshmallow
|
33 |
+
matplotlib
|
34 |
+
packaging
|
35 |
+
pyannote.core
|
36 |
+
pyannote.metrics
|
37 |
+
pydub
|
38 |
+
pyloudnorm
|
39 |
+
resampy
|
40 |
+
ruamel.yaml
|
41 |
+
scipy>=0.14
|
42 |
+
soundfile
|
43 |
+
sox
|
44 |
+
texterrors
|
45 |
+
hydra-core>1.3,<=1.3.2
|
46 |
+
omegaconf<=2.3
|
47 |
+
pytorch-lightning>=2.2.1
|
48 |
+
torchmetrics>=0.11.0
|
49 |
+
transformers>=4.36.0
|
50 |
+
wandb
|
51 |
+
webdataset>=0.2.86
|