Spaces:
Runtime error
Runtime error
AkshatJain1402
commited on
Commit
•
5b0d6da
1
Parent(s):
f9714e1
firest commit
Browse files- .gitignore +1 -0
- __pycache__/config.cpython-310.pyc +0 -0
- __pycache__/server.cpython-310.pyc +0 -0
- audio.wav +0 -0
- audio_cache/0177c590-2a31-492d-b837-934fa7e81022.wav +0 -0
- audio_cache/023cd2de-44d3-46d2-a25f-33ff62394cfb.wav +0 -0
- audio_cache/61132cdd-ebfa-45c6-a200-940ff4c71861.wav +0 -0
- audio_cache/ebf8d4cd-ab30-4ed7-ad7d-9d949456a54f.wav +0 -0
- config.py +19 -0
- dependency/audio_logs/english/00e6a918-780f-4781-a015-b91a326c64d9/transcript.txt +1 -0
- dependency/audio_logs/english/06308abc-142a-444c-863c-46a84a6adc0c/transcript.txt +1 -0
- dependency/audio_logs/english/153dadc1-1ee7-4b46-b76f-62a415aa1dc8/audio.wav +0 -0
- dependency/audio_logs/english/153dadc1-1ee7-4b46-b76f-62a415aa1dc8/transcript.txt +1 -0
- dependency/audio_logs/english/2c6e8916-012f-44d3-8e0a-cea7f40ff8f5/transcript.txt +1 -0
- dependency/audio_logs/english/2fd17592-cc9e-433b-83a5-8e64fcd98715/transcript.txt +1 -0
- dependency/audio_logs/english/3cf9bb5f-51f8-4d95-ac99-f11920fc6c9d/transcript.txt +1 -0
- dependency/audio_logs/english/41751b79-03c7-4dea-8f2e-4aa6da74d647/transcript.txt +1 -0
- dependency/audio_logs/english/495c7c6c-a18d-48f5-a597-16389c1d366c/transcript.txt +1 -0
- dependency/audio_logs/english/4ee9a133-4b38-424b-9561-5c870c723d50/transcript.txt +1 -0
- dependency/audio_logs/english/54645247-f260-42f9-b475-d890bd0f5e00/transcript.txt +1 -0
- dependency/audio_logs/english/61ca0ac4-72f7-4a5c-b82a-02327710d4ce/transcript.txt +1 -0
- dependency/audio_logs/english/716694d5-64ff-42ac-90dd-b259e40d2ec2/transcript.txt +1 -0
- dependency/audio_logs/english/95396c57-c169-4f8d-bd8b-124f4224676a/transcript.txt +1 -0
- dependency/audio_logs/english/DemoTranscript.txt +1 -0
- dependency/audio_logs/english/be445b40-f504-4ffe-9b36-0da83e312f15/transcript.txt +1 -0
- dependency/audio_logs/english/c75ea509-a258-423d-845b-4daf922d38ba/transcript.txt +1 -0
- dependency/audio_logs/english/d1bb4f14-e186-40bd-96bb-a3347f6179d1/transcript.txt +1 -0
- dependency/audio_logs/english/dc6cc1a6-c3da-4fc1-a79d-1482acf8f993/transcript.txt +1 -0
- dependency/audio_logs/english/e03026bb-412b-48ac-ba17-52402ee25a4e/transcript.txt +1 -0
- dependency/audio_logs/english/f2aed9e2-e2a7-4aae-b4a8-24d7b2487704/transcript.txt +1 -0
- dependency/audio_logs/english/f3264a23-1bba-44e6-a03c-8ce3515a040f/transcript.txt +1 -0
- dependency/audio_logs/english/fa344aa0-9a59-48b0-b360-fe95280bdc59/transcript.txt +1 -0
- dependency/audio_logs/hindi/07e6146b-50f2-4f80-b88d-b4aa84e2458c/audio.wav +0 -0
- dependency/audio_logs/hindi/4bdc92e1-7c6d-4e96-a513-e7afb95ed962/audio.wav +0 -0
- dependency/audio_logs/hindi/544fc9bc-f092-4eee-85dd-d390cfa56c52/audio.wav +0 -0
- dependency/audio_logs/hindi/5cc27cdc-b839-41f1-ab7e-fbc0f5b1112d/audio.wav +0 -0
- dependency/audio_logs/hindi/ca4f49d5-f622-4945-a54e-de53be8f1d51/audio.wav +0 -0
- dependency/audio_logs/hindi/fc2c7d9d-d9dd-4d44-87c3-f811448cb43e/audio.wav +0 -0
- dependency/audio_logs/hindi/fe00b82d-c412-4537-b81e-1ff45e21b042/transcript.txt +1 -0
- recorded_audio.wav +0 -0
- requirements.txt +204 -0
- server.py +133 -0
- streamlit_app.py +95 -0
- streamlit_audio_recorder_app.py +44 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
WSL
|
__pycache__/config.cpython-310.pyc
ADDED
Binary file (365 Bytes). View file
|
|
__pycache__/server.cpython-310.pyc
ADDED
Binary file (3.4 kB). View file
|
|
audio.wav
ADDED
Binary file (238 kB). View file
|
|
audio_cache/0177c590-2a31-492d-b837-934fa7e81022.wav
ADDED
Binary file (165 kB). View file
|
|
audio_cache/023cd2de-44d3-46d2-a25f-33ff62394cfb.wav
ADDED
Binary file (152 kB). View file
|
|
audio_cache/61132cdd-ebfa-45c6-a200-940ff4c71861.wav
ADDED
Binary file (165 kB). View file
|
|
audio_cache/ebf8d4cd-ab30-4ed7-ad7d-9d949456a54f.wav
ADDED
Binary file (163 kB). View file
|
|
config.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Author: Tanmay Jain
|
3 |
+
Email: csetanmayjain@gmail.com
|
4 |
+
"""
|
5 |
+
import os
|
6 |
+
import subprocess
|
7 |
+
|
8 |
+
|
9 |
+
upload_files_path = '/home/akshat/v1/dependency/audio_logs'
|
10 |
+
|
11 |
+
hi_am_model_path = "./hi_am_model"
|
12 |
+
|
13 |
+
en_am_model_path = "./en_am_model"
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
# upload_files_path = '/home/tanmay/zb/docker_files_donot_delete/v1/audio_logs'
|
18 |
+
# hi_am_model_path = "/home/tanmay/zb/Conformer-CTC-BPE-Large.nemo"
|
19 |
+
# en_am_model_path = "/home/tanmay/zb/Conformer-CTC-BPE-Large.nemo"
|
dependency/audio_logs/english/00e6a918-780f-4781-a015-b91a326c64d9/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/06308abc-142a-444c-863c-46a84a6adc0c/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/153dadc1-1ee7-4b46-b76f-62a415aa1dc8/audio.wav
ADDED
Binary file (238 kB). View file
|
|
dependency/audio_logs/english/153dadc1-1ee7-4b46-b76f-62a415aa1dc8/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/2c6e8916-012f-44d3-8e0a-cea7f40ff8f5/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/2fd17592-cc9e-433b-83a5-8e64fcd98715/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/3cf9bb5f-51f8-4d95-ac99-f11920fc6c9d/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/41751b79-03c7-4dea-8f2e-4aa6da74d647/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/495c7c6c-a18d-48f5-a597-16389c1d366c/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/4ee9a133-4b38-424b-9561-5c870c723d50/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/54645247-f260-42f9-b475-d890bd0f5e00/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/61ca0ac4-72f7-4a5c-b82a-02327710d4ce/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/716694d5-64ff-42ac-90dd-b259e40d2ec2/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/95396c57-c169-4f8d-bd8b-124f4224676a/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/DemoTranscript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/be445b40-f504-4ffe-9b36-0da83e312f15/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/c75ea509-a258-423d-845b-4daf922d38ba/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/d1bb4f14-e186-40bd-96bb-a3347f6179d1/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/dc6cc1a6-c3da-4fc1-a79d-1482acf8f993/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/e03026bb-412b-48ac-ba17-52402ee25a4e/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/f2aed9e2-e2a7-4aae-b4a8-24d7b2487704/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/f3264a23-1bba-44e6-a03c-8ce3515a040f/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/english/fa344aa0-9a59-48b0-b360-fe95280bdc59/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
|
dependency/audio_logs/hindi/07e6146b-50f2-4f80-b88d-b4aa84e2458c/audio.wav
ADDED
Binary file (238 kB). View file
|
|
dependency/audio_logs/hindi/4bdc92e1-7c6d-4e96-a513-e7afb95ed962/audio.wav
ADDED
Binary file (238 kB). View file
|
|
dependency/audio_logs/hindi/544fc9bc-f092-4eee-85dd-d390cfa56c52/audio.wav
ADDED
Binary file (238 kB). View file
|
|
dependency/audio_logs/hindi/5cc27cdc-b839-41f1-ab7e-fbc0f5b1112d/audio.wav
ADDED
Binary file (238 kB). View file
|
|
dependency/audio_logs/hindi/ca4f49d5-f622-4945-a54e-de53be8f1d51/audio.wav
ADDED
Binary file (238 kB). View file
|
|
dependency/audio_logs/hindi/fc2c7d9d-d9dd-4d44-87c3-f811448cb43e/audio.wav
ADDED
Binary file (238 kB). View file
|
|
dependency/audio_logs/hindi/fe00b82d-c412-4537-b81e-1ff45e21b042/transcript.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
वेलेश सी एनीोर
|
recorded_audio.wav
ADDED
File without changes
|
requirements.txt
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==2.1.0
|
2 |
+
aiohttp==3.9.3
|
3 |
+
aioice==0.9.0
|
4 |
+
aiortc==1.8.0
|
5 |
+
aiosignal==1.3.1
|
6 |
+
alembic==1.13.1
|
7 |
+
altair==5.2.0
|
8 |
+
annotated-types==0.6.0
|
9 |
+
antlr4-python3-runtime==4.9.3
|
10 |
+
asteroid-filterbanks==0.4.0
|
11 |
+
asttokens==2.4.1
|
12 |
+
async-timeout==4.0.3
|
13 |
+
attrs==23.2.0
|
14 |
+
audioread==3.0.1
|
15 |
+
av==11.0.0
|
16 |
+
blinker==1.7.0
|
17 |
+
braceexpand==0.1.7
|
18 |
+
cachetools==5.3.3
|
19 |
+
certifi==2024.2.2
|
20 |
+
cffi==1.16.0
|
21 |
+
charset-normalizer==3.3.2
|
22 |
+
click==8.1.7
|
23 |
+
colorama==0.4.6
|
24 |
+
colorlog==6.8.2
|
25 |
+
contourpy==1.2.0
|
26 |
+
cryptography==42.0.5
|
27 |
+
cycler==0.12.1
|
28 |
+
Cython
|
29 |
+
cytoolz==0.12.3
|
30 |
+
datasets==2.18.0
|
31 |
+
decorator==5.1.1
|
32 |
+
dill==0.3.8
|
33 |
+
dnspython==2.6.1
|
34 |
+
docopt==0.6.2
|
35 |
+
editdistance==0.8.1
|
36 |
+
einops==0.7.0
|
37 |
+
exceptiongroup==1.2.0
|
38 |
+
executing==2.0.1
|
39 |
+
filelock==3.13.1
|
40 |
+
Flask==3.0.2
|
41 |
+
Flask-Cors==4.0.0
|
42 |
+
fonttools==4.49.0
|
43 |
+
frozenlist==1.4.1
|
44 |
+
fsspec==2024.2.0
|
45 |
+
gitdb==4.0.11
|
46 |
+
GitPython==3.1.42
|
47 |
+
google-crc32c==1.5.0
|
48 |
+
greenlet==3.0.3
|
49 |
+
grpcio==1.62.1
|
50 |
+
huggingface-hub==0.21.4
|
51 |
+
hydra-core==1.3.2
|
52 |
+
HyperPyYAML==1.2.2
|
53 |
+
idna==3.6
|
54 |
+
ifaddr==0.2.0
|
55 |
+
inflect==7.0.0
|
56 |
+
intervaltree==3.1.0
|
57 |
+
ipython
|
58 |
+
itsdangerous==2.1.2
|
59 |
+
jedi==0.19.1
|
60 |
+
Jinja2==3.1.3
|
61 |
+
jiwer==3.0.3
|
62 |
+
joblib==1.3.2
|
63 |
+
jsonschema==4.21.1
|
64 |
+
jsonschema-specifications==2023.12.1
|
65 |
+
julius==0.2.7
|
66 |
+
kaldi-io==0.9.8
|
67 |
+
kiwisolver==1.4.5
|
68 |
+
lazy_loader==0.3
|
69 |
+
lhotse==1.22.0
|
70 |
+
librosa==0.10.1
|
71 |
+
lightning==2.2.1
|
72 |
+
lightning-utilities==0.10.1
|
73 |
+
lilcom==1.7
|
74 |
+
llvmlite==0.42.0
|
75 |
+
Mako==1.3.2
|
76 |
+
Markdown==3.5.2
|
77 |
+
markdown-it-py==3.0.0
|
78 |
+
MarkupSafe==2.1.5
|
79 |
+
marshmallow==3.21.1
|
80 |
+
matplotlib==3.8.3
|
81 |
+
matplotlib-inline==0.1.6
|
82 |
+
mdurl==0.1.2
|
83 |
+
mpmath==1.3.0
|
84 |
+
msgpack==1.0.8
|
85 |
+
multidict==6.0.5
|
86 |
+
multiprocess==0.70.16
|
87 |
+
nemo-asr==0.9.0
|
88 |
+
nemo_toolkit==1.23.0
|
89 |
+
networkx==3.2.1
|
90 |
+
num2words==0.5.13
|
91 |
+
numba==0.59.0
|
92 |
+
numpy==1.26.4
|
93 |
+
# nvidia-cublas-cu12==12.1.3.1
|
94 |
+
# nvidia-cuda-cupti-cu12==12.1.105
|
95 |
+
# nvidia-cuda-nvrtc-cu12==12.1.105
|
96 |
+
# nvidia-cuda-runtime-cu12==12.1.105
|
97 |
+
# nvidia-cudnn-cu12==8.9.2.26
|
98 |
+
# nvidia-cufft-cu12==11.0.2.54
|
99 |
+
# nvidia-curand-cu12==10.3.2.106
|
100 |
+
# nvidia-cusolver-cu12==11.4.5.107
|
101 |
+
# nvidia-cusparse-cu12==12.1.0.106
|
102 |
+
# nvidia-nccl-cu12==2.19.3
|
103 |
+
# nvidia-nvjitlink-cu12==12.4.99
|
104 |
+
# nvidia-nvtx-cu12==12.1.105
|
105 |
+
omegaconf==2.3.0
|
106 |
+
onnx==1.15.0
|
107 |
+
optuna==3.5.0
|
108 |
+
packaging==23.2
|
109 |
+
pandas==2.2.1
|
110 |
+
parso==0.8.3
|
111 |
+
pexpect==4.9.0
|
112 |
+
pillow==10.2.0
|
113 |
+
platformdirs==4.2.0
|
114 |
+
pooch==1.8.1
|
115 |
+
primePy==1.3
|
116 |
+
prompt-toolkit==3.0.43
|
117 |
+
protobuf==4.25.3
|
118 |
+
ptyprocess==0.7.0
|
119 |
+
pure-eval==0.2.2
|
120 |
+
pyannote.audio==3.1.1
|
121 |
+
pyannote.core==5.0.0
|
122 |
+
pyannote.database==5.0.1
|
123 |
+
pyannote.metrics==3.2.1
|
124 |
+
pyannote.pipeline==3.0.1
|
125 |
+
pyarrow==15.0.1
|
126 |
+
pyarrow-hotfix==0.6
|
127 |
+
pycparser==2.21
|
128 |
+
pydantic==2.6.3
|
129 |
+
pydantic_core==2.16.3
|
130 |
+
pydeck==0.8.1b0
|
131 |
+
pydub==0.25.1
|
132 |
+
pyee==11.1.0
|
133 |
+
Pygments==2.17.2
|
134 |
+
pylibsrtp==0.10.0
|
135 |
+
pyOpenSSL==24.1.0
|
136 |
+
pyparsing==3.1.2
|
137 |
+
python-dateutil==2.9.0.post0
|
138 |
+
pytorch-lightning==2.2.1
|
139 |
+
pytorch-metric-learning==2.4.1
|
140 |
+
pytz==2024.1
|
141 |
+
PyYAML==6.0.1
|
142 |
+
rapidfuzz==3.6.2
|
143 |
+
referencing==0.33.0
|
144 |
+
regex==2023.12.25
|
145 |
+
requests==2.31.0
|
146 |
+
rich==13.7.1
|
147 |
+
rpds-py==0.18.0
|
148 |
+
ruamel.yaml==0.18.6
|
149 |
+
ruamel.yaml.clib==0.2.8
|
150 |
+
safetensors==0.4.2
|
151 |
+
scikit-learn==1.4.1.post1
|
152 |
+
scipy==1.12.0
|
153 |
+
semver==3.0.2
|
154 |
+
sentencepiece==0.2.0
|
155 |
+
shellingham==1.5.4
|
156 |
+
six==1.16.0
|
157 |
+
smmap==5.0.1
|
158 |
+
sortedcontainers==2.4.0
|
159 |
+
sounddevice==0.4.6
|
160 |
+
soundfile==0.12.1
|
161 |
+
sox==1.4.1
|
162 |
+
soxr==0.3.7
|
163 |
+
speechbrain==1.0.0
|
164 |
+
SQLAlchemy==2.0.28
|
165 |
+
stack-data==0.6.3
|
166 |
+
streamlit==1.32.1
|
167 |
+
streamlit-audiorec==0.1.3
|
168 |
+
streamlit-webrtc==0.47.6
|
169 |
+
sympy==1.12
|
170 |
+
tabulate==0.9.0
|
171 |
+
tenacity==8.2.3
|
172 |
+
tensorboard==2.16.2
|
173 |
+
tensorboard-data-server==0.7.2
|
174 |
+
tensorboardX==2.6.2.2
|
175 |
+
text-unidecode==1.3
|
176 |
+
threadpoolctl==3.3.0
|
177 |
+
tokenizers==0.15.2
|
178 |
+
toml==0.10.2
|
179 |
+
toolz==0.12.1
|
180 |
+
# torch==2.2.1
|
181 |
+
# torch-audiomentations==0.11.1
|
182 |
+
# torch-pitch-shift==1.2.4
|
183 |
+
# torch-stft==0.1.4
|
184 |
+
# torchaudio==2.2.1
|
185 |
+
# torchmetrics==1.3.1
|
186 |
+
tornado==6.4
|
187 |
+
tqdm==4.66.2
|
188 |
+
traitlets==5.14.1
|
189 |
+
transformers==4.38.2
|
190 |
+
triton==2.2.0
|
191 |
+
typer==0.9.0
|
192 |
+
typing_extensions==4.10.0
|
193 |
+
tzdata==2024.1
|
194 |
+
Unidecode==1.3.8
|
195 |
+
urllib3==2.2.1
|
196 |
+
watchdog==4.0.0
|
197 |
+
wcwidth==0.2.13
|
198 |
+
webdataset==0.2.86
|
199 |
+
Werkzeug==3.0.1
|
200 |
+
wget==3.2
|
201 |
+
wrapt==1.16.0
|
202 |
+
xxhash==3.4.1
|
203 |
+
yarl==1.9.4
|
204 |
+
youtokentome
|
server.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Author: Tanmay Jain
|
3 |
+
Email: csetanmayjain@gmail.com
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import threading
|
8 |
+
import uuid
|
9 |
+
|
10 |
+
from flask import Flask, jsonify, request, send_file
|
11 |
+
from flask_cors import CORS
|
12 |
+
import nemo.collections.asr as nemo_asr
|
13 |
+
import subprocess
|
14 |
+
import config
|
15 |
+
|
16 |
+
|
17 |
+
hi_asr_model = None
|
18 |
+
en_asr_model = None
|
19 |
+
en_upload_files_path=None
|
20 |
+
hi_upload_files_path=None
|
21 |
+
def initialize_path():
|
22 |
+
|
23 |
+
global hi_upload_files_path, en_upload_files_path, hi_am_model_path, en_am_model_path
|
24 |
+
|
25 |
+
hi_am_model_path = config.hi_am_model_path
|
26 |
+
en_am_model_path = config.en_am_model_path
|
27 |
+
hi_upload_files_path = os.path.join(config.upload_files_path, "hindi")
|
28 |
+
en_upload_files_path = os.path.join(config.upload_files_path, "english")
|
29 |
+
|
30 |
+
os.makedirs(hi_upload_files_path, exist_ok=True)
|
31 |
+
os.makedirs(en_upload_files_path, exist_ok=True)
|
32 |
+
|
33 |
+
|
34 |
+
def load_model(lang):
|
35 |
+
|
36 |
+
global hi_asr_model, en_asr_model
|
37 |
+
|
38 |
+
|
39 |
+
if lang == "hi" and hi_asr_model == None:
|
40 |
+
print("Loading Hindi Model")
|
41 |
+
if not os.path.exists(hi_am_model_path):
|
42 |
+
download=subprocess.run(["wget","-P",hi_am_model_path,"https://storage.googleapis.com/vakyansh-open-models/conformer_models/hindi/filtered_v1_ssl_2022-07-08_19-43-25/Conformer-CTC-BPE-Large.nemo"],capture_output=True, text=True)
|
43 |
+
print('done Downloading hindi asr')
|
44 |
+
if download.returncode != 0:
|
45 |
+
raise Exception(f"wget download failed: {download.stderr}")
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
|
50 |
+
|
51 |
+
#hi_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from(hi_am_model_path)
|
52 |
+
# path for hi_asr_model for local sever running
|
53 |
+
hi_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from('./hi_am_model/Conformer-CTC-BPE-Large.nemo')
|
54 |
+
if lang == "en" and en_asr_model == None:
|
55 |
+
|
56 |
+
if not os.path.exists(en_am_model_path):
|
57 |
+
print(en_am_model_path)
|
58 |
+
print('not found downloading english model')
|
59 |
+
download=subprocess.run(["wget","-P",en_am_model_path,"https://storage.googleapis.com/vakyansh-open-models/conformer_models/english/2022-09-13_15-50-48/Conformer-CTC-BPE-Large.nemo"],capture_output=True, text=True)
|
60 |
+
print('done Downloading')
|
61 |
+
print("Loading English Model")
|
62 |
+
|
63 |
+
|
64 |
+
|
65 |
+
en_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from('./en_am_model/Conformer-CTC-BPE-Large.nemo')
|
66 |
+
|
67 |
+
|
68 |
+
def transcribe(audio_file_path, transcription_file_path, lang, logprobs=False):
|
69 |
+
|
70 |
+
#load_model(lang)
|
71 |
+
if lang == "hi":
|
72 |
+
transcription = hi_asr_model.transcribe([audio_file_path], logprobs=logprobs)[0]
|
73 |
+
if lang == "en":
|
74 |
+
transcription = en_asr_model.transcribe([audio_file_path], logprobs=logprobs)[0]
|
75 |
+
|
76 |
+
f = open(transcription_file_path, "w")
|
77 |
+
f.write(transcription)
|
78 |
+
f.close()
|
79 |
+
|
80 |
+
def asr(audio_file_path,lang):
|
81 |
+
initialize_path()
|
82 |
+
|
83 |
+
global en_upload_files_path,hi_upload_files_path
|
84 |
+
if lang == None:
|
85 |
+
return jsonify({'error': 'no language code provided'}), 400
|
86 |
+
if lang != "hi" and lang != "en":
|
87 |
+
return jsonify({'error': 'Invalid language code'}), 400
|
88 |
+
client_id = str(uuid.uuid4())
|
89 |
+
|
90 |
+
if lang == "hi":
|
91 |
+
client_id_dir = os.path.join(hi_upload_files_path, client_id)
|
92 |
+
if lang == "en":
|
93 |
+
client_id_dir = os.path.join(en_upload_files_path, client_id)
|
94 |
+
|
95 |
+
|
96 |
+
transcription_file_path = os.path.join(client_id_dir, "transcript.txt")
|
97 |
+
|
98 |
+
os.makedirs(client_id_dir)
|
99 |
+
|
100 |
+
transcribe(audio_file_path,transcription_file_path,lang)
|
101 |
+
return client_id
|
102 |
+
|
103 |
+
def get_transcription(client_id,lang):
|
104 |
+
|
105 |
+
global en_upload_files_path
|
106 |
+
if client_id == None:
|
107 |
+
return 'No Client ID Provided'
|
108 |
+
if lang == None:
|
109 |
+
return 'no language code provided'
|
110 |
+
if lang != "hi" and lang != "en":
|
111 |
+
return 'Invalid language code'
|
112 |
+
print(type(lang),en_upload_files_path,"PATHS is thisss eherreb")
|
113 |
+
if lang == "hi":
|
114 |
+
print('looking in hindi file path')
|
115 |
+
client_dir = os.path.join('dependency/audio_logs/hindi', client_id)
|
116 |
+
if lang == "en":
|
117 |
+
print('looking in english file path')
|
118 |
+
client_dir = os.path.join('dependency/audio_logs/english', client_id)
|
119 |
+
transcript_file_path = os.path.join(client_dir,'transcript.txt')
|
120 |
+
if not os.path.exists(client_dir):
|
121 |
+
return "Invalid Client ID"
|
122 |
+
if os.path.exists(transcript_file_path):
|
123 |
+
print(transcript_file_path,"transcript file path")
|
124 |
+
#return send_file(transcript_file_path, as_attachment=True, download_name='transcript.txt'), 201
|
125 |
+
return transcript_file_path
|
126 |
+
else:
|
127 |
+
return 'your transcription file is not ready'
|
128 |
+
|
129 |
+
|
130 |
+
|
131 |
+
# if __name__ == '__main__':
|
132 |
+
# initialize_path()
|
133 |
+
# app.run(host='0.0.0.0', port=5000)
|
streamlit_app.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#start
|
2 |
+
#to see changes in your local browser just run $ streamlit run streamlit_app.py
|
3 |
+
import streamlit as st
|
4 |
+
from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings
|
5 |
+
import sounddevice as sd
|
6 |
+
import server
|
7 |
+
from st_audiorec import st_audiorec
|
8 |
+
import nemo.collections.asr as nemo_asr
|
9 |
+
from pydub import AudioSegment
|
10 |
+
import io
|
11 |
+
import os
|
12 |
+
import uuid
|
13 |
+
|
14 |
+
global language
|
15 |
+
|
16 |
+
|
17 |
+
st.title("Customer Care")
|
18 |
+
|
19 |
+
|
20 |
+
@st.cache_resource
|
21 |
+
def get_model():
|
22 |
+
try:
|
23 |
+
os.makedirs("audio_cache")
|
24 |
+
except:
|
25 |
+
pass
|
26 |
+
en_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from("en_am_model/Conformer-CTC-BPE-Large.nemo")
|
27 |
+
|
28 |
+
return en_asr_model
|
29 |
+
|
30 |
+
en_asr_model = get_model()
|
31 |
+
|
32 |
+
st.title("💬 Vocalize: Empower Your Voice ")
|
33 |
+
language = st.selectbox(
|
34 |
+
'Enter your preferred language',
|
35 |
+
('en', 'hi'))
|
36 |
+
|
37 |
+
|
38 |
+
"""
|
39 |
+
Hi cord the audio, and get the transcription in real time!
|
40 |
+
Note: Works best for smaller audios"""
|
41 |
+
|
42 |
+
|
43 |
+
global client_id
|
44 |
+
|
45 |
+
client_id=None
|
46 |
+
|
47 |
+
language = st.selectbox(
|
48 |
+
'Enter your preferred language',
|
49 |
+
('en', 'hi'))
|
50 |
+
|
51 |
+
|
52 |
+
|
53 |
+
if language==None:
|
54 |
+
st.error("Please enter a valid language.")
|
55 |
+
|
56 |
+
wav_audio_data = st_audiorec()
|
57 |
+
if wav_audio_data:
|
58 |
+
audio_location = "audio_cache/" + str(uuid.uuid4()) + ".wav"
|
59 |
+
audio_file = io.BytesIO(wav_audio_data)
|
60 |
+
audio = AudioSegment.from_file(audio_file)
|
61 |
+
audio = audio.set_sample_width(2)
|
62 |
+
audio = audio.set_channels(1)
|
63 |
+
audio = audio.set_frame_rate(16000)
|
64 |
+
audio.export(audio_location, format="wav")
|
65 |
+
|
66 |
+
text = en_asr_model.transcribe([audio_location], logprobs=False)[0]
|
67 |
+
print(text)
|
68 |
+
st.write(text)
|
69 |
+
|
70 |
+
st.header("Want to transcribe files larger than 2 minutes?")
|
71 |
+
uploaded_file=st.file_uploader("upload your recording ", disabled=False, label_visibility="visible")
|
72 |
+
if uploaded_file is not None:
|
73 |
+
# Store the uploaded file:
|
74 |
+
with open(uploaded_file.name, "wb") as f:
|
75 |
+
f.write(uploaded_file.getbuffer())
|
76 |
+
isTranslate=st.button("Click me for translation")
|
77 |
+
# Trigger transcription:
|
78 |
+
if isTranslate and language!=None:
|
79 |
+
client_id= server.asr(uploaded_file.name,language)
|
80 |
+
st.write("your id for recieving the transcribed File",client_id)
|
81 |
+
|
82 |
+
|
83 |
+
st.header('Results')
|
84 |
+
|
85 |
+
client_id = st.text_input("Enter your id")
|
86 |
+
clicked=st.button("Click me for transcribed File")
|
87 |
+
if clicked:
|
88 |
+
resp=server.get_transcription(str(client_id),language)
|
89 |
+
print(resp,"this is the requested file path")
|
90 |
+
with open(resp, "rb") as file:
|
91 |
+
st.download_button(
|
92 |
+
"Download transcription",
|
93 |
+
file,
|
94 |
+
file_name="transcription.txt"
|
95 |
+
)
|
streamlit_audio_recorder_app.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# pip install streamlit
|
2 |
+
# pip install audio-recorder-streamlit
|
3 |
+
|
4 |
+
import streamlit as st
|
5 |
+
from st_audiorec import st_audiorec
|
6 |
+
import nemo.collections.asr as nemo_asr
|
7 |
+
from pydub import AudioSegment
|
8 |
+
import io
|
9 |
+
import os
|
10 |
+
import uuid
|
11 |
+
|
12 |
+
@st.cache_resource
|
13 |
+
def get_model():
|
14 |
+
try:
|
15 |
+
os.makedirs("audio_cache")
|
16 |
+
except:
|
17 |
+
pass
|
18 |
+
en_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from("/home/akshat/v1/en_am_model/Conformer-CTC-BPE-Large.nemo")
|
19 |
+
|
20 |
+
return en_asr_model
|
21 |
+
|
22 |
+
en_asr_model = get_model()
|
23 |
+
|
24 |
+
st.title("💬 Vocalize: Empower Your Voice ")
|
25 |
+
|
26 |
+
"""
|
27 |
+
Hi record the audio, and get the transcription in real time!
|
28 |
+
Note: Works best for smaller audios
|
29 |
+
"""
|
30 |
+
|
31 |
+
wav_audio_data = st_audiorec()
|
32 |
+
|
33 |
+
if wav_audio_data:
|
34 |
+
audio_location = "audio_cache/" + str(uuid.uuid4()) + ".wav"
|
35 |
+
audio_file = io.BytesIO(wav_audio_data)
|
36 |
+
audio = AudioSegment.from_file(audio_file)
|
37 |
+
audio = audio.set_sample_width(2)
|
38 |
+
audio = audio.set_channels(1)
|
39 |
+
audio = audio.set_frame_rate(16000)
|
40 |
+
audio.export(audio_location, format="wav")
|
41 |
+
|
42 |
+
text = en_asr_model.transcribe([audio_location], logprobs=False)[0]
|
43 |
+
print(text)
|
44 |
+
st.write(text)
|