AkshatJain1402 commited on
Commit
5b0d6da
1 Parent(s): f9714e1

firest commit

Browse files
Files changed (44) hide show
  1. .gitignore +1 -0
  2. __pycache__/config.cpython-310.pyc +0 -0
  3. __pycache__/server.cpython-310.pyc +0 -0
  4. audio.wav +0 -0
  5. audio_cache/0177c590-2a31-492d-b837-934fa7e81022.wav +0 -0
  6. audio_cache/023cd2de-44d3-46d2-a25f-33ff62394cfb.wav +0 -0
  7. audio_cache/61132cdd-ebfa-45c6-a200-940ff4c71861.wav +0 -0
  8. audio_cache/ebf8d4cd-ab30-4ed7-ad7d-9d949456a54f.wav +0 -0
  9. config.py +19 -0
  10. dependency/audio_logs/english/00e6a918-780f-4781-a015-b91a326c64d9/transcript.txt +1 -0
  11. dependency/audio_logs/english/06308abc-142a-444c-863c-46a84a6adc0c/transcript.txt +1 -0
  12. dependency/audio_logs/english/153dadc1-1ee7-4b46-b76f-62a415aa1dc8/audio.wav +0 -0
  13. dependency/audio_logs/english/153dadc1-1ee7-4b46-b76f-62a415aa1dc8/transcript.txt +1 -0
  14. dependency/audio_logs/english/2c6e8916-012f-44d3-8e0a-cea7f40ff8f5/transcript.txt +1 -0
  15. dependency/audio_logs/english/2fd17592-cc9e-433b-83a5-8e64fcd98715/transcript.txt +1 -0
  16. dependency/audio_logs/english/3cf9bb5f-51f8-4d95-ac99-f11920fc6c9d/transcript.txt +1 -0
  17. dependency/audio_logs/english/41751b79-03c7-4dea-8f2e-4aa6da74d647/transcript.txt +1 -0
  18. dependency/audio_logs/english/495c7c6c-a18d-48f5-a597-16389c1d366c/transcript.txt +1 -0
  19. dependency/audio_logs/english/4ee9a133-4b38-424b-9561-5c870c723d50/transcript.txt +1 -0
  20. dependency/audio_logs/english/54645247-f260-42f9-b475-d890bd0f5e00/transcript.txt +1 -0
  21. dependency/audio_logs/english/61ca0ac4-72f7-4a5c-b82a-02327710d4ce/transcript.txt +1 -0
  22. dependency/audio_logs/english/716694d5-64ff-42ac-90dd-b259e40d2ec2/transcript.txt +1 -0
  23. dependency/audio_logs/english/95396c57-c169-4f8d-bd8b-124f4224676a/transcript.txt +1 -0
  24. dependency/audio_logs/english/DemoTranscript.txt +1 -0
  25. dependency/audio_logs/english/be445b40-f504-4ffe-9b36-0da83e312f15/transcript.txt +1 -0
  26. dependency/audio_logs/english/c75ea509-a258-423d-845b-4daf922d38ba/transcript.txt +1 -0
  27. dependency/audio_logs/english/d1bb4f14-e186-40bd-96bb-a3347f6179d1/transcript.txt +1 -0
  28. dependency/audio_logs/english/dc6cc1a6-c3da-4fc1-a79d-1482acf8f993/transcript.txt +1 -0
  29. dependency/audio_logs/english/e03026bb-412b-48ac-ba17-52402ee25a4e/transcript.txt +1 -0
  30. dependency/audio_logs/english/f2aed9e2-e2a7-4aae-b4a8-24d7b2487704/transcript.txt +1 -0
  31. dependency/audio_logs/english/f3264a23-1bba-44e6-a03c-8ce3515a040f/transcript.txt +1 -0
  32. dependency/audio_logs/english/fa344aa0-9a59-48b0-b360-fe95280bdc59/transcript.txt +1 -0
  33. dependency/audio_logs/hindi/07e6146b-50f2-4f80-b88d-b4aa84e2458c/audio.wav +0 -0
  34. dependency/audio_logs/hindi/4bdc92e1-7c6d-4e96-a513-e7afb95ed962/audio.wav +0 -0
  35. dependency/audio_logs/hindi/544fc9bc-f092-4eee-85dd-d390cfa56c52/audio.wav +0 -0
  36. dependency/audio_logs/hindi/5cc27cdc-b839-41f1-ab7e-fbc0f5b1112d/audio.wav +0 -0
  37. dependency/audio_logs/hindi/ca4f49d5-f622-4945-a54e-de53be8f1d51/audio.wav +0 -0
  38. dependency/audio_logs/hindi/fc2c7d9d-d9dd-4d44-87c3-f811448cb43e/audio.wav +0 -0
  39. dependency/audio_logs/hindi/fe00b82d-c412-4537-b81e-1ff45e21b042/transcript.txt +1 -0
  40. recorded_audio.wav +0 -0
  41. requirements.txt +204 -0
  42. server.py +133 -0
  43. streamlit_app.py +95 -0
  44. streamlit_audio_recorder_app.py +44 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ WSL
__pycache__/config.cpython-310.pyc ADDED
Binary file (365 Bytes). View file
 
__pycache__/server.cpython-310.pyc ADDED
Binary file (3.4 kB). View file
 
audio.wav ADDED
Binary file (238 kB). View file
 
audio_cache/0177c590-2a31-492d-b837-934fa7e81022.wav ADDED
Binary file (165 kB). View file
 
audio_cache/023cd2de-44d3-46d2-a25f-33ff62394cfb.wav ADDED
Binary file (152 kB). View file
 
audio_cache/61132cdd-ebfa-45c6-a200-940ff4c71861.wav ADDED
Binary file (165 kB). View file
 
audio_cache/ebf8d4cd-ab30-4ed7-ad7d-9d949456a54f.wav ADDED
Binary file (163 kB). View file
 
config.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Author: Tanmay Jain
3
+ Email: csetanmayjain@gmail.com
4
+ """
5
+ import os
6
+ import subprocess
7
+
8
+
9
+ upload_files_path = '/home/akshat/v1/dependency/audio_logs'
10
+
11
+ hi_am_model_path = "./hi_am_model"
12
+
13
+ en_am_model_path = "./en_am_model"
14
+
15
+
16
+
17
+ # upload_files_path = '/home/tanmay/zb/docker_files_donot_delete/v1/audio_logs'
18
+ # hi_am_model_path = "/home/tanmay/zb/Conformer-CTC-BPE-Large.nemo"
19
+ # en_am_model_path = "/home/tanmay/zb/Conformer-CTC-BPE-Large.nemo"
dependency/audio_logs/english/00e6a918-780f-4781-a015-b91a326c64d9/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/06308abc-142a-444c-863c-46a84a6adc0c/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/153dadc1-1ee7-4b46-b76f-62a415aa1dc8/audio.wav ADDED
Binary file (238 kB). View file
 
dependency/audio_logs/english/153dadc1-1ee7-4b46-b76f-62a415aa1dc8/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/2c6e8916-012f-44d3-8e0a-cea7f40ff8f5/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/2fd17592-cc9e-433b-83a5-8e64fcd98715/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/3cf9bb5f-51f8-4d95-ac99-f11920fc6c9d/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/41751b79-03c7-4dea-8f2e-4aa6da74d647/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/495c7c6c-a18d-48f5-a597-16389c1d366c/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/4ee9a133-4b38-424b-9561-5c870c723d50/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/54645247-f260-42f9-b475-d890bd0f5e00/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/61ca0ac4-72f7-4a5c-b82a-02327710d4ce/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/716694d5-64ff-42ac-90dd-b259e40d2ec2/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/95396c57-c169-4f8d-bd8b-124f4224676a/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/DemoTranscript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/be445b40-f504-4ffe-9b36-0da83e312f15/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/c75ea509-a258-423d-845b-4daf922d38ba/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/d1bb4f14-e186-40bd-96bb-a3347f6179d1/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/dc6cc1a6-c3da-4fc1-a79d-1482acf8f993/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/e03026bb-412b-48ac-ba17-52402ee25a4e/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/f2aed9e2-e2a7-4aae-b4a8-24d7b2487704/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/f3264a23-1bba-44e6-a03c-8ce3515a040f/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/english/fa344aa0-9a59-48b0-b360-fe95280bdc59/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ well i dont wish to see it anymre observed f b turning away her eyes it is certainly very like old portrad
dependency/audio_logs/hindi/07e6146b-50f2-4f80-b88d-b4aa84e2458c/audio.wav ADDED
Binary file (238 kB). View file
 
dependency/audio_logs/hindi/4bdc92e1-7c6d-4e96-a513-e7afb95ed962/audio.wav ADDED
Binary file (238 kB). View file
 
dependency/audio_logs/hindi/544fc9bc-f092-4eee-85dd-d390cfa56c52/audio.wav ADDED
Binary file (238 kB). View file
 
dependency/audio_logs/hindi/5cc27cdc-b839-41f1-ab7e-fbc0f5b1112d/audio.wav ADDED
Binary file (238 kB). View file
 
dependency/audio_logs/hindi/ca4f49d5-f622-4945-a54e-de53be8f1d51/audio.wav ADDED
Binary file (238 kB). View file
 
dependency/audio_logs/hindi/fc2c7d9d-d9dd-4d44-87c3-f811448cb43e/audio.wav ADDED
Binary file (238 kB). View file
 
dependency/audio_logs/hindi/fe00b82d-c412-4537-b81e-1ff45e21b042/transcript.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ वेलेश सी एनीोर
recorded_audio.wav ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.1.0
2
+ aiohttp==3.9.3
3
+ aioice==0.9.0
4
+ aiortc==1.8.0
5
+ aiosignal==1.3.1
6
+ alembic==1.13.1
7
+ altair==5.2.0
8
+ annotated-types==0.6.0
9
+ antlr4-python3-runtime==4.9.3
10
+ asteroid-filterbanks==0.4.0
11
+ asttokens==2.4.1
12
+ async-timeout==4.0.3
13
+ attrs==23.2.0
14
+ audioread==3.0.1
15
+ av==11.0.0
16
+ blinker==1.7.0
17
+ braceexpand==0.1.7
18
+ cachetools==5.3.3
19
+ certifi==2024.2.2
20
+ cffi==1.16.0
21
+ charset-normalizer==3.3.2
22
+ click==8.1.7
23
+ colorama==0.4.6
24
+ colorlog==6.8.2
25
+ contourpy==1.2.0
26
+ cryptography==42.0.5
27
+ cycler==0.12.1
28
+ Cython
29
+ cytoolz==0.12.3
30
+ datasets==2.18.0
31
+ decorator==5.1.1
32
+ dill==0.3.8
33
+ dnspython==2.6.1
34
+ docopt==0.6.2
35
+ editdistance==0.8.1
36
+ einops==0.7.0
37
+ exceptiongroup==1.2.0
38
+ executing==2.0.1
39
+ filelock==3.13.1
40
+ Flask==3.0.2
41
+ Flask-Cors==4.0.0
42
+ fonttools==4.49.0
43
+ frozenlist==1.4.1
44
+ fsspec==2024.2.0
45
+ gitdb==4.0.11
46
+ GitPython==3.1.42
47
+ google-crc32c==1.5.0
48
+ greenlet==3.0.3
49
+ grpcio==1.62.1
50
+ huggingface-hub==0.21.4
51
+ hydra-core==1.3.2
52
+ HyperPyYAML==1.2.2
53
+ idna==3.6
54
+ ifaddr==0.2.0
55
+ inflect==7.0.0
56
+ intervaltree==3.1.0
57
+ ipython
58
+ itsdangerous==2.1.2
59
+ jedi==0.19.1
60
+ Jinja2==3.1.3
61
+ jiwer==3.0.3
62
+ joblib==1.3.2
63
+ jsonschema==4.21.1
64
+ jsonschema-specifications==2023.12.1
65
+ julius==0.2.7
66
+ kaldi-io==0.9.8
67
+ kiwisolver==1.4.5
68
+ lazy_loader==0.3
69
+ lhotse==1.22.0
70
+ librosa==0.10.1
71
+ lightning==2.2.1
72
+ lightning-utilities==0.10.1
73
+ lilcom==1.7
74
+ llvmlite==0.42.0
75
+ Mako==1.3.2
76
+ Markdown==3.5.2
77
+ markdown-it-py==3.0.0
78
+ MarkupSafe==2.1.5
79
+ marshmallow==3.21.1
80
+ matplotlib==3.8.3
81
+ matplotlib-inline==0.1.6
82
+ mdurl==0.1.2
83
+ mpmath==1.3.0
84
+ msgpack==1.0.8
85
+ multidict==6.0.5
86
+ multiprocess==0.70.16
87
+ nemo-asr==0.9.0
88
+ nemo_toolkit==1.23.0
89
+ networkx==3.2.1
90
+ num2words==0.5.13
91
+ numba==0.59.0
92
+ numpy==1.26.4
93
+ # nvidia-cublas-cu12==12.1.3.1
94
+ # nvidia-cuda-cupti-cu12==12.1.105
95
+ # nvidia-cuda-nvrtc-cu12==12.1.105
96
+ # nvidia-cuda-runtime-cu12==12.1.105
97
+ # nvidia-cudnn-cu12==8.9.2.26
98
+ # nvidia-cufft-cu12==11.0.2.54
99
+ # nvidia-curand-cu12==10.3.2.106
100
+ # nvidia-cusolver-cu12==11.4.5.107
101
+ # nvidia-cusparse-cu12==12.1.0.106
102
+ # nvidia-nccl-cu12==2.19.3
103
+ # nvidia-nvjitlink-cu12==12.4.99
104
+ # nvidia-nvtx-cu12==12.1.105
105
+ omegaconf==2.3.0
106
+ onnx==1.15.0
107
+ optuna==3.5.0
108
+ packaging==23.2
109
+ pandas==2.2.1
110
+ parso==0.8.3
111
+ pexpect==4.9.0
112
+ pillow==10.2.0
113
+ platformdirs==4.2.0
114
+ pooch==1.8.1
115
+ primePy==1.3
116
+ prompt-toolkit==3.0.43
117
+ protobuf==4.25.3
118
+ ptyprocess==0.7.0
119
+ pure-eval==0.2.2
120
+ pyannote.audio==3.1.1
121
+ pyannote.core==5.0.0
122
+ pyannote.database==5.0.1
123
+ pyannote.metrics==3.2.1
124
+ pyannote.pipeline==3.0.1
125
+ pyarrow==15.0.1
126
+ pyarrow-hotfix==0.6
127
+ pycparser==2.21
128
+ pydantic==2.6.3
129
+ pydantic_core==2.16.3
130
+ pydeck==0.8.1b0
131
+ pydub==0.25.1
132
+ pyee==11.1.0
133
+ Pygments==2.17.2
134
+ pylibsrtp==0.10.0
135
+ pyOpenSSL==24.1.0
136
+ pyparsing==3.1.2
137
+ python-dateutil==2.9.0.post0
138
+ pytorch-lightning==2.2.1
139
+ pytorch-metric-learning==2.4.1
140
+ pytz==2024.1
141
+ PyYAML==6.0.1
142
+ rapidfuzz==3.6.2
143
+ referencing==0.33.0
144
+ regex==2023.12.25
145
+ requests==2.31.0
146
+ rich==13.7.1
147
+ rpds-py==0.18.0
148
+ ruamel.yaml==0.18.6
149
+ ruamel.yaml.clib==0.2.8
150
+ safetensors==0.4.2
151
+ scikit-learn==1.4.1.post1
152
+ scipy==1.12.0
153
+ semver==3.0.2
154
+ sentencepiece==0.2.0
155
+ shellingham==1.5.4
156
+ six==1.16.0
157
+ smmap==5.0.1
158
+ sortedcontainers==2.4.0
159
+ sounddevice==0.4.6
160
+ soundfile==0.12.1
161
+ sox==1.4.1
162
+ soxr==0.3.7
163
+ speechbrain==1.0.0
164
+ SQLAlchemy==2.0.28
165
+ stack-data==0.6.3
166
+ streamlit==1.32.1
167
+ streamlit-audiorec==0.1.3
168
+ streamlit-webrtc==0.47.6
169
+ sympy==1.12
170
+ tabulate==0.9.0
171
+ tenacity==8.2.3
172
+ tensorboard==2.16.2
173
+ tensorboard-data-server==0.7.2
174
+ tensorboardX==2.6.2.2
175
+ text-unidecode==1.3
176
+ threadpoolctl==3.3.0
177
+ tokenizers==0.15.2
178
+ toml==0.10.2
179
+ toolz==0.12.1
180
+ # torch==2.2.1
181
+ # torch-audiomentations==0.11.1
182
+ # torch-pitch-shift==1.2.4
183
+ # torch-stft==0.1.4
184
+ # torchaudio==2.2.1
185
+ # torchmetrics==1.3.1
186
+ tornado==6.4
187
+ tqdm==4.66.2
188
+ traitlets==5.14.1
189
+ transformers==4.38.2
190
+ triton==2.2.0
191
+ typer==0.9.0
192
+ typing_extensions==4.10.0
193
+ tzdata==2024.1
194
+ Unidecode==1.3.8
195
+ urllib3==2.2.1
196
+ watchdog==4.0.0
197
+ wcwidth==0.2.13
198
+ webdataset==0.2.86
199
+ Werkzeug==3.0.1
200
+ wget==3.2
201
+ wrapt==1.16.0
202
+ xxhash==3.4.1
203
+ yarl==1.9.4
204
+ youtokentome
server.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Author: Tanmay Jain
3
+ Email: csetanmayjain@gmail.com
4
+ """
5
+
6
+ import os
7
+ import threading
8
+ import uuid
9
+
10
+ from flask import Flask, jsonify, request, send_file
11
+ from flask_cors import CORS
12
+ import nemo.collections.asr as nemo_asr
13
+ import subprocess
14
+ import config
15
+
16
+
17
+ hi_asr_model = None
18
+ en_asr_model = None
19
+ en_upload_files_path=None
20
+ hi_upload_files_path=None
21
+ def initialize_path():
22
+
23
+ global hi_upload_files_path, en_upload_files_path, hi_am_model_path, en_am_model_path
24
+
25
+ hi_am_model_path = config.hi_am_model_path
26
+ en_am_model_path = config.en_am_model_path
27
+ hi_upload_files_path = os.path.join(config.upload_files_path, "hindi")
28
+ en_upload_files_path = os.path.join(config.upload_files_path, "english")
29
+
30
+ os.makedirs(hi_upload_files_path, exist_ok=True)
31
+ os.makedirs(en_upload_files_path, exist_ok=True)
32
+
33
+
34
+ def load_model(lang):
35
+
36
+ global hi_asr_model, en_asr_model
37
+
38
+
39
+ if lang == "hi" and hi_asr_model == None:
40
+ print("Loading Hindi Model")
41
+ if not os.path.exists(hi_am_model_path):
42
+ download=subprocess.run(["wget","-P",hi_am_model_path,"https://storage.googleapis.com/vakyansh-open-models/conformer_models/hindi/filtered_v1_ssl_2022-07-08_19-43-25/Conformer-CTC-BPE-Large.nemo"],capture_output=True, text=True)
43
+ print('done Downloading hindi asr')
44
+ if download.returncode != 0:
45
+ raise Exception(f"wget download failed: {download.stderr}")
46
+
47
+
48
+
49
+
50
+
51
+ #hi_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from(hi_am_model_path)
52
+ # path for hi_asr_model for local sever running
53
+ hi_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from('./hi_am_model/Conformer-CTC-BPE-Large.nemo')
54
+ if lang == "en" and en_asr_model == None:
55
+
56
+ if not os.path.exists(en_am_model_path):
57
+ print(en_am_model_path)
58
+ print('not found downloading english model')
59
+ download=subprocess.run(["wget","-P",en_am_model_path,"https://storage.googleapis.com/vakyansh-open-models/conformer_models/english/2022-09-13_15-50-48/Conformer-CTC-BPE-Large.nemo"],capture_output=True, text=True)
60
+ print('done Downloading')
61
+ print("Loading English Model")
62
+
63
+
64
+
65
+ en_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from('./en_am_model/Conformer-CTC-BPE-Large.nemo')
66
+
67
+
68
+ def transcribe(audio_file_path, transcription_file_path, lang, logprobs=False):
69
+
70
+ #load_model(lang)
71
+ if lang == "hi":
72
+ transcription = hi_asr_model.transcribe([audio_file_path], logprobs=logprobs)[0]
73
+ if lang == "en":
74
+ transcription = en_asr_model.transcribe([audio_file_path], logprobs=logprobs)[0]
75
+
76
+ f = open(transcription_file_path, "w")
77
+ f.write(transcription)
78
+ f.close()
79
+
80
+ def asr(audio_file_path,lang):
81
+ initialize_path()
82
+
83
+ global en_upload_files_path,hi_upload_files_path
84
+ if lang == None:
85
+ return jsonify({'error': 'no language code provided'}), 400
86
+ if lang != "hi" and lang != "en":
87
+ return jsonify({'error': 'Invalid language code'}), 400
88
+ client_id = str(uuid.uuid4())
89
+
90
+ if lang == "hi":
91
+ client_id_dir = os.path.join(hi_upload_files_path, client_id)
92
+ if lang == "en":
93
+ client_id_dir = os.path.join(en_upload_files_path, client_id)
94
+
95
+
96
+ transcription_file_path = os.path.join(client_id_dir, "transcript.txt")
97
+
98
+ os.makedirs(client_id_dir)
99
+
100
+ transcribe(audio_file_path,transcription_file_path,lang)
101
+ return client_id
102
+
103
+ def get_transcription(client_id,lang):
104
+
105
+ global en_upload_files_path
106
+ if client_id == None:
107
+ return 'No Client ID Provided'
108
+ if lang == None:
109
+ return 'no language code provided'
110
+ if lang != "hi" and lang != "en":
111
+ return 'Invalid language code'
112
+ print(type(lang),en_upload_files_path,"PATHS is thisss eherreb")
113
+ if lang == "hi":
114
+ print('looking in hindi file path')
115
+ client_dir = os.path.join('dependency/audio_logs/hindi', client_id)
116
+ if lang == "en":
117
+ print('looking in english file path')
118
+ client_dir = os.path.join('dependency/audio_logs/english', client_id)
119
+ transcript_file_path = os.path.join(client_dir,'transcript.txt')
120
+ if not os.path.exists(client_dir):
121
+ return "Invalid Client ID"
122
+ if os.path.exists(transcript_file_path):
123
+ print(transcript_file_path,"transcript file path")
124
+ #return send_file(transcript_file_path, as_attachment=True, download_name='transcript.txt'), 201
125
+ return transcript_file_path
126
+ else:
127
+ return 'your transcription file is not ready'
128
+
129
+
130
+
131
+ # if __name__ == '__main__':
132
+ # initialize_path()
133
+ # app.run(host='0.0.0.0', port=5000)
streamlit_app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #start
2
+ #to see changes in your local browser just run $ streamlit run streamlit_app.py
3
+ import streamlit as st
4
+ from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings
5
+ import sounddevice as sd
6
+ import server
7
+ from st_audiorec import st_audiorec
8
+ import nemo.collections.asr as nemo_asr
9
+ from pydub import AudioSegment
10
+ import io
11
+ import os
12
+ import uuid
13
+
14
+ global language
15
+
16
+
17
+ st.title("Customer Care")
18
+
19
+
20
+ @st.cache_resource
21
+ def get_model():
22
+ try:
23
+ os.makedirs("audio_cache")
24
+ except:
25
+ pass
26
+ en_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from("en_am_model/Conformer-CTC-BPE-Large.nemo")
27
+
28
+ return en_asr_model
29
+
30
+ en_asr_model = get_model()
31
+
32
+ st.title("💬 Vocalize: Empower Your Voice ")
33
+ language = st.selectbox(
34
+ 'Enter your preferred language',
35
+ ('en', 'hi'))
36
+
37
+
38
+ """
39
+ Hi cord the audio, and get the transcription in real time!
40
+ Note: Works best for smaller audios"""
41
+
42
+
43
+ global client_id
44
+
45
+ client_id=None
46
+
47
+ language = st.selectbox(
48
+ 'Enter your preferred language',
49
+ ('en', 'hi'))
50
+
51
+
52
+
53
+ if language==None:
54
+ st.error("Please enter a valid language.")
55
+
56
+ wav_audio_data = st_audiorec()
57
+ if wav_audio_data:
58
+ audio_location = "audio_cache/" + str(uuid.uuid4()) + ".wav"
59
+ audio_file = io.BytesIO(wav_audio_data)
60
+ audio = AudioSegment.from_file(audio_file)
61
+ audio = audio.set_sample_width(2)
62
+ audio = audio.set_channels(1)
63
+ audio = audio.set_frame_rate(16000)
64
+ audio.export(audio_location, format="wav")
65
+
66
+ text = en_asr_model.transcribe([audio_location], logprobs=False)[0]
67
+ print(text)
68
+ st.write(text)
69
+
70
+ st.header("Want to transcribe files larger than 2 minutes?")
71
+ uploaded_file=st.file_uploader("upload your recording ", disabled=False, label_visibility="visible")
72
+ if uploaded_file is not None:
73
+ # Store the uploaded file:
74
+ with open(uploaded_file.name, "wb") as f:
75
+ f.write(uploaded_file.getbuffer())
76
+ isTranslate=st.button("Click me for translation")
77
+ # Trigger transcription:
78
+ if isTranslate and language!=None:
79
+ client_id= server.asr(uploaded_file.name,language)
80
+ st.write("your id for recieving the transcribed File",client_id)
81
+
82
+
83
+ st.header('Results')
84
+
85
+ client_id = st.text_input("Enter your id")
86
+ clicked=st.button("Click me for transcribed File")
87
+ if clicked:
88
+ resp=server.get_transcription(str(client_id),language)
89
+ print(resp,"this is the requested file path")
90
+ with open(resp, "rb") as file:
91
+ st.download_button(
92
+ "Download transcription",
93
+ file,
94
+ file_name="transcription.txt"
95
+ )
streamlit_audio_recorder_app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pip install streamlit
2
+ # pip install audio-recorder-streamlit
3
+
4
+ import streamlit as st
5
+ from st_audiorec import st_audiorec
6
+ import nemo.collections.asr as nemo_asr
7
+ from pydub import AudioSegment
8
+ import io
9
+ import os
10
+ import uuid
11
+
12
+ @st.cache_resource
13
+ def get_model():
14
+ try:
15
+ os.makedirs("audio_cache")
16
+ except:
17
+ pass
18
+ en_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from("/home/akshat/v1/en_am_model/Conformer-CTC-BPE-Large.nemo")
19
+
20
+ return en_asr_model
21
+
22
+ en_asr_model = get_model()
23
+
24
+ st.title("💬 Vocalize: Empower Your Voice ")
25
+
26
+ """
27
+ Hi record the audio, and get the transcription in real time!
28
+ Note: Works best for smaller audios
29
+ """
30
+
31
+ wav_audio_data = st_audiorec()
32
+
33
+ if wav_audio_data:
34
+ audio_location = "audio_cache/" + str(uuid.uuid4()) + ".wav"
35
+ audio_file = io.BytesIO(wav_audio_data)
36
+ audio = AudioSegment.from_file(audio_file)
37
+ audio = audio.set_sample_width(2)
38
+ audio = audio.set_channels(1)
39
+ audio = audio.set_frame_rate(16000)
40
+ audio.export(audio_location, format="wav")
41
+
42
+ text = en_asr_model.transcribe([audio_location], logprobs=False)[0]
43
+ print(text)
44
+ st.write(text)