anonymoussubmitter222 commited on
Commit
3485e08
1 Parent(s): bf7e6b5

now handles stereo input

Browse files
TunisianASR/results/14epoch_tunisian/1234/app.py CHANGED
@@ -744,18 +744,9 @@ mixer.checkpointer.recover_if_possible(device="cpu")
744
  mixer.modules.eval()
745
 
746
 
747
- label_encoder = sb.dataio.encoder.CTCTextEncoder()
748
 
749
 
750
- # We dynamicaly add the tokenizer to our brain class.
751
- # NB: This tokenizer corresponds to the one used for the LM!!
752
 
753
- decoder = build_ctcdecoder(
754
- labels,
755
- kenlm_model_path= "arpas/everything.arpa", # either .arpa or .bin file
756
- alpha=0.5, # tuned on a val set
757
- beta=1, # tuned on a val set
758
- )
759
 
760
 
761
 
@@ -785,7 +776,12 @@ def treat_wav_file(file_mic,file_upload ,asr=mixer, device="cpu") :
785
  wav = file_mic
786
  else:
787
  wav = file_upload
788
- sig, sr = torchaudio.load(wav)
 
 
 
 
 
789
  tensor_wav = sig.to(device)
790
  resampled = torchaudio.functional.resample( tensor_wav, sr, 16000)
791
  sentence = asr.treat_wav(resampled)
 
744
  mixer.modules.eval()
745
 
746
 
 
747
 
748
 
 
 
749
 
 
 
 
 
 
 
750
 
751
 
752
 
 
776
  wav = file_mic
777
  else:
778
  wav = file_upload
779
+ info = torchaudio.info(wav)
780
+ sr = info.sample_rate
781
+ sig = sb.dataio.dataio.read_audio(wav)
782
+ if len(sig.shape)>1 :
783
+ sig = torch.mean(sig, dim=1)
784
+ sig = torch.unsqueeze(sig, 0)
785
  tensor_wav = sig.to(device)
786
  resampled = torchaudio.functional.resample( tensor_wav, sr, 16000)
787
  sentence = asr.treat_wav(resampled)
TunisianASR/results/14epoch_tunisian/1234/env.log CHANGED
@@ -473,7 +473,7 @@ youtube-dl==2021.6.6
473
  zipp==3.6.0
474
  ==============================
475
  Git revision:
476
- 0fdcdc4
477
  ==============================
478
  CUDA version:
479
  11.7
 
473
  zipp==3.6.0
474
  ==============================
475
  Git revision:
476
+ bf7e6b5
477
  ==============================
478
  CUDA version:
479
  11.7
TunisianASR/results/14epoch_tunisian/1234/log.txt CHANGED
@@ -1830,3 +1830,494 @@ zipp==3.6.0
1830
  2023-09-25 12:30:16,221 - speechbrain.core - INFO - 314.4M trainable parameters in ASR
1831
  2023-09-25 12:30:16,224 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from TunisianASR/results/14epoch_tunisian/1234/save/CKPT+2023-08-03+01-38-38+00
1832
  2023-09-25 12:30:16,534 - speechbrain.utils.distributed - INFO - distributed_launch flag is disabled, this experiment will be executed without DDP.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1830
  2023-09-25 12:30:16,221 - speechbrain.core - INFO - 314.4M trainable parameters in ASR
1831
  2023-09-25 12:30:16,224 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from TunisianASR/results/14epoch_tunisian/1234/save/CKPT+2023-08-03+01-38-38+00
1832
  2023-09-25 12:30:16,534 - speechbrain.utils.distributed - INFO - distributed_launch flag is disabled, this experiment will be executed without DDP.
1833
+ 2023-09-25 15:14:42,745 - speechbrain.core - INFO - Beginning experiment!
1834
+ 2023-09-25 15:14:42,745 - speechbrain.core - INFO - Experiment folder: TunisianASR/results/14epoch_tunisian/1234/
1835
+ 2023-09-25 15:14:43,231 - speechbrain.utils.superpowers - DEBUG - abkhazia==1.0
1836
+ absl-py==0.11.0
1837
+ aiofiles==23.2.1
1838
+ aiohttp==3.8.0
1839
+ aiosignal==1.2.0
1840
+ alabaster==0.7.12
1841
+ alembic==1.7.4
1842
+ altair==4.2.0
1843
+ altgraph==0.17
1844
+ antlr4-python3-runtime==4.9.3
1845
+ anyio==3.6.2
1846
+ appdirs==1.4.4
1847
+ argcomplete==1.12.2
1848
+ argon2-cffi==20.1.0
1849
+ arrow==1.2.3
1850
+ asgiref==3.6.0
1851
+ asteroid-filterbanks==0.4.0
1852
+ astunparse==1.6.3
1853
+ async-generator==1.10
1854
+ async-timeout==4.0.0
1855
+ attrdict==2.0.1
1856
+ attrs==20.3.0
1857
+ audeer==1.16.0
1858
+ audformat==0.11.5
1859
+ audinterface==0.7.0
1860
+ audiofile==1.0.0
1861
+ audiomentations==0.25.0
1862
+ audioread==2.1.9
1863
+ audobject==0.4.14
1864
+ audresample==0.1.6
1865
+ -e git+https://github.com/facebookresearch/WavAugment.git@54afcdb00ccc852c2f030f239f8532c9562b550e#egg=augment
1866
+ autopage==0.4.0
1867
+ Babel==2.9.0
1868
+ backcall==0.2.0
1869
+ backports.cached-property==1.0.2
1870
+ beautifulsoup4==4.10.0
1871
+ black==19.10b0
1872
+ bleach==3.3.0
1873
+ blessed==1.20.0
1874
+ boto3==1.20.2
1875
+ botocore==1.23.2
1876
+ bpemb==0.3.4
1877
+ braceexpand==0.1.7
1878
+ cachetools==4.2.0
1879
+ certifi @ file:///croot/certifi_1671487769961/work/certifi
1880
+ cffi==1.14.3
1881
+ cfgv==3.2.0
1882
+ chardet==3.0.4
1883
+ charset-normalizer==2.0.7
1884
+ click==7.1.2
1885
+ cliff==3.9.0
1886
+ clldutils==3.5.4
1887
+ cloudpickle==2.2.1
1888
+ cmaes==0.8.2
1889
+ cmake==3.18.4.post1
1890
+ cmd2==2.2.0
1891
+ colorama==0.4.4
1892
+ colorlog==4.6.2
1893
+ configparser==5.1.0
1894
+ conllu==4.5.3
1895
+ croniter==1.3.15
1896
+ cryptography==38.0.4
1897
+ csrgraph==0.1.28
1898
+ csvw==1.8.1
1899
+ cycler==0.10.0
1900
+ Cython==0.29.21
1901
+ dataclasses==0.6
1902
+ dateutils==0.6.12
1903
+ decorator==4.4.2
1904
+ deepdiff==6.3.0
1905
+ deepspeech==0.9.1
1906
+ defusedxml==0.7.1
1907
+ Deprecated==1.2.14
1908
+ dill==0.3.3
1909
+ Distance==0.1.3
1910
+ distlib==0.3.1
1911
+ Django==3.2.16
1912
+ django-auditlog==2.2.1
1913
+ django-filter==22.1
1914
+ django-js-asset==1.2.2
1915
+ django-mptt==0.14.0
1916
+ djangorestframework==3.14.0
1917
+ docker-pycreds==0.4.0
1918
+ docopt==0.6.2
1919
+ docutils==0.16
1920
+ drf-excel==2.2.0
1921
+ drf-flex-fields==1.0.0
1922
+ drf-renderer-xlsx==0.4.1
1923
+ easyocr==1.2.1
1924
+ editdistance==0.6.0
1925
+ einops==0.3.2
1926
+ emoji==2.2.0
1927
+ entrypoints==0.3
1928
+ et-xmlfile==1.1.0
1929
+ exceptiongroup==1.1.0
1930
+ farasapy==0.0.14
1931
+ fastapi==0.98.0
1932
+ fastjsonschema==2.17.1
1933
+ fasttext==0.9.2
1934
+ ffmpeg-python==0.2.0
1935
+ ffmpy==0.3.0
1936
+ filelock==3.0.12
1937
+ flair==0.12.2
1938
+ flake8==3.7.9
1939
+ flatbuffers==1.12
1940
+ frozendict==2.0.7
1941
+ frozenlist==1.2.0
1942
+ fsspec==2021.11.0
1943
+ ftfy==6.1.1
1944
+ future==0.18.2
1945
+ g2p-en==2.1.0
1946
+ gast==0.3.3
1947
+ gdown==4.4.0
1948
+ gdrive==0.1.5
1949
+ gensim==4.0.1
1950
+ gitdb==4.0.9
1951
+ GitPython==3.1.24
1952
+ google-api-core==2.11.1
1953
+ google-api-python-client==2.43.0
1954
+ google-auth==1.24.0
1955
+ google-auth-httplib2==0.1.0
1956
+ google-auth-oauthlib==0.5.3
1957
+ google-pasta==0.2.0
1958
+ googleapis-common-protos==1.59.1
1959
+ gradio==3.44.4
1960
+ gradio-client==0.5.1
1961
+ greenlet==1.1.2
1962
+ grpcio==1.32.0
1963
+ h11==0.14.0
1964
+ h5features==1.3.2
1965
+ h5py==2.10.0
1966
+ hierarchy==0.4.0
1967
+ hmmlearn==0.2.8
1968
+ htk-io==0.5
1969
+ httpcore==0.16.3
1970
+ httplib2==0.22.0
1971
+ httpx==0.23.3
1972
+ huggingface-hub==0.15.1
1973
+ hydra-colorlog==0.1.4
1974
+ hydra-core==1.3.2
1975
+ hyperopt==0.2.7
1976
+ HyperPyYAML==1.1.0
1977
+ hypothesis==6.61.2
1978
+ identify==1.5.10
1979
+ idna==2.10
1980
+ imageio==2.9.0
1981
+ imagesize==1.2.0
1982
+ importlib-metadata==4.8.1
1983
+ importlib-resources==5.2.2
1984
+ inflect==5.3.0
1985
+ inquirer==3.1.3
1986
+ ipadic==1.0.0
1987
+ ipyevents==2.0.1
1988
+ ipykernel==5.3.4
1989
+ ipython==7.19.0
1990
+ ipython-genutils==0.2.0
1991
+ ipywebrtc==0.6.0
1992
+ ipywidgets==7.6.3
1993
+ iso-639==0.4.5
1994
+ isodate==0.6.0
1995
+ isort==4.3.21
1996
+ itsdangerous==2.1.2
1997
+ Janome==0.5.0
1998
+ jedi==0.17.2
1999
+ jeepney==0.8.0
2000
+ jieba==0.42.1
2001
+ Jinja2==3.0.3
2002
+ jiwer==2.2.0
2003
+ jmespath==0.10.0
2004
+ joblib==0.17.0
2005
+ jsonschema==3.2.0
2006
+ julius==0.2.7
2007
+ jupyter-client==6.1.7
2008
+ jupyter-core==4.7.0
2009
+ jupyterlab-pygments==0.1.2
2010
+ jupyterlab-widgets==1.0.0
2011
+ kaitaistruct==0.9
2012
+ kaldi-io==0.9.4
2013
+ kaldi-python-io==1.2.2
2014
+ kaldiio==2.17.2
2015
+ kenlm @ https://github.com/kpu/kenlm/archive/master.zip
2016
+ Keras-Preprocessing==1.1.2
2017
+ kiwisolver==1.3.1
2018
+ lang-trans==0.6.0
2019
+ langdetect==1.0.9
2020
+ latexcodec==2.0.1
2021
+ ldap3==2.9.1
2022
+ librosa==0.9.0
2023
+ lightning-cloud==0.5.37
2024
+ lightning-utilities==0.8.0
2025
+ linkify-it-py==1.0.3
2026
+ lit==16.0.6
2027
+ llvmlite==0.35.0
2028
+ lxml==4.9.0
2029
+ Mako==1.1.5
2030
+ Markdown==3.3.3
2031
+ markdown-it-py==3.0.0
2032
+ MarkupSafe==2.1.3
2033
+ marshmallow==3.14.0
2034
+ matplotlib==3.3.3
2035
+ mccabe==0.6.1
2036
+ mcd==0.4
2037
+ mdit-py-plugins==0.3.3
2038
+ mdurl==0.1.2
2039
+ mecab-python3==1.0.3
2040
+ megatron-lm==2.2.0
2041
+ metrics==0.3.3
2042
+ mido==1.2.10
2043
+ mistune==0.8.4
2044
+ more-itertools==8.6.0
2045
+ mpld3==0.3
2046
+ mpmath==1.2.1
2047
+ multidict==5.2.0
2048
+ multiprocess==0.70.11.1
2049
+ nbclient==0.5.3
2050
+ nbconvert==5.6.1
2051
+ nbformat==5.9.0
2052
+ NEMO==4.3.2
2053
+ nemo-toolkit==1.4.0
2054
+ nest-asyncio==1.5.1
2055
+ networkx==2.8.8
2056
+ nltk==3.2.4
2057
+ nodeenv==1.5.0
2058
+ normalize==2.0.2
2059
+ notebook==6.3.0
2060
+ numba==0.52.0
2061
+ numpy==1.19.4
2062
+ nvidia-cublas-cu11==11.10.3.66
2063
+ nvidia-cuda-cupti-cu11==11.7.101
2064
+ nvidia-cuda-nvrtc-cu11==11.7.99
2065
+ nvidia-cuda-runtime-cu11==11.7.99
2066
+ nvidia-cudnn-cu11==8.5.0.96
2067
+ nvidia-cufft-cu11==10.9.0.58
2068
+ nvidia-curand-cu11==10.2.10.91
2069
+ nvidia-cusolver-cu11==11.4.0.1
2070
+ nvidia-cusparse-cu11==11.7.4.91
2071
+ nvidia-nccl-cu11==2.14.3
2072
+ nvidia-nvtx-cu11==11.7.91
2073
+ oauthlib==3.1.0
2074
+ omegaconf==2.3.0
2075
+ onnx==1.10.2
2076
+ OpenCC==1.1.2
2077
+ opencv-python==4.4.0.46
2078
+ openpyxl==3.0.9
2079
+ opensmile==2.2.0
2080
+ opt-einsum==3.3.0
2081
+ optuna==2.10.0
2082
+ ordered-set==4.1.0
2083
+ orjson==3.8.4
2084
+ oyaml==1.0
2085
+ packaging==22.0
2086
+ pandas==1.2.5
2087
+ pandocfilters==1.4.3
2088
+ pangu==4.0.6.1
2089
+ parameterized==0.8.1
2090
+ parso==0.7.1
2091
+ pathlib2==2.3.7.post1
2092
+ pathspec==0.5.5
2093
+ pathtools==0.1.2
2094
+ pbr==5.6.0
2095
+ pefile==2019.4.18
2096
+ pescador==2.1.0
2097
+ pesq==0.0.3
2098
+ pexpect==4.8.0
2099
+ phonemizer==2.2.1
2100
+ pickleshare==0.7.5
2101
+ Pillow==9.3.0
2102
+ pip-api==0.0.23
2103
+ pipreqs==0.4.11
2104
+ pluggy==0.13.1
2105
+ pooch==1.3.0
2106
+ portalocker==2.3.2
2107
+ pptree==3.1
2108
+ pre-commit==2.9.0
2109
+ preprocessing==0.1.13
2110
+ pretty-midi==0.2.9
2111
+ prettytable==2.2.1
2112
+ primePy==1.3
2113
+ progressbar2==3.53.1
2114
+ prometheus-client==0.10.1
2115
+ promise==2.3
2116
+ prompt-toolkit==3.0.8
2117
+ protobuf==3.20.3
2118
+ psutil==5.6.6
2119
+ ptyprocess==0.6.0
2120
+ py==1.9.0
2121
+ py-espeak-ng==0.1.8
2122
+ py4j==0.10.9.7
2123
+ pyannote.audio==2.1.1
2124
+ pyannote.core==4.5
2125
+ pyannote.database==4.1.3
2126
+ pyannote.metrics==3.2.1
2127
+ pyannote.pipeline==2.3
2128
+ pyannotebook==0.1.0.dev0
2129
+ PyArabic==0.6.15
2130
+ pyarrow==3.0.0
2131
+ pyasn1==0.4.8
2132
+ pyasn1-modules==0.2.8
2133
+ pybind11==2.8.1
2134
+ pybtex==0.24.0
2135
+ pybtex-docutils==1.0.1
2136
+ pycodestyle==2.5.0
2137
+ pycparser==2.20
2138
+ pycryptodome==3.16.0
2139
+ pyctcdecode==0.4.0
2140
+ pydantic==1.10.4
2141
+ pyDeprecate==0.3.1
2142
+ pydub==0.25.1
2143
+ pyflakes==2.1.1
2144
+ Pygments==2.15.1
2145
+ pygtrie==2.5.0
2146
+ PyJWT==2.7.0
2147
+ pymodbus==2.5.3
2148
+ pyparsing==2.4.7
2149
+ pyperclip==1.8.2
2150
+ pypinyin==0.43.0
2151
+ pyrsistent==0.17.3
2152
+ pyserial==3.5
2153
+ PySocks==1.7.1
2154
+ pystoi==0.3.3
2155
+ pytest==5.4.1
2156
+ pytest-runner==5.3.1
2157
+ python-bidi==0.4.2
2158
+ python-crfsuite==0.9.7
2159
+ python-dateutil==2.8.2
2160
+ python-editor==1.0.4
2161
+ python-Levenshtein==0.12.2
2162
+ python-multipart==0.0.5
2163
+ python-utils==2.4.0
2164
+ pytorch-lightning==1.6.5
2165
+ pytorch-metric-learning==1.7.3
2166
+ pytorch-revgrad==0.2.0
2167
+ pytube==11.0.1
2168
+ pytz==2022.6
2169
+ PyWavelets==1.1.1
2170
+ PyYAML==6.0
2171
+ pyzmq==20.0.0
2172
+ rapidfuzz==1.8.2
2173
+ readchar==4.0.5
2174
+ regex==2020.11.13
2175
+ requests==2.28.1
2176
+ requests-oauthlib==1.3.0
2177
+ resampy==0.2.2
2178
+ rfc3986==1.4.0
2179
+ rich==13.4.2
2180
+ richenum==1.3.1
2181
+ rsa==4.7
2182
+ ruamel.yaml==0.17.21
2183
+ ruamel.yaml.clib==0.2.7
2184
+ s3m==1.1.0
2185
+ s3transfer==0.5.0
2186
+ sacrebleu==2.0.0
2187
+ sacremoses==0.0.44
2188
+ safetensors==0.3.1
2189
+ scikit-image==0.18.1
2190
+ scikit-learn==0.23.2
2191
+ scipy==1.5.4
2192
+ -e git+https://github.com/sanghack81/SDCIT@00d060dde733fde9345154a494f81e97fb395ca7#egg=SDCIT
2193
+ seaborn==0.11.1
2194
+ SecretStorage==3.3.3
2195
+ segments==2.1.3
2196
+ segtok==1.5.11
2197
+ semantic-version==2.10.0
2198
+ semver==2.13.0
2199
+ Send2Trash==1.5.0
2200
+ sentencepiece==0.1.99
2201
+ sentry-sdk==1.4.3
2202
+ shellingham==1.4.0
2203
+ shortuuid==1.0.7
2204
+ SIDEKIT==1.3.8.5.2
2205
+ simplejson==3.17.5
2206
+ singledispatchmethod==1.0
2207
+ six==1.15.0
2208
+ smart-open==5.0.0
2209
+ smmap==5.0.0
2210
+ sniffio==1.3.0
2211
+ snowballstemmer==2.0.0
2212
+ sortedcollections==2.1.0
2213
+ sortedcontainers==2.4.0
2214
+ sounddevice==0.4.5
2215
+ SoundFile==0.10.3.post1
2216
+ soupsieve==2.3
2217
+ sox==1.4.1
2218
+ sparsemax==0.1.9
2219
+ speechbrain==0.5.14
2220
+ sphfile==1.0.3
2221
+ Sphinx==3.3.1
2222
+ sphinx-rtd-theme==0.2.4
2223
+ sphinxcontrib-applehelp==1.0.2
2224
+ sphinxcontrib-bibtex==2.4.1
2225
+ sphinxcontrib-devhelp==1.0.2
2226
+ sphinxcontrib-htmlhelp==1.0.3
2227
+ sphinxcontrib-jsmath==1.0.1
2228
+ sphinxcontrib-qthelp==1.0.3
2229
+ sphinxcontrib-serializinghtml==1.1.4
2230
+ SQLAlchemy==1.4.25
2231
+ sqlitedict==2.1.0
2232
+ sqlparse==0.4.2
2233
+ stanza==1.4.2
2234
+ starlette==0.27.0
2235
+ starsessions==1.3.0
2236
+ stevedore==3.4.0
2237
+ subprocess32==3.5.4
2238
+ sympy==1.9
2239
+ tabulate==0.8.9
2240
+ tensorboard==2.4.0
2241
+ tensorboard-plugin-wit==1.7.0
2242
+ tensorboardX==2.6.1
2243
+ tensorflow==2.4.0
2244
+ tensorflow-estimator==2.4.0
2245
+ termcolor==1.1.0
2246
+ terminado==0.9.4
2247
+ testpath==0.4.4
2248
+ threadpoolctl==2.1.0
2249
+ tifffile==2020.12.8
2250
+ tikzplotlib==0.9.8
2251
+ tinycss2==1.2.1
2252
+ tkseem==0.0.3
2253
+ tokenizers==0.13.3
2254
+ toml==0.10.2
2255
+ toolz==0.12.0
2256
+ torch==1.13.1
2257
+ torch-audiomentations==0.11.0
2258
+ torch-pitch-shift==1.2.4
2259
+ torch-stft==0.1.4
2260
+ torchaudio==0.13.1
2261
+ torchmetrics==0.11.4
2262
+ torchvision==0.14.1
2263
+ tornado==6.1
2264
+ tqdm==4.61.1
2265
+ trackrip==1.2.1
2266
+ traitlets==5.9.0
2267
+ transformer-smaller-training-vocab==0.3.1
2268
+ transformers==4.30.2
2269
+ triton==2.0.0
2270
+ typed-ast==1.4.1
2271
+ typer==0.4.0
2272
+ typing-extensions==4.4.0
2273
+ uc-micro-py==1.0.1
2274
+ Unidecode==1.3.2
2275
+ uritemplate==3.0.1
2276
+ urllib3==1.26.2
2277
+ uvicorn==0.20.0
2278
+ versioneer==0.28
2279
+ virtualenv==20.2.1
2280
+ wandb==0.12.6
2281
+ wcwidth==0.2.5
2282
+ webdataset==0.1.62
2283
+ webencodings==0.5.1
2284
+ websocket-client==1.6.1
2285
+ websockets==10.4
2286
+ Werkzeug==1.0.1
2287
+ wget==3.2
2288
+ widgetsnbextension==3.5.1
2289
+ Wikipedia-API==0.6.0
2290
+ wordninja==2.0.0
2291
+ wrapt==1.12.1
2292
+ xmltodict==0.13.0
2293
+ xxhash==2.0.0
2294
+ yamllint==1.23.0
2295
+ yarg==0.1.9
2296
+ yarl==1.7.2
2297
+ yaspin==2.1.0
2298
+ youtokentome==1.0.6
2299
+ youtube-dl==2021.6.6
2300
+ zipp==3.6.0
2301
+
2302
+
2303
+ 2023-09-25 15:14:43,263 - speechbrain.utils.superpowers - DEBUG - bf7e6b5
2304
+
2305
+
2306
+ 2023-09-25 15:14:43,325 - speechbrain.pretrained.fetching - INFO - Fetch hyperparams.yaml: Using existing file/symlink in pretrained_models/asr-wav2vec2-commonvoice-fr/hyperparams.yaml.
2307
+ 2023-09-25 15:14:43,325 - speechbrain.pretrained.fetching - INFO - Fetch custom.py: Linking to local file in /home/salah/Code-Switched-Tunisian-SpeechToText/asr-wav2vec2-commonvoice-fr/custom.py.
2308
+ 2023-09-25 15:14:46,203 - speechbrain.lobes.models.huggingface_wav2vec - WARNING - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 is frozen.
2309
+ 2023-09-25 15:14:46,206 - speechbrain.utils.parameter_transfer - DEBUG - Collecting files (or symlinks) for pretraining in pretrained_models/asr-wav2vec2-commonvoice-fr.
2310
+ 2023-09-25 15:14:46,207 - speechbrain.pretrained.fetching - INFO - Fetch wav2vec2.ckpt: Using existing file/symlink in pretrained_models/asr-wav2vec2-commonvoice-fr/wav2vec2.ckpt.
2311
+ 2023-09-25 15:14:46,207 - speechbrain.pretrained.fetching - INFO - Fetch asr.ckpt: Using existing file/symlink in pretrained_models/asr-wav2vec2-commonvoice-fr/asr.ckpt.
2312
+ 2023-09-25 15:14:46,207 - speechbrain.pretrained.fetching - INFO - Fetch tokenizer.ckpt: Using existing file/symlink in pretrained_models/asr-wav2vec2-commonvoice-fr/tokenizer.ckpt.
2313
+ 2023-09-25 15:14:46,208 - speechbrain.utils.parameter_transfer - INFO - Loading pretrained files for: wav2vec2, asr, tokenizer
2314
+ 2023-09-25 15:14:50,143 - speechbrain.lobes.models.huggingface_wav2vec - WARNING - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 feature extractor is frozen.
2315
+ 2023-09-25 15:14:50,144 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
2316
+ 2023-09-25 15:14:50,145 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
2317
+ 2023-09-25 15:14:50,148 - speechbrain.core - INFO - 314.4M trainable parameters in ASRCV
2318
+ 2023-09-25 15:14:50,157 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from EnglishCV/results/wav2vec2_ctc_en/1234/save/CKPT+2023-09-06+22-56-31+00
2319
+ 2023-09-25 15:14:51,202 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
2320
+ 2023-09-25 15:14:51,202 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
2321
+ 2023-09-25 15:14:51,207 - speechbrain.core - INFO - 314.4M trainable parameters in ASR
2322
+ 2023-09-25 15:14:51,211 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from TunisianASR/results/14epoch_tunisian/1234/save/CKPT+2023-08-03+01-38-38+00
2323
+ 2023-09-25 15:14:52,244 - speechbrain.utils.distributed - INFO - distributed_launch flag is disabled, this experiment will be executed without DDP.
app.py CHANGED
@@ -744,18 +744,9 @@ mixer.checkpointer.recover_if_possible(device="cpu")
744
  mixer.modules.eval()
745
 
746
 
747
- label_encoder = sb.dataio.encoder.CTCTextEncoder()
748
 
749
 
750
- # We dynamicaly add the tokenizer to our brain class.
751
- # NB: This tokenizer corresponds to the one used for the LM!!
752
 
753
- decoder = build_ctcdecoder(
754
- labels,
755
- kenlm_model_path= "arpas/everything.arpa", # either .arpa or .bin file
756
- alpha=0.5, # tuned on a val set
757
- beta=1, # tuned on a val set
758
- )
759
 
760
 
761
 
@@ -785,7 +776,12 @@ def treat_wav_file(file_mic,file_upload ,asr=mixer, device="cpu") :
785
  wav = file_mic
786
  else:
787
  wav = file_upload
788
- sig, sr = torchaudio.load(wav)
 
 
 
 
 
789
  tensor_wav = sig.to(device)
790
  resampled = torchaudio.functional.resample( tensor_wav, sr, 16000)
791
  sentence = asr.treat_wav(resampled)
 
744
  mixer.modules.eval()
745
 
746
 
 
747
 
748
 
 
 
749
 
 
 
 
 
 
 
750
 
751
 
752
 
 
776
  wav = file_mic
777
  else:
778
  wav = file_upload
779
+ info = torchaudio.info(wav)
780
+ sr = info.sample_rate
781
+ sig = sb.dataio.dataio.read_audio(wav)
782
+ if len(sig.shape)>1 :
783
+ sig = torch.mean(sig, dim=1)
784
+ sig = torch.unsqueeze(sig, 0)
785
  tensor_wav = sig.to(device)
786
  resampled = torchaudio.functional.resample( tensor_wav, sr, 16000)
787
  sentence = asr.treat_wav(resampled)
results/non_semi_final_stac/app.py CHANGED
@@ -744,18 +744,9 @@ mixer.checkpointer.recover_if_possible(device="cpu")
744
  mixer.modules.eval()
745
 
746
 
747
- label_encoder = sb.dataio.encoder.CTCTextEncoder()
748
 
749
 
750
- # We dynamicaly add the tokenizer to our brain class.
751
- # NB: This tokenizer corresponds to the one used for the LM!!
752
 
753
- decoder = build_ctcdecoder(
754
- labels,
755
- kenlm_model_path= "arpas/everything.arpa", # either .arpa or .bin file
756
- alpha=0.5, # tuned on a val set
757
- beta=1, # tuned on a val set
758
- )
759
 
760
 
761
 
@@ -785,7 +776,12 @@ def treat_wav_file(file_mic,file_upload ,asr=mixer, device="cpu") :
785
  wav = file_mic
786
  else:
787
  wav = file_upload
788
- sig, sr = torchaudio.load(wav)
 
 
 
 
 
789
  tensor_wav = sig.to(device)
790
  resampled = torchaudio.functional.resample( tensor_wav, sr, 16000)
791
  sentence = asr.treat_wav(resampled)
 
744
  mixer.modules.eval()
745
 
746
 
 
747
 
748
 
 
 
749
 
 
 
 
 
 
 
750
 
751
 
752
 
 
776
  wav = file_mic
777
  else:
778
  wav = file_upload
779
+ info = torchaudio.info(wav)
780
+ sr = info.sample_rate
781
+ sig = sb.dataio.dataio.read_audio(wav)
782
+ if len(sig.shape)>1 :
783
+ sig = torch.mean(sig, dim=1)
784
+ sig = torch.unsqueeze(sig, 0)
785
  tensor_wav = sig.to(device)
786
  resampled = torchaudio.functional.resample( tensor_wav, sr, 16000)
787
  sentence = asr.treat_wav(resampled)
results/non_semi_final_stac/env.log CHANGED
@@ -473,7 +473,7 @@ youtube-dl==2021.6.6
473
  zipp==3.6.0
474
  ==============================
475
  Git revision:
476
- 0fdcdc4
477
  ==============================
478
  CUDA version:
479
  11.7
 
473
  zipp==3.6.0
474
  ==============================
475
  Git revision:
476
+ bf7e6b5
477
  ==============================
478
  CUDA version:
479
  11.7
results/non_semi_final_stac/log.txt CHANGED
The diff for this file is too large to render. See raw diff