anonymoussubmitter222 commited on
Commit
bf7e6b5
1 Parent(s): 0fdcdc4

better description

Browse files
TunisianASR/results/14epoch_tunisian/1234/app.py CHANGED
@@ -356,7 +356,7 @@ english_asr_model = ASRCV(
356
  )
357
  english_asr_model.modules.to("cpu")
358
  english_asr_model.device="cpu"
359
- english_asr_model.checkpointer.recover_if_possible()
360
  run_opts["device"]="cpu"
361
  print("moving to tunisian model")
362
  asr_brain = ASR(
@@ -366,7 +366,7 @@ asr_brain = ASR(
366
  checkpointer=hparams["checkpointer"],
367
  )
368
  asr_brain.modules.to("cpu")
369
- asr_brain.checkpointer.recover_if_possible()
370
  asr_brain.modules.eval()
371
  english_asr_model.modules.eval()
372
  french_asr_model.mods.eval()
@@ -701,6 +701,33 @@ if hparams["language_modelling"]:
701
  beta=1, # tuned on a val set
702
  )
703
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
704
 
705
 
706
  run_opts["device"]="cpu"
@@ -713,7 +740,7 @@ mixer = Mixer(
713
  )
714
  mixer.tokenizer = label_encoder
715
  mixer.device = "cpu"
716
- mixer.checkpointer.recover_if_possible()
717
  mixer.modules.eval()
718
 
719
 
@@ -766,6 +793,8 @@ def treat_wav_file(file_mic,file_upload ,asr=mixer, device="cpu") :
766
 
767
  gr.Interface(
768
  fn=treat_wav_file,
 
 
769
  inputs=[gr.Audio(source="microphone", type='filepath', label = "record", optional = True),
770
  gr.Audio(source="upload", type='filepath', label="filein", optional=True)]
771
  ,outputs="text").launch()
 
356
  )
357
  english_asr_model.modules.to("cpu")
358
  english_asr_model.device="cpu"
359
+ english_asr_model.checkpointer.recover_if_possible(device="cpu")
360
  run_opts["device"]="cpu"
361
  print("moving to tunisian model")
362
  asr_brain = ASR(
 
366
  checkpointer=hparams["checkpointer"],
367
  )
368
  asr_brain.modules.to("cpu")
369
+ asr_brain.checkpointer.recover_if_possible(device="cpu")
370
  asr_brain.modules.eval()
371
  english_asr_model.modules.eval()
372
  french_asr_model.mods.eval()
 
701
  beta=1, # tuned on a val set
702
  )
703
 
704
+ description = """This is a speechbrain-based Automatic Speech Recognition (ASR) model for Tunisian arabic. It outputs code-switched Tunisian transcriptions written in Arabic and Latin characters. It handles Tunisian Arabic, English and French outputs.
705
+ Code-switching is notoriously hard to handle for speech recognition models, the main errors you man encounter using this model are spelling/language identification errors due to code-switching. We may work on improving this in further models. However if you do not need code-switching in your transcripts, you would better use the non-code switched model, available in another space from the same author. (https://huggingface.co/spaces/SalahZa/Tunisian-Speech-Recognition)
706
+
707
+ Run is done on CPU to keep it free in this space. This leads to quite long running times on long sequences. If for your project or research, you want to transcribe long sequences, you would better use the model directly from its page, some instructions for inference on a test set have been provided there. (https://huggingface.co/SalahZa/Code_Switched_Tunisian_Speech_Recognition). If you need help, feel free to drop an email here : zaiemsalah@gmail.com
708
+
709
+ Authors :
710
+ * [Salah Zaiem](https://fr.linkedin.com/in/salah-zaiem)
711
+ * [Ahmed Amine Ben Aballah](https://www.linkedin.com/in/aabenz/)
712
+ * [Ata Kaboudi](https://www.linkedin.com/in/ata-kaboudi-63365b1a8)
713
+ * [Amir Kanoun](https://tn.linkedin.com/in/ahmed-amir-kanoun)
714
+
715
+ More in-depth details and insights are available in a released preprint. Please find the paper [here](https://arxiv.org/abs/2309.11327).
716
+ If you use or refer to this model, please cite :
717
+
718
+ ```
719
+ @misc{abdallah2023leveraging,
720
+ title={Leveraging Data Collection and Unsupervised Learning for Code-switched Tunisian Arabic Automatic Speech Recognition},
721
+ author={Ahmed Amine Ben Abdallah and Ata Kabboudi and Amir Kanoun and Salah Zaiem},
722
+ year={2023},
723
+ eprint={2309.11327},
724
+ archivePrefix={arXiv},
725
+ primaryClass={eess.AS}
726
+ }
727
+
728
+
729
+ """
730
+ title = "Code-Switched Tunisian Speech Recognition"
731
 
732
 
733
  run_opts["device"]="cpu"
 
740
  )
741
  mixer.tokenizer = label_encoder
742
  mixer.device = "cpu"
743
+ mixer.checkpointer.recover_if_possible(device="cpu")
744
  mixer.modules.eval()
745
 
746
 
 
793
 
794
  gr.Interface(
795
  fn=treat_wav_file,
796
+ title = title,
797
+ description = description,
798
  inputs=[gr.Audio(source="microphone", type='filepath', label = "record", optional = True),
799
  gr.Audio(source="upload", type='filepath', label="filein", optional=True)]
800
  ,outputs="text").launch()
TunisianASR/results/14epoch_tunisian/1234/env.log CHANGED
@@ -473,7 +473,7 @@ youtube-dl==2021.6.6
473
  zipp==3.6.0
474
  ==============================
475
  Git revision:
476
- be9098b
477
  ==============================
478
  CUDA version:
479
  11.7
 
473
  zipp==3.6.0
474
  ==============================
475
  Git revision:
476
+ 0fdcdc4
477
  ==============================
478
  CUDA version:
479
  11.7
TunisianASR/results/14epoch_tunisian/1234/log.txt CHANGED
@@ -848,3 +848,985 @@ zipp==3.6.0
848
  2023-09-25 11:13:04,509 - speechbrain.core - INFO - 314.4M trainable parameters in ASR
849
  2023-09-25 11:13:04,513 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from TunisianASR/results/14epoch_tunisian/1234/save/CKPT+2023-08-03+01-38-38+00
850
  2023-09-25 11:13:05,900 - speechbrain.utils.distributed - INFO - distributed_launch flag is disabled, this experiment will be executed without DDP.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
848
  2023-09-25 11:13:04,509 - speechbrain.core - INFO - 314.4M trainable parameters in ASR
849
  2023-09-25 11:13:04,513 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from TunisianASR/results/14epoch_tunisian/1234/save/CKPT+2023-08-03+01-38-38+00
850
  2023-09-25 11:13:05,900 - speechbrain.utils.distributed - INFO - distributed_launch flag is disabled, this experiment will be executed without DDP.
851
+ 2023-09-25 12:27:42,070 - speechbrain.core - INFO - Beginning experiment!
852
+ 2023-09-25 12:27:42,070 - speechbrain.core - INFO - Experiment folder: TunisianASR/results/14epoch_tunisian/1234/
853
+ 2023-09-25 12:27:42,557 - speechbrain.utils.superpowers - DEBUG - abkhazia==1.0
854
+ absl-py==0.11.0
855
+ aiofiles==23.2.1
856
+ aiohttp==3.8.0
857
+ aiosignal==1.2.0
858
+ alabaster==0.7.12
859
+ alembic==1.7.4
860
+ altair==4.2.0
861
+ altgraph==0.17
862
+ antlr4-python3-runtime==4.9.3
863
+ anyio==3.6.2
864
+ appdirs==1.4.4
865
+ argcomplete==1.12.2
866
+ argon2-cffi==20.1.0
867
+ arrow==1.2.3
868
+ asgiref==3.6.0
869
+ asteroid-filterbanks==0.4.0
870
+ astunparse==1.6.3
871
+ async-generator==1.10
872
+ async-timeout==4.0.0
873
+ attrdict==2.0.1
874
+ attrs==20.3.0
875
+ audeer==1.16.0
876
+ audformat==0.11.5
877
+ audinterface==0.7.0
878
+ audiofile==1.0.0
879
+ audiomentations==0.25.0
880
+ audioread==2.1.9
881
+ audobject==0.4.14
882
+ audresample==0.1.6
883
+ -e git+https://github.com/facebookresearch/WavAugment.git@54afcdb00ccc852c2f030f239f8532c9562b550e#egg=augment
884
+ autopage==0.4.0
885
+ Babel==2.9.0
886
+ backcall==0.2.0
887
+ backports.cached-property==1.0.2
888
+ beautifulsoup4==4.10.0
889
+ black==19.10b0
890
+ bleach==3.3.0
891
+ blessed==1.20.0
892
+ boto3==1.20.2
893
+ botocore==1.23.2
894
+ bpemb==0.3.4
895
+ braceexpand==0.1.7
896
+ cachetools==4.2.0
897
+ certifi @ file:///croot/certifi_1671487769961/work/certifi
898
+ cffi==1.14.3
899
+ cfgv==3.2.0
900
+ chardet==3.0.4
901
+ charset-normalizer==2.0.7
902
+ click==7.1.2
903
+ cliff==3.9.0
904
+ clldutils==3.5.4
905
+ cloudpickle==2.2.1
906
+ cmaes==0.8.2
907
+ cmake==3.18.4.post1
908
+ cmd2==2.2.0
909
+ colorama==0.4.4
910
+ colorlog==4.6.2
911
+ configparser==5.1.0
912
+ conllu==4.5.3
913
+ croniter==1.3.15
914
+ cryptography==38.0.4
915
+ csrgraph==0.1.28
916
+ csvw==1.8.1
917
+ cycler==0.10.0
918
+ Cython==0.29.21
919
+ dataclasses==0.6
920
+ dateutils==0.6.12
921
+ decorator==4.4.2
922
+ deepdiff==6.3.0
923
+ deepspeech==0.9.1
924
+ defusedxml==0.7.1
925
+ Deprecated==1.2.14
926
+ dill==0.3.3
927
+ Distance==0.1.3
928
+ distlib==0.3.1
929
+ Django==3.2.16
930
+ django-auditlog==2.2.1
931
+ django-filter==22.1
932
+ django-js-asset==1.2.2
933
+ django-mptt==0.14.0
934
+ djangorestframework==3.14.0
935
+ docker-pycreds==0.4.0
936
+ docopt==0.6.2
937
+ docutils==0.16
938
+ drf-excel==2.2.0
939
+ drf-flex-fields==1.0.0
940
+ drf-renderer-xlsx==0.4.1
941
+ easyocr==1.2.1
942
+ editdistance==0.6.0
943
+ einops==0.3.2
944
+ emoji==2.2.0
945
+ entrypoints==0.3
946
+ et-xmlfile==1.1.0
947
+ exceptiongroup==1.1.0
948
+ farasapy==0.0.14
949
+ fastapi==0.98.0
950
+ fastjsonschema==2.17.1
951
+ fasttext==0.9.2
952
+ ffmpeg-python==0.2.0
953
+ ffmpy==0.3.0
954
+ filelock==3.0.12
955
+ flair==0.12.2
956
+ flake8==3.7.9
957
+ flatbuffers==1.12
958
+ frozendict==2.0.7
959
+ frozenlist==1.2.0
960
+ fsspec==2021.11.0
961
+ ftfy==6.1.1
962
+ future==0.18.2
963
+ g2p-en==2.1.0
964
+ gast==0.3.3
965
+ gdown==4.4.0
966
+ gdrive==0.1.5
967
+ gensim==4.0.1
968
+ gitdb==4.0.9
969
+ GitPython==3.1.24
970
+ google-api-core==2.11.1
971
+ google-api-python-client==2.43.0
972
+ google-auth==1.24.0
973
+ google-auth-httplib2==0.1.0
974
+ google-auth-oauthlib==0.5.3
975
+ google-pasta==0.2.0
976
+ googleapis-common-protos==1.59.1
977
+ gradio==3.44.4
978
+ gradio-client==0.5.1
979
+ greenlet==1.1.2
980
+ grpcio==1.32.0
981
+ h11==0.14.0
982
+ h5features==1.3.2
983
+ h5py==2.10.0
984
+ hierarchy==0.4.0
985
+ hmmlearn==0.2.8
986
+ htk-io==0.5
987
+ httpcore==0.16.3
988
+ httplib2==0.22.0
989
+ httpx==0.23.3
990
+ huggingface-hub==0.15.1
991
+ hydra-colorlog==0.1.4
992
+ hydra-core==1.3.2
993
+ hyperopt==0.2.7
994
+ HyperPyYAML==1.1.0
995
+ hypothesis==6.61.2
996
+ identify==1.5.10
997
+ idna==2.10
998
+ imageio==2.9.0
999
+ imagesize==1.2.0
1000
+ importlib-metadata==4.8.1
1001
+ importlib-resources==5.2.2
1002
+ inflect==5.3.0
1003
+ inquirer==3.1.3
1004
+ ipadic==1.0.0
1005
+ ipyevents==2.0.1
1006
+ ipykernel==5.3.4
1007
+ ipython==7.19.0
1008
+ ipython-genutils==0.2.0
1009
+ ipywebrtc==0.6.0
1010
+ ipywidgets==7.6.3
1011
+ iso-639==0.4.5
1012
+ isodate==0.6.0
1013
+ isort==4.3.21
1014
+ itsdangerous==2.1.2
1015
+ Janome==0.5.0
1016
+ jedi==0.17.2
1017
+ jeepney==0.8.0
1018
+ jieba==0.42.1
1019
+ Jinja2==3.0.3
1020
+ jiwer==2.2.0
1021
+ jmespath==0.10.0
1022
+ joblib==0.17.0
1023
+ jsonschema==3.2.0
1024
+ julius==0.2.7
1025
+ jupyter-client==6.1.7
1026
+ jupyter-core==4.7.0
1027
+ jupyterlab-pygments==0.1.2
1028
+ jupyterlab-widgets==1.0.0
1029
+ kaitaistruct==0.9
1030
+ kaldi-io==0.9.4
1031
+ kaldi-python-io==1.2.2
1032
+ kaldiio==2.17.2
1033
+ kenlm @ https://github.com/kpu/kenlm/archive/master.zip
1034
+ Keras-Preprocessing==1.1.2
1035
+ kiwisolver==1.3.1
1036
+ lang-trans==0.6.0
1037
+ langdetect==1.0.9
1038
+ latexcodec==2.0.1
1039
+ ldap3==2.9.1
1040
+ librosa==0.9.0
1041
+ lightning-cloud==0.5.37
1042
+ lightning-utilities==0.8.0
1043
+ linkify-it-py==1.0.3
1044
+ lit==16.0.6
1045
+ llvmlite==0.35.0
1046
+ lxml==4.9.0
1047
+ Mako==1.1.5
1048
+ Markdown==3.3.3
1049
+ markdown-it-py==3.0.0
1050
+ MarkupSafe==2.1.3
1051
+ marshmallow==3.14.0
1052
+ matplotlib==3.3.3
1053
+ mccabe==0.6.1
1054
+ mcd==0.4
1055
+ mdit-py-plugins==0.3.3
1056
+ mdurl==0.1.2
1057
+ mecab-python3==1.0.3
1058
+ megatron-lm==2.2.0
1059
+ metrics==0.3.3
1060
+ mido==1.2.10
1061
+ mistune==0.8.4
1062
+ more-itertools==8.6.0
1063
+ mpld3==0.3
1064
+ mpmath==1.2.1
1065
+ multidict==5.2.0
1066
+ multiprocess==0.70.11.1
1067
+ nbclient==0.5.3
1068
+ nbconvert==5.6.1
1069
+ nbformat==5.9.0
1070
+ NEMO==4.3.2
1071
+ nemo-toolkit==1.4.0
1072
+ nest-asyncio==1.5.1
1073
+ networkx==2.8.8
1074
+ nltk==3.2.4
1075
+ nodeenv==1.5.0
1076
+ normalize==2.0.2
1077
+ notebook==6.3.0
1078
+ numba==0.52.0
1079
+ numpy==1.19.4
1080
+ nvidia-cublas-cu11==11.10.3.66
1081
+ nvidia-cuda-cupti-cu11==11.7.101
1082
+ nvidia-cuda-nvrtc-cu11==11.7.99
1083
+ nvidia-cuda-runtime-cu11==11.7.99
1084
+ nvidia-cudnn-cu11==8.5.0.96
1085
+ nvidia-cufft-cu11==10.9.0.58
1086
+ nvidia-curand-cu11==10.2.10.91
1087
+ nvidia-cusolver-cu11==11.4.0.1
1088
+ nvidia-cusparse-cu11==11.7.4.91
1089
+ nvidia-nccl-cu11==2.14.3
1090
+ nvidia-nvtx-cu11==11.7.91
1091
+ oauthlib==3.1.0
1092
+ omegaconf==2.3.0
1093
+ onnx==1.10.2
1094
+ OpenCC==1.1.2
1095
+ opencv-python==4.4.0.46
1096
+ openpyxl==3.0.9
1097
+ opensmile==2.2.0
1098
+ opt-einsum==3.3.0
1099
+ optuna==2.10.0
1100
+ ordered-set==4.1.0
1101
+ orjson==3.8.4
1102
+ oyaml==1.0
1103
+ packaging==22.0
1104
+ pandas==1.2.5
1105
+ pandocfilters==1.4.3
1106
+ pangu==4.0.6.1
1107
+ parameterized==0.8.1
1108
+ parso==0.7.1
1109
+ pathlib2==2.3.7.post1
1110
+ pathspec==0.5.5
1111
+ pathtools==0.1.2
1112
+ pbr==5.6.0
1113
+ pefile==2019.4.18
1114
+ pescador==2.1.0
1115
+ pesq==0.0.3
1116
+ pexpect==4.8.0
1117
+ phonemizer==2.2.1
1118
+ pickleshare==0.7.5
1119
+ Pillow==9.3.0
1120
+ pip-api==0.0.23
1121
+ pipreqs==0.4.11
1122
+ pluggy==0.13.1
1123
+ pooch==1.3.0
1124
+ portalocker==2.3.2
1125
+ pptree==3.1
1126
+ pre-commit==2.9.0
1127
+ preprocessing==0.1.13
1128
+ pretty-midi==0.2.9
1129
+ prettytable==2.2.1
1130
+ primePy==1.3
1131
+ progressbar2==3.53.1
1132
+ prometheus-client==0.10.1
1133
+ promise==2.3
1134
+ prompt-toolkit==3.0.8
1135
+ protobuf==3.20.3
1136
+ psutil==5.6.6
1137
+ ptyprocess==0.6.0
1138
+ py==1.9.0
1139
+ py-espeak-ng==0.1.8
1140
+ py4j==0.10.9.7
1141
+ pyannote.audio==2.1.1
1142
+ pyannote.core==4.5
1143
+ pyannote.database==4.1.3
1144
+ pyannote.metrics==3.2.1
1145
+ pyannote.pipeline==2.3
1146
+ pyannotebook==0.1.0.dev0
1147
+ PyArabic==0.6.15
1148
+ pyarrow==3.0.0
1149
+ pyasn1==0.4.8
1150
+ pyasn1-modules==0.2.8
1151
+ pybind11==2.8.1
1152
+ pybtex==0.24.0
1153
+ pybtex-docutils==1.0.1
1154
+ pycodestyle==2.5.0
1155
+ pycparser==2.20
1156
+ pycryptodome==3.16.0
1157
+ pyctcdecode==0.4.0
1158
+ pydantic==1.10.4
1159
+ pyDeprecate==0.3.1
1160
+ pydub==0.25.1
1161
+ pyflakes==2.1.1
1162
+ Pygments==2.15.1
1163
+ pygtrie==2.5.0
1164
+ PyJWT==2.7.0
1165
+ pymodbus==2.5.3
1166
+ pyparsing==2.4.7
1167
+ pyperclip==1.8.2
1168
+ pypinyin==0.43.0
1169
+ pyrsistent==0.17.3
1170
+ pyserial==3.5
1171
+ PySocks==1.7.1
1172
+ pystoi==0.3.3
1173
+ pytest==5.4.1
1174
+ pytest-runner==5.3.1
1175
+ python-bidi==0.4.2
1176
+ python-crfsuite==0.9.7
1177
+ python-dateutil==2.8.2
1178
+ python-editor==1.0.4
1179
+ python-Levenshtein==0.12.2
1180
+ python-multipart==0.0.5
1181
+ python-utils==2.4.0
1182
+ pytorch-lightning==1.6.5
1183
+ pytorch-metric-learning==1.7.3
1184
+ pytorch-revgrad==0.2.0
1185
+ pytube==11.0.1
1186
+ pytz==2022.6
1187
+ PyWavelets==1.1.1
1188
+ PyYAML==6.0
1189
+ pyzmq==20.0.0
1190
+ rapidfuzz==1.8.2
1191
+ readchar==4.0.5
1192
+ regex==2020.11.13
1193
+ requests==2.28.1
1194
+ requests-oauthlib==1.3.0
1195
+ resampy==0.2.2
1196
+ rfc3986==1.4.0
1197
+ rich==13.4.2
1198
+ richenum==1.3.1
1199
+ rsa==4.7
1200
+ ruamel.yaml==0.17.21
1201
+ ruamel.yaml.clib==0.2.7
1202
+ s3m==1.1.0
1203
+ s3transfer==0.5.0
1204
+ sacrebleu==2.0.0
1205
+ sacremoses==0.0.44
1206
+ safetensors==0.3.1
1207
+ scikit-image==0.18.1
1208
+ scikit-learn==0.23.2
1209
+ scipy==1.5.4
1210
+ -e git+https://github.com/sanghack81/SDCIT@00d060dde733fde9345154a494f81e97fb395ca7#egg=SDCIT
1211
+ seaborn==0.11.1
1212
+ SecretStorage==3.3.3
1213
+ segments==2.1.3
1214
+ segtok==1.5.11
1215
+ semantic-version==2.10.0
1216
+ semver==2.13.0
1217
+ Send2Trash==1.5.0
1218
+ sentencepiece==0.1.99
1219
+ sentry-sdk==1.4.3
1220
+ shellingham==1.4.0
1221
+ shortuuid==1.0.7
1222
+ SIDEKIT==1.3.8.5.2
1223
+ simplejson==3.17.5
1224
+ singledispatchmethod==1.0
1225
+ six==1.15.0
1226
+ smart-open==5.0.0
1227
+ smmap==5.0.0
1228
+ sniffio==1.3.0
1229
+ snowballstemmer==2.0.0
1230
+ sortedcollections==2.1.0
1231
+ sortedcontainers==2.4.0
1232
+ sounddevice==0.4.5
1233
+ SoundFile==0.10.3.post1
1234
+ soupsieve==2.3
1235
+ sox==1.4.1
1236
+ sparsemax==0.1.9
1237
+ speechbrain==0.5.14
1238
+ sphfile==1.0.3
1239
+ Sphinx==3.3.1
1240
+ sphinx-rtd-theme==0.2.4
1241
+ sphinxcontrib-applehelp==1.0.2
1242
+ sphinxcontrib-bibtex==2.4.1
1243
+ sphinxcontrib-devhelp==1.0.2
1244
+ sphinxcontrib-htmlhelp==1.0.3
1245
+ sphinxcontrib-jsmath==1.0.1
1246
+ sphinxcontrib-qthelp==1.0.3
1247
+ sphinxcontrib-serializinghtml==1.1.4
1248
+ SQLAlchemy==1.4.25
1249
+ sqlitedict==2.1.0
1250
+ sqlparse==0.4.2
1251
+ stanza==1.4.2
1252
+ starlette==0.27.0
1253
+ starsessions==1.3.0
1254
+ stevedore==3.4.0
1255
+ subprocess32==3.5.4
1256
+ sympy==1.9
1257
+ tabulate==0.8.9
1258
+ tensorboard==2.4.0
1259
+ tensorboard-plugin-wit==1.7.0
1260
+ tensorboardX==2.6.1
1261
+ tensorflow==2.4.0
1262
+ tensorflow-estimator==2.4.0
1263
+ termcolor==1.1.0
1264
+ terminado==0.9.4
1265
+ testpath==0.4.4
1266
+ threadpoolctl==2.1.0
1267
+ tifffile==2020.12.8
1268
+ tikzplotlib==0.9.8
1269
+ tinycss2==1.2.1
1270
+ tkseem==0.0.3
1271
+ tokenizers==0.13.3
1272
+ toml==0.10.2
1273
+ toolz==0.12.0
1274
+ torch==1.13.1
1275
+ torch-audiomentations==0.11.0
1276
+ torch-pitch-shift==1.2.4
1277
+ torch-stft==0.1.4
1278
+ torchaudio==0.13.1
1279
+ torchmetrics==0.11.4
1280
+ torchvision==0.14.1
1281
+ tornado==6.1
1282
+ tqdm==4.61.1
1283
+ trackrip==1.2.1
1284
+ traitlets==5.9.0
1285
+ transformer-smaller-training-vocab==0.3.1
1286
+ transformers==4.30.2
1287
+ triton==2.0.0
1288
+ typed-ast==1.4.1
1289
+ typer==0.4.0
1290
+ typing-extensions==4.4.0
1291
+ uc-micro-py==1.0.1
1292
+ Unidecode==1.3.2
1293
+ uritemplate==3.0.1
1294
+ urllib3==1.26.2
1295
+ uvicorn==0.20.0
1296
+ versioneer==0.28
1297
+ virtualenv==20.2.1
1298
+ wandb==0.12.6
1299
+ wcwidth==0.2.5
1300
+ webdataset==0.1.62
1301
+ webencodings==0.5.1
1302
+ websocket-client==1.6.1
1303
+ websockets==10.4
1304
+ Werkzeug==1.0.1
1305
+ wget==3.2
1306
+ widgetsnbextension==3.5.1
1307
+ Wikipedia-API==0.6.0
1308
+ wordninja==2.0.0
1309
+ wrapt==1.12.1
1310
+ xmltodict==0.13.0
1311
+ xxhash==2.0.0
1312
+ yamllint==1.23.0
1313
+ yarg==0.1.9
1314
+ yarl==1.7.2
1315
+ yaspin==2.1.0
1316
+ youtokentome==1.0.6
1317
+ youtube-dl==2021.6.6
1318
+ zipp==3.6.0
1319
+
1320
+
1321
+ 2023-09-25 12:27:42,586 - speechbrain.utils.superpowers - DEBUG - 0fdcdc4
1322
+
1323
+
1324
+ 2023-09-25 12:27:42,617 - speechbrain.pretrained.fetching - INFO - Fetch hyperparams.yaml: Using existing file/symlink in pretrained_models/asr-wav2vec2-commonvoice-fr/hyperparams.yaml.
1325
+ 2023-09-25 12:27:42,617 - speechbrain.pretrained.fetching - INFO - Fetch custom.py: Linking to local file in /home/salah/Code-Switched-Tunisian-SpeechToText/asr-wav2vec2-commonvoice-fr/custom.py.
1326
+ 2023-09-25 12:27:45,390 - speechbrain.lobes.models.huggingface_wav2vec - WARNING - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 is frozen.
1327
+ 2023-09-25 12:27:45,393 - speechbrain.utils.parameter_transfer - DEBUG - Collecting files (or symlinks) for pretraining in pretrained_models/asr-wav2vec2-commonvoice-fr.
1328
+ 2023-09-25 12:27:45,394 - speechbrain.pretrained.fetching - INFO - Fetch wav2vec2.ckpt: Using existing file/symlink in pretrained_models/asr-wav2vec2-commonvoice-fr/wav2vec2.ckpt.
1329
+ 2023-09-25 12:27:45,394 - speechbrain.pretrained.fetching - INFO - Fetch asr.ckpt: Using existing file/symlink in pretrained_models/asr-wav2vec2-commonvoice-fr/asr.ckpt.
1330
+ 2023-09-25 12:27:45,395 - speechbrain.pretrained.fetching - INFO - Fetch tokenizer.ckpt: Using existing file/symlink in pretrained_models/asr-wav2vec2-commonvoice-fr/tokenizer.ckpt.
1331
+ 2023-09-25 12:27:45,395 - speechbrain.utils.parameter_transfer - INFO - Loading pretrained files for: wav2vec2, asr, tokenizer
1332
+ 2023-09-25 12:27:49,225 - speechbrain.lobes.models.huggingface_wav2vec - WARNING - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 feature extractor is frozen.
1333
+ 2023-09-25 12:27:49,226 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
1334
+ 2023-09-25 12:27:49,226 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
1335
+ 2023-09-25 12:27:49,229 - speechbrain.core - INFO - 314.4M trainable parameters in ASRCV
1336
+ 2023-09-25 12:27:49,232 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from EnglishCV/results/wav2vec2_ctc_en/1234/save/CKPT+2023-09-06+22-56-31+00
1337
+ 2023-09-25 12:27:50,282 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
1338
+ 2023-09-25 12:27:50,282 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
1339
+ 2023-09-25 12:27:50,286 - speechbrain.core - INFO - 314.4M trainable parameters in ASR
1340
+ 2023-09-25 12:27:50,290 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from TunisianASR/results/14epoch_tunisian/1234/save/CKPT+2023-08-03+01-38-38+00
1341
+ 2023-09-25 12:27:51,290 - speechbrain.utils.distributed - INFO - distributed_launch flag is disabled, this experiment will be executed without DDP.
1342
+ 2023-09-25 12:30:08,036 - speechbrain.core - INFO - Beginning experiment!
1343
+ 2023-09-25 12:30:08,037 - speechbrain.core - INFO - Experiment folder: TunisianASR/results/14epoch_tunisian/1234/
1344
+ 2023-09-25 12:30:08,556 - speechbrain.utils.superpowers - DEBUG - abkhazia==1.0
1345
+ absl-py==0.11.0
1346
+ aiofiles==23.2.1
1347
+ aiohttp==3.8.0
1348
+ aiosignal==1.2.0
1349
+ alabaster==0.7.12
1350
+ alembic==1.7.4
1351
+ altair==4.2.0
1352
+ altgraph==0.17
1353
+ antlr4-python3-runtime==4.9.3
1354
+ anyio==3.6.2
1355
+ appdirs==1.4.4
1356
+ argcomplete==1.12.2
1357
+ argon2-cffi==20.1.0
1358
+ arrow==1.2.3
1359
+ asgiref==3.6.0
1360
+ asteroid-filterbanks==0.4.0
1361
+ astunparse==1.6.3
1362
+ async-generator==1.10
1363
+ async-timeout==4.0.0
1364
+ attrdict==2.0.1
1365
+ attrs==20.3.0
1366
+ audeer==1.16.0
1367
+ audformat==0.11.5
1368
+ audinterface==0.7.0
1369
+ audiofile==1.0.0
1370
+ audiomentations==0.25.0
1371
+ audioread==2.1.9
1372
+ audobject==0.4.14
1373
+ audresample==0.1.6
1374
+ -e git+https://github.com/facebookresearch/WavAugment.git@54afcdb00ccc852c2f030f239f8532c9562b550e#egg=augment
1375
+ autopage==0.4.0
1376
+ Babel==2.9.0
1377
+ backcall==0.2.0
1378
+ backports.cached-property==1.0.2
1379
+ beautifulsoup4==4.10.0
1380
+ black==19.10b0
1381
+ bleach==3.3.0
1382
+ blessed==1.20.0
1383
+ boto3==1.20.2
1384
+ botocore==1.23.2
1385
+ bpemb==0.3.4
1386
+ braceexpand==0.1.7
1387
+ cachetools==4.2.0
1388
+ certifi @ file:///croot/certifi_1671487769961/work/certifi
1389
+ cffi==1.14.3
1390
+ cfgv==3.2.0
1391
+ chardet==3.0.4
1392
+ charset-normalizer==2.0.7
1393
+ click==7.1.2
1394
+ cliff==3.9.0
1395
+ clldutils==3.5.4
1396
+ cloudpickle==2.2.1
1397
+ cmaes==0.8.2
1398
+ cmake==3.18.4.post1
1399
+ cmd2==2.2.0
1400
+ colorama==0.4.4
1401
+ colorlog==4.6.2
1402
+ configparser==5.1.0
1403
+ conllu==4.5.3
1404
+ croniter==1.3.15
1405
+ cryptography==38.0.4
1406
+ csrgraph==0.1.28
1407
+ csvw==1.8.1
1408
+ cycler==0.10.0
1409
+ Cython==0.29.21
1410
+ dataclasses==0.6
1411
+ dateutils==0.6.12
1412
+ decorator==4.4.2
1413
+ deepdiff==6.3.0
1414
+ deepspeech==0.9.1
1415
+ defusedxml==0.7.1
1416
+ Deprecated==1.2.14
1417
+ dill==0.3.3
1418
+ Distance==0.1.3
1419
+ distlib==0.3.1
1420
+ Django==3.2.16
1421
+ django-auditlog==2.2.1
1422
+ django-filter==22.1
1423
+ django-js-asset==1.2.2
1424
+ django-mptt==0.14.0
1425
+ djangorestframework==3.14.0
1426
+ docker-pycreds==0.4.0
1427
+ docopt==0.6.2
1428
+ docutils==0.16
1429
+ drf-excel==2.2.0
1430
+ drf-flex-fields==1.0.0
1431
+ drf-renderer-xlsx==0.4.1
1432
+ easyocr==1.2.1
1433
+ editdistance==0.6.0
1434
+ einops==0.3.2
1435
+ emoji==2.2.0
1436
+ entrypoints==0.3
1437
+ et-xmlfile==1.1.0
1438
+ exceptiongroup==1.1.0
1439
+ farasapy==0.0.14
1440
+ fastapi==0.98.0
1441
+ fastjsonschema==2.17.1
1442
+ fasttext==0.9.2
1443
+ ffmpeg-python==0.2.0
1444
+ ffmpy==0.3.0
1445
+ filelock==3.0.12
1446
+ flair==0.12.2
1447
+ flake8==3.7.9
1448
+ flatbuffers==1.12
1449
+ frozendict==2.0.7
1450
+ frozenlist==1.2.0
1451
+ fsspec==2021.11.0
1452
+ ftfy==6.1.1
1453
+ future==0.18.2
1454
+ g2p-en==2.1.0
1455
+ gast==0.3.3
1456
+ gdown==4.4.0
1457
+ gdrive==0.1.5
1458
+ gensim==4.0.1
1459
+ gitdb==4.0.9
1460
+ GitPython==3.1.24
1461
+ google-api-core==2.11.1
1462
+ google-api-python-client==2.43.0
1463
+ google-auth==1.24.0
1464
+ google-auth-httplib2==0.1.0
1465
+ google-auth-oauthlib==0.5.3
1466
+ google-pasta==0.2.0
1467
+ googleapis-common-protos==1.59.1
1468
+ gradio==3.44.4
1469
+ gradio-client==0.5.1
1470
+ greenlet==1.1.2
1471
+ grpcio==1.32.0
1472
+ h11==0.14.0
1473
+ h5features==1.3.2
1474
+ h5py==2.10.0
1475
+ hierarchy==0.4.0
1476
+ hmmlearn==0.2.8
1477
+ htk-io==0.5
1478
+ httpcore==0.16.3
1479
+ httplib2==0.22.0
1480
+ httpx==0.23.3
1481
+ huggingface-hub==0.15.1
1482
+ hydra-colorlog==0.1.4
1483
+ hydra-core==1.3.2
1484
+ hyperopt==0.2.7
1485
+ HyperPyYAML==1.1.0
1486
+ hypothesis==6.61.2
1487
+ identify==1.5.10
1488
+ idna==2.10
1489
+ imageio==2.9.0
1490
+ imagesize==1.2.0
1491
+ importlib-metadata==4.8.1
1492
+ importlib-resources==5.2.2
1493
+ inflect==5.3.0
1494
+ inquirer==3.1.3
1495
+ ipadic==1.0.0
1496
+ ipyevents==2.0.1
1497
+ ipykernel==5.3.4
1498
+ ipython==7.19.0
1499
+ ipython-genutils==0.2.0
1500
+ ipywebrtc==0.6.0
1501
+ ipywidgets==7.6.3
1502
+ iso-639==0.4.5
1503
+ isodate==0.6.0
1504
+ isort==4.3.21
1505
+ itsdangerous==2.1.2
1506
+ Janome==0.5.0
1507
+ jedi==0.17.2
1508
+ jeepney==0.8.0
1509
+ jieba==0.42.1
1510
+ Jinja2==3.0.3
1511
+ jiwer==2.2.0
1512
+ jmespath==0.10.0
1513
+ joblib==0.17.0
1514
+ jsonschema==3.2.0
1515
+ julius==0.2.7
1516
+ jupyter-client==6.1.7
1517
+ jupyter-core==4.7.0
1518
+ jupyterlab-pygments==0.1.2
1519
+ jupyterlab-widgets==1.0.0
1520
+ kaitaistruct==0.9
1521
+ kaldi-io==0.9.4
1522
+ kaldi-python-io==1.2.2
1523
+ kaldiio==2.17.2
1524
+ kenlm @ https://github.com/kpu/kenlm/archive/master.zip
1525
+ Keras-Preprocessing==1.1.2
1526
+ kiwisolver==1.3.1
1527
+ lang-trans==0.6.0
1528
+ langdetect==1.0.9
1529
+ latexcodec==2.0.1
1530
+ ldap3==2.9.1
1531
+ librosa==0.9.0
1532
+ lightning-cloud==0.5.37
1533
+ lightning-utilities==0.8.0
1534
+ linkify-it-py==1.0.3
1535
+ lit==16.0.6
1536
+ llvmlite==0.35.0
1537
+ lxml==4.9.0
1538
+ Mako==1.1.5
1539
+ Markdown==3.3.3
1540
+ markdown-it-py==3.0.0
1541
+ MarkupSafe==2.1.3
1542
+ marshmallow==3.14.0
1543
+ matplotlib==3.3.3
1544
+ mccabe==0.6.1
1545
+ mcd==0.4
1546
+ mdit-py-plugins==0.3.3
1547
+ mdurl==0.1.2
1548
+ mecab-python3==1.0.3
1549
+ megatron-lm==2.2.0
1550
+ metrics==0.3.3
1551
+ mido==1.2.10
1552
+ mistune==0.8.4
1553
+ more-itertools==8.6.0
1554
+ mpld3==0.3
1555
+ mpmath==1.2.1
1556
+ multidict==5.2.0
1557
+ multiprocess==0.70.11.1
1558
+ nbclient==0.5.3
1559
+ nbconvert==5.6.1
1560
+ nbformat==5.9.0
1561
+ NEMO==4.3.2
1562
+ nemo-toolkit==1.4.0
1563
+ nest-asyncio==1.5.1
1564
+ networkx==2.8.8
1565
+ nltk==3.2.4
1566
+ nodeenv==1.5.0
1567
+ normalize==2.0.2
1568
+ notebook==6.3.0
1569
+ numba==0.52.0
1570
+ numpy==1.19.4
1571
+ nvidia-cublas-cu11==11.10.3.66
1572
+ nvidia-cuda-cupti-cu11==11.7.101
1573
+ nvidia-cuda-nvrtc-cu11==11.7.99
1574
+ nvidia-cuda-runtime-cu11==11.7.99
1575
+ nvidia-cudnn-cu11==8.5.0.96
1576
+ nvidia-cufft-cu11==10.9.0.58
1577
+ nvidia-curand-cu11==10.2.10.91
1578
+ nvidia-cusolver-cu11==11.4.0.1
1579
+ nvidia-cusparse-cu11==11.7.4.91
1580
+ nvidia-nccl-cu11==2.14.3
1581
+ nvidia-nvtx-cu11==11.7.91
1582
+ oauthlib==3.1.0
1583
+ omegaconf==2.3.0
1584
+ onnx==1.10.2
1585
+ OpenCC==1.1.2
1586
+ opencv-python==4.4.0.46
1587
+ openpyxl==3.0.9
1588
+ opensmile==2.2.0
1589
+ opt-einsum==3.3.0
1590
+ optuna==2.10.0
1591
+ ordered-set==4.1.0
1592
+ orjson==3.8.4
1593
+ oyaml==1.0
1594
+ packaging==22.0
1595
+ pandas==1.2.5
1596
+ pandocfilters==1.4.3
1597
+ pangu==4.0.6.1
1598
+ parameterized==0.8.1
1599
+ parso==0.7.1
1600
+ pathlib2==2.3.7.post1
1601
+ pathspec==0.5.5
1602
+ pathtools==0.1.2
1603
+ pbr==5.6.0
1604
+ pefile==2019.4.18
1605
+ pescador==2.1.0
1606
+ pesq==0.0.3
1607
+ pexpect==4.8.0
1608
+ phonemizer==2.2.1
1609
+ pickleshare==0.7.5
1610
+ Pillow==9.3.0
1611
+ pip-api==0.0.23
1612
+ pipreqs==0.4.11
1613
+ pluggy==0.13.1
1614
+ pooch==1.3.0
1615
+ portalocker==2.3.2
1616
+ pptree==3.1
1617
+ pre-commit==2.9.0
1618
+ preprocessing==0.1.13
1619
+ pretty-midi==0.2.9
1620
+ prettytable==2.2.1
1621
+ primePy==1.3
1622
+ progressbar2==3.53.1
1623
+ prometheus-client==0.10.1
1624
+ promise==2.3
1625
+ prompt-toolkit==3.0.8
1626
+ protobuf==3.20.3
1627
+ psutil==5.6.6
1628
+ ptyprocess==0.6.0
1629
+ py==1.9.0
1630
+ py-espeak-ng==0.1.8
1631
+ py4j==0.10.9.7
1632
+ pyannote.audio==2.1.1
1633
+ pyannote.core==4.5
1634
+ pyannote.database==4.1.3
1635
+ pyannote.metrics==3.2.1
1636
+ pyannote.pipeline==2.3
1637
+ pyannotebook==0.1.0.dev0
1638
+ PyArabic==0.6.15
1639
+ pyarrow==3.0.0
1640
+ pyasn1==0.4.8
1641
+ pyasn1-modules==0.2.8
1642
+ pybind11==2.8.1
1643
+ pybtex==0.24.0
1644
+ pybtex-docutils==1.0.1
1645
+ pycodestyle==2.5.0
1646
+ pycparser==2.20
1647
+ pycryptodome==3.16.0
1648
+ pyctcdecode==0.4.0
1649
+ pydantic==1.10.4
1650
+ pyDeprecate==0.3.1
1651
+ pydub==0.25.1
1652
+ pyflakes==2.1.1
1653
+ Pygments==2.15.1
1654
+ pygtrie==2.5.0
1655
+ PyJWT==2.7.0
1656
+ pymodbus==2.5.3
1657
+ pyparsing==2.4.7
1658
+ pyperclip==1.8.2
1659
+ pypinyin==0.43.0
1660
+ pyrsistent==0.17.3
1661
+ pyserial==3.5
1662
+ PySocks==1.7.1
1663
+ pystoi==0.3.3
1664
+ pytest==5.4.1
1665
+ pytest-runner==5.3.1
1666
+ python-bidi==0.4.2
1667
+ python-crfsuite==0.9.7
1668
+ python-dateutil==2.8.2
1669
+ python-editor==1.0.4
1670
+ python-Levenshtein==0.12.2
1671
+ python-multipart==0.0.5
1672
+ python-utils==2.4.0
1673
+ pytorch-lightning==1.6.5
1674
+ pytorch-metric-learning==1.7.3
1675
+ pytorch-revgrad==0.2.0
1676
+ pytube==11.0.1
1677
+ pytz==2022.6
1678
+ PyWavelets==1.1.1
1679
+ PyYAML==6.0
1680
+ pyzmq==20.0.0
1681
+ rapidfuzz==1.8.2
1682
+ readchar==4.0.5
1683
+ regex==2020.11.13
1684
+ requests==2.28.1
1685
+ requests-oauthlib==1.3.0
1686
+ resampy==0.2.2
1687
+ rfc3986==1.4.0
1688
+ rich==13.4.2
1689
+ richenum==1.3.1
1690
+ rsa==4.7
1691
+ ruamel.yaml==0.17.21
1692
+ ruamel.yaml.clib==0.2.7
1693
+ s3m==1.1.0
1694
+ s3transfer==0.5.0
1695
+ sacrebleu==2.0.0
1696
+ sacremoses==0.0.44
1697
+ safetensors==0.3.1
1698
+ scikit-image==0.18.1
1699
+ scikit-learn==0.23.2
1700
+ scipy==1.5.4
1701
+ -e git+https://github.com/sanghack81/SDCIT@00d060dde733fde9345154a494f81e97fb395ca7#egg=SDCIT
1702
+ seaborn==0.11.1
1703
+ SecretStorage==3.3.3
1704
+ segments==2.1.3
1705
+ segtok==1.5.11
1706
+ semantic-version==2.10.0
1707
+ semver==2.13.0
1708
+ Send2Trash==1.5.0
1709
+ sentencepiece==0.1.99
1710
+ sentry-sdk==1.4.3
1711
+ shellingham==1.4.0
1712
+ shortuuid==1.0.7
1713
+ SIDEKIT==1.3.8.5.2
1714
+ simplejson==3.17.5
1715
+ singledispatchmethod==1.0
1716
+ six==1.15.0
1717
+ smart-open==5.0.0
1718
+ smmap==5.0.0
1719
+ sniffio==1.3.0
1720
+ snowballstemmer==2.0.0
1721
+ sortedcollections==2.1.0
1722
+ sortedcontainers==2.4.0
1723
+ sounddevice==0.4.5
1724
+ SoundFile==0.10.3.post1
1725
+ soupsieve==2.3
1726
+ sox==1.4.1
1727
+ sparsemax==0.1.9
1728
+ speechbrain==0.5.14
1729
+ sphfile==1.0.3
1730
+ Sphinx==3.3.1
1731
+ sphinx-rtd-theme==0.2.4
1732
+ sphinxcontrib-applehelp==1.0.2
1733
+ sphinxcontrib-bibtex==2.4.1
1734
+ sphinxcontrib-devhelp==1.0.2
1735
+ sphinxcontrib-htmlhelp==1.0.3
1736
+ sphinxcontrib-jsmath==1.0.1
1737
+ sphinxcontrib-qthelp==1.0.3
1738
+ sphinxcontrib-serializinghtml==1.1.4
1739
+ SQLAlchemy==1.4.25
1740
+ sqlitedict==2.1.0
1741
+ sqlparse==0.4.2
1742
+ stanza==1.4.2
1743
+ starlette==0.27.0
1744
+ starsessions==1.3.0
1745
+ stevedore==3.4.0
1746
+ subprocess32==3.5.4
1747
+ sympy==1.9
1748
+ tabulate==0.8.9
1749
+ tensorboard==2.4.0
1750
+ tensorboard-plugin-wit==1.7.0
1751
+ tensorboardX==2.6.1
1752
+ tensorflow==2.4.0
1753
+ tensorflow-estimator==2.4.0
1754
+ termcolor==1.1.0
1755
+ terminado==0.9.4
1756
+ testpath==0.4.4
1757
+ threadpoolctl==2.1.0
1758
+ tifffile==2020.12.8
1759
+ tikzplotlib==0.9.8
1760
+ tinycss2==1.2.1
1761
+ tkseem==0.0.3
1762
+ tokenizers==0.13.3
1763
+ toml==0.10.2
1764
+ toolz==0.12.0
1765
+ torch==1.13.1
1766
+ torch-audiomentations==0.11.0
1767
+ torch-pitch-shift==1.2.4
1768
+ torch-stft==0.1.4
1769
+ torchaudio==0.13.1
1770
+ torchmetrics==0.11.4
1771
+ torchvision==0.14.1
1772
+ tornado==6.1
1773
+ tqdm==4.61.1
1774
+ trackrip==1.2.1
1775
+ traitlets==5.9.0
1776
+ transformer-smaller-training-vocab==0.3.1
1777
+ transformers==4.30.2
1778
+ triton==2.0.0
1779
+ typed-ast==1.4.1
1780
+ typer==0.4.0
1781
+ typing-extensions==4.4.0
1782
+ uc-micro-py==1.0.1
1783
+ Unidecode==1.3.2
1784
+ uritemplate==3.0.1
1785
+ urllib3==1.26.2
1786
+ uvicorn==0.20.0
1787
+ versioneer==0.28
1788
+ virtualenv==20.2.1
1789
+ wandb==0.12.6
1790
+ wcwidth==0.2.5
1791
+ webdataset==0.1.62
1792
+ webencodings==0.5.1
1793
+ websocket-client==1.6.1
1794
+ websockets==10.4
1795
+ Werkzeug==1.0.1
1796
+ wget==3.2
1797
+ widgetsnbextension==3.5.1
1798
+ Wikipedia-API==0.6.0
1799
+ wordninja==2.0.0
1800
+ wrapt==1.12.1
1801
+ xmltodict==0.13.0
1802
+ xxhash==2.0.0
1803
+ yamllint==1.23.0
1804
+ yarg==0.1.9
1805
+ yarl==1.7.2
1806
+ yaspin==2.1.0
1807
+ youtokentome==1.0.6
1808
+ youtube-dl==2021.6.6
1809
+ zipp==3.6.0
1810
+
1811
+
1812
+ 2023-09-25 12:30:08,594 - speechbrain.utils.superpowers - DEBUG - 0fdcdc4
1813
+
1814
+
1815
+ 2023-09-25 12:30:08,630 - speechbrain.pretrained.fetching - INFO - Fetch hyperparams.yaml: Using existing file/symlink in pretrained_models/asr-wav2vec2-commonvoice-fr/hyperparams.yaml.
1816
+ 2023-09-25 12:30:08,631 - speechbrain.pretrained.fetching - INFO - Fetch custom.py: Linking to local file in /home/salah/Code-Switched-Tunisian-SpeechToText/asr-wav2vec2-commonvoice-fr/custom.py.
1817
+ 2023-09-25 12:30:11,413 - speechbrain.lobes.models.huggingface_wav2vec - WARNING - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 is frozen.
1818
+ 2023-09-25 12:30:11,416 - speechbrain.utils.parameter_transfer - DEBUG - Collecting files (or symlinks) for pretraining in pretrained_models/asr-wav2vec2-commonvoice-fr.
1819
+ 2023-09-25 12:30:11,417 - speechbrain.pretrained.fetching - INFO - Fetch wav2vec2.ckpt: Using existing file/symlink in pretrained_models/asr-wav2vec2-commonvoice-fr/wav2vec2.ckpt.
1820
+ 2023-09-25 12:30:11,417 - speechbrain.pretrained.fetching - INFO - Fetch asr.ckpt: Using existing file/symlink in pretrained_models/asr-wav2vec2-commonvoice-fr/asr.ckpt.
1821
+ 2023-09-25 12:30:11,418 - speechbrain.pretrained.fetching - INFO - Fetch tokenizer.ckpt: Using existing file/symlink in pretrained_models/asr-wav2vec2-commonvoice-fr/tokenizer.ckpt.
1822
+ 2023-09-25 12:30:11,418 - speechbrain.utils.parameter_transfer - INFO - Loading pretrained files for: wav2vec2, asr, tokenizer
1823
+ 2023-09-25 12:30:15,151 - speechbrain.lobes.models.huggingface_wav2vec - WARNING - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 feature extractor is frozen.
1824
+ 2023-09-25 12:30:15,152 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
1825
+ 2023-09-25 12:30:15,152 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
1826
+ 2023-09-25 12:30:15,155 - speechbrain.core - INFO - 314.4M trainable parameters in ASRCV
1827
+ 2023-09-25 12:30:15,164 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from EnglishCV/results/wav2vec2_ctc_en/1234/save/CKPT+2023-09-06+22-56-31+00
1828
+ 2023-09-25 12:30:16,217 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
1829
+ 2023-09-25 12:30:16,217 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
1830
+ 2023-09-25 12:30:16,221 - speechbrain.core - INFO - 314.4M trainable parameters in ASR
1831
+ 2023-09-25 12:30:16,224 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from TunisianASR/results/14epoch_tunisian/1234/save/CKPT+2023-08-03+01-38-38+00
1832
+ 2023-09-25 12:30:16,534 - speechbrain.utils.distributed - INFO - distributed_launch flag is disabled, this experiment will be executed without DDP.
app.py CHANGED
@@ -701,6 +701,33 @@ if hparams["language_modelling"]:
701
  beta=1, # tuned on a val set
702
  )
703
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
704
 
705
 
706
  run_opts["device"]="cpu"
@@ -766,6 +793,8 @@ def treat_wav_file(file_mic,file_upload ,asr=mixer, device="cpu") :
766
 
767
  gr.Interface(
768
  fn=treat_wav_file,
 
 
769
  inputs=[gr.Audio(source="microphone", type='filepath', label = "record", optional = True),
770
  gr.Audio(source="upload", type='filepath', label="filein", optional=True)]
771
  ,outputs="text").launch()
 
701
  beta=1, # tuned on a val set
702
  )
703
 
704
+ description = """This is a speechbrain-based Automatic Speech Recognition (ASR) model for Tunisian arabic. It outputs code-switched Tunisian transcriptions written in Arabic and Latin characters. It handles Tunisian Arabic, English and French outputs.
705
+ Code-switching is notoriously hard to handle for speech recognition models, the main errors you man encounter using this model are spelling/language identification errors due to code-switching. We may work on improving this in further models. However if you do not need code-switching in your transcripts, you would better use the non-code switched model, available in another space from the same author. (https://huggingface.co/spaces/SalahZa/Tunisian-Speech-Recognition)
706
+
707
+ Run is done on CPU to keep it free in this space. This leads to quite long running times on long sequences. If for your project or research, you want to transcribe long sequences, you would better use the model directly from its page, some instructions for inference on a test set have been provided there. (https://huggingface.co/SalahZa/Code_Switched_Tunisian_Speech_Recognition). If you need help, feel free to drop an email here : zaiemsalah@gmail.com
708
+
709
+ Authors :
710
+ * [Salah Zaiem](https://fr.linkedin.com/in/salah-zaiem)
711
+ * [Ahmed Amine Ben Aballah](https://www.linkedin.com/in/aabenz/)
712
+ * [Ata Kaboudi](https://www.linkedin.com/in/ata-kaboudi-63365b1a8)
713
+ * [Amir Kanoun](https://tn.linkedin.com/in/ahmed-amir-kanoun)
714
+
715
+ More in-depth details and insights are available in a released preprint. Please find the paper [here](https://arxiv.org/abs/2309.11327).
716
+ If you use or refer to this model, please cite :
717
+
718
+ ```
719
+ @misc{abdallah2023leveraging,
720
+ title={Leveraging Data Collection and Unsupervised Learning for Code-switched Tunisian Arabic Automatic Speech Recognition},
721
+ author={Ahmed Amine Ben Abdallah and Ata Kabboudi and Amir Kanoun and Salah Zaiem},
722
+ year={2023},
723
+ eprint={2309.11327},
724
+ archivePrefix={arXiv},
725
+ primaryClass={eess.AS}
726
+ }
727
+
728
+
729
+ """
730
+ title = "Code-Switched Tunisian Speech Recognition"
731
 
732
 
733
  run_opts["device"]="cpu"
 
793
 
794
  gr.Interface(
795
  fn=treat_wav_file,
796
+ title = title,
797
+ description = description,
798
  inputs=[gr.Audio(source="microphone", type='filepath', label = "record", optional = True),
799
  gr.Audio(source="upload", type='filepath', label="filein", optional=True)]
800
  ,outputs="text").launch()
results/non_semi_final_stac/app.py CHANGED
@@ -356,7 +356,7 @@ english_asr_model = ASRCV(
356
  )
357
  english_asr_model.modules.to("cpu")
358
  english_asr_model.device="cpu"
359
- english_asr_model.checkpointer.recover_if_possible()
360
  run_opts["device"]="cpu"
361
  print("moving to tunisian model")
362
  asr_brain = ASR(
@@ -366,7 +366,7 @@ asr_brain = ASR(
366
  checkpointer=hparams["checkpointer"],
367
  )
368
  asr_brain.modules.to("cpu")
369
- asr_brain.checkpointer.recover_if_possible()
370
  asr_brain.modules.eval()
371
  english_asr_model.modules.eval()
372
  french_asr_model.mods.eval()
@@ -701,6 +701,33 @@ if hparams["language_modelling"]:
701
  beta=1, # tuned on a val set
702
  )
703
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
704
 
705
 
706
  run_opts["device"]="cpu"
@@ -713,7 +740,7 @@ mixer = Mixer(
713
  )
714
  mixer.tokenizer = label_encoder
715
  mixer.device = "cpu"
716
- mixer.checkpointer.recover_if_possible()
717
  mixer.modules.eval()
718
 
719
 
@@ -766,6 +793,8 @@ def treat_wav_file(file_mic,file_upload ,asr=mixer, device="cpu") :
766
 
767
  gr.Interface(
768
  fn=treat_wav_file,
 
 
769
  inputs=[gr.Audio(source="microphone", type='filepath', label = "record", optional = True),
770
  gr.Audio(source="upload", type='filepath', label="filein", optional=True)]
771
  ,outputs="text").launch()
 
356
  )
357
  english_asr_model.modules.to("cpu")
358
  english_asr_model.device="cpu"
359
+ english_asr_model.checkpointer.recover_if_possible(device="cpu")
360
  run_opts["device"]="cpu"
361
  print("moving to tunisian model")
362
  asr_brain = ASR(
 
366
  checkpointer=hparams["checkpointer"],
367
  )
368
  asr_brain.modules.to("cpu")
369
+ asr_brain.checkpointer.recover_if_possible(device="cpu")
370
  asr_brain.modules.eval()
371
  english_asr_model.modules.eval()
372
  french_asr_model.mods.eval()
 
701
  beta=1, # tuned on a val set
702
  )
703
 
704
+ description = """This is a speechbrain-based Automatic Speech Recognition (ASR) model for Tunisian arabic. It outputs code-switched Tunisian transcriptions written in Arabic and Latin characters. It handles Tunisian Arabic, English and French outputs.
705
+ Code-switching is notoriously hard to handle for speech recognition models, the main errors you man encounter using this model are spelling/language identification errors due to code-switching. We may work on improving this in further models. However if you do not need code-switching in your transcripts, you would better use the non-code switched model, available in another space from the same author. (https://huggingface.co/spaces/SalahZa/Tunisian-Speech-Recognition)
706
+
707
+ Run is done on CPU to keep it free in this space. This leads to quite long running times on long sequences. If for your project or research, you want to transcribe long sequences, you would better use the model directly from its page, some instructions for inference on a test set have been provided there. (https://huggingface.co/SalahZa/Code_Switched_Tunisian_Speech_Recognition). If you need help, feel free to drop an email here : zaiemsalah@gmail.com
708
+
709
+ Authors :
710
+ * [Salah Zaiem](https://fr.linkedin.com/in/salah-zaiem)
711
+ * [Ahmed Amine Ben Aballah](https://www.linkedin.com/in/aabenz/)
712
+ * [Ata Kaboudi](https://www.linkedin.com/in/ata-kaboudi-63365b1a8)
713
+ * [Amir Kanoun](https://tn.linkedin.com/in/ahmed-amir-kanoun)
714
+
715
+ More in-depth details and insights are available in a released preprint. Please find the paper [here](https://arxiv.org/abs/2309.11327).
716
+ If you use or refer to this model, please cite :
717
+
718
+ ```
719
+ @misc{abdallah2023leveraging,
720
+ title={Leveraging Data Collection and Unsupervised Learning for Code-switched Tunisian Arabic Automatic Speech Recognition},
721
+ author={Ahmed Amine Ben Abdallah and Ata Kabboudi and Amir Kanoun and Salah Zaiem},
722
+ year={2023},
723
+ eprint={2309.11327},
724
+ archivePrefix={arXiv},
725
+ primaryClass={eess.AS}
726
+ }
727
+
728
+
729
+ """
730
+ title = "Code-Switched Tunisian Speech Recognition"
731
 
732
 
733
  run_opts["device"]="cpu"
 
740
  )
741
  mixer.tokenizer = label_encoder
742
  mixer.device = "cpu"
743
+ mixer.checkpointer.recover_if_possible(device="cpu")
744
  mixer.modules.eval()
745
 
746
 
 
793
 
794
  gr.Interface(
795
  fn=treat_wav_file,
796
+ title = title,
797
+ description = description,
798
  inputs=[gr.Audio(source="microphone", type='filepath', label = "record", optional = True),
799
  gr.Audio(source="upload", type='filepath', label="filein", optional=True)]
800
  ,outputs="text").launch()
results/non_semi_final_stac/env.log CHANGED
@@ -473,7 +473,7 @@ youtube-dl==2021.6.6
473
  zipp==3.6.0
474
  ==============================
475
  Git revision:
476
- be9098b
477
  ==============================
478
  CUDA version:
479
  11.7
 
473
  zipp==3.6.0
474
  ==============================
475
  Git revision:
476
+ 0fdcdc4
477
  ==============================
478
  CUDA version:
479
  11.7
results/non_semi_final_stac/log.txt CHANGED
The diff for this file is too large to render. See raw diff