pyf98 commited on
Commit
3b3dddc
1 Parent(s): da6f79c

add model files

Browse files
Files changed (40) hide show
  1. README.md +15 -0
  2. data/token_list/bpe_unigram50000/bpe.model +3 -0
  3. exp/s2t_stats_raw_bpe50000/train/feats_stats.npz +3 -0
  4. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/config.yaml +0 -0
  5. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/backward_time.png +0 -0
  6. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/cer_ctc.png +0 -0
  7. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/cer_interctc_layer12.png +0 -0
  8. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/cer_interctc_layer15.png +0 -0
  9. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/cer_interctc_layer21.png +0 -0
  10. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/cer_interctc_layer6.png +0 -0
  11. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/clip.png +0 -0
  12. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/forward_time.png +0 -0
  13. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/gpu_max_cached_mem_GB.png +0 -0
  14. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/grad_norm.png +0 -0
  15. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/iter_time.png +0 -0
  16. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/loss.png +0 -0
  17. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/loss_ctc.png +0 -0
  18. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/loss_interctc_layer12.png +0 -0
  19. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/loss_interctc_layer15.png +0 -0
  20. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/loss_interctc_layer21.png +0 -0
  21. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/loss_interctc_layer6.png +0 -0
  22. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/loss_scale.png +0 -0
  23. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/optim0_lr0.png +0 -0
  24. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/optim_step_time.png +0 -0
  25. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/train_time.png +0 -0
  26. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.1.log +0 -0
  27. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.10.log +0 -0
  28. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.13.log +0 -0
  29. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.17.log +0 -0
  30. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.19.log +0 -0
  31. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.2.log +0 -0
  32. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.20.log +0 -0
  33. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.29.log +0 -0
  34. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.3.log +0 -0
  35. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.30.log +0 -0
  36. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.31.log +0 -0
  37. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.4.log +0 -0
  38. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.6.log +0 -0
  39. exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/valid.total_count.ave_5best.till40epoch.pth +3 -0
  40. meta.yaml +8 -0
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ - speech-translation
7
+ - language-identification
8
+ language: multilingual
9
+ datasets:
10
+ - owsm_v3.1_ctc
11
+ license: cc-by-4.0
12
+ ---
13
+
14
+ [OWSM-CTC](https://arxiv.org/abs/2402.12654) is an encoder-only speech foundation model based on multi-task self-conditioned CTC.
15
+ It is trained on 180k hours of public audio data for multilingual speech recognition, any-to-any speech translation, and language identification, which follows the design of the previous [encoder-decoder OWSM](https://arxiv.org/abs/2401.16658).
data/token_list/bpe_unigram50000/bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff9e37e2ec3b9c6cd1a2b02672b40a17b8bc2e11ad865a44518835a199dfd890
3
+ size 1031801
exp/s2t_stats_raw_bpe50000/train/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ef4b5e465110edf32eec024cf2427eedd677f5733bb87d6b2131e6984a6e13f
3
+ size 1402
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/config.yaml ADDED
The diff for this file is too large to render. See raw diff
 
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/backward_time.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/cer_ctc.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/cer_interctc_layer12.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/cer_interctc_layer15.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/cer_interctc_layer21.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/cer_interctc_layer6.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/clip.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/forward_time.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/gpu_max_cached_mem_GB.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/grad_norm.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/iter_time.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/loss.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/loss_ctc.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/loss_interctc_layer12.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/loss_interctc_layer15.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/loss_interctc_layer21.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/loss_interctc_layer6.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/loss_scale.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/optim0_lr0.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/optim_step_time.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/images/train_time.png ADDED
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.1.log ADDED
The diff for this file is too large to render. See raw diff
 
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.10.log ADDED
The diff for this file is too large to render. See raw diff
 
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.13.log ADDED
The diff for this file is too large to render. See raw diff
 
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.17.log ADDED
The diff for this file is too large to render. See raw diff
 
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.19.log ADDED
The diff for this file is too large to render. See raw diff
 
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.2.log ADDED
The diff for this file is too large to render. See raw diff
 
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.20.log ADDED
The diff for this file is too large to render. See raw diff
 
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.29.log ADDED
The diff for this file is too large to render. See raw diff
 
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.3.log ADDED
The diff for this file is too large to render. See raw diff
 
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.30.log ADDED
The diff for this file is too large to render. See raw diff
 
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.31.log ADDED
The diff for this file is too large to render. See raw diff
 
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.4.log ADDED
The diff for this file is too large to render. See raw diff
 
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.6.log ADDED
The diff for this file is too large to render. See raw diff
 
exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/valid.total_count.ave_5best.till40epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e52260a1386c23a97d75d8dafaf6dccf1ac581dd3865ec9b2c0435b8f8102e3
3
+ size 4020771173
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202310'
2
+ files:
3
+ s2t_model_file: exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/valid.total_count.ave_5best.till40epoch.pth
4
+ python: 3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:36:39) [GCC 12.3.0]
5
+ timestamp: 1708654076.994769
6
+ torch: 1.13.1
7
+ yaml_files:
8
+ s2t_train_config: exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/config.yaml