HaNguyen commited on
Commit
b444210
1 Parent(s): de99137
Files changed (5) hide show
  1. CKPT.yaml +5 -0
  2. hyperparams.yaml +97 -0
  3. mBART.ckpt +3 -0
  4. model.ckpt +3 -0
  5. wav2vec2.ckpt +3 -0
CKPT.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # yamllint disable
2
+ BLEU: 9.623792780691117
3
+ end-of-epoch: true
4
+ epoch: 216
5
+ unixtime: 1697683012.8301373
hyperparams.yaml ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pretrained_path: HaNguyen/IWSLT-ast-w2v2-mbart
2
+
3
+ lang: fr #for the BLEU score detokenization
4
+ target_lang: fr_XX # for mbart initialization
5
+ sample_rate: 16000
6
+
7
+
8
+ # URL for the HuggingFace model we want to load (BASE here)
9
+ wav2vec2_hub: LIA-AvignonUniversity/IWSLT2022-tamasheq-only
10
+
11
+ # wav2vec 2.0 specific parameters
12
+ wav2vec2_frozen: False
13
+
14
+ # Feature parameters (W2V2 etc)
15
+ features_dim: 768 # base wav2vec output dimension, for large replace by 1024
16
+
17
+ #projection for w2v
18
+ enc_dnn_layers: 1
19
+ enc_dnn_neurons: 1024 #256
20
+
21
+ # Transformer
22
+ embedding_size: 256
23
+ d_model: 1024 #256
24
+ activation: !name:torch.nn.GELU
25
+
26
+ # Outputs
27
+ blank_index: 1
28
+ label_smoothing: 0.1
29
+ pad_index: 1 # pad_index defined by mbart model
30
+ bos_index: 250008 # fr_XX bos_index defined by mbart model
31
+ eos_index: 2
32
+
33
+ # Decoding parameters
34
+ # Be sure that the bos and eos index match with the BPEs ones
35
+ min_decode_ratio: 0.0
36
+ max_decode_ratio: 0.25
37
+ valid_beam_size: 5
38
+ test_beam_size: 5
39
+
40
+
41
+ ############################## models ################################
42
+ #wav2vec model
43
+ wav2vec2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec.Wav2Vec2
44
+ source: <wav2vec2_hub>
45
+ output_norm: True
46
+ freeze: <wav2vec2_frozen>
47
+ save_path: wav2vec2_checkpoint
48
+
49
+ #linear projection
50
+ enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
51
+ input_shape: [null, null, 768]
52
+ activation: <activation>
53
+ dnn_blocks: 1
54
+ dnn_neurons: 1024
55
+
56
+ #mBART
57
+ mbart_path: facebook/mbart-large-50-many-to-many-mmt
58
+ mbart_frozen: False
59
+ mBART: &id004 !new:speechbrain.lobes.models.huggingface_transformers.mbart.mBART
60
+
61
+ source: facebook/mbart-large-50-many-to-many-mmt
62
+ freeze: <mbart_frozen>
63
+ save_path: mbart_checkpoint
64
+ target_lang: <target_lang>
65
+
66
+ log_softmax: !new:speechbrain.nnet.activations.Softmax
67
+ apply_log: True
68
+
69
+ seq_lin: !new:torch.nn.Identity
70
+
71
+ modules:
72
+ wav2vec2: <wav2vec2>
73
+ enc: <enc>
74
+ mBART: <mBART>
75
+ model: !new:torch.nn.ModuleList
76
+ - [!ref <enc>]
77
+
78
+ valid_search: !new:speechbrain.decoders.S2SHFTextBasedBeamSearcher
79
+ modules: [!ref <mBART>, null, null]
80
+ vocab_size: 250054
81
+ bos_index: 250008
82
+ eos_index: 2
83
+ min_decode_ratio: 0.0
84
+ max_decode_ratio: 0.25
85
+ beam_size: 5
86
+ using_eos_threshold: True
87
+ length_normalization: True
88
+
89
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
90
+ loadables:
91
+ model: !ref <model>
92
+ wav2vec2: !ref <wav2vec2>
93
+ mBART: !ref <mBART>
94
+ paths:
95
+ wav2vec2: !ref <pretrained_path>/wav2vec2.ckpt
96
+ model: !ref <pretrained_path>/model.ckpt
97
+ mBART: !ref <pretrained_path>/mBART.ckpt
mBART.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:172d2d7bb7d9e034419c6f988a52c2918bbb632daae036d8f90d7c80719e7e4b
3
+ size 1835782657
model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:080be2136e146f03c2b471e35d59c1ce331ef3d9240b2312ecb1cec9feb99e6a
3
+ size 3150879
wav2vec2.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fce0a9c95ce720483661ecde058e8b273b096709c6dc68b28a5893f30b83c06d
3
+ size 377572016