Ethan Sim commited on
Commit
2e7b5b1
1 Parent(s): b0adf36

stage best wang model

Browse files
checkpoint-256000/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/m2m100_418M",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "M2M100ForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 0,
10
+ "d_model": 1024,
11
+ "decoder_attention_heads": 16,
12
+ "decoder_ffn_dim": 4096,
13
+ "decoder_layerdrop": 0.05,
14
+ "decoder_layers": 12,
15
+ "decoder_start_token_id": 2,
16
+ "dropout": 0.1,
17
+ "early_stopping": true,
18
+ "encoder_attention_heads": 16,
19
+ "encoder_ffn_dim": 4096,
20
+ "encoder_layerdrop": 0.05,
21
+ "encoder_layers": 12,
22
+ "eos_token_id": 2,
23
+ "gradient_checkpointing": false,
24
+ "init_std": 0.02,
25
+ "is_encoder_decoder": true,
26
+ "max_length": 200,
27
+ "max_position_embeddings": 1024,
28
+ "model_type": "m2m_100",
29
+ "num_beams": 5,
30
+ "num_hidden_layers": 12,
31
+ "pad_token_id": 1,
32
+ "scale_embedding": true,
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.28.0",
35
+ "use_cache": true,
36
+ "vocab_size": 128112
37
+ }
checkpoint-256000/generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 0,
3
+ "decoder_start_token_id": 2,
4
+ "early_stopping": true,
5
+ "eos_token_id": 2,
6
+ "max_length": 200,
7
+ "num_beams": 5,
8
+ "pad_token_id": 1,
9
+ "transformers_version": "4.28.0"
10
+ }
checkpoint-256000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38a1322634b2d217675b81412e30f3374b08abf7623ae73d2e96d9efb795e2fe
3
+ size 3871544599
checkpoint-256000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c353a0da2bdb44caf3bc774a1f09200622a89d1206338b7f9ac554bd3cd9171d
3
+ size 1944201353
checkpoint-256000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8aaf9520cf4149c556ce815dc4ff12f8d7a864f94b792e1a28c43d73bc3e021
3
+ size 14511
checkpoint-256000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47acc835a3f0fabec0bed149f23e92d29a899f1a62e0cd51b616df2f6058e03f
3
+ size 557
checkpoint-256000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09db2bddec8d85cd09c3ced5c24082ea85f7fc1208e00ae381ead78d58fc37c3
3
+ size 627
checkpoint-256000/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8f7c76ed2a5e0822be39f0a4f95a55eb19c78f4593ce609e2edbc2aea4d380a
3
+ size 2423393
checkpoint-256000/special_tokens_map.json ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "__af__",
4
+ "__am__",
5
+ "__ar__",
6
+ "__ast__",
7
+ "__az__",
8
+ "__ba__",
9
+ "__be__",
10
+ "__bg__",
11
+ "__bn__",
12
+ "__br__",
13
+ "__bs__",
14
+ "__ca__",
15
+ "__ceb__",
16
+ "__cs__",
17
+ "__cy__",
18
+ "__da__",
19
+ "__de__",
20
+ "__el__",
21
+ "__en__",
22
+ "__es__",
23
+ "__et__",
24
+ "__fa__",
25
+ "__ff__",
26
+ "__fi__",
27
+ "__fr__",
28
+ "__fy__",
29
+ "__ga__",
30
+ "__gd__",
31
+ "__gl__",
32
+ "__gu__",
33
+ "__ha__",
34
+ "__he__",
35
+ "__hi__",
36
+ "__hr__",
37
+ "__ht__",
38
+ "__hu__",
39
+ "__hy__",
40
+ "__id__",
41
+ "__ig__",
42
+ "__ilo__",
43
+ "__is__",
44
+ "__it__",
45
+ "__ja__",
46
+ "__jv__",
47
+ "__ka__",
48
+ "__kk__",
49
+ "__km__",
50
+ "__kn__",
51
+ "__ko__",
52
+ "__lb__",
53
+ "__lg__",
54
+ "__ln__",
55
+ "__lo__",
56
+ "__lt__",
57
+ "__lv__",
58
+ "__mg__",
59
+ "__mk__",
60
+ "__ml__",
61
+ "__mn__",
62
+ "__mr__",
63
+ "__ms__",
64
+ "__my__",
65
+ "__ne__",
66
+ "__nl__",
67
+ "__no__",
68
+ "__ns__",
69
+ "__oc__",
70
+ "__or__",
71
+ "__pa__",
72
+ "__pl__",
73
+ "__ps__",
74
+ "__pt__",
75
+ "__ro__",
76
+ "__ru__",
77
+ "__sd__",
78
+ "__si__",
79
+ "__sk__",
80
+ "__sl__",
81
+ "__so__",
82
+ "__sq__",
83
+ "__sr__",
84
+ "__ss__",
85
+ "__su__",
86
+ "__sv__",
87
+ "__sw__",
88
+ "__ta__",
89
+ "__th__",
90
+ "__tl__",
91
+ "__tn__",
92
+ "__tr__",
93
+ "__uk__",
94
+ "__ur__",
95
+ "__uz__",
96
+ "__vi__",
97
+ "__wo__",
98
+ "__xh__",
99
+ "__yi__",
100
+ "__yo__",
101
+ "__zh__",
102
+ "__zu__"
103
+ ],
104
+ "bos_token": "<s>",
105
+ "eos_token": "</s>",
106
+ "pad_token": "<pad>",
107
+ "sep_token": "</s>",
108
+ "unk_token": "<unk>"
109
+ }
checkpoint-256000/tokenizer_config.json ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "__af__",
4
+ "__am__",
5
+ "__ar__",
6
+ "__ast__",
7
+ "__az__",
8
+ "__ba__",
9
+ "__be__",
10
+ "__bg__",
11
+ "__bn__",
12
+ "__br__",
13
+ "__bs__",
14
+ "__ca__",
15
+ "__ceb__",
16
+ "__cs__",
17
+ "__cy__",
18
+ "__da__",
19
+ "__de__",
20
+ "__el__",
21
+ "__en__",
22
+ "__es__",
23
+ "__et__",
24
+ "__fa__",
25
+ "__ff__",
26
+ "__fi__",
27
+ "__fr__",
28
+ "__fy__",
29
+ "__ga__",
30
+ "__gd__",
31
+ "__gl__",
32
+ "__gu__",
33
+ "__ha__",
34
+ "__he__",
35
+ "__hi__",
36
+ "__hr__",
37
+ "__ht__",
38
+ "__hu__",
39
+ "__hy__",
40
+ "__id__",
41
+ "__ig__",
42
+ "__ilo__",
43
+ "__is__",
44
+ "__it__",
45
+ "__ja__",
46
+ "__jv__",
47
+ "__ka__",
48
+ "__kk__",
49
+ "__km__",
50
+ "__kn__",
51
+ "__ko__",
52
+ "__lb__",
53
+ "__lg__",
54
+ "__ln__",
55
+ "__lo__",
56
+ "__lt__",
57
+ "__lv__",
58
+ "__mg__",
59
+ "__mk__",
60
+ "__ml__",
61
+ "__mn__",
62
+ "__mr__",
63
+ "__ms__",
64
+ "__my__",
65
+ "__ne__",
66
+ "__nl__",
67
+ "__no__",
68
+ "__ns__",
69
+ "__oc__",
70
+ "__or__",
71
+ "__pa__",
72
+ "__pl__",
73
+ "__ps__",
74
+ "__pt__",
75
+ "__ro__",
76
+ "__ru__",
77
+ "__sd__",
78
+ "__si__",
79
+ "__sk__",
80
+ "__sl__",
81
+ "__so__",
82
+ "__sq__",
83
+ "__sr__",
84
+ "__ss__",
85
+ "__su__",
86
+ "__sv__",
87
+ "__sw__",
88
+ "__ta__",
89
+ "__th__",
90
+ "__tl__",
91
+ "__tn__",
92
+ "__tr__",
93
+ "__uk__",
94
+ "__ur__",
95
+ "__uz__",
96
+ "__vi__",
97
+ "__wo__",
98
+ "__xh__",
99
+ "__yi__",
100
+ "__yo__",
101
+ "__zh__",
102
+ "__zu__"
103
+ ],
104
+ "bos_token": "<s>",
105
+ "clean_up_tokenization_spaces": true,
106
+ "eos_token": "</s>",
107
+ "language_codes": "m2m100",
108
+ "model_max_length": 1024,
109
+ "num_madeup_words": 8,
110
+ "pad_token": "<pad>",
111
+ "sep_token": "</s>",
112
+ "sp_model_kwargs": {},
113
+ "src_lang": "en",
114
+ "tgt_lang": "fr",
115
+ "tokenizer_class": "M2M100Tokenizer",
116
+ "tokenizer_file": null,
117
+ "unk_token": "<unk>"
118
+ }
checkpoint-256000/trainer_state.json ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 40.6424,
3
+ "best_model_checkpoint": "M2M100_enfr_FT_wang_2022/checkpoint-256000",
4
+ "epoch": 3.130158341994253,
5
+ "global_step": 256000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.2,
12
+ "learning_rate": 1.975551751543682e-05,
13
+ "loss": 0.2244,
14
+ "step": 16000
15
+ },
16
+ {
17
+ "epoch": 0.2,
18
+ "eval_bleu": 35.6567,
19
+ "eval_gen_len": 44.8111,
20
+ "eval_loss": 0.13247372210025787,
21
+ "eval_runtime": 433.7309,
22
+ "eval_samples_per_second": 2.405,
23
+ "eval_steps_per_second": 0.302,
24
+ "step": 16000
25
+ },
26
+ {
27
+ "epoch": 0.39,
28
+ "learning_rate": 1.951108088280247e-05,
29
+ "loss": 0.1522,
30
+ "step": 32000
31
+ },
32
+ {
33
+ "epoch": 0.39,
34
+ "eval_bleu": 36.512,
35
+ "eval_gen_len": 44.4851,
36
+ "eval_loss": 0.1262633502483368,
37
+ "eval_runtime": 417.8212,
38
+ "eval_samples_per_second": 2.496,
39
+ "eval_steps_per_second": 0.314,
40
+ "step": 32000
41
+ },
42
+ {
43
+ "epoch": 0.59,
44
+ "learning_rate": 1.9266613682215567e-05,
45
+ "loss": 0.1435,
46
+ "step": 48000
47
+ },
48
+ {
49
+ "epoch": 0.59,
50
+ "eval_bleu": 37.957,
51
+ "eval_gen_len": 44.5302,
52
+ "eval_loss": 0.12126067280769348,
53
+ "eval_runtime": 414.4742,
54
+ "eval_samples_per_second": 2.516,
55
+ "eval_steps_per_second": 0.316,
56
+ "step": 48000
57
+ },
58
+ {
59
+ "epoch": 0.78,
60
+ "learning_rate": 1.9022177049581222e-05,
61
+ "loss": 0.1384,
62
+ "step": 64000
63
+ },
64
+ {
65
+ "epoch": 0.78,
66
+ "eval_bleu": 38.0569,
67
+ "eval_gen_len": 44.5034,
68
+ "eval_loss": 0.1191168949007988,
69
+ "eval_runtime": 415.8111,
70
+ "eval_samples_per_second": 2.508,
71
+ "eval_steps_per_second": 0.315,
72
+ "step": 64000
73
+ },
74
+ {
75
+ "epoch": 0.98,
76
+ "learning_rate": 1.8777709848994316e-05,
77
+ "loss": 0.1345,
78
+ "step": 80000
79
+ },
80
+ {
81
+ "epoch": 0.98,
82
+ "eval_bleu": 38.4966,
83
+ "eval_gen_len": 44.8821,
84
+ "eval_loss": 0.11713190376758575,
85
+ "eval_runtime": 423.2079,
86
+ "eval_samples_per_second": 2.465,
87
+ "eval_steps_per_second": 0.31,
88
+ "step": 80000
89
+ },
90
+ {
91
+ "epoch": 1.17,
92
+ "learning_rate": 1.853325793238369e-05,
93
+ "loss": 0.1213,
94
+ "step": 96000
95
+ },
96
+ {
97
+ "epoch": 1.17,
98
+ "eval_bleu": 39.0368,
99
+ "eval_gen_len": 44.6012,
100
+ "eval_loss": 0.1156671866774559,
101
+ "eval_runtime": 417.9213,
102
+ "eval_samples_per_second": 2.496,
103
+ "eval_steps_per_second": 0.313,
104
+ "step": 96000
105
+ },
106
+ {
107
+ "epoch": 1.37,
108
+ "learning_rate": 1.8288806015773065e-05,
109
+ "loss": 0.1199,
110
+ "step": 112000
111
+ },
112
+ {
113
+ "epoch": 1.37,
114
+ "eval_bleu": 39.6906,
115
+ "eval_gen_len": 44.8178,
116
+ "eval_loss": 0.11390843987464905,
117
+ "eval_runtime": 418.2954,
118
+ "eval_samples_per_second": 2.493,
119
+ "eval_steps_per_second": 0.313,
120
+ "step": 112000
121
+ },
122
+ {
123
+ "epoch": 1.57,
124
+ "learning_rate": 1.804436938313872e-05,
125
+ "loss": 0.1195,
126
+ "step": 128000
127
+ },
128
+ {
129
+ "epoch": 1.57,
130
+ "eval_bleu": 39.6284,
131
+ "eval_gen_len": 44.8552,
132
+ "eval_loss": 0.11290750652551651,
133
+ "eval_runtime": 422.1564,
134
+ "eval_samples_per_second": 2.471,
135
+ "eval_steps_per_second": 0.31,
136
+ "step": 128000
137
+ },
138
+ {
139
+ "epoch": 1.76,
140
+ "learning_rate": 1.7799902182551813e-05,
141
+ "loss": 0.1185,
142
+ "step": 144000
143
+ },
144
+ {
145
+ "epoch": 1.76,
146
+ "eval_bleu": 39.0079,
147
+ "eval_gen_len": 44.5618,
148
+ "eval_loss": 0.11247587949037552,
149
+ "eval_runtime": 408.9786,
150
+ "eval_samples_per_second": 2.55,
151
+ "eval_steps_per_second": 0.32,
152
+ "step": 144000
153
+ },
154
+ {
155
+ "epoch": 1.96,
156
+ "learning_rate": 1.7555465549917468e-05,
157
+ "loss": 0.1175,
158
+ "step": 160000
159
+ },
160
+ {
161
+ "epoch": 1.96,
162
+ "eval_bleu": 39.2898,
163
+ "eval_gen_len": 44.745,
164
+ "eval_loss": 0.1103997528553009,
165
+ "eval_runtime": 413.1156,
166
+ "eval_samples_per_second": 2.525,
167
+ "eval_steps_per_second": 0.317,
168
+ "step": 160000
169
+ },
170
+ {
171
+ "epoch": 2.15,
172
+ "learning_rate": 1.7310998349330562e-05,
173
+ "loss": 0.1062,
174
+ "step": 176000
175
+ },
176
+ {
177
+ "epoch": 2.15,
178
+ "eval_bleu": 39.8593,
179
+ "eval_gen_len": 45.1151,
180
+ "eval_loss": 0.1111496165394783,
181
+ "eval_runtime": 415.6075,
182
+ "eval_samples_per_second": 2.51,
183
+ "eval_steps_per_second": 0.315,
184
+ "step": 176000
185
+ },
186
+ {
187
+ "epoch": 2.35,
188
+ "learning_rate": 1.7066546432719936e-05,
189
+ "loss": 0.1047,
190
+ "step": 192000
191
+ },
192
+ {
193
+ "epoch": 2.35,
194
+ "eval_bleu": 39.913,
195
+ "eval_gen_len": 44.8102,
196
+ "eval_loss": 0.11086419969797134,
197
+ "eval_runtime": 418.8388,
198
+ "eval_samples_per_second": 2.49,
199
+ "eval_steps_per_second": 0.313,
200
+ "step": 192000
201
+ },
202
+ {
203
+ "epoch": 2.54,
204
+ "learning_rate": 1.682210980008559e-05,
205
+ "loss": 0.1055,
206
+ "step": 208000
207
+ },
208
+ {
209
+ "epoch": 2.54,
210
+ "eval_bleu": 40.2278,
211
+ "eval_gen_len": 45.2848,
212
+ "eval_loss": 0.11025020480155945,
213
+ "eval_runtime": 421.9598,
214
+ "eval_samples_per_second": 2.472,
215
+ "eval_steps_per_second": 0.31,
216
+ "step": 208000
217
+ },
218
+ {
219
+ "epoch": 2.74,
220
+ "learning_rate": 1.6577657883474966e-05,
221
+ "loss": 0.1059,
222
+ "step": 224000
223
+ },
224
+ {
225
+ "epoch": 2.74,
226
+ "eval_bleu": 40.2198,
227
+ "eval_gen_len": 45.0719,
228
+ "eval_loss": 0.10949720442295074,
229
+ "eval_runtime": 416.4666,
230
+ "eval_samples_per_second": 2.504,
231
+ "eval_steps_per_second": 0.315,
232
+ "step": 224000
233
+ },
234
+ {
235
+ "epoch": 2.93,
236
+ "learning_rate": 1.633320596686434e-05,
237
+ "loss": 0.106,
238
+ "step": 240000
239
+ },
240
+ {
241
+ "epoch": 2.93,
242
+ "eval_bleu": 39.8973,
243
+ "eval_gen_len": 44.954,
244
+ "eval_loss": 0.10881481319665909,
245
+ "eval_runtime": 421.0124,
246
+ "eval_samples_per_second": 2.477,
247
+ "eval_steps_per_second": 0.311,
248
+ "step": 240000
249
+ },
250
+ {
251
+ "epoch": 3.13,
252
+ "learning_rate": 1.6088769334229995e-05,
253
+ "loss": 0.0971,
254
+ "step": 256000
255
+ },
256
+ {
257
+ "epoch": 3.13,
258
+ "eval_bleu": 40.6424,
259
+ "eval_gen_len": 44.9732,
260
+ "eval_loss": 0.11019956320524216,
261
+ "eval_runtime": 418.4973,
262
+ "eval_samples_per_second": 2.492,
263
+ "eval_steps_per_second": 0.313,
264
+ "step": 256000
265
+ }
266
+ ],
267
+ "max_steps": 1308560,
268
+ "num_train_epochs": 16,
269
+ "total_flos": 8.283927408492872e+17,
270
+ "trial_name": null,
271
+ "trial_params": null
272
+ }
checkpoint-256000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98438687991f47832564d8ab84087cb1c5a15d8246397eca599954545732c6e4
3
+ size 3771
checkpoint-256000/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/m2m100_418M",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "M2M100ForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 0,
10
+ "d_model": 1024,
11
+ "decoder_attention_heads": 16,
12
+ "decoder_ffn_dim": 4096,
13
+ "decoder_layerdrop": 0.05,
14
+ "decoder_layers": 12,
15
+ "decoder_start_token_id": 2,
16
+ "dropout": 0.1,
17
+ "early_stopping": true,
18
+ "encoder_attention_heads": 16,
19
+ "encoder_ffn_dim": 4096,
20
+ "encoder_layerdrop": 0.05,
21
+ "encoder_layers": 12,
22
+ "eos_token_id": 2,
23
+ "gradient_checkpointing": false,
24
+ "init_std": 0.02,
25
+ "is_encoder_decoder": true,
26
+ "max_length": 200,
27
+ "max_position_embeddings": 1024,
28
+ "model_type": "m2m_100",
29
+ "num_beams": 5,
30
+ "num_hidden_layers": 12,
31
+ "pad_token_id": 1,
32
+ "scale_embedding": true,
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.28.0",
35
+ "use_cache": true,
36
+ "vocab_size": 128112
37
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 0,
3
+ "decoder_start_token_id": 2,
4
+ "early_stopping": true,
5
+ "eos_token_id": 2,
6
+ "max_length": 200,
7
+ "num_beams": 5,
8
+ "pad_token_id": 1,
9
+ "transformers_version": "4.28.0"
10
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c353a0da2bdb44caf3bc774a1f09200622a89d1206338b7f9ac554bd3cd9171d
3
+ size 1944201353
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8f7c76ed2a5e0822be39f0a4f95a55eb19c78f4593ce609e2edbc2aea4d380a
3
+ size 2423393
special_tokens_map.json ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "__af__",
4
+ "__am__",
5
+ "__ar__",
6
+ "__ast__",
7
+ "__az__",
8
+ "__ba__",
9
+ "__be__",
10
+ "__bg__",
11
+ "__bn__",
12
+ "__br__",
13
+ "__bs__",
14
+ "__ca__",
15
+ "__ceb__",
16
+ "__cs__",
17
+ "__cy__",
18
+ "__da__",
19
+ "__de__",
20
+ "__el__",
21
+ "__en__",
22
+ "__es__",
23
+ "__et__",
24
+ "__fa__",
25
+ "__ff__",
26
+ "__fi__",
27
+ "__fr__",
28
+ "__fy__",
29
+ "__ga__",
30
+ "__gd__",
31
+ "__gl__",
32
+ "__gu__",
33
+ "__ha__",
34
+ "__he__",
35
+ "__hi__",
36
+ "__hr__",
37
+ "__ht__",
38
+ "__hu__",
39
+ "__hy__",
40
+ "__id__",
41
+ "__ig__",
42
+ "__ilo__",
43
+ "__is__",
44
+ "__it__",
45
+ "__ja__",
46
+ "__jv__",
47
+ "__ka__",
48
+ "__kk__",
49
+ "__km__",
50
+ "__kn__",
51
+ "__ko__",
52
+ "__lb__",
53
+ "__lg__",
54
+ "__ln__",
55
+ "__lo__",
56
+ "__lt__",
57
+ "__lv__",
58
+ "__mg__",
59
+ "__mk__",
60
+ "__ml__",
61
+ "__mn__",
62
+ "__mr__",
63
+ "__ms__",
64
+ "__my__",
65
+ "__ne__",
66
+ "__nl__",
67
+ "__no__",
68
+ "__ns__",
69
+ "__oc__",
70
+ "__or__",
71
+ "__pa__",
72
+ "__pl__",
73
+ "__ps__",
74
+ "__pt__",
75
+ "__ro__",
76
+ "__ru__",
77
+ "__sd__",
78
+ "__si__",
79
+ "__sk__",
80
+ "__sl__",
81
+ "__so__",
82
+ "__sq__",
83
+ "__sr__",
84
+ "__ss__",
85
+ "__su__",
86
+ "__sv__",
87
+ "__sw__",
88
+ "__ta__",
89
+ "__th__",
90
+ "__tl__",
91
+ "__tn__",
92
+ "__tr__",
93
+ "__uk__",
94
+ "__ur__",
95
+ "__uz__",
96
+ "__vi__",
97
+ "__wo__",
98
+ "__xh__",
99
+ "__yi__",
100
+ "__yo__",
101
+ "__zh__",
102
+ "__zu__"
103
+ ],
104
+ "bos_token": "<s>",
105
+ "eos_token": "</s>",
106
+ "pad_token": "<pad>",
107
+ "sep_token": "</s>",
108
+ "unk_token": "<unk>"
109
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "__af__",
4
+ "__am__",
5
+ "__ar__",
6
+ "__ast__",
7
+ "__az__",
8
+ "__ba__",
9
+ "__be__",
10
+ "__bg__",
11
+ "__bn__",
12
+ "__br__",
13
+ "__bs__",
14
+ "__ca__",
15
+ "__ceb__",
16
+ "__cs__",
17
+ "__cy__",
18
+ "__da__",
19
+ "__de__",
20
+ "__el__",
21
+ "__en__",
22
+ "__es__",
23
+ "__et__",
24
+ "__fa__",
25
+ "__ff__",
26
+ "__fi__",
27
+ "__fr__",
28
+ "__fy__",
29
+ "__ga__",
30
+ "__gd__",
31
+ "__gl__",
32
+ "__gu__",
33
+ "__ha__",
34
+ "__he__",
35
+ "__hi__",
36
+ "__hr__",
37
+ "__ht__",
38
+ "__hu__",
39
+ "__hy__",
40
+ "__id__",
41
+ "__ig__",
42
+ "__ilo__",
43
+ "__is__",
44
+ "__it__",
45
+ "__ja__",
46
+ "__jv__",
47
+ "__ka__",
48
+ "__kk__",
49
+ "__km__",
50
+ "__kn__",
51
+ "__ko__",
52
+ "__lb__",
53
+ "__lg__",
54
+ "__ln__",
55
+ "__lo__",
56
+ "__lt__",
57
+ "__lv__",
58
+ "__mg__",
59
+ "__mk__",
60
+ "__ml__",
61
+ "__mn__",
62
+ "__mr__",
63
+ "__ms__",
64
+ "__my__",
65
+ "__ne__",
66
+ "__nl__",
67
+ "__no__",
68
+ "__ns__",
69
+ "__oc__",
70
+ "__or__",
71
+ "__pa__",
72
+ "__pl__",
73
+ "__ps__",
74
+ "__pt__",
75
+ "__ro__",
76
+ "__ru__",
77
+ "__sd__",
78
+ "__si__",
79
+ "__sk__",
80
+ "__sl__",
81
+ "__so__",
82
+ "__sq__",
83
+ "__sr__",
84
+ "__ss__",
85
+ "__su__",
86
+ "__sv__",
87
+ "__sw__",
88
+ "__ta__",
89
+ "__th__",
90
+ "__tl__",
91
+ "__tn__",
92
+ "__tr__",
93
+ "__uk__",
94
+ "__ur__",
95
+ "__uz__",
96
+ "__vi__",
97
+ "__wo__",
98
+ "__xh__",
99
+ "__yi__",
100
+ "__yo__",
101
+ "__zh__",
102
+ "__zu__"
103
+ ],
104
+ "bos_token": "<s>",
105
+ "clean_up_tokenization_spaces": true,
106
+ "eos_token": "</s>",
107
+ "language_codes": "m2m100",
108
+ "model_max_length": 1024,
109
+ "num_madeup_words": 8,
110
+ "pad_token": "<pad>",
111
+ "sep_token": "</s>",
112
+ "sp_model_kwargs": {},
113
+ "src_lang": "en",
114
+ "tgt_lang": "fr",
115
+ "tokenizer_class": "M2M100Tokenizer",
116
+ "tokenizer_file": null,
117
+ "unk_token": "<unk>"
118
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98438687991f47832564d8ab84087cb1c5a15d8246397eca599954545732c6e4
3
+ size 3771
vocab.json ADDED
The diff for this file is too large to render. See raw diff