Emanuela Boros commited on
Commit
8ab3b70
1 Parent(s): 9bbf34b

update model

Browse files
generation_config.json CHANGED
@@ -4,5 +4,5 @@
4
  "eos_token_id": 2,
5
  "forced_eos_token_id": 2,
6
  "pad_token_id": 1,
7
- "transformers_version": "4.31.0"
8
  }
 
4
  "eos_token_id": 2,
5
  "forced_eos_token_id": 2,
6
  "pad_token_id": 1,
7
+ "transformers_version": "4.46.0.dev0"
8
  }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8cfb5bf9aa521336b586ae37eecac31ed7e86327a1be1802d32551472988633
3
+ size 2468961388
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:effe05830fd50a56a6e9872b81d573e07cf6c91261cd48880aca08db611299dd
3
+ size 4936191835
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:517bd588fb9ad506ac1303ef0497609383ce21e9c6ec4f26ce6682deb37888cf
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc00424e1006d8552c992d3bde1acf8d4282909093c4e18a2112a6e6b087b217
3
+ size 1064
special_tokens_map.json CHANGED
@@ -1,7 +1,25 @@
1
  {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "mask_token": {
6
  "content": "<mask>",
7
  "lstrip": true,
@@ -9,7 +27,25 @@
9
  "rstrip": false,
10
  "single_word": false
11
  },
12
- "pad_token": "<pad>",
13
- "sep_token": "</s>",
14
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
  "mask_token": {
24
  "content": "<mask>",
25
  "lstrip": true,
 
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
  }
tokenizer_config.json CHANGED
@@ -1,16 +1,51 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": "<s>",
3
- "clean_up_tokenization_spaces": true,
4
  "cls_token": "<s>",
5
  "eos_token": "</s>",
6
- "mask_token": {
7
- "__type": "AddedToken",
8
- "content": "<mask>",
9
- "lstrip": true,
10
- "normalized": true,
11
- "rstrip": false,
12
- "single_word": false
13
- },
14
  "model_max_length": 512,
15
  "pad_token": "<pad>",
16
  "sep_token": "</s>",
 
1
  {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "256001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
  "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
  "cls_token": "<s>",
47
  "eos_token": "</s>",
48
+ "mask_token": "<mask>",
 
 
 
 
 
 
 
49
  "model_max_length": 512,
50
  "pad_token": "<pad>",
51
  "sep_token": "</s>",
trainer_state.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "eval_steps": 500,
6
+ "global_step": 6480,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7716049382716049,
13
+ "grad_norm": 1.0553990602493286,
14
+ "learning_rate": 1.846913580246914e-05,
15
+ "loss": 0.9346,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_bleu": 0.0,
21
+ "eval_gen_len": 10.0959,
22
+ "eval_loss": 0.15309424698352814,
23
+ "eval_runtime": 7.8759,
24
+ "eval_samples_per_second": 154.903,
25
+ "eval_steps_per_second": 2.539,
26
+ "step": 648
27
+ },
28
+ {
29
+ "epoch": 1.5432098765432098,
30
+ "grad_norm": 0.7297214269638062,
31
+ "learning_rate": 1.6925925925925926e-05,
32
+ "loss": 0.0763,
33
+ "step": 1000
34
+ },
35
+ {
36
+ "epoch": 2.0,
37
+ "eval_bleu": 0.0,
38
+ "eval_gen_len": 10.159,
39
+ "eval_loss": 0.16104426980018616,
40
+ "eval_runtime": 7.6405,
41
+ "eval_samples_per_second": 159.674,
42
+ "eval_steps_per_second": 2.618,
43
+ "step": 1296
44
+ },
45
+ {
46
+ "epoch": 2.314814814814815,
47
+ "grad_norm": 0.44237253069877625,
48
+ "learning_rate": 1.5382716049382717e-05,
49
+ "loss": 0.0446,
50
+ "step": 1500
51
+ },
52
+ {
53
+ "epoch": 3.0,
54
+ "eval_bleu": 0.0,
55
+ "eval_gen_len": 10.0426,
56
+ "eval_loss": 0.17489495873451233,
57
+ "eval_runtime": 7.8385,
58
+ "eval_samples_per_second": 155.642,
59
+ "eval_steps_per_second": 2.552,
60
+ "step": 1944
61
+ },
62
+ {
63
+ "epoch": 3.0864197530864197,
64
+ "grad_norm": 0.3801327049732208,
65
+ "learning_rate": 1.3839506172839507e-05,
66
+ "loss": 0.0275,
67
+ "step": 2000
68
+ },
69
+ {
70
+ "epoch": 3.8580246913580245,
71
+ "grad_norm": 0.29495081305503845,
72
+ "learning_rate": 1.2296296296296298e-05,
73
+ "loss": 0.0162,
74
+ "step": 2500
75
+ },
76
+ {
77
+ "epoch": 4.0,
78
+ "eval_bleu": 0.0,
79
+ "eval_gen_len": 10.1139,
80
+ "eval_loss": 0.1843736320734024,
81
+ "eval_runtime": 7.7649,
82
+ "eval_samples_per_second": 157.118,
83
+ "eval_steps_per_second": 2.576,
84
+ "step": 2592
85
+ },
86
+ {
87
+ "epoch": 4.62962962962963,
88
+ "grad_norm": 0.29735738039016724,
89
+ "learning_rate": 1.0753086419753086e-05,
90
+ "loss": 0.0106,
91
+ "step": 3000
92
+ },
93
+ {
94
+ "epoch": 5.0,
95
+ "eval_bleu": 0.0,
96
+ "eval_gen_len": 9.9508,
97
+ "eval_loss": 0.19341909885406494,
98
+ "eval_runtime": 7.6995,
99
+ "eval_samples_per_second": 158.452,
100
+ "eval_steps_per_second": 2.598,
101
+ "step": 3240
102
+ },
103
+ {
104
+ "epoch": 5.401234567901234,
105
+ "grad_norm": 0.07027166336774826,
106
+ "learning_rate": 9.209876543209877e-06,
107
+ "loss": 0.0076,
108
+ "step": 3500
109
+ },
110
+ {
111
+ "epoch": 6.0,
112
+ "eval_bleu": 0.0,
113
+ "eval_gen_len": 9.9377,
114
+ "eval_loss": 0.20017552375793457,
115
+ "eval_runtime": 7.6996,
116
+ "eval_samples_per_second": 158.45,
117
+ "eval_steps_per_second": 2.598,
118
+ "step": 3888
119
+ },
120
+ {
121
+ "epoch": 6.172839506172839,
122
+ "grad_norm": 0.1504916250705719,
123
+ "learning_rate": 7.666666666666667e-06,
124
+ "loss": 0.0059,
125
+ "step": 4000
126
+ },
127
+ {
128
+ "epoch": 6.944444444444445,
129
+ "grad_norm": 0.24264627695083618,
130
+ "learning_rate": 6.123456790123458e-06,
131
+ "loss": 0.0043,
132
+ "step": 4500
133
+ },
134
+ {
135
+ "epoch": 7.0,
136
+ "eval_bleu": 0.0,
137
+ "eval_gen_len": 10.0279,
138
+ "eval_loss": 0.20386986434459686,
139
+ "eval_runtime": 7.7944,
140
+ "eval_samples_per_second": 156.523,
141
+ "eval_steps_per_second": 2.566,
142
+ "step": 4536
143
+ },
144
+ {
145
+ "epoch": 7.716049382716049,
146
+ "grad_norm": 0.08363181352615356,
147
+ "learning_rate": 4.580246913580247e-06,
148
+ "loss": 0.0035,
149
+ "step": 5000
150
+ },
151
+ {
152
+ "epoch": 8.0,
153
+ "eval_bleu": 0.0,
154
+ "eval_gen_len": 10.1566,
155
+ "eval_loss": 0.20531675219535828,
156
+ "eval_runtime": 7.6989,
157
+ "eval_samples_per_second": 158.465,
158
+ "eval_steps_per_second": 2.598,
159
+ "step": 5184
160
+ },
161
+ {
162
+ "epoch": 8.487654320987655,
163
+ "grad_norm": 0.13225023448467255,
164
+ "learning_rate": 3.0370370370370372e-06,
165
+ "loss": 0.0029,
166
+ "step": 5500
167
+ },
168
+ {
169
+ "epoch": 9.0,
170
+ "eval_bleu": 0.0,
171
+ "eval_gen_len": 10.0689,
172
+ "eval_loss": 0.20702147483825684,
173
+ "eval_runtime": 7.6619,
174
+ "eval_samples_per_second": 159.23,
175
+ "eval_steps_per_second": 2.61,
176
+ "step": 5832
177
+ },
178
+ {
179
+ "epoch": 9.25925925925926,
180
+ "grad_norm": 0.022540247067809105,
181
+ "learning_rate": 1.4938271604938272e-06,
182
+ "loss": 0.003,
183
+ "step": 6000
184
+ }
185
+ ],
186
+ "logging_steps": 500,
187
+ "max_steps": 6480,
188
+ "num_input_tokens_seen": 0,
189
+ "num_train_epochs": 10,
190
+ "save_steps": 1000,
191
+ "stateful_callbacks": {
192
+ "TrainerControl": {
193
+ "args": {
194
+ "should_epoch_stop": false,
195
+ "should_evaluate": false,
196
+ "should_log": false,
197
+ "should_save": true,
198
+ "should_training_stop": true
199
+ },
200
+ "attributes": {}
201
+ }
202
+ },
203
+ "total_flos": 4.288315707563704e+17,
204
+ "train_batch_size": 64,
205
+ "trial_name": null,
206
+ "trial_params": null
207
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cac52e28b14106cbb32ed28da554d23ebe13705f9b1a96081743a9266949f0e
3
+ size 5496