Cyanbox commited on
Commit
4a9ad21
1 Parent(s): 475c661

add finetuned flan-t5

Browse files
flant5-large-finetuned/checkpoint-40000/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home1/wangyongqi/assets/t5",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2816,
8
+ "d_kv": 64,
9
+ "d_model": 1024,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 24,
22
+ "num_heads": 16,
23
+ "num_layers": 24,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "tie_word_embeddings": false,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.32.0.dev0",
31
+ "use_cache": true,
32
+ "vocab_size": 32128
33
+ }
flant5-large-finetuned/checkpoint-40000/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.32.0.dev0"
7
+ }
flant5-large-finetuned/checkpoint-40000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3bfdee4fd607dbf15bc2c88ccf40cabf53b1ba99592314b0105b19e6165963f
3
+ size 3132793669
flant5-large-finetuned/checkpoint-40000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e03c76ff5293d09d4bca544b8f4ebcb048f61f791b31ac924d98725d5177b791
3
+ size 14575
flant5-large-finetuned/checkpoint-40000/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
flant5-large-finetuned/checkpoint-40000/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
flant5-large-finetuned/checkpoint-40000/tokenizer_config.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "clean_up_tokenization_spaces": true,
105
+ "eos_token": "</s>",
106
+ "extra_ids": 100,
107
+ "legacy": true,
108
+ "model_max_length": 512,
109
+ "pad_token": "<pad>",
110
+ "sp_model_kwargs": {},
111
+ "tokenizer_class": "T5Tokenizer",
112
+ "unk_token": "<unk>"
113
+ }
flant5-large-finetuned/checkpoint-40000/trainer_state.json ADDED
@@ -0,0 +1,515 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.0,
3
+ "best_model_checkpoint": "/home1/wangyongqi/codes/text_encoder_finetuning/t5_finetuningg/flant5_large_t2t/checkpoint-20000",
4
+ "epoch": 22.321428571428573,
5
+ "eval_steps": 20000,
6
+ "global_step": 40000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.28,
13
+ "learning_rate": 0.00019825613839285716,
14
+ "loss": 0.0216,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.56,
19
+ "learning_rate": 0.00019651227678571428,
20
+ "loss": 0.0002,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.84,
25
+ "learning_rate": 0.00019476841517857143,
26
+ "loss": 0.0,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 1.12,
31
+ "learning_rate": 0.00019302455357142858,
32
+ "loss": 0.0018,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 1.4,
37
+ "learning_rate": 0.00019128069196428573,
38
+ "loss": 0.0,
39
+ "step": 2500
40
+ },
41
+ {
42
+ "epoch": 1.67,
43
+ "learning_rate": 0.00018953683035714288,
44
+ "loss": 0.0,
45
+ "step": 3000
46
+ },
47
+ {
48
+ "epoch": 1.95,
49
+ "learning_rate": 0.00018779296875,
50
+ "loss": 0.0,
51
+ "step": 3500
52
+ },
53
+ {
54
+ "epoch": 2.23,
55
+ "learning_rate": 0.00018604910714285715,
56
+ "loss": 0.0001,
57
+ "step": 4000
58
+ },
59
+ {
60
+ "epoch": 2.51,
61
+ "learning_rate": 0.0001843052455357143,
62
+ "loss": 0.0,
63
+ "step": 4500
64
+ },
65
+ {
66
+ "epoch": 2.79,
67
+ "learning_rate": 0.00018256138392857142,
68
+ "loss": 0.0011,
69
+ "step": 5000
70
+ },
71
+ {
72
+ "epoch": 3.07,
73
+ "learning_rate": 0.0001808175223214286,
74
+ "loss": 0.0,
75
+ "step": 5500
76
+ },
77
+ {
78
+ "epoch": 3.35,
79
+ "learning_rate": 0.00017907366071428572,
80
+ "loss": 0.0,
81
+ "step": 6000
82
+ },
83
+ {
84
+ "epoch": 3.63,
85
+ "learning_rate": 0.00017732979910714287,
86
+ "loss": 0.0,
87
+ "step": 6500
88
+ },
89
+ {
90
+ "epoch": 3.91,
91
+ "learning_rate": 0.00017558593750000002,
92
+ "loss": 0.0,
93
+ "step": 7000
94
+ },
95
+ {
96
+ "epoch": 4.19,
97
+ "learning_rate": 0.00017384207589285714,
98
+ "loss": 0.0002,
99
+ "step": 7500
100
+ },
101
+ {
102
+ "epoch": 4.46,
103
+ "learning_rate": 0.00017209821428571429,
104
+ "loss": 0.0001,
105
+ "step": 8000
106
+ },
107
+ {
108
+ "epoch": 4.74,
109
+ "learning_rate": 0.00017035435267857144,
110
+ "loss": 0.0,
111
+ "step": 8500
112
+ },
113
+ {
114
+ "epoch": 5.02,
115
+ "learning_rate": 0.00016861049107142858,
116
+ "loss": 0.0,
117
+ "step": 9000
118
+ },
119
+ {
120
+ "epoch": 5.3,
121
+ "learning_rate": 0.00016686662946428573,
122
+ "loss": 0.0,
123
+ "step": 9500
124
+ },
125
+ {
126
+ "epoch": 5.58,
127
+ "learning_rate": 0.00016512276785714286,
128
+ "loss": 0.0,
129
+ "step": 10000
130
+ },
131
+ {
132
+ "epoch": 5.86,
133
+ "learning_rate": 0.00016337890625,
134
+ "loss": 0.0,
135
+ "step": 10500
136
+ },
137
+ {
138
+ "epoch": 6.14,
139
+ "learning_rate": 0.00016163504464285715,
140
+ "loss": 0.0,
141
+ "step": 11000
142
+ },
143
+ {
144
+ "epoch": 6.42,
145
+ "learning_rate": 0.00015989118303571428,
146
+ "loss": 0.0,
147
+ "step": 11500
148
+ },
149
+ {
150
+ "epoch": 6.7,
151
+ "learning_rate": 0.00015814732142857142,
152
+ "loss": 0.0,
153
+ "step": 12000
154
+ },
155
+ {
156
+ "epoch": 6.98,
157
+ "learning_rate": 0.0001564034598214286,
158
+ "loss": 0.0,
159
+ "step": 12500
160
+ },
161
+ {
162
+ "epoch": 7.25,
163
+ "learning_rate": 0.00015465959821428572,
164
+ "loss": 0.0,
165
+ "step": 13000
166
+ },
167
+ {
168
+ "epoch": 7.53,
169
+ "learning_rate": 0.00015291573660714287,
170
+ "loss": 0.0,
171
+ "step": 13500
172
+ },
173
+ {
174
+ "epoch": 7.81,
175
+ "learning_rate": 0.00015117187500000002,
176
+ "loss": 0.0,
177
+ "step": 14000
178
+ },
179
+ {
180
+ "epoch": 8.09,
181
+ "learning_rate": 0.00014942801339285714,
182
+ "loss": 0.0,
183
+ "step": 14500
184
+ },
185
+ {
186
+ "epoch": 8.37,
187
+ "learning_rate": 0.0001476841517857143,
188
+ "loss": 0.0,
189
+ "step": 15000
190
+ },
191
+ {
192
+ "epoch": 8.65,
193
+ "learning_rate": 0.0001459402901785714,
194
+ "loss": 0.0,
195
+ "step": 15500
196
+ },
197
+ {
198
+ "epoch": 8.93,
199
+ "learning_rate": 0.0001441964285714286,
200
+ "loss": 0.0,
201
+ "step": 16000
202
+ },
203
+ {
204
+ "epoch": 9.21,
205
+ "learning_rate": 0.00014245256696428574,
206
+ "loss": 0.0,
207
+ "step": 16500
208
+ },
209
+ {
210
+ "epoch": 9.49,
211
+ "learning_rate": 0.00014070870535714286,
212
+ "loss": 0.0,
213
+ "step": 17000
214
+ },
215
+ {
216
+ "epoch": 9.77,
217
+ "learning_rate": 0.00013896484375,
218
+ "loss": 0.0,
219
+ "step": 17500
220
+ },
221
+ {
222
+ "epoch": 10.04,
223
+ "learning_rate": 0.00013722098214285716,
224
+ "loss": 0.0,
225
+ "step": 18000
226
+ },
227
+ {
228
+ "epoch": 10.32,
229
+ "learning_rate": 0.00013547712053571428,
230
+ "loss": 0.0,
231
+ "step": 18500
232
+ },
233
+ {
234
+ "epoch": 10.6,
235
+ "learning_rate": 0.00013373325892857143,
236
+ "loss": 0.0,
237
+ "step": 19000
238
+ },
239
+ {
240
+ "epoch": 10.88,
241
+ "learning_rate": 0.00013198939732142858,
242
+ "loss": 0.0,
243
+ "step": 19500
244
+ },
245
+ {
246
+ "epoch": 11.16,
247
+ "learning_rate": 0.00013024553571428573,
248
+ "loss": 0.0,
249
+ "step": 20000
250
+ },
251
+ {
252
+ "epoch": 11.16,
253
+ "eval_loss": 0.0,
254
+ "eval_runtime": 18.1,
255
+ "eval_samples_per_second": 276.243,
256
+ "eval_steps_per_second": 2.21,
257
+ "step": 20000
258
+ },
259
+ {
260
+ "epoch": 11.44,
261
+ "learning_rate": 0.00012850167410714288,
262
+ "loss": 0.0,
263
+ "step": 20500
264
+ },
265
+ {
266
+ "epoch": 11.72,
267
+ "learning_rate": 0.0001267578125,
268
+ "loss": 0.0,
269
+ "step": 21000
270
+ },
271
+ {
272
+ "epoch": 12.0,
273
+ "learning_rate": 0.00012501395089285715,
274
+ "loss": 0.0,
275
+ "step": 21500
276
+ },
277
+ {
278
+ "epoch": 12.28,
279
+ "learning_rate": 0.0001232700892857143,
280
+ "loss": 0.0,
281
+ "step": 22000
282
+ },
283
+ {
284
+ "epoch": 12.56,
285
+ "learning_rate": 0.00012152622767857142,
286
+ "loss": 0.0,
287
+ "step": 22500
288
+ },
289
+ {
290
+ "epoch": 12.83,
291
+ "learning_rate": 0.00011978236607142858,
292
+ "loss": 0.0,
293
+ "step": 23000
294
+ },
295
+ {
296
+ "epoch": 13.11,
297
+ "learning_rate": 0.00011803850446428573,
298
+ "loss": 0.0,
299
+ "step": 23500
300
+ },
301
+ {
302
+ "epoch": 13.39,
303
+ "learning_rate": 0.00011629464285714287,
304
+ "loss": 0.0,
305
+ "step": 24000
306
+ },
307
+ {
308
+ "epoch": 13.67,
309
+ "learning_rate": 0.00011455078125,
310
+ "loss": 0.0,
311
+ "step": 24500
312
+ },
313
+ {
314
+ "epoch": 13.95,
315
+ "learning_rate": 0.00011280691964285715,
316
+ "loss": 0.0,
317
+ "step": 25000
318
+ },
319
+ {
320
+ "epoch": 14.23,
321
+ "learning_rate": 0.00011106305803571429,
322
+ "loss": 0.0,
323
+ "step": 25500
324
+ },
325
+ {
326
+ "epoch": 14.51,
327
+ "learning_rate": 0.00010931919642857142,
328
+ "loss": 0.0,
329
+ "step": 26000
330
+ },
331
+ {
332
+ "epoch": 14.79,
333
+ "learning_rate": 0.00010757533482142858,
334
+ "loss": 0.0,
335
+ "step": 26500
336
+ },
337
+ {
338
+ "epoch": 15.07,
339
+ "learning_rate": 0.00010583147321428572,
340
+ "loss": 0.0004,
341
+ "step": 27000
342
+ },
343
+ {
344
+ "epoch": 15.35,
345
+ "learning_rate": 0.00010408761160714287,
346
+ "loss": 0.0,
347
+ "step": 27500
348
+ },
349
+ {
350
+ "epoch": 15.62,
351
+ "learning_rate": 0.00010234375,
352
+ "loss": 0.0,
353
+ "step": 28000
354
+ },
355
+ {
356
+ "epoch": 15.9,
357
+ "learning_rate": 0.00010059988839285714,
358
+ "loss": 0.0,
359
+ "step": 28500
360
+ },
361
+ {
362
+ "epoch": 16.18,
363
+ "learning_rate": 9.885602678571429e-05,
364
+ "loss": 0.0,
365
+ "step": 29000
366
+ },
367
+ {
368
+ "epoch": 16.46,
369
+ "learning_rate": 9.711216517857144e-05,
370
+ "loss": 0.0,
371
+ "step": 29500
372
+ },
373
+ {
374
+ "epoch": 16.74,
375
+ "learning_rate": 9.536830357142857e-05,
376
+ "loss": 0.0,
377
+ "step": 30000
378
+ },
379
+ {
380
+ "epoch": 17.02,
381
+ "learning_rate": 9.362444196428571e-05,
382
+ "loss": 0.0,
383
+ "step": 30500
384
+ },
385
+ {
386
+ "epoch": 17.3,
387
+ "learning_rate": 9.188058035714287e-05,
388
+ "loss": 0.0,
389
+ "step": 31000
390
+ },
391
+ {
392
+ "epoch": 17.58,
393
+ "learning_rate": 9.013671875000001e-05,
394
+ "loss": 0.0,
395
+ "step": 31500
396
+ },
397
+ {
398
+ "epoch": 17.86,
399
+ "learning_rate": 8.839285714285714e-05,
400
+ "loss": 0.0,
401
+ "step": 32000
402
+ },
403
+ {
404
+ "epoch": 18.14,
405
+ "learning_rate": 8.664899553571429e-05,
406
+ "loss": 0.0,
407
+ "step": 32500
408
+ },
409
+ {
410
+ "epoch": 18.42,
411
+ "learning_rate": 8.490513392857144e-05,
412
+ "loss": 0.0,
413
+ "step": 33000
414
+ },
415
+ {
416
+ "epoch": 18.69,
417
+ "learning_rate": 8.316127232142858e-05,
418
+ "loss": 0.0,
419
+ "step": 33500
420
+ },
421
+ {
422
+ "epoch": 18.97,
423
+ "learning_rate": 8.141741071428571e-05,
424
+ "loss": 0.0,
425
+ "step": 34000
426
+ },
427
+ {
428
+ "epoch": 19.25,
429
+ "learning_rate": 7.967354910714286e-05,
430
+ "loss": 0.0,
431
+ "step": 34500
432
+ },
433
+ {
434
+ "epoch": 19.53,
435
+ "learning_rate": 7.792968750000001e-05,
436
+ "loss": 0.0,
437
+ "step": 35000
438
+ },
439
+ {
440
+ "epoch": 19.81,
441
+ "learning_rate": 7.618582589285715e-05,
442
+ "loss": 0.0,
443
+ "step": 35500
444
+ },
445
+ {
446
+ "epoch": 20.09,
447
+ "learning_rate": 7.44419642857143e-05,
448
+ "loss": 0.0,
449
+ "step": 36000
450
+ },
451
+ {
452
+ "epoch": 20.37,
453
+ "learning_rate": 7.269810267857143e-05,
454
+ "loss": 0.0,
455
+ "step": 36500
456
+ },
457
+ {
458
+ "epoch": 20.65,
459
+ "learning_rate": 7.095424107142858e-05,
460
+ "loss": 0.0,
461
+ "step": 37000
462
+ },
463
+ {
464
+ "epoch": 20.93,
465
+ "learning_rate": 6.921037946428571e-05,
466
+ "loss": 0.0,
467
+ "step": 37500
468
+ },
469
+ {
470
+ "epoch": 21.21,
471
+ "learning_rate": 6.746651785714286e-05,
472
+ "loss": 0.0,
473
+ "step": 38000
474
+ },
475
+ {
476
+ "epoch": 21.48,
477
+ "learning_rate": 6.572265625e-05,
478
+ "loss": 0.0,
479
+ "step": 38500
480
+ },
481
+ {
482
+ "epoch": 21.76,
483
+ "learning_rate": 6.397879464285715e-05,
484
+ "loss": 0.0,
485
+ "step": 39000
486
+ },
487
+ {
488
+ "epoch": 22.04,
489
+ "learning_rate": 6.22349330357143e-05,
490
+ "loss": 0.0,
491
+ "step": 39500
492
+ },
493
+ {
494
+ "epoch": 22.32,
495
+ "learning_rate": 6.049107142857143e-05,
496
+ "loss": 0.0,
497
+ "step": 40000
498
+ },
499
+ {
500
+ "epoch": 22.32,
501
+ "eval_loss": 0.0,
502
+ "eval_runtime": 18.0032,
503
+ "eval_samples_per_second": 277.729,
504
+ "eval_steps_per_second": 2.222,
505
+ "step": 40000
506
+ }
507
+ ],
508
+ "logging_steps": 500,
509
+ "max_steps": 57344,
510
+ "num_train_epochs": 32,
511
+ "save_steps": 20000,
512
+ "total_flos": 1.7738935169875476e+18,
513
+ "trial_name": null,
514
+ "trial_params": null
515
+ }