hllj commited on
Commit
006bb98
·
1 Parent(s): fbd6737

Model save

Browse files
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.5488
19
 
20
  ## Model description
21
 
@@ -42,17 +42,14 @@ The following hyperparameters were used during training:
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: cosine
44
  - lr_scheduler_warmup_ratio: 0.05
45
- - num_epochs: 2
46
 
47
  ### Training results
48
 
49
  | Training Loss | Epoch | Step | Validation Loss |
50
  |:-------------:|:-----:|:----:|:---------------:|
51
- | 0.6891 | 0.34 | 500 | 0.6660 |
52
- | 0.6423 | 0.68 | 1000 | 0.6075 |
53
- | 0.5553 | 1.03 | 1500 | 0.5688 |
54
- | 0.5175 | 1.37 | 2000 | 0.5533 |
55
- | 0.5614 | 1.71 | 2500 | 0.5487 |
56
 
57
 
58
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 1.1997
19
 
20
  ## Model description
21
 
 
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: cosine
44
  - lr_scheduler_warmup_ratio: 0.05
45
+ - training_steps: 50
46
 
47
  ### Training results
48
 
49
  | Training Loss | Epoch | Step | Validation Loss |
50
  |:-------------:|:-----:|:----:|:---------------:|
51
+ | 1.5219 | 0.02 | 25 | 1.2539 |
52
+ | 1.3156 | 0.03 | 50 | 1.1997 |
 
 
 
53
 
54
 
55
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 2.0,
3
- "eval_loss": 0.5487806797027588,
4
- "eval_runtime": 112.8332,
5
  "eval_samples": 650,
6
- "eval_samples_per_second": 5.761,
7
- "eval_steps_per_second": 1.445,
8
- "train_loss": 0.628653114618257,
9
- "train_runtime": 6305.9173,
10
  "train_samples": 5845,
11
- "train_samples_per_second": 1.854,
12
- "train_steps_per_second": 0.464
13
  }
 
1
  {
2
+ "epoch": 0.03,
3
+ "eval_loss": 1.1997405290603638,
4
+ "eval_runtime": 112.9062,
5
  "eval_samples": 650,
6
+ "eval_samples_per_second": 5.757,
7
+ "eval_steps_per_second": 1.444,
8
+ "train_loss": 1.4715181255340577,
9
+ "train_runtime": 335.1031,
10
  "train_samples": 5845,
11
+ "train_samples_per_second": 0.597,
12
+ "train_steps_per_second": 0.149
13
  }
config_argument.yaml ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !!python/tuple
2
+ - !!python/object:__main__.ModelArguments
3
+ bnb_4bit_quant_type: nf4
4
+ cache_dir: ./cache
5
+ device_map: auto
6
+ load_in_4bit: true
7
+ load_in_8bit: false
8
+ model_name_or_path: HuggingFaceH4/zephyr-7b-beta
9
+ model_revision: main
10
+ model_type: auto
11
+ neft_alpha: 0
12
+ rope_scaling: null
13
+ shift_attn: false
14
+ tokenizer_name_or_path: null
15
+ torch_dtype: float16
16
+ trust_remote_code: true
17
+ use_bnb_nested_quant: false
18
+ use_fast_tokenizer: false
19
+ use_flash_attention_2: false
20
+ - !!python/object:__main__.DataArguments
21
+ dataset_config_name: null
22
+ dataset_name: null
23
+ ignore_pad_token_for_loss: true
24
+ max_eval_samples: null
25
+ max_train_samples: null
26
+ overwrite_cache: false
27
+ preprocessing_num_workers: 4
28
+ template_name: vicuna
29
+ train_file_dir: datasets/finetune
30
+ validation_file_dir: null
31
+ validation_split_percentage: 10
32
+ - !!python/object:__main__.SFTConfig
33
+ __cached__setup_devices: !!python/object/apply:torch.device
34
+ - cuda
35
+ - 0
36
+ _n_gpu: 1
37
+ adafactor: false
38
+ adam_beta1: 0.9
39
+ adam_beta2: 0.999
40
+ adam_epsilon: 1.0e-08
41
+ auto_find_batch_size: false
42
+ bf16: false
43
+ bf16_full_eval: false
44
+ data_seed: null
45
+ dataloader_drop_last: false
46
+ dataloader_num_workers: 0
47
+ dataloader_pin_memory: true
48
+ ddp_backend: null
49
+ ddp_broadcast_buffers: null
50
+ ddp_bucket_cap_mb: null
51
+ ddp_find_unused_parameters: false
52
+ ddp_timeout: 30000
53
+ debug: []
54
+ deepspeed: null
55
+ deepspeed_plugin: null
56
+ disable_tqdm: false
57
+ dispatch_batches: null
58
+ distributed_state: !!python/object:accelerate.state.PartialState
59
+ _cpu: false
60
+ backend: null
61
+ debug: false
62
+ device: !!python/object/apply:torch.device
63
+ - cuda
64
+ - 0
65
+ distributed_type: !!python/object/apply:accelerate.utils.dataclasses.DistributedType
66
+ - MULTI_GPU
67
+ fork_launched: false
68
+ local_process_index: 0
69
+ num_processes: 1
70
+ process_index: 0
71
+ do_eval: true
72
+ do_predict: false
73
+ do_train: true
74
+ eval_accumulation_steps: null
75
+ eval_delay: 0
76
+ eval_steps: 25
77
+ evaluation_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
78
+ - steps
79
+ fp16: false
80
+ fp16_backend: auto
81
+ fp16_full_eval: false
82
+ fp16_opt_level: O1
83
+ fsdp: []
84
+ fsdp_config:
85
+ min_num_params: 0
86
+ xla: false
87
+ xla_fsdp_grad_ckpt: false
88
+ fsdp_min_num_params: 0
89
+ fsdp_transformer_layer_cls_to_wrap: null
90
+ full_determinism: false
91
+ gradient_accumulation_steps: 1
92
+ gradient_checkpointing: true
93
+ gradient_checkpointing_kwargs:
94
+ use_reentrant: false
95
+ greater_is_better: null
96
+ group_by_length: false
97
+ half_precision_backend: auto
98
+ hub_always_push: false
99
+ hub_model_id: hllj/non-qa-sft-zephyr-7b-beta-v1
100
+ hub_private_repo: false
101
+ hub_strategy: !!python/object/apply:transformers.trainer_utils.HubStrategy
102
+ - every_save
103
+ hub_token: null
104
+ ignore_data_skip: false
105
+ include_inputs_for_metrics: false
106
+ include_tokens_per_second: false
107
+ jit_mode_eval: false
108
+ label_names: null
109
+ label_smoothing_factor: 0.0
110
+ learning_rate: 3.0e-05
111
+ length_column_name: length
112
+ load_best_model_at_end: false
113
+ local_rank: 0
114
+ log_level: info
115
+ log_level_replica: warning
116
+ log_on_each_node: true
117
+ logging_dir: outputs-sft-zephyr-beta-v1/runs/Nov22_05-52-29_a72e59c0abac
118
+ logging_first_step: true
119
+ logging_nan_inf_filter: true
120
+ logging_steps: 10
121
+ logging_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
122
+ - steps
123
+ lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType
124
+ - cosine
125
+ max_grad_norm: 1.0
126
+ max_seq_length: 512
127
+ max_steps: 50
128
+ metric_for_best_model: null
129
+ mp_parameters: ''
130
+ neftune_noise_alpha: null
131
+ no_cuda: false
132
+ num_train_epochs: 3.0
133
+ optim: !!python/object/apply:transformers.training_args.OptimizerNames
134
+ - adamw_torch
135
+ optim_args: null
136
+ output_dir: outputs-sft-zephyr-beta-v1
137
+ overwrite_output_dir: true
138
+ past_index: -1
139
+ per_device_eval_batch_size: 4
140
+ per_device_train_batch_size: 4
141
+ per_gpu_eval_batch_size: null
142
+ per_gpu_train_batch_size: null
143
+ prediction_loss_only: false
144
+ push_to_hub: true
145
+ push_to_hub_model_id: null
146
+ push_to_hub_organization: null
147
+ push_to_hub_token: null
148
+ ray_scope: last
149
+ remove_unused_columns: true
150
+ report_to:
151
+ - wandb
152
+ resume_from_checkpoint: null
153
+ run_name: sft-zephyr-7b-beta-v1
154
+ save_on_each_node: false
155
+ save_safetensors: true
156
+ save_steps: 25
157
+ save_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
158
+ - steps
159
+ save_total_limit: 13
160
+ seed: 42
161
+ skip_memory_metrics: true
162
+ split_batches: false
163
+ tf32: null
164
+ torch_compile: false
165
+ torch_compile_backend: null
166
+ torch_compile_mode: null
167
+ torchdynamo: null
168
+ tpu_metrics_debug: false
169
+ tpu_num_cores: null
170
+ use_cpu: false
171
+ use_ipex: false
172
+ use_legacy_prediction_loop: false
173
+ use_mps_device: false
174
+ warmup_ratio: 0.05
175
+ warmup_steps: 0
176
+ weight_decay: 0.05
177
+ - !!python/object:__main__.ScriptArguments
178
+ lora_alpha: 16
179
+ lora_dropout: 0.1
180
+ lora_modules_to_save: null
181
+ lora_r: 64
182
+ lora_target_modules:
183
+ - q_proj
184
+ - k_proj
185
+ - v_proj
186
+ - o_proj
187
+ peft_path: null
188
+ use_peft: true
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.0,
3
- "eval_loss": 0.5487806797027588,
4
- "eval_runtime": 112.8332,
5
  "eval_samples": 650,
6
- "eval_samples_per_second": 5.761,
7
- "eval_steps_per_second": 1.445
8
  }
 
1
  {
2
+ "epoch": 0.03,
3
+ "eval_loss": 1.1997405290603638,
4
+ "eval_runtime": 112.9062,
5
  "eval_samples": 650,
6
+ "eval_samples_per_second": 5.757,
7
+ "eval_steps_per_second": 1.444
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.0,
3
- "train_loss": 0.628653114618257,
4
- "train_runtime": 6305.9173,
5
  "train_samples": 5845,
6
- "train_samples_per_second": 1.854,
7
- "train_steps_per_second": 0.464
8
  }
 
1
  {
2
+ "epoch": 0.03,
3
+ "train_loss": 1.4715181255340577,
4
+ "train_runtime": 335.1031,
5
  "train_samples": 5845,
6
+ "train_samples_per_second": 0.597,
7
+ "train_steps_per_second": 0.149
8
  }
trainer_state.json CHANGED
@@ -1,1826 +1,80 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
- "eval_steps": 500,
6
- "global_step": 2924,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "learning_rate": 2.0408163265306121e-07,
14
- "loss": 1.4003,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.01,
19
- "learning_rate": 2.0408163265306125e-06,
20
- "loss": 1.6942,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
- "learning_rate": 4.081632653061225e-06,
26
- "loss": 1.6045,
27
  "step": 20
28
  },
29
  {
30
  "epoch": 0.02,
31
- "learning_rate": 6.122448979591837e-06,
32
- "loss": 1.5842,
 
 
 
 
 
 
 
 
33
  "step": 30
34
  },
35
  {
36
  "epoch": 0.03,
37
- "learning_rate": 8.16326530612245e-06,
38
- "loss": 1.6188,
39
  "step": 40
40
  },
41
  {
42
  "epoch": 0.03,
43
- "learning_rate": 1.0204081632653061e-05,
44
- "loss": 1.5321,
45
  "step": 50
46
  },
47
  {
48
- "epoch": 0.04,
49
- "learning_rate": 1.2244897959183674e-05,
50
- "loss": 1.6182,
51
- "step": 60
52
- },
53
- {
54
- "epoch": 0.05,
55
- "learning_rate": 1.4285714285714285e-05,
56
- "loss": 1.3685,
57
- "step": 70
58
- },
59
- {
60
- "epoch": 0.05,
61
- "learning_rate": 1.63265306122449e-05,
62
- "loss": 1.3332,
63
- "step": 80
64
- },
65
- {
66
- "epoch": 0.06,
67
- "learning_rate": 1.836734693877551e-05,
68
- "loss": 1.1891,
69
- "step": 90
70
- },
71
- {
72
- "epoch": 0.07,
73
- "learning_rate": 2.0408163265306123e-05,
74
- "loss": 0.9755,
75
- "step": 100
76
- },
77
- {
78
- "epoch": 0.08,
79
- "learning_rate": 2.2448979591836737e-05,
80
- "loss": 0.9277,
81
- "step": 110
82
- },
83
- {
84
- "epoch": 0.08,
85
- "learning_rate": 2.448979591836735e-05,
86
- "loss": 0.8792,
87
- "step": 120
88
- },
89
- {
90
- "epoch": 0.09,
91
- "learning_rate": 2.653061224489796e-05,
92
- "loss": 0.8798,
93
- "step": 130
94
- },
95
- {
96
- "epoch": 0.1,
97
- "learning_rate": 2.857142857142857e-05,
98
- "loss": 0.8967,
99
- "step": 140
100
- },
101
- {
102
- "epoch": 0.1,
103
- "learning_rate": 2.9999913612413297e-05,
104
- "loss": 0.8726,
105
- "step": 150
106
- },
107
- {
108
- "epoch": 0.11,
109
- "learning_rate": 2.9998377860774987e-05,
110
- "loss": 0.8242,
111
- "step": 160
112
- },
113
- {
114
- "epoch": 0.12,
115
- "learning_rate": 2.9994922611221096e-05,
116
- "loss": 0.762,
117
- "step": 170
118
- },
119
- {
120
- "epoch": 0.12,
121
- "learning_rate": 2.998954830595574e-05,
122
- "loss": 0.854,
123
- "step": 180
124
- },
125
- {
126
- "epoch": 0.13,
127
- "learning_rate": 2.9982255632784477e-05,
128
- "loss": 0.7793,
129
- "step": 190
130
- },
131
- {
132
- "epoch": 0.14,
133
- "learning_rate": 2.9973045525026307e-05,
134
- "loss": 0.7864,
135
- "step": 200
136
- },
137
- {
138
- "epoch": 0.14,
139
- "learning_rate": 2.9961919161394204e-05,
140
- "loss": 0.7602,
141
- "step": 210
142
- },
143
- {
144
- "epoch": 0.15,
145
- "learning_rate": 2.9948877965844274e-05,
146
- "loss": 0.7455,
147
- "step": 220
148
- },
149
- {
150
- "epoch": 0.16,
151
- "learning_rate": 2.9933923607393534e-05,
152
- "loss": 0.7094,
153
- "step": 230
154
- },
155
- {
156
- "epoch": 0.16,
157
- "learning_rate": 2.991705799990627e-05,
158
- "loss": 0.847,
159
- "step": 240
160
- },
161
- {
162
- "epoch": 0.17,
163
- "learning_rate": 2.9898283301849137e-05,
164
- "loss": 0.7693,
165
- "step": 250
166
- },
167
- {
168
- "epoch": 0.18,
169
- "learning_rate": 2.9877601916014893e-05,
170
- "loss": 0.7415,
171
- "step": 260
172
- },
173
- {
174
- "epoch": 0.18,
175
- "learning_rate": 2.9855016489214902e-05,
176
- "loss": 0.7504,
177
- "step": 270
178
- },
179
- {
180
- "epoch": 0.19,
181
- "learning_rate": 2.983052991194041e-05,
182
- "loss": 0.7186,
183
- "step": 280
184
- },
185
- {
186
- "epoch": 0.2,
187
- "learning_rate": 2.980414531799256e-05,
188
- "loss": 0.7272,
189
- "step": 290
190
- },
191
- {
192
- "epoch": 0.21,
193
- "learning_rate": 2.977586608408141e-05,
194
- "loss": 0.7452,
195
- "step": 300
196
- },
197
- {
198
- "epoch": 0.21,
199
- "learning_rate": 2.974569582939371e-05,
200
- "loss": 0.7392,
201
- "step": 310
202
- },
203
- {
204
- "epoch": 0.22,
205
- "learning_rate": 2.9713638415129753e-05,
206
- "loss": 0.7574,
207
- "step": 320
208
- },
209
- {
210
- "epoch": 0.23,
211
- "learning_rate": 2.967969794400921e-05,
212
- "loss": 0.7113,
213
- "step": 330
214
- },
215
- {
216
- "epoch": 0.23,
217
- "learning_rate": 2.9643878759746055e-05,
218
- "loss": 0.7494,
219
- "step": 340
220
- },
221
- {
222
- "epoch": 0.24,
223
- "learning_rate": 2.9606185446492676e-05,
224
- "loss": 0.7889,
225
- "step": 350
226
- },
227
- {
228
- "epoch": 0.25,
229
- "learning_rate": 2.9566622828253155e-05,
230
- "loss": 0.6575,
231
- "step": 360
232
- },
233
- {
234
- "epoch": 0.25,
235
- "learning_rate": 2.9525195968265937e-05,
236
- "loss": 0.7284,
237
- "step": 370
238
- },
239
- {
240
- "epoch": 0.26,
241
- "learning_rate": 2.9481910168355798e-05,
242
- "loss": 0.6623,
243
- "step": 380
244
- },
245
- {
246
- "epoch": 0.27,
247
- "learning_rate": 2.943677096825533e-05,
248
- "loss": 0.7226,
249
- "step": 390
250
- },
251
- {
252
- "epoch": 0.27,
253
- "learning_rate": 2.938978414489597e-05,
254
- "loss": 0.678,
255
- "step": 400
256
- },
257
- {
258
- "epoch": 0.28,
259
- "learning_rate": 2.9340955711668652e-05,
260
- "loss": 0.6915,
261
- "step": 410
262
- },
263
- {
264
- "epoch": 0.29,
265
- "learning_rate": 2.9290291917654223e-05,
266
- "loss": 0.6835,
267
- "step": 420
268
- },
269
- {
270
- "epoch": 0.29,
271
- "learning_rate": 2.9237799246823677e-05,
272
- "loss": 0.6486,
273
- "step": 430
274
- },
275
- {
276
- "epoch": 0.3,
277
- "learning_rate": 2.918348441720833e-05,
278
- "loss": 0.6618,
279
- "step": 440
280
- },
281
- {
282
- "epoch": 0.31,
283
- "learning_rate": 2.9127354380040067e-05,
284
- "loss": 0.7055,
285
- "step": 450
286
- },
287
- {
288
- "epoch": 0.31,
289
- "learning_rate": 2.906941631886168e-05,
290
- "loss": 0.6836,
291
- "step": 460
292
- },
293
- {
294
- "epoch": 0.32,
295
- "learning_rate": 2.9009677648607556e-05,
296
- "loss": 0.6514,
297
- "step": 470
298
- },
299
- {
300
- "epoch": 0.33,
301
- "learning_rate": 2.894814601465469e-05,
302
- "loss": 0.716,
303
- "step": 480
304
- },
305
- {
306
- "epoch": 0.34,
307
- "learning_rate": 2.8884829291844227e-05,
308
- "loss": 0.6487,
309
- "step": 490
310
- },
311
- {
312
- "epoch": 0.34,
313
- "learning_rate": 2.8819735583473635e-05,
314
- "loss": 0.6891,
315
- "step": 500
316
- },
317
- {
318
- "epoch": 0.34,
319
- "eval_loss": 0.665991485118866,
320
- "eval_runtime": 112.8957,
321
- "eval_samples_per_second": 5.758,
322
- "eval_steps_per_second": 1.444,
323
- "step": 500
324
- },
325
- {
326
- "epoch": 0.35,
327
- "learning_rate": 2.8752873220259666e-05,
328
- "loss": 0.7041,
329
- "step": 510
330
- },
331
- {
332
- "epoch": 0.36,
333
- "learning_rate": 2.8684250759272157e-05,
334
- "loss": 0.7158,
335
- "step": 520
336
- },
337
- {
338
- "epoch": 0.36,
339
- "learning_rate": 2.8613876982838903e-05,
340
- "loss": 0.6794,
341
- "step": 530
342
- },
343
- {
344
- "epoch": 0.37,
345
- "learning_rate": 2.85417608974217e-05,
346
- "loss": 0.6874,
347
- "step": 540
348
- },
349
- {
350
- "epoch": 0.38,
351
- "learning_rate": 2.846791173246368e-05,
352
- "loss": 0.6694,
353
- "step": 550
354
- },
355
- {
356
- "epoch": 0.38,
357
- "learning_rate": 2.839233893920815e-05,
358
- "loss": 0.6643,
359
- "step": 560
360
- },
361
- {
362
- "epoch": 0.39,
363
- "learning_rate": 2.8315052189488983e-05,
364
- "loss": 0.6691,
365
- "step": 570
366
- },
367
- {
368
- "epoch": 0.4,
369
- "learning_rate": 2.8236061374492838e-05,
370
- "loss": 0.6586,
371
- "step": 580
372
- },
373
- {
374
- "epoch": 0.4,
375
- "learning_rate": 2.8155376603493274e-05,
376
- "loss": 0.646,
377
- "step": 590
378
- },
379
- {
380
- "epoch": 0.41,
381
- "learning_rate": 2.8073008202556964e-05,
382
- "loss": 0.6588,
383
- "step": 600
384
- },
385
- {
386
- "epoch": 0.42,
387
- "learning_rate": 2.7988966713222155e-05,
388
- "loss": 0.6293,
389
- "step": 610
390
- },
391
- {
392
- "epoch": 0.42,
393
- "learning_rate": 2.7903262891149568e-05,
394
- "loss": 0.6398,
395
- "step": 620
396
- },
397
- {
398
- "epoch": 0.43,
399
- "learning_rate": 2.7815907704745874e-05,
400
- "loss": 0.6031,
401
- "step": 630
402
- },
403
- {
404
- "epoch": 0.44,
405
- "learning_rate": 2.7726912333759963e-05,
406
- "loss": 0.6452,
407
- "step": 640
408
- },
409
- {
410
- "epoch": 0.44,
411
- "learning_rate": 2.763628816785215e-05,
412
- "loss": 0.6369,
413
- "step": 650
414
- },
415
- {
416
- "epoch": 0.45,
417
- "learning_rate": 2.7544046805136524e-05,
418
- "loss": 0.7018,
419
- "step": 660
420
- },
421
- {
422
- "epoch": 0.46,
423
- "learning_rate": 2.7450200050696616e-05,
424
- "loss": 0.6782,
425
- "step": 670
426
- },
427
- {
428
- "epoch": 0.47,
429
- "learning_rate": 2.7354759915074574e-05,
430
- "loss": 0.6427,
431
- "step": 680
432
- },
433
- {
434
- "epoch": 0.47,
435
- "learning_rate": 2.7257738612734065e-05,
436
- "loss": 0.6457,
437
- "step": 690
438
- },
439
- {
440
- "epoch": 0.48,
441
- "learning_rate": 2.715914856049705e-05,
442
- "loss": 0.6016,
443
- "step": 700
444
- },
445
- {
446
- "epoch": 0.49,
447
- "learning_rate": 2.7059002375954684e-05,
448
- "loss": 0.6606,
449
- "step": 710
450
- },
451
- {
452
- "epoch": 0.49,
453
- "learning_rate": 2.695731287585249e-05,
454
- "loss": 0.6334,
455
- "step": 720
456
- },
457
- {
458
- "epoch": 0.5,
459
- "learning_rate": 2.685409307445011e-05,
460
- "loss": 0.6806,
461
- "step": 730
462
- },
463
- {
464
- "epoch": 0.51,
465
- "learning_rate": 2.6749356181855685e-05,
466
- "loss": 0.5794,
467
- "step": 740
468
- },
469
- {
470
- "epoch": 0.51,
471
- "learning_rate": 2.6643115602335263e-05,
472
- "loss": 0.5806,
473
- "step": 750
474
- },
475
- {
476
- "epoch": 0.52,
477
- "learning_rate": 2.6535384932597288e-05,
478
- "loss": 0.6126,
479
- "step": 760
480
- },
481
- {
482
- "epoch": 0.53,
483
- "learning_rate": 2.6426177960052494e-05,
484
- "loss": 0.6405,
485
- "step": 770
486
- },
487
- {
488
- "epoch": 0.53,
489
- "learning_rate": 2.631550866104941e-05,
490
- "loss": 0.644,
491
- "step": 780
492
- },
493
- {
494
- "epoch": 0.54,
495
- "learning_rate": 2.6203391199085617e-05,
496
- "loss": 0.6072,
497
- "step": 790
498
- },
499
- {
500
- "epoch": 0.55,
501
- "learning_rate": 2.6089839922995145e-05,
502
- "loss": 0.6152,
503
- "step": 800
504
- },
505
- {
506
- "epoch": 0.55,
507
- "learning_rate": 2.5974869365112084e-05,
508
- "loss": 0.6294,
509
- "step": 810
510
- },
511
- {
512
- "epoch": 0.56,
513
- "learning_rate": 2.5858494239410705e-05,
514
- "loss": 0.6486,
515
- "step": 820
516
- },
517
- {
518
- "epoch": 0.57,
519
- "learning_rate": 2.5740729439622414e-05,
520
- "loss": 0.6507,
521
- "step": 830
522
- },
523
- {
524
- "epoch": 0.57,
525
- "learning_rate": 2.5621590037329604e-05,
526
- "loss": 0.6327,
527
- "step": 840
528
- },
529
- {
530
- "epoch": 0.58,
531
- "learning_rate": 2.550109128003681e-05,
532
- "loss": 0.5947,
533
- "step": 850
534
- },
535
- {
536
- "epoch": 0.59,
537
- "learning_rate": 2.5379248589219307e-05,
538
- "loss": 0.6366,
539
- "step": 860
540
- },
541
- {
542
- "epoch": 0.6,
543
- "learning_rate": 2.525607755834948e-05,
544
- "loss": 0.6608,
545
- "step": 870
546
- },
547
- {
548
- "epoch": 0.6,
549
- "learning_rate": 2.513159395090117e-05,
550
- "loss": 0.6861,
551
- "step": 880
552
- },
553
- {
554
- "epoch": 0.61,
555
- "learning_rate": 2.5005813698332257e-05,
556
- "loss": 0.6377,
557
- "step": 890
558
- },
559
- {
560
- "epoch": 0.62,
561
- "learning_rate": 2.4878752898045725e-05,
562
- "loss": 0.601,
563
- "step": 900
564
- },
565
- {
566
- "epoch": 0.62,
567
- "learning_rate": 2.475042781132953e-05,
568
- "loss": 0.6185,
569
- "step": 910
570
- },
571
- {
572
- "epoch": 0.63,
573
- "learning_rate": 2.462085486127547e-05,
574
- "loss": 0.6199,
575
- "step": 920
576
- },
577
- {
578
- "epoch": 0.64,
579
- "learning_rate": 2.449005063067736e-05,
580
- "loss": 0.6137,
581
- "step": 930
582
- },
583
- {
584
- "epoch": 0.64,
585
- "learning_rate": 2.4358031859908747e-05,
586
- "loss": 0.6004,
587
- "step": 940
588
- },
589
- {
590
- "epoch": 0.65,
591
- "learning_rate": 2.4224815444780473e-05,
592
- "loss": 0.6245,
593
- "step": 950
594
- },
595
- {
596
- "epoch": 0.66,
597
- "learning_rate": 2.4090418434378348e-05,
598
- "loss": 0.6535,
599
- "step": 960
600
- },
601
- {
602
- "epoch": 0.66,
603
- "learning_rate": 2.3954858028881206e-05,
604
- "loss": 0.594,
605
- "step": 970
606
- },
607
- {
608
- "epoch": 0.67,
609
- "learning_rate": 2.3818151577359593e-05,
610
- "loss": 0.605,
611
- "step": 980
612
- },
613
- {
614
- "epoch": 0.68,
615
- "learning_rate": 2.3680316575555474e-05,
616
- "loss": 0.6185,
617
- "step": 990
618
- },
619
- {
620
- "epoch": 0.68,
621
- "learning_rate": 2.3541370663643073e-05,
622
- "loss": 0.6423,
623
- "step": 1000
624
- },
625
- {
626
- "epoch": 0.68,
627
- "eval_loss": 0.6074596047401428,
628
- "eval_runtime": 112.8513,
629
- "eval_samples_per_second": 5.76,
630
- "eval_steps_per_second": 1.444,
631
- "step": 1000
632
- },
633
- {
634
- "epoch": 0.69,
635
- "learning_rate": 2.3401331623971307e-05,
636
- "loss": 0.6404,
637
- "step": 1010
638
- },
639
- {
640
- "epoch": 0.7,
641
- "learning_rate": 2.3260217378787985e-05,
642
- "loss": 0.5879,
643
- "step": 1020
644
- },
645
- {
646
- "epoch": 0.7,
647
- "learning_rate": 2.3118045987946115e-05,
648
- "loss": 0.6582,
649
- "step": 1030
650
- },
651
- {
652
- "epoch": 0.71,
653
- "learning_rate": 2.297483564659259e-05,
654
- "loss": 0.6132,
655
- "step": 1040
656
- },
657
- {
658
- "epoch": 0.72,
659
- "learning_rate": 2.2830604682839577e-05,
660
- "loss": 0.5921,
661
- "step": 1050
662
- },
663
- {
664
- "epoch": 0.73,
665
- "learning_rate": 2.2685371555418865e-05,
666
- "loss": 0.5874,
667
- "step": 1060
668
- },
669
- {
670
- "epoch": 0.73,
671
- "learning_rate": 2.2539154851319534e-05,
672
- "loss": 0.6708,
673
- "step": 1070
674
- },
675
- {
676
- "epoch": 0.74,
677
- "learning_rate": 2.2391973283409148e-05,
678
- "loss": 0.6474,
679
- "step": 1080
680
- },
681
- {
682
- "epoch": 0.75,
683
- "learning_rate": 2.2243845688038912e-05,
684
- "loss": 0.5853,
685
- "step": 1090
686
- },
687
- {
688
- "epoch": 0.75,
689
- "learning_rate": 2.2094791022632964e-05,
690
- "loss": 0.602,
691
- "step": 1100
692
- },
693
- {
694
- "epoch": 0.76,
695
- "learning_rate": 2.194482836326222e-05,
696
- "loss": 0.6012,
697
- "step": 1110
698
- },
699
- {
700
- "epoch": 0.77,
701
- "learning_rate": 2.179397690220298e-05,
702
- "loss": 0.5837,
703
- "step": 1120
704
- },
705
- {
706
- "epoch": 0.77,
707
- "learning_rate": 2.164225594548072e-05,
708
- "loss": 0.6277,
709
- "step": 1130
710
- },
711
- {
712
- "epoch": 0.78,
713
- "learning_rate": 2.1489684910399287e-05,
714
- "loss": 0.548,
715
- "step": 1140
716
- },
717
- {
718
- "epoch": 0.79,
719
- "learning_rate": 2.1336283323055877e-05,
720
- "loss": 0.6275,
721
- "step": 1150
722
- },
723
- {
724
- "epoch": 0.79,
725
- "learning_rate": 2.118207081584207e-05,
726
- "loss": 0.6045,
727
- "step": 1160
728
- },
729
- {
730
- "epoch": 0.8,
731
- "learning_rate": 2.102706712493125e-05,
732
- "loss": 0.579,
733
- "step": 1170
734
- },
735
- {
736
- "epoch": 0.81,
737
- "learning_rate": 2.0871292087752813e-05,
738
- "loss": 0.6307,
739
- "step": 1180
740
- },
741
- {
742
- "epoch": 0.81,
743
- "learning_rate": 2.0714765640453295e-05,
744
- "loss": 0.6708,
745
- "step": 1190
746
- },
747
- {
748
- "epoch": 0.82,
749
- "learning_rate": 2.055750781534502e-05,
750
- "loss": 0.5567,
751
- "step": 1200
752
- },
753
- {
754
- "epoch": 0.83,
755
- "learning_rate": 2.0399538738342276e-05,
756
- "loss": 0.6077,
757
- "step": 1210
758
- },
759
- {
760
- "epoch": 0.83,
761
- "learning_rate": 2.0240878626385656e-05,
762
- "loss": 0.5914,
763
- "step": 1220
764
- },
765
- {
766
- "epoch": 0.84,
767
- "learning_rate": 2.0081547784854636e-05,
768
- "loss": 0.5752,
769
- "step": 1230
770
- },
771
- {
772
- "epoch": 0.85,
773
- "learning_rate": 1.9921566604968935e-05,
774
- "loss": 0.5845,
775
- "step": 1240
776
- },
777
- {
778
- "epoch": 0.85,
779
- "learning_rate": 1.9760955561178792e-05,
780
- "loss": 0.5957,
781
- "step": 1250
782
- },
783
- {
784
- "epoch": 0.86,
785
- "learning_rate": 1.9599735208544676e-05,
786
- "loss": 0.6019,
787
- "step": 1260
788
- },
789
- {
790
- "epoch": 0.87,
791
- "learning_rate": 1.943792618010662e-05,
792
- "loss": 0.6482,
793
- "step": 1270
794
- },
795
- {
796
- "epoch": 0.88,
797
- "learning_rate": 1.9275549184243597e-05,
798
- "loss": 0.5898,
799
- "step": 1280
800
- },
801
- {
802
- "epoch": 0.88,
803
- "learning_rate": 1.9112625002023276e-05,
804
- "loss": 0.5559,
805
- "step": 1290
806
  },
807
  {
808
- "epoch": 0.89,
809
- "learning_rate": 1.8949174484542413e-05,
810
- "loss": 0.5938,
811
- "step": 1300
812
- },
813
- {
814
- "epoch": 0.9,
815
- "learning_rate": 1.8785218550258373e-05,
816
- "loss": 0.6049,
817
- "step": 1310
818
- },
819
- {
820
- "epoch": 0.9,
821
- "learning_rate": 1.862077818231193e-05,
822
- "loss": 0.5744,
823
- "step": 1320
824
- },
825
- {
826
- "epoch": 0.91,
827
- "learning_rate": 1.845587442584185e-05,
828
- "loss": 0.6184,
829
- "step": 1330
830
- },
831
- {
832
- "epoch": 0.92,
833
- "learning_rate": 1.8290528385291558e-05,
834
- "loss": 0.5656,
835
- "step": 1340
836
- },
837
- {
838
- "epoch": 0.92,
839
- "learning_rate": 1.8124761221708136e-05,
840
- "loss": 0.5297,
841
- "step": 1350
842
- },
843
- {
844
- "epoch": 0.93,
845
- "learning_rate": 1.7958594150034165e-05,
846
- "loss": 0.5681,
847
- "step": 1360
848
- },
849
- {
850
- "epoch": 0.94,
851
- "learning_rate": 1.7792048436392584e-05,
852
- "loss": 0.6133,
853
- "step": 1370
854
- },
855
- {
856
- "epoch": 0.94,
857
- "learning_rate": 1.7625145395365098e-05,
858
- "loss": 0.5877,
859
- "step": 1380
860
- },
861
- {
862
- "epoch": 0.95,
863
- "learning_rate": 1.7457906387264288e-05,
864
- "loss": 0.5887,
865
- "step": 1390
866
- },
867
- {
868
- "epoch": 0.96,
869
- "learning_rate": 1.729035281539993e-05,
870
- "loss": 0.5648,
871
- "step": 1400
872
- },
873
- {
874
- "epoch": 0.96,
875
- "learning_rate": 1.7122506123339775e-05,
876
- "loss": 0.551,
877
- "step": 1410
878
- },
879
- {
880
- "epoch": 0.97,
881
- "learning_rate": 1.6954387792165222e-05,
882
- "loss": 0.5537,
883
- "step": 1420
884
- },
885
- {
886
- "epoch": 0.98,
887
- "learning_rate": 1.6786019337722136e-05,
888
- "loss": 0.542,
889
- "step": 1430
890
- },
891
- {
892
- "epoch": 0.98,
893
- "learning_rate": 1.6617422307867238e-05,
894
- "loss": 0.5466,
895
- "step": 1440
896
- },
897
- {
898
- "epoch": 0.99,
899
- "learning_rate": 1.6448618279710438e-05,
900
- "loss": 0.5977,
901
- "step": 1450
902
- },
903
- {
904
- "epoch": 1.0,
905
- "learning_rate": 1.6279628856853338e-05,
906
- "loss": 0.5897,
907
- "step": 1460
908
- },
909
- {
910
- "epoch": 1.01,
911
- "learning_rate": 1.611047566662445e-05,
912
- "loss": 0.5539,
913
- "step": 1470
914
- },
915
- {
916
- "epoch": 1.01,
917
- "learning_rate": 1.5941180357311268e-05,
918
- "loss": 0.5411,
919
- "step": 1480
920
- },
921
- {
922
- "epoch": 1.02,
923
- "learning_rate": 1.5771764595389764e-05,
924
- "loss": 0.5479,
925
- "step": 1490
926
- },
927
- {
928
- "epoch": 1.03,
929
- "learning_rate": 1.560225006275145e-05,
930
- "loss": 0.5553,
931
- "step": 1500
932
- },
933
- {
934
- "epoch": 1.03,
935
- "eval_loss": 0.5688494443893433,
936
- "eval_runtime": 112.9699,
937
- "eval_samples_per_second": 5.754,
938
- "eval_steps_per_second": 1.443,
939
- "step": 1500
940
- },
941
- {
942
- "epoch": 1.03,
943
- "learning_rate": 1.543265845392858e-05,
944
- "loss": 0.535,
945
- "step": 1510
946
- },
947
- {
948
- "epoch": 1.04,
949
- "learning_rate": 1.5263011473317623e-05,
950
- "loss": 0.531,
951
- "step": 1520
952
- },
953
- {
954
- "epoch": 1.05,
955
- "learning_rate": 1.509333083240157e-05,
956
- "loss": 0.568,
957
- "step": 1530
958
- },
959
- {
960
- "epoch": 1.05,
961
- "learning_rate": 1.4923638246971261e-05,
962
- "loss": 0.5375,
963
- "step": 1540
964
- },
965
- {
966
- "epoch": 1.06,
967
- "learning_rate": 1.4753955434346188e-05,
968
- "loss": 0.5868,
969
- "step": 1550
970
- },
971
- {
972
- "epoch": 1.07,
973
- "learning_rate": 1.458430411059513e-05,
974
- "loss": 0.5549,
975
- "step": 1560
976
- },
977
- {
978
- "epoch": 1.07,
979
- "learning_rate": 1.4414705987756907e-05,
980
- "loss": 0.5595,
981
- "step": 1570
982
- },
983
- {
984
- "epoch": 1.08,
985
- "learning_rate": 1.4245182771061667e-05,
986
- "loss": 0.5578,
987
- "step": 1580
988
- },
989
- {
990
- "epoch": 1.09,
991
- "learning_rate": 1.407575615615303e-05,
992
- "loss": 0.538,
993
- "step": 1590
994
- },
995
- {
996
- "epoch": 1.09,
997
- "learning_rate": 1.3906447826311502e-05,
998
- "loss": 0.5574,
999
- "step": 1600
1000
- },
1001
- {
1002
- "epoch": 1.1,
1003
- "learning_rate": 1.3737279449679414e-05,
1004
- "loss": 0.5667,
1005
- "step": 1610
1006
- },
1007
- {
1008
- "epoch": 1.11,
1009
- "learning_rate": 1.3568272676487834e-05,
1010
- "loss": 0.5564,
1011
- "step": 1620
1012
- },
1013
- {
1014
- "epoch": 1.11,
1015
- "learning_rate": 1.3399449136285751e-05,
1016
- "loss": 0.5589,
1017
- "step": 1630
1018
- },
1019
- {
1020
- "epoch": 1.12,
1021
- "learning_rate": 1.3230830435171951e-05,
1022
- "loss": 0.5636,
1023
- "step": 1640
1024
- },
1025
- {
1026
- "epoch": 1.13,
1027
- "learning_rate": 1.3062438153029829e-05,
1028
- "loss": 0.5677,
1029
- "step": 1650
1030
- },
1031
- {
1032
- "epoch": 1.14,
1033
- "learning_rate": 1.2894293840765583e-05,
1034
- "loss": 0.5321,
1035
- "step": 1660
1036
- },
1037
- {
1038
- "epoch": 1.14,
1039
- "learning_rate": 1.272641901755015e-05,
1040
- "loss": 0.5664,
1041
- "step": 1670
1042
- },
1043
- {
1044
- "epoch": 1.15,
1045
- "learning_rate": 1.2558835168065162e-05,
1046
- "loss": 0.6372,
1047
- "step": 1680
1048
- },
1049
- {
1050
- "epoch": 1.16,
1051
- "learning_rate": 1.2391563739753316e-05,
1052
- "loss": 0.5377,
1053
- "step": 1690
1054
- },
1055
- {
1056
- "epoch": 1.16,
1057
- "learning_rate": 1.2224626140073533e-05,
1058
- "loss": 0.5974,
1059
- "step": 1700
1060
- },
1061
- {
1062
- "epoch": 1.17,
1063
- "learning_rate": 1.2058043733761228e-05,
1064
- "loss": 0.5924,
1065
- "step": 1710
1066
- },
1067
- {
1068
- "epoch": 1.18,
1069
- "learning_rate": 1.1891837840094038e-05,
1070
- "loss": 0.5149,
1071
- "step": 1720
1072
- },
1073
- {
1074
- "epoch": 1.18,
1075
- "learning_rate": 1.1726029730163372e-05,
1076
- "loss": 0.5486,
1077
- "step": 1730
1078
- },
1079
- {
1080
- "epoch": 1.19,
1081
- "learning_rate": 1.156064062415212e-05,
1082
- "loss": 0.5161,
1083
- "step": 1740
1084
- },
1085
- {
1086
- "epoch": 1.2,
1087
- "learning_rate": 1.1395691688618916e-05,
1088
- "loss": 0.5317,
1089
- "step": 1750
1090
- },
1091
- {
1092
- "epoch": 1.2,
1093
- "learning_rate": 1.1231204033789202e-05,
1094
- "loss": 0.5456,
1095
- "step": 1760
1096
- },
1097
- {
1098
- "epoch": 1.21,
1099
- "learning_rate": 1.1067198710853537e-05,
1100
- "loss": 0.4997,
1101
- "step": 1770
1102
- },
1103
- {
1104
- "epoch": 1.22,
1105
- "learning_rate": 1.0903696709273497e-05,
1106
- "loss": 0.5274,
1107
- "step": 1780
1108
- },
1109
- {
1110
- "epoch": 1.22,
1111
- "learning_rate": 1.074071895409537e-05,
1112
- "loss": 0.5395,
1113
- "step": 1790
1114
- },
1115
- {
1116
- "epoch": 1.23,
1117
- "learning_rate": 1.0578286303272224e-05,
1118
- "loss": 0.5754,
1119
- "step": 1800
1120
- },
1121
- {
1122
- "epoch": 1.24,
1123
- "learning_rate": 1.0416419544994449e-05,
1124
- "loss": 0.5593,
1125
- "step": 1810
1126
- },
1127
- {
1128
- "epoch": 1.24,
1129
- "learning_rate": 1.0255139395029315e-05,
1130
- "loss": 0.5635,
1131
- "step": 1820
1132
- },
1133
- {
1134
- "epoch": 1.25,
1135
- "learning_rate": 1.009446649406974e-05,
1136
- "loss": 0.5796,
1137
- "step": 1830
1138
- },
1139
- {
1140
- "epoch": 1.26,
1141
- "learning_rate": 9.934421405092689e-06,
1142
- "loss": 0.5489,
1143
- "step": 1840
1144
- },
1145
- {
1146
- "epoch": 1.27,
1147
- "learning_rate": 9.775024610727527e-06,
1148
- "loss": 0.5869,
1149
- "step": 1850
1150
- },
1151
- {
1152
- "epoch": 1.27,
1153
- "learning_rate": 9.616296510634619e-06,
1154
- "loss": 0.5463,
1155
- "step": 1860
1156
- },
1157
- {
1158
- "epoch": 1.28,
1159
- "learning_rate": 9.458257418894607e-06,
1160
- "loss": 0.5725,
1161
- "step": 1870
1162
- },
1163
- {
1164
- "epoch": 1.29,
1165
- "learning_rate": 9.300927561408574e-06,
1166
- "loss": 0.5694,
1167
- "step": 1880
1168
- },
1169
- {
1170
- "epoch": 1.29,
1171
- "learning_rate": 9.14432707330957e-06,
1172
- "loss": 0.5721,
1173
- "step": 1890
1174
- },
1175
- {
1176
- "epoch": 1.3,
1177
- "learning_rate": 8.988475996385653e-06,
1178
- "loss": 0.5423,
1179
- "step": 1900
1180
- },
1181
- {
1182
- "epoch": 1.31,
1183
- "learning_rate": 8.833394276514976e-06,
1184
- "loss": 0.5376,
1185
- "step": 1910
1186
- },
1187
- {
1188
- "epoch": 1.31,
1189
- "learning_rate": 8.679101761113099e-06,
1190
- "loss": 0.5102,
1191
- "step": 1920
1192
- },
1193
- {
1194
- "epoch": 1.32,
1195
- "learning_rate": 8.525618196592897e-06,
1196
- "loss": 0.5698,
1197
- "step": 1930
1198
- },
1199
- {
1200
- "epoch": 1.33,
1201
- "learning_rate": 8.372963225837405e-06,
1202
- "loss": 0.4899,
1203
- "step": 1940
1204
- },
1205
- {
1206
- "epoch": 1.33,
1207
- "learning_rate": 8.221156385685939e-06,
1208
- "loss": 0.5326,
1209
- "step": 1950
1210
- },
1211
- {
1212
- "epoch": 1.34,
1213
- "learning_rate": 8.070217104433746e-06,
1214
- "loss": 0.5562,
1215
- "step": 1960
1216
- },
1217
- {
1218
- "epoch": 1.35,
1219
- "learning_rate": 7.920164699345559e-06,
1220
- "loss": 0.542,
1221
- "step": 1970
1222
- },
1223
- {
1224
- "epoch": 1.35,
1225
- "learning_rate": 7.771018374183367e-06,
1226
- "loss": 0.5584,
1227
- "step": 1980
1228
- },
1229
- {
1230
- "epoch": 1.36,
1231
- "learning_rate": 7.622797216748732e-06,
1232
- "loss": 0.559,
1233
- "step": 1990
1234
- },
1235
- {
1236
- "epoch": 1.37,
1237
- "learning_rate": 7.475520196439913e-06,
1238
- "loss": 0.5175,
1239
- "step": 2000
1240
- },
1241
- {
1242
- "epoch": 1.37,
1243
- "eval_loss": 0.5532566905021667,
1244
- "eval_runtime": 112.9998,
1245
- "eval_samples_per_second": 5.752,
1246
- "eval_steps_per_second": 1.442,
1247
- "step": 2000
1248
- },
1249
- {
1250
- "epoch": 1.37,
1251
- "learning_rate": 7.329206161824133e-06,
1252
- "loss": 0.5066,
1253
- "step": 2010
1254
- },
1255
- {
1256
- "epoch": 1.38,
1257
- "learning_rate": 7.183873838225359e-06,
1258
- "loss": 0.5446,
1259
- "step": 2020
1260
- },
1261
- {
1262
- "epoch": 1.39,
1263
- "learning_rate": 7.039541825327827e-06,
1264
- "loss": 0.5875,
1265
- "step": 2030
1266
- },
1267
- {
1268
- "epoch": 1.4,
1269
- "learning_rate": 6.896228594795646e-06,
1270
- "loss": 0.5211,
1271
- "step": 2040
1272
- },
1273
- {
1274
- "epoch": 1.4,
1275
- "learning_rate": 6.753952487908767e-06,
1276
- "loss": 0.5785,
1277
- "step": 2050
1278
- },
1279
- {
1280
- "epoch": 1.41,
1281
- "learning_rate": 6.612731713215683e-06,
1282
- "loss": 0.5338,
1283
- "step": 2060
1284
- },
1285
- {
1286
- "epoch": 1.42,
1287
- "learning_rate": 6.472584344203087e-06,
1288
- "loss": 0.5539,
1289
- "step": 2070
1290
- },
1291
- {
1292
- "epoch": 1.42,
1293
- "learning_rate": 6.3335283169828236e-06,
1294
- "loss": 0.5763,
1295
- "step": 2080
1296
- },
1297
- {
1298
- "epoch": 1.43,
1299
- "learning_rate": 6.195581427996395e-06,
1300
- "loss": 0.5044,
1301
- "step": 2090
1302
- },
1303
- {
1304
- "epoch": 1.44,
1305
- "learning_rate": 6.058761331737406e-06,
1306
- "loss": 0.5509,
1307
- "step": 2100
1308
- },
1309
- {
1310
- "epoch": 1.44,
1311
- "learning_rate": 5.923085538492093e-06,
1312
- "loss": 0.5212,
1313
- "step": 2110
1314
- },
1315
- {
1316
- "epoch": 1.45,
1317
- "learning_rate": 5.788571412098394e-06,
1318
- "loss": 0.5772,
1319
- "step": 2120
1320
- },
1321
- {
1322
- "epoch": 1.46,
1323
- "learning_rate": 5.655236167723671e-06,
1324
- "loss": 0.5368,
1325
- "step": 2130
1326
- },
1327
- {
1328
- "epoch": 1.46,
1329
- "learning_rate": 5.5230968696615565e-06,
1330
- "loss": 0.5327,
1331
- "step": 2140
1332
- },
1333
- {
1334
- "epoch": 1.47,
1335
- "learning_rate": 5.392170429148018e-06,
1336
- "loss": 0.5411,
1337
- "step": 2150
1338
- },
1339
- {
1340
- "epoch": 1.48,
1341
- "learning_rate": 5.26247360219706e-06,
1342
- "loss": 0.5354,
1343
- "step": 2160
1344
- },
1345
- {
1346
- "epoch": 1.48,
1347
- "learning_rate": 5.134022987456326e-06,
1348
- "loss": 0.5276,
1349
- "step": 2170
1350
- },
1351
- {
1352
- "epoch": 1.49,
1353
- "learning_rate": 5.006835024082726e-06,
1354
- "loss": 0.5055,
1355
- "step": 2180
1356
- },
1357
- {
1358
- "epoch": 1.5,
1359
- "learning_rate": 4.880925989638598e-06,
1360
- "loss": 0.5838,
1361
- "step": 2190
1362
- },
1363
- {
1364
- "epoch": 1.5,
1365
- "learning_rate": 4.75631199800848e-06,
1366
- "loss": 0.5915,
1367
- "step": 2200
1368
- },
1369
- {
1370
- "epoch": 1.51,
1371
- "learning_rate": 4.633008997336863e-06,
1372
- "loss": 0.4826,
1373
- "step": 2210
1374
- },
1375
- {
1376
- "epoch": 1.52,
1377
- "learning_rate": 4.5110327679871335e-06,
1378
- "loss": 0.562,
1379
- "step": 2220
1380
- },
1381
- {
1382
- "epoch": 1.53,
1383
- "learning_rate": 4.3903989205219805e-06,
1384
- "loss": 0.5399,
1385
- "step": 2230
1386
- },
1387
- {
1388
- "epoch": 1.53,
1389
- "learning_rate": 4.2711228937055865e-06,
1390
- "loss": 0.5079,
1391
- "step": 2240
1392
- },
1393
- {
1394
- "epoch": 1.54,
1395
- "learning_rate": 4.1532199525277285e-06,
1396
- "loss": 0.541,
1397
- "step": 2250
1398
- },
1399
- {
1400
- "epoch": 1.55,
1401
- "learning_rate": 4.0367051862501955e-06,
1402
- "loss": 0.5536,
1403
- "step": 2260
1404
- },
1405
- {
1406
- "epoch": 1.55,
1407
- "learning_rate": 3.921593506475625e-06,
1408
- "loss": 0.5456,
1409
- "step": 2270
1410
- },
1411
- {
1412
- "epoch": 1.56,
1413
- "learning_rate": 3.807899645239149e-06,
1414
- "loss": 0.5251,
1415
- "step": 2280
1416
- },
1417
- {
1418
- "epoch": 1.57,
1419
- "learning_rate": 3.695638153122936e-06,
1420
- "loss": 0.5232,
1421
- "step": 2290
1422
- },
1423
- {
1424
- "epoch": 1.57,
1425
- "learning_rate": 3.5848233973940525e-06,
1426
- "loss": 0.5597,
1427
- "step": 2300
1428
- },
1429
- {
1430
- "epoch": 1.58,
1431
- "learning_rate": 3.4754695601656887e-06,
1432
- "loss": 0.5475,
1433
- "step": 2310
1434
- },
1435
- {
1436
- "epoch": 1.59,
1437
- "learning_rate": 3.367590636582165e-06,
1438
- "loss": 0.557,
1439
- "step": 2320
1440
- },
1441
- {
1442
- "epoch": 1.59,
1443
- "learning_rate": 3.2612004330277988e-06,
1444
- "loss": 0.4989,
1445
- "step": 2330
1446
- },
1447
- {
1448
- "epoch": 1.6,
1449
- "learning_rate": 3.156312565359975e-06,
1450
- "loss": 0.5602,
1451
- "step": 2340
1452
- },
1453
- {
1454
- "epoch": 1.61,
1455
- "learning_rate": 3.0529404571665865e-06,
1456
- "loss": 0.5619,
1457
- "step": 2350
1458
- },
1459
- {
1460
- "epoch": 1.61,
1461
- "learning_rate": 2.9510973380480584e-06,
1462
- "loss": 0.537,
1463
- "step": 2360
1464
- },
1465
- {
1466
- "epoch": 1.62,
1467
- "learning_rate": 2.850796241924237e-06,
1468
- "loss": 0.5252,
1469
- "step": 2370
1470
- },
1471
- {
1472
- "epoch": 1.63,
1473
- "learning_rate": 2.7520500053662927e-06,
1474
- "loss": 0.5549,
1475
- "step": 2380
1476
- },
1477
- {
1478
- "epoch": 1.63,
1479
- "learning_rate": 2.654871265953916e-06,
1480
- "loss": 0.57,
1481
- "step": 2390
1482
- },
1483
- {
1484
- "epoch": 1.64,
1485
- "learning_rate": 2.559272460657915e-06,
1486
- "loss": 0.5821,
1487
- "step": 2400
1488
- },
1489
- {
1490
- "epoch": 1.65,
1491
- "learning_rate": 2.4652658242485547e-06,
1492
- "loss": 0.5291,
1493
- "step": 2410
1494
- },
1495
- {
1496
- "epoch": 1.66,
1497
- "learning_rate": 2.372863387729749e-06,
1498
- "loss": 0.5418,
1499
- "step": 2420
1500
- },
1501
- {
1502
- "epoch": 1.66,
1503
- "learning_rate": 2.282076976799303e-06,
1504
- "loss": 0.6195,
1505
- "step": 2430
1506
- },
1507
- {
1508
- "epoch": 1.67,
1509
- "learning_rate": 2.1929182103354907e-06,
1510
- "loss": 0.4792,
1511
- "step": 2440
1512
- },
1513
- {
1514
- "epoch": 1.68,
1515
- "learning_rate": 2.105398498910033e-06,
1516
- "loss": 0.5733,
1517
- "step": 2450
1518
- },
1519
- {
1520
- "epoch": 1.68,
1521
- "learning_rate": 2.0195290433277987e-06,
1522
- "loss": 0.5846,
1523
- "step": 2460
1524
- },
1525
- {
1526
- "epoch": 1.69,
1527
- "learning_rate": 1.935320833193291e-06,
1528
- "loss": 0.5232,
1529
- "step": 2470
1530
- },
1531
- {
1532
- "epoch": 1.7,
1533
- "learning_rate": 1.852784645504223e-06,
1534
- "loss": 0.537,
1535
- "step": 2480
1536
- },
1537
- {
1538
- "epoch": 1.7,
1539
- "learning_rate": 1.771931043272257e-06,
1540
- "loss": 0.5164,
1541
- "step": 2490
1542
- },
1543
- {
1544
- "epoch": 1.71,
1545
- "learning_rate": 1.6927703741711437e-06,
1546
- "loss": 0.5614,
1547
- "step": 2500
1548
- },
1549
- {
1550
- "epoch": 1.71,
1551
- "eval_loss": 0.5487334728240967,
1552
- "eval_runtime": 112.8609,
1553
- "eval_samples_per_second": 5.759,
1554
- "eval_steps_per_second": 1.444,
1555
- "step": 2500
1556
- },
1557
- {
1558
- "epoch": 1.72,
1559
- "learning_rate": 1.6153127692124298e-06,
1560
- "loss": 0.6014,
1561
- "step": 2510
1562
- },
1563
- {
1564
- "epoch": 1.72,
1565
- "learning_rate": 1.5395681414488938e-06,
1566
- "loss": 0.5534,
1567
- "step": 2520
1568
- },
1569
- {
1570
- "epoch": 1.73,
1571
- "learning_rate": 1.4655461847058633e-06,
1572
- "loss": 0.5652,
1573
- "step": 2530
1574
- },
1575
- {
1576
- "epoch": 1.74,
1577
- "learning_rate": 1.3932563723405833e-06,
1578
- "loss": 0.6083,
1579
- "step": 2540
1580
- },
1581
- {
1582
- "epoch": 1.74,
1583
- "learning_rate": 1.3227079560298305e-06,
1584
- "loss": 0.5197,
1585
- "step": 2550
1586
- },
1587
- {
1588
- "epoch": 1.75,
1589
- "learning_rate": 1.2539099645858776e-06,
1590
- "loss": 0.5637,
1591
- "step": 2560
1592
- },
1593
- {
1594
- "epoch": 1.76,
1595
- "learning_rate": 1.1868712028009782e-06,
1596
- "loss": 0.4674,
1597
- "step": 2570
1598
- },
1599
- {
1600
- "epoch": 1.76,
1601
- "learning_rate": 1.1216002503205213e-06,
1602
- "loss": 0.5429,
1603
- "step": 2580
1604
- },
1605
- {
1606
- "epoch": 1.77,
1607
- "learning_rate": 1.0581054605450153e-06,
1608
- "loss": 0.5204,
1609
- "step": 2590
1610
- },
1611
- {
1612
- "epoch": 1.78,
1613
- "learning_rate": 9.963949595610117e-07,
1614
- "loss": 0.5476,
1615
- "step": 2600
1616
- },
1617
- {
1618
- "epoch": 1.79,
1619
- "learning_rate": 9.364766451011236e-07,
1620
- "loss": 0.5862,
1621
- "step": 2610
1622
- },
1623
- {
1624
- "epoch": 1.79,
1625
- "learning_rate": 8.783581855332817e-07,
1626
- "loss": 0.5291,
1627
- "step": 2620
1628
- },
1629
- {
1630
- "epoch": 1.8,
1631
- "learning_rate": 8.220470188793128e-07,
1632
- "loss": 0.5299,
1633
- "step": 2630
1634
- },
1635
- {
1636
- "epoch": 1.81,
1637
- "learning_rate": 7.675503518630428e-07,
1638
- "loss": 0.5563,
1639
- "step": 2640
1640
- },
1641
- {
1642
- "epoch": 1.81,
1643
- "learning_rate": 7.148751589879599e-07,
1644
- "loss": 0.5179,
1645
- "step": 2650
1646
- },
1647
- {
1648
- "epoch": 1.82,
1649
- "learning_rate": 6.640281816446248e-07,
1650
- "loss": 0.547,
1651
- "step": 2660
1652
- },
1653
- {
1654
- "epoch": 1.83,
1655
- "learning_rate": 6.150159272479044e-07,
1656
- "loss": 0.5231,
1657
- "step": 2670
1658
- },
1659
- {
1660
- "epoch": 1.83,
1661
- "learning_rate": 5.678446684041416e-07,
1662
- "loss": 0.5496,
1663
- "step": 2680
1664
- },
1665
- {
1666
- "epoch": 1.84,
1667
- "learning_rate": 5.225204421083934e-07,
1668
- "loss": 0.5732,
1669
- "step": 2690
1670
- },
1671
- {
1672
- "epoch": 1.85,
1673
- "learning_rate": 4.790490489718147e-07,
1674
- "loss": 0.536,
1675
- "step": 2700
1676
- },
1677
- {
1678
- "epoch": 1.85,
1679
- "learning_rate": 4.3743605247929307e-07,
1680
- "loss": 0.5526,
1681
- "step": 2710
1682
- },
1683
- {
1684
- "epoch": 1.86,
1685
- "learning_rate": 3.9768677827742073e-07,
1686
- "loss": 0.5145,
1687
- "step": 2720
1688
- },
1689
- {
1690
- "epoch": 1.87,
1691
- "learning_rate": 3.598063134929341e-07,
1692
- "loss": 0.5583,
1693
- "step": 2730
1694
- },
1695
- {
1696
- "epoch": 1.87,
1697
- "learning_rate": 3.2379950608164845e-07,
1698
- "loss": 0.5408,
1699
- "step": 2740
1700
- },
1701
- {
1702
- "epoch": 1.88,
1703
- "learning_rate": 2.8967096420802364e-07,
1704
- "loss": 0.5287,
1705
- "step": 2750
1706
- },
1707
- {
1708
- "epoch": 1.89,
1709
- "learning_rate": 2.5742505565539974e-07,
1710
- "loss": 0.5565,
1711
- "step": 2760
1712
- },
1713
- {
1714
- "epoch": 1.89,
1715
- "learning_rate": 2.270659072670156e-07,
1716
- "loss": 0.5068,
1717
- "step": 2770
1718
- },
1719
- {
1720
- "epoch": 1.9,
1721
- "learning_rate": 1.9859740441784958e-07,
1722
- "loss": 0.5354,
1723
- "step": 2780
1724
- },
1725
- {
1726
- "epoch": 1.91,
1727
- "learning_rate": 1.720231905173675e-07,
1728
- "loss": 0.5375,
1729
- "step": 2790
1730
- },
1731
- {
1732
- "epoch": 1.92,
1733
- "learning_rate": 1.4734666654324046e-07,
1734
- "loss": 0.5401,
1735
- "step": 2800
1736
- },
1737
- {
1738
- "epoch": 1.92,
1739
- "learning_rate": 1.2457099060608156e-07,
1740
- "loss": 0.5922,
1741
- "step": 2810
1742
- },
1743
- {
1744
- "epoch": 1.93,
1745
- "learning_rate": 1.036990775452773e-07,
1746
- "loss": 0.5462,
1747
- "step": 2820
1748
- },
1749
- {
1750
- "epoch": 1.94,
1751
- "learning_rate": 8.473359855593609e-08,
1752
- "loss": 0.5514,
1753
- "step": 2830
1754
- },
1755
- {
1756
- "epoch": 1.94,
1757
- "learning_rate": 6.767698084703677e-08,
1758
- "loss": 0.5318,
1759
- "step": 2840
1760
- },
1761
- {
1762
- "epoch": 1.95,
1763
- "learning_rate": 5.253140733078865e-08,
1764
- "loss": 0.5045,
1765
- "step": 2850
1766
- },
1767
- {
1768
- "epoch": 1.96,
1769
- "learning_rate": 3.929881634326005e-08,
1770
- "loss": 0.5767,
1771
- "step": 2860
1772
- },
1773
- {
1774
- "epoch": 1.96,
1775
- "learning_rate": 2.7980901396313374e-08,
1776
- "loss": 0.5566,
1777
- "step": 2870
1778
- },
1779
- {
1780
- "epoch": 1.97,
1781
- "learning_rate": 1.8579110960865175e-08,
1782
- "loss": 0.5318,
1783
- "step": 2880
1784
- },
1785
- {
1786
- "epoch": 1.98,
1787
- "learning_rate": 1.1094648281516073e-08,
1788
- "loss": 0.5267,
1789
- "step": 2890
1790
- },
1791
- {
1792
- "epoch": 1.98,
1793
- "learning_rate": 5.528471222552289e-09,
1794
- "loss": 0.5307,
1795
- "step": 2900
1796
- },
1797
- {
1798
- "epoch": 1.99,
1799
- "learning_rate": 1.8812921453603693e-09,
1800
- "loss": 0.5309,
1801
- "step": 2910
1802
- },
1803
- {
1804
- "epoch": 2.0,
1805
- "learning_rate": 1.5357781726010878e-10,
1806
- "loss": 0.5018,
1807
- "step": 2920
1808
- },
1809
- {
1810
- "epoch": 2.0,
1811
- "step": 2924,
1812
- "total_flos": 2.288755460150395e+17,
1813
- "train_loss": 0.628653114618257,
1814
- "train_runtime": 6305.9173,
1815
- "train_samples_per_second": 1.854,
1816
- "train_steps_per_second": 0.464
1817
  }
1818
  ],
1819
  "logging_steps": 10,
1820
- "max_steps": 2924,
1821
- "num_train_epochs": 2,
1822
- "save_steps": 500,
1823
- "total_flos": 2.288755460150395e+17,
1824
  "trial_name": null,
1825
  "trial_params": null
1826
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.03419972640218878,
5
+ "eval_steps": 25,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "learning_rate": 9.999999999999999e-06,
14
+ "loss": 1.4953,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.01,
19
+ "learning_rate": 2.838778253789822e-05,
20
+ "loss": 1.7137,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
+ "learning_rate": 2.1314021436425026e-05,
26
+ "loss": 1.5219,
27
  "step": 20
28
  },
29
  {
30
  "epoch": 0.02,
31
+ "eval_loss": 1.2538621425628662,
32
+ "eval_runtime": 112.7562,
33
+ "eval_samples_per_second": 5.765,
34
+ "eval_steps_per_second": 1.446,
35
+ "step": 25
36
+ },
37
+ {
38
+ "epoch": 0.02,
39
+ "learning_rate": 1.1522697745987076e-05,
40
+ "loss": 1.4272,
41
  "step": 30
42
  },
43
  {
44
  "epoch": 0.03,
45
+ "learning_rate": 3.2280092208200853e-06,
46
+ "loss": 1.401,
47
  "step": 40
48
  },
49
  {
50
  "epoch": 0.03,
51
+ "learning_rate": 0.0,
52
+ "loss": 1.3156,
53
  "step": 50
54
  },
55
  {
56
+ "epoch": 0.03,
57
+ "eval_loss": 1.1997405290603638,
58
+ "eval_runtime": 113.4502,
59
+ "eval_samples_per_second": 5.729,
60
+ "eval_steps_per_second": 1.437,
61
+ "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  },
63
  {
64
+ "epoch": 0.03,
65
+ "step": 50,
66
+ "total_flos": 3919242130882560.0,
67
+ "train_loss": 1.4715181255340577,
68
+ "train_runtime": 335.1031,
69
+ "train_samples_per_second": 0.597,
70
+ "train_steps_per_second": 0.149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  }
72
  ],
73
  "logging_steps": 10,
74
+ "max_steps": 50,
75
+ "num_train_epochs": 1,
76
+ "save_steps": 25,
77
+ "total_flos": 3919242130882560.0,
78
  "trial_name": null,
79
  "trial_params": null
80
  }