Commit
•
c1a75b8
1
Parent(s):
85b6d28
Model save
Browse files- README.md +9 -2
- all_results.json +7 -7
- runs/May25_13-55-16_br1t43-s3-25/events.out.tfevents.1716645331.br1t43-s3-25.187086.0 +2 -2
- train_results.json +7 -7
- trainer_state.json +839 -12
README.md
CHANGED
@@ -17,6 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
|
|
17 |
# asset-generation-sft-qlora
|
18 |
|
19 |
This model was trained from scratch on the generator dataset.
|
|
|
|
|
20 |
|
21 |
## Model description
|
22 |
|
@@ -40,15 +42,20 @@ The following hyperparameters were used during training:
|
|
40 |
- eval_batch_size: 32
|
41 |
- seed: 42
|
42 |
- distributed_type: multi-GPU
|
|
|
43 |
- gradient_accumulation_steps: 2
|
44 |
-
- total_train_batch_size:
|
|
|
45 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
46 |
- lr_scheduler_type: cosine
|
47 |
- lr_scheduler_warmup_ratio: 0.1
|
48 |
-
- num_epochs:
|
49 |
|
50 |
### Training results
|
51 |
|
|
|
|
|
|
|
52 |
|
53 |
|
54 |
### Framework versions
|
|
|
17 |
# asset-generation-sft-qlora
|
18 |
|
19 |
This model was trained from scratch on the generator dataset.
|
20 |
+
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.7983
|
22 |
|
23 |
## Model description
|
24 |
|
|
|
42 |
- eval_batch_size: 32
|
43 |
- seed: 42
|
44 |
- distributed_type: multi-GPU
|
45 |
+
- num_devices: 2
|
46 |
- gradient_accumulation_steps: 2
|
47 |
+
- total_train_batch_size: 64
|
48 |
+
- total_eval_batch_size: 64
|
49 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
50 |
- lr_scheduler_type: cosine
|
51 |
- lr_scheduler_warmup_ratio: 0.1
|
52 |
+
- num_epochs: 1
|
53 |
|
54 |
### Training results
|
55 |
|
56 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
57 |
+
|:-------------:|:-----:|:----:|:---------------:|
|
58 |
+
| 0.8288 | 1.0 | 5088 | 0.7983 |
|
59 |
|
60 |
|
61 |
### Framework versions
|
all_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"epoch": 0
|
3 |
-
"total_flos": 1.
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples":
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"total_flos": 1.5751056572484157e+19,
|
4 |
+
"train_loss": 0.09284130807192821,
|
5 |
+
"train_runtime": 20560.1048,
|
6 |
+
"train_samples": 1055292,
|
7 |
+
"train_samples_per_second": 15.837,
|
8 |
+
"train_steps_per_second": 0.247
|
9 |
}
|
runs/May25_13-55-16_br1t43-s3-25/events.out.tfevents.1716645331.br1t43-s3-25.187086.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:346fafda02c3a931a24d9615d6f5811e351bbaabbefbf31491855a578e6ea4f9
|
3 |
+
size 30765
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"epoch": 0
|
3 |
-
"total_flos": 1.
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples":
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"total_flos": 1.5751056572484157e+19,
|
4 |
+
"train_loss": 0.09284130807192821,
|
5 |
+
"train_runtime": 20560.1048,
|
6 |
+
"train_samples": 1055292,
|
7 |
+
"train_samples_per_second": 15.837,
|
8 |
+
"train_steps_per_second": 0.247
|
9 |
}
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6316,19 +6316,846 @@
|
|
6316 |
"step": 4500
|
6317 |
},
|
6318 |
{
|
6319 |
-
"epoch": 0.
|
6320 |
-
"
|
6321 |
-
"
|
6322 |
-
"
|
6323 |
-
"
|
6324 |
-
|
6325 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6326 |
}
|
6327 |
],
|
6328 |
"logging_steps": 5,
|
6329 |
-
"max_steps":
|
6330 |
"num_input_tokens_seen": 0,
|
6331 |
-
"num_train_epochs":
|
6332 |
"save_steps": 500,
|
6333 |
"stateful_callbacks": {
|
6334 |
"TrainerControl": {
|
@@ -6342,7 +7169,7 @@
|
|
6342 |
"attributes": {}
|
6343 |
}
|
6344 |
},
|
6345 |
-
"total_flos": 1.
|
6346 |
"train_batch_size": 16,
|
6347 |
"trial_name": null,
|
6348 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 5088,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6316 |
"step": 4500
|
6317 |
},
|
6318 |
{
|
6319 |
+
"epoch": 0.8854166666666666,
|
6320 |
+
"grad_norm": 0.43359375,
|
6321 |
+
"learning_rate": 7.893462771773996e-06,
|
6322 |
+
"loss": 0.782,
|
6323 |
+
"step": 4505
|
6324 |
+
},
|
6325 |
+
{
|
6326 |
+
"epoch": 0.8863993710691824,
|
6327 |
+
"grad_norm": 0.337890625,
|
6328 |
+
"learning_rate": 7.760421092313152e-06,
|
6329 |
+
"loss": 0.7891,
|
6330 |
+
"step": 4510
|
6331 |
+
},
|
6332 |
+
{
|
6333 |
+
"epoch": 0.8873820754716981,
|
6334 |
+
"grad_norm": 0.326171875,
|
6335 |
+
"learning_rate": 7.628464876673202e-06,
|
6336 |
+
"loss": 0.8201,
|
6337 |
+
"step": 4515
|
6338 |
+
},
|
6339 |
+
{
|
6340 |
+
"epoch": 0.8883647798742138,
|
6341 |
+
"grad_norm": 0.3203125,
|
6342 |
+
"learning_rate": 7.497595677698388e-06,
|
6343 |
+
"loss": 0.8031,
|
6344 |
+
"step": 4520
|
6345 |
+
},
|
6346 |
+
{
|
6347 |
+
"epoch": 0.8893474842767296,
|
6348 |
+
"grad_norm": 0.32421875,
|
6349 |
+
"learning_rate": 7.3678150354410615e-06,
|
6350 |
+
"loss": 0.8013,
|
6351 |
+
"step": 4525
|
6352 |
+
},
|
6353 |
+
{
|
6354 |
+
"epoch": 0.8903301886792453,
|
6355 |
+
"grad_norm": 0.326171875,
|
6356 |
+
"learning_rate": 7.239124477143578e-06,
|
6357 |
+
"loss": 0.8075,
|
6358 |
+
"step": 4530
|
6359 |
+
},
|
6360 |
+
{
|
6361 |
+
"epoch": 0.891312893081761,
|
6362 |
+
"grad_norm": 0.310546875,
|
6363 |
+
"learning_rate": 7.111525517220308e-06,
|
6364 |
+
"loss": 0.7919,
|
6365 |
+
"step": 4535
|
6366 |
+
},
|
6367 |
+
{
|
6368 |
+
"epoch": 0.8922955974842768,
|
6369 |
+
"grad_norm": 0.298828125,
|
6370 |
+
"learning_rate": 6.985019657239867e-06,
|
6371 |
+
"loss": 0.8074,
|
6372 |
+
"step": 4540
|
6373 |
+
},
|
6374 |
+
{
|
6375 |
+
"epoch": 0.8932783018867925,
|
6376 |
+
"grad_norm": 0.314453125,
|
6377 |
+
"learning_rate": 6.859608385907379e-06,
|
6378 |
+
"loss": 0.8009,
|
6379 |
+
"step": 4545
|
6380 |
+
},
|
6381 |
+
{
|
6382 |
+
"epoch": 0.8942610062893082,
|
6383 |
+
"grad_norm": 0.310546875,
|
6384 |
+
"learning_rate": 6.735293179046975e-06,
|
6385 |
+
"loss": 0.8081,
|
6386 |
+
"step": 4550
|
6387 |
+
},
|
6388 |
+
{
|
6389 |
+
"epoch": 0.8952437106918238,
|
6390 |
+
"grad_norm": 0.3046875,
|
6391 |
+
"learning_rate": 6.612075499584458e-06,
|
6392 |
+
"loss": 0.8067,
|
6393 |
+
"step": 4555
|
6394 |
+
},
|
6395 |
+
{
|
6396 |
+
"epoch": 0.8962264150943396,
|
6397 |
+
"grad_norm": 0.3125,
|
6398 |
+
"learning_rate": 6.489956797530084e-06,
|
6399 |
+
"loss": 0.811,
|
6400 |
+
"step": 4560
|
6401 |
+
},
|
6402 |
+
{
|
6403 |
+
"epoch": 0.8972091194968553,
|
6404 |
+
"grad_norm": 0.30078125,
|
6405 |
+
"learning_rate": 6.368938509961398e-06,
|
6406 |
+
"loss": 0.7966,
|
6407 |
+
"step": 4565
|
6408 |
+
},
|
6409 |
+
{
|
6410 |
+
"epoch": 0.898191823899371,
|
6411 |
+
"grad_norm": 0.328125,
|
6412 |
+
"learning_rate": 6.2490220610065155e-06,
|
6413 |
+
"loss": 0.8123,
|
6414 |
+
"step": 4570
|
6415 |
+
},
|
6416 |
+
{
|
6417 |
+
"epoch": 0.8991745283018868,
|
6418 |
+
"grad_norm": 0.30859375,
|
6419 |
+
"learning_rate": 6.130208861827202e-06,
|
6420 |
+
"loss": 0.8045,
|
6421 |
+
"step": 4575
|
6422 |
+
},
|
6423 |
+
{
|
6424 |
+
"epoch": 0.9001572327044025,
|
6425 |
+
"grad_norm": 0.302734375,
|
6426 |
+
"learning_rate": 6.012500310602254e-06,
|
6427 |
+
"loss": 0.7923,
|
6428 |
+
"step": 4580
|
6429 |
+
},
|
6430 |
+
{
|
6431 |
+
"epoch": 0.9011399371069182,
|
6432 |
+
"grad_norm": 0.30859375,
|
6433 |
+
"learning_rate": 5.8958977925112405e-06,
|
6434 |
+
"loss": 0.7986,
|
6435 |
+
"step": 4585
|
6436 |
+
},
|
6437 |
+
{
|
6438 |
+
"epoch": 0.902122641509434,
|
6439 |
+
"grad_norm": 0.322265625,
|
6440 |
+
"learning_rate": 5.780402679717989e-06,
|
6441 |
+
"loss": 0.8166,
|
6442 |
+
"step": 4590
|
6443 |
+
},
|
6444 |
+
{
|
6445 |
+
"epoch": 0.9031053459119497,
|
6446 |
+
"grad_norm": 0.298828125,
|
6447 |
+
"learning_rate": 5.666016331354485e-06,
|
6448 |
+
"loss": 0.7845,
|
6449 |
+
"step": 4595
|
6450 |
+
},
|
6451 |
+
{
|
6452 |
+
"epoch": 0.9040880503144654,
|
6453 |
+
"grad_norm": 0.330078125,
|
6454 |
+
"learning_rate": 5.552740093505015e-06,
|
6455 |
+
"loss": 0.7865,
|
6456 |
+
"step": 4600
|
6457 |
+
},
|
6458 |
+
{
|
6459 |
+
"epoch": 0.9050707547169812,
|
6460 |
+
"grad_norm": 0.30859375,
|
6461 |
+
"learning_rate": 5.440575299190165e-06,
|
6462 |
+
"loss": 0.8243,
|
6463 |
+
"step": 4605
|
6464 |
+
},
|
6465 |
+
{
|
6466 |
+
"epoch": 0.9060534591194969,
|
6467 |
+
"grad_norm": 0.31640625,
|
6468 |
+
"learning_rate": 5.329523268351155e-06,
|
6469 |
+
"loss": 0.8041,
|
6470 |
+
"step": 4610
|
6471 |
+
},
|
6472 |
+
{
|
6473 |
+
"epoch": 0.9070361635220126,
|
6474 |
+
"grad_norm": 0.310546875,
|
6475 |
+
"learning_rate": 5.219585307834407e-06,
|
6476 |
+
"loss": 0.8057,
|
6477 |
+
"step": 4615
|
6478 |
+
},
|
6479 |
+
{
|
6480 |
+
"epoch": 0.9080188679245284,
|
6481 |
+
"grad_norm": 0.294921875,
|
6482 |
+
"learning_rate": 5.110762711376116e-06,
|
6483 |
+
"loss": 0.7987,
|
6484 |
+
"step": 4620
|
6485 |
+
},
|
6486 |
+
{
|
6487 |
+
"epoch": 0.909001572327044,
|
6488 |
+
"grad_norm": 0.326171875,
|
6489 |
+
"learning_rate": 5.003056759586944e-06,
|
6490 |
+
"loss": 0.7983,
|
6491 |
+
"step": 4625
|
6492 |
+
},
|
6493 |
+
{
|
6494 |
+
"epoch": 0.9099842767295597,
|
6495 |
+
"grad_norm": 0.3203125,
|
6496 |
+
"learning_rate": 4.89646871993703e-06,
|
6497 |
+
"loss": 0.7872,
|
6498 |
+
"step": 4630
|
6499 |
+
},
|
6500 |
+
{
|
6501 |
+
"epoch": 0.9109669811320755,
|
6502 |
+
"grad_norm": 0.31640625,
|
6503 |
+
"learning_rate": 4.79099984674114e-06,
|
6504 |
+
"loss": 0.8203,
|
6505 |
+
"step": 4635
|
6506 |
+
},
|
6507 |
+
{
|
6508 |
+
"epoch": 0.9119496855345912,
|
6509 |
+
"grad_norm": 0.318359375,
|
6510 |
+
"learning_rate": 4.6866513811437475e-06,
|
6511 |
+
"loss": 0.7816,
|
6512 |
+
"step": 4640
|
6513 |
+
},
|
6514 |
+
{
|
6515 |
+
"epoch": 0.9129323899371069,
|
6516 |
+
"grad_norm": 0.30078125,
|
6517 |
+
"learning_rate": 4.58342455110452e-06,
|
6518 |
+
"loss": 0.8151,
|
6519 |
+
"step": 4645
|
6520 |
+
},
|
6521 |
+
{
|
6522 |
+
"epoch": 0.9139150943396226,
|
6523 |
+
"grad_norm": 0.29296875,
|
6524 |
+
"learning_rate": 4.481320571383907e-06,
|
6525 |
+
"loss": 0.8052,
|
6526 |
+
"step": 4650
|
6527 |
+
},
|
6528 |
+
{
|
6529 |
+
"epoch": 0.9148977987421384,
|
6530 |
+
"grad_norm": 0.31640625,
|
6531 |
+
"learning_rate": 4.380340643528735e-06,
|
6532 |
+
"loss": 0.8069,
|
6533 |
+
"step": 4655
|
6534 |
+
},
|
6535 |
+
{
|
6536 |
+
"epoch": 0.9158805031446541,
|
6537 |
+
"grad_norm": 0.328125,
|
6538 |
+
"learning_rate": 4.280485955858171e-06,
|
6539 |
+
"loss": 0.7986,
|
6540 |
+
"step": 4660
|
6541 |
+
},
|
6542 |
+
{
|
6543 |
+
"epoch": 0.9168632075471698,
|
6544 |
+
"grad_norm": 0.310546875,
|
6545 |
+
"learning_rate": 4.181757683449694e-06,
|
6546 |
+
"loss": 0.8219,
|
6547 |
+
"step": 4665
|
6548 |
+
},
|
6549 |
+
{
|
6550 |
+
"epoch": 0.9178459119496856,
|
6551 |
+
"grad_norm": 0.30859375,
|
6552 |
+
"learning_rate": 4.084156988125231e-06,
|
6553 |
+
"loss": 0.8162,
|
6554 |
+
"step": 4670
|
6555 |
+
},
|
6556 |
+
{
|
6557 |
+
"epoch": 0.9188286163522013,
|
6558 |
+
"grad_norm": 0.314453125,
|
6559 |
+
"learning_rate": 3.987685018437581e-06,
|
6560 |
+
"loss": 0.7972,
|
6561 |
+
"step": 4675
|
6562 |
+
},
|
6563 |
+
{
|
6564 |
+
"epoch": 0.9198113207547169,
|
6565 |
+
"grad_norm": 0.30859375,
|
6566 |
+
"learning_rate": 3.892342909656776e-06,
|
6567 |
+
"loss": 0.8163,
|
6568 |
+
"step": 4680
|
6569 |
+
},
|
6570 |
+
{
|
6571 |
+
"epoch": 0.9207940251572327,
|
6572 |
+
"grad_norm": 0.310546875,
|
6573 |
+
"learning_rate": 3.798131783756853e-06,
|
6574 |
+
"loss": 0.8151,
|
6575 |
+
"step": 4685
|
6576 |
+
},
|
6577 |
+
{
|
6578 |
+
"epoch": 0.9217767295597484,
|
6579 |
+
"grad_norm": 0.310546875,
|
6580 |
+
"learning_rate": 3.7050527494025265e-06,
|
6581 |
+
"loss": 0.8023,
|
6582 |
+
"step": 4690
|
6583 |
+
},
|
6584 |
+
{
|
6585 |
+
"epoch": 0.9227594339622641,
|
6586 |
+
"grad_norm": 0.322265625,
|
6587 |
+
"learning_rate": 3.6131069019362362e-06,
|
6588 |
+
"loss": 0.8229,
|
6589 |
+
"step": 4695
|
6590 |
+
},
|
6591 |
+
{
|
6592 |
+
"epoch": 0.9237421383647799,
|
6593 |
+
"grad_norm": 0.302734375,
|
6594 |
+
"learning_rate": 3.52229532336521e-06,
|
6595 |
+
"loss": 0.7951,
|
6596 |
+
"step": 4700
|
6597 |
+
},
|
6598 |
+
{
|
6599 |
+
"epoch": 0.9247248427672956,
|
6600 |
+
"grad_norm": 0.314453125,
|
6601 |
+
"learning_rate": 3.4326190823487315e-06,
|
6602 |
+
"loss": 0.8034,
|
6603 |
+
"step": 4705
|
6604 |
+
},
|
6605 |
+
{
|
6606 |
+
"epoch": 0.9257075471698113,
|
6607 |
+
"grad_norm": 0.30859375,
|
6608 |
+
"learning_rate": 3.344079234185604e-06,
|
6609 |
+
"loss": 0.807,
|
6610 |
+
"step": 4710
|
6611 |
+
},
|
6612 |
+
{
|
6613 |
+
"epoch": 0.9266902515723271,
|
6614 |
+
"grad_norm": 0.306640625,
|
6615 |
+
"learning_rate": 3.2566768208016297e-06,
|
6616 |
+
"loss": 0.8122,
|
6617 |
+
"step": 4715
|
6618 |
+
},
|
6619 |
+
{
|
6620 |
+
"epoch": 0.9276729559748428,
|
6621 |
+
"grad_norm": 0.30859375,
|
6622 |
+
"learning_rate": 3.170412870737516e-06,
|
6623 |
+
"loss": 0.8023,
|
6624 |
+
"step": 4720
|
6625 |
+
},
|
6626 |
+
{
|
6627 |
+
"epoch": 0.9286556603773585,
|
6628 |
+
"grad_norm": 0.3046875,
|
6629 |
+
"learning_rate": 3.0852883991366322e-06,
|
6630 |
+
"loss": 0.7757,
|
6631 |
+
"step": 4725
|
6632 |
+
},
|
6633 |
+
{
|
6634 |
+
"epoch": 0.9296383647798742,
|
6635 |
+
"grad_norm": 0.306640625,
|
6636 |
+
"learning_rate": 3.0013044077330744e-06,
|
6637 |
+
"loss": 0.7709,
|
6638 |
+
"step": 4730
|
6639 |
+
},
|
6640 |
+
{
|
6641 |
+
"epoch": 0.93062106918239,
|
6642 |
+
"grad_norm": 0.322265625,
|
6643 |
+
"learning_rate": 2.9184618848399627e-06,
|
6644 |
+
"loss": 0.8331,
|
6645 |
+
"step": 4735
|
6646 |
+
},
|
6647 |
+
{
|
6648 |
+
"epoch": 0.9316037735849056,
|
6649 |
+
"grad_norm": 0.3125,
|
6650 |
+
"learning_rate": 2.836761805337762e-06,
|
6651 |
+
"loss": 0.7819,
|
6652 |
+
"step": 4740
|
6653 |
+
},
|
6654 |
+
{
|
6655 |
+
"epoch": 0.9325864779874213,
|
6656 |
+
"grad_norm": 0.33984375,
|
6657 |
+
"learning_rate": 2.756205130662737e-06,
|
6658 |
+
"loss": 0.7949,
|
6659 |
+
"step": 4745
|
6660 |
+
},
|
6661 |
+
{
|
6662 |
+
"epoch": 0.9335691823899371,
|
6663 |
+
"grad_norm": 0.31640625,
|
6664 |
+
"learning_rate": 2.6767928087957693e-06,
|
6665 |
+
"loss": 0.8147,
|
6666 |
+
"step": 4750
|
6667 |
+
},
|
6668 |
+
{
|
6669 |
+
"epoch": 0.9345518867924528,
|
6670 |
+
"grad_norm": 0.30078125,
|
6671 |
+
"learning_rate": 2.598525774251159e-06,
|
6672 |
+
"loss": 0.7786,
|
6673 |
+
"step": 4755
|
6674 |
+
},
|
6675 |
+
{
|
6676 |
+
"epoch": 0.9355345911949685,
|
6677 |
+
"grad_norm": 0.302734375,
|
6678 |
+
"learning_rate": 2.52140494806552e-06,
|
6679 |
+
"loss": 0.7954,
|
6680 |
+
"step": 4760
|
6681 |
+
},
|
6682 |
+
{
|
6683 |
+
"epoch": 0.9365172955974843,
|
6684 |
+
"grad_norm": 0.30859375,
|
6685 |
+
"learning_rate": 2.44543123778711e-06,
|
6686 |
+
"loss": 0.7851,
|
6687 |
+
"step": 4765
|
6688 |
+
},
|
6689 |
+
{
|
6690 |
+
"epoch": 0.9375,
|
6691 |
+
"grad_norm": 0.3046875,
|
6692 |
+
"learning_rate": 2.370605537465065e-06,
|
6693 |
+
"loss": 0.81,
|
6694 |
+
"step": 4770
|
6695 |
+
},
|
6696 |
+
{
|
6697 |
+
"epoch": 0.9384827044025157,
|
6698 |
+
"grad_norm": 0.302734375,
|
6699 |
+
"learning_rate": 2.296928727638814e-06,
|
6700 |
+
"loss": 0.8305,
|
6701 |
+
"step": 4775
|
6702 |
+
},
|
6703 |
+
{
|
6704 |
+
"epoch": 0.9394654088050315,
|
6705 |
+
"grad_norm": 0.3046875,
|
6706 |
+
"learning_rate": 2.2244016753278586e-06,
|
6707 |
+
"loss": 0.7896,
|
6708 |
+
"step": 4780
|
6709 |
+
},
|
6710 |
+
{
|
6711 |
+
"epoch": 0.9404481132075472,
|
6712 |
+
"grad_norm": 0.3046875,
|
6713 |
+
"learning_rate": 2.1530252340214996e-06,
|
6714 |
+
"loss": 0.8101,
|
6715 |
+
"step": 4785
|
6716 |
+
},
|
6717 |
+
{
|
6718 |
+
"epoch": 0.9414308176100629,
|
6719 |
+
"grad_norm": 0.31640625,
|
6720 |
+
"learning_rate": 2.0828002436687257e-06,
|
6721 |
+
"loss": 0.805,
|
6722 |
+
"step": 4790
|
6723 |
+
},
|
6724 |
+
{
|
6725 |
+
"epoch": 0.9424135220125787,
|
6726 |
+
"grad_norm": 0.310546875,
|
6727 |
+
"learning_rate": 2.013727530668452e-06,
|
6728 |
+
"loss": 0.804,
|
6729 |
+
"step": 4795
|
6730 |
+
},
|
6731 |
+
{
|
6732 |
+
"epoch": 0.9433962264150944,
|
6733 |
+
"grad_norm": 0.314453125,
|
6734 |
+
"learning_rate": 1.9458079078597203e-06,
|
6735 |
+
"loss": 0.825,
|
6736 |
+
"step": 4800
|
6737 |
+
},
|
6738 |
+
{
|
6739 |
+
"epoch": 0.94437893081761,
|
6740 |
+
"grad_norm": 0.3046875,
|
6741 |
+
"learning_rate": 1.8790421745121356e-06,
|
6742 |
+
"loss": 0.821,
|
6743 |
+
"step": 4805
|
6744 |
+
},
|
6745 |
+
{
|
6746 |
+
"epoch": 0.9453616352201258,
|
6747 |
+
"grad_norm": 0.310546875,
|
6748 |
+
"learning_rate": 1.813431116316522e-06,
|
6749 |
+
"loss": 0.8101,
|
6750 |
+
"step": 4810
|
6751 |
+
},
|
6752 |
+
{
|
6753 |
+
"epoch": 0.9463443396226415,
|
6754 |
+
"grad_norm": 0.30859375,
|
6755 |
+
"learning_rate": 1.748975505375583e-06,
|
6756 |
+
"loss": 0.8016,
|
6757 |
+
"step": 4815
|
6758 |
+
},
|
6759 |
+
{
|
6760 |
+
"epoch": 0.9473270440251572,
|
6761 |
+
"grad_norm": 0.296875,
|
6762 |
+
"learning_rate": 1.6856761001948772e-06,
|
6763 |
+
"loss": 0.7847,
|
6764 |
+
"step": 4820
|
6765 |
+
},
|
6766 |
+
{
|
6767 |
+
"epoch": 0.9483097484276729,
|
6768 |
+
"grad_norm": 0.3203125,
|
6769 |
+
"learning_rate": 1.6235336456739026e-06,
|
6770 |
+
"loss": 0.8007,
|
6771 |
+
"step": 4825
|
6772 |
+
},
|
6773 |
+
{
|
6774 |
+
"epoch": 0.9492924528301887,
|
6775 |
+
"grad_norm": 0.310546875,
|
6776 |
+
"learning_rate": 1.5625488730972693e-06,
|
6777 |
+
"loss": 0.7891,
|
6778 |
+
"step": 4830
|
6779 |
+
},
|
6780 |
+
{
|
6781 |
+
"epoch": 0.9502751572327044,
|
6782 |
+
"grad_norm": 0.30859375,
|
6783 |
+
"learning_rate": 1.5027225001261525e-06,
|
6784 |
+
"loss": 0.8244,
|
6785 |
+
"step": 4835
|
6786 |
+
},
|
6787 |
+
{
|
6788 |
+
"epoch": 0.9512578616352201,
|
6789 |
+
"grad_norm": 0.298828125,
|
6790 |
+
"learning_rate": 1.4440552307898202e-06,
|
6791 |
+
"loss": 0.7962,
|
6792 |
+
"step": 4840
|
6793 |
+
},
|
6794 |
+
{
|
6795 |
+
"epoch": 0.9522405660377359,
|
6796 |
+
"grad_norm": 0.306640625,
|
6797 |
+
"learning_rate": 1.386547755477363e-06,
|
6798 |
+
"loss": 0.7982,
|
6799 |
+
"step": 4845
|
6800 |
+
},
|
6801 |
+
{
|
6802 |
+
"epoch": 0.9532232704402516,
|
6803 |
+
"grad_norm": 0.318359375,
|
6804 |
+
"learning_rate": 1.3302007509295445e-06,
|
6805 |
+
"loss": 0.7896,
|
6806 |
+
"step": 4850
|
6807 |
+
},
|
6808 |
+
{
|
6809 |
+
"epoch": 0.9542059748427673,
|
6810 |
+
"grad_norm": 0.310546875,
|
6811 |
+
"learning_rate": 1.2750148802308737e-06,
|
6812 |
+
"loss": 0.8158,
|
6813 |
+
"step": 4855
|
6814 |
+
},
|
6815 |
+
{
|
6816 |
+
"epoch": 0.9551886792452831,
|
6817 |
+
"grad_norm": 0.3125,
|
6818 |
+
"learning_rate": 1.2209907928017795e-06,
|
6819 |
+
"loss": 0.8012,
|
6820 |
+
"step": 4860
|
6821 |
+
},
|
6822 |
+
{
|
6823 |
+
"epoch": 0.9561713836477987,
|
6824 |
+
"grad_norm": 0.310546875,
|
6825 |
+
"learning_rate": 1.1681291243909153e-06,
|
6826 |
+
"loss": 0.8146,
|
6827 |
+
"step": 4865
|
6828 |
+
},
|
6829 |
+
{
|
6830 |
+
"epoch": 0.9571540880503144,
|
6831 |
+
"grad_norm": 0.330078125,
|
6832 |
+
"learning_rate": 1.116430497067833e-06,
|
6833 |
+
"loss": 0.8175,
|
6834 |
+
"step": 4870
|
6835 |
+
},
|
6836 |
+
{
|
6837 |
+
"epoch": 0.9581367924528302,
|
6838 |
+
"grad_norm": 0.3125,
|
6839 |
+
"learning_rate": 1.0658955192154763e-06,
|
6840 |
+
"loss": 0.7937,
|
6841 |
+
"step": 4875
|
6842 |
+
},
|
6843 |
+
{
|
6844 |
+
"epoch": 0.9591194968553459,
|
6845 |
+
"grad_norm": 0.3125,
|
6846 |
+
"learning_rate": 1.0165247855231542e-06,
|
6847 |
+
"loss": 0.8,
|
6848 |
+
"step": 4880
|
6849 |
+
},
|
6850 |
+
{
|
6851 |
+
"epoch": 0.9601022012578616,
|
6852 |
+
"grad_norm": 0.314453125,
|
6853 |
+
"learning_rate": 9.683188769794792e-07,
|
6854 |
+
"loss": 0.8042,
|
6855 |
+
"step": 4885
|
6856 |
+
},
|
6857 |
+
{
|
6858 |
+
"epoch": 0.9610849056603774,
|
6859 |
+
"grad_norm": 0.298828125,
|
6860 |
+
"learning_rate": 9.212783608655518e-07,
|
6861 |
+
"loss": 0.8078,
|
6862 |
+
"step": 4890
|
6863 |
+
},
|
6864 |
+
{
|
6865 |
+
"epoch": 0.9620676100628931,
|
6866 |
+
"grad_norm": 0.31640625,
|
6867 |
+
"learning_rate": 8.754037907482748e-07,
|
6868 |
+
"loss": 0.7992,
|
6869 |
+
"step": 4895
|
6870 |
+
},
|
6871 |
+
{
|
6872 |
+
"epoch": 0.9630503144654088,
|
6873 |
+
"grad_norm": 0.306640625,
|
6874 |
+
"learning_rate": 8.306957064738385e-07,
|
6875 |
+
"loss": 0.806,
|
6876 |
+
"step": 4900
|
6877 |
+
},
|
6878 |
+
{
|
6879 |
+
"epoch": 0.9640330188679245,
|
6880 |
+
"grad_norm": 0.31640625,
|
6881 |
+
"learning_rate": 7.871546341614023e-07,
|
6882 |
+
"loss": 0.7803,
|
6883 |
+
"step": 4905
|
6884 |
+
},
|
6885 |
+
{
|
6886 |
+
"epoch": 0.9650157232704403,
|
6887 |
+
"grad_norm": 0.3046875,
|
6888 |
+
"learning_rate": 7.447810861968552e-07,
|
6889 |
+
"loss": 0.7864,
|
6890 |
+
"step": 4910
|
6891 |
+
},
|
6892 |
+
{
|
6893 |
+
"epoch": 0.965998427672956,
|
6894 |
+
"grad_norm": 0.30859375,
|
6895 |
+
"learning_rate": 7.03575561226788e-07,
|
6896 |
+
"loss": 0.7837,
|
6897 |
+
"step": 4915
|
6898 |
+
},
|
6899 |
+
{
|
6900 |
+
"epoch": 0.9669811320754716,
|
6901 |
+
"grad_norm": 0.302734375,
|
6902 |
+
"learning_rate": 6.635385441526754e-07,
|
6903 |
+
"loss": 0.7935,
|
6904 |
+
"step": 4920
|
6905 |
+
},
|
6906 |
+
{
|
6907 |
+
"epoch": 0.9679638364779874,
|
6908 |
+
"grad_norm": 0.314453125,
|
6909 |
+
"learning_rate": 6.246705061251245e-07,
|
6910 |
+
"loss": 0.8074,
|
6911 |
+
"step": 4925
|
6912 |
+
},
|
6913 |
+
{
|
6914 |
+
"epoch": 0.9689465408805031,
|
6915 |
+
"grad_norm": 0.298828125,
|
6916 |
+
"learning_rate": 5.86971904538347e-07,
|
6917 |
+
"loss": 0.8082,
|
6918 |
+
"step": 4930
|
6919 |
+
},
|
6920 |
+
{
|
6921 |
+
"epoch": 0.9699292452830188,
|
6922 |
+
"grad_norm": 0.3125,
|
6923 |
+
"learning_rate": 5.504431830247514e-07,
|
6924 |
+
"loss": 0.7889,
|
6925 |
+
"step": 4935
|
6926 |
+
},
|
6927 |
+
{
|
6928 |
+
"epoch": 0.9709119496855346,
|
6929 |
+
"grad_norm": 0.306640625,
|
6930 |
+
"learning_rate": 5.150847714497697e-07,
|
6931 |
+
"loss": 0.7924,
|
6932 |
+
"step": 4940
|
6933 |
+
},
|
6934 |
+
{
|
6935 |
+
"epoch": 0.9718946540880503,
|
6936 |
+
"grad_norm": 0.296875,
|
6937 |
+
"learning_rate": 4.80897085906773e-07,
|
6938 |
+
"loss": 0.81,
|
6939 |
+
"step": 4945
|
6940 |
+
},
|
6941 |
+
{
|
6942 |
+
"epoch": 0.972877358490566,
|
6943 |
+
"grad_norm": 0.294921875,
|
6944 |
+
"learning_rate": 4.4788052871215234e-07,
|
6945 |
+
"loss": 0.805,
|
6946 |
+
"step": 4950
|
6947 |
+
},
|
6948 |
+
{
|
6949 |
+
"epoch": 0.9738600628930818,
|
6950 |
+
"grad_norm": 0.30078125,
|
6951 |
+
"learning_rate": 4.1603548840062345e-07,
|
6952 |
+
"loss": 0.8101,
|
6953 |
+
"step": 4955
|
6954 |
+
},
|
6955 |
+
{
|
6956 |
+
"epoch": 0.9748427672955975,
|
6957 |
+
"grad_norm": 0.3046875,
|
6958 |
+
"learning_rate": 3.853623397206407e-07,
|
6959 |
+
"loss": 0.7909,
|
6960 |
+
"step": 4960
|
6961 |
+
},
|
6962 |
+
{
|
6963 |
+
"epoch": 0.9758254716981132,
|
6964 |
+
"grad_norm": 0.302734375,
|
6965 |
+
"learning_rate": 3.5586144362997896e-07,
|
6966 |
+
"loss": 0.7972,
|
6967 |
+
"step": 4965
|
6968 |
+
},
|
6969 |
+
{
|
6970 |
+
"epoch": 0.976808176100629,
|
6971 |
+
"grad_norm": 0.314453125,
|
6972 |
+
"learning_rate": 3.275331472914922e-07,
|
6973 |
+
"loss": 0.8101,
|
6974 |
+
"step": 4970
|
6975 |
+
},
|
6976 |
+
{
|
6977 |
+
"epoch": 0.9777908805031447,
|
6978 |
+
"grad_norm": 0.3125,
|
6979 |
+
"learning_rate": 3.0037778406902805e-07,
|
6980 |
+
"loss": 0.8184,
|
6981 |
+
"step": 4975
|
6982 |
+
},
|
6983 |
+
{
|
6984 |
+
"epoch": 0.9787735849056604,
|
6985 |
+
"grad_norm": 0.3125,
|
6986 |
+
"learning_rate": 2.743956735234865e-07,
|
6987 |
+
"loss": 0.782,
|
6988 |
+
"step": 4980
|
6989 |
+
},
|
6990 |
+
{
|
6991 |
+
"epoch": 0.9797562893081762,
|
6992 |
+
"grad_norm": 0.322265625,
|
6993 |
+
"learning_rate": 2.4958712140911166e-07,
|
6994 |
+
"loss": 0.7905,
|
6995 |
+
"step": 4985
|
6996 |
+
},
|
6997 |
+
{
|
6998 |
+
"epoch": 0.9807389937106918,
|
6999 |
+
"grad_norm": 0.310546875,
|
7000 |
+
"learning_rate": 2.2595241966982817e-07,
|
7001 |
+
"loss": 0.8163,
|
7002 |
+
"step": 4990
|
7003 |
+
},
|
7004 |
+
{
|
7005 |
+
"epoch": 0.9817216981132075,
|
7006 |
+
"grad_norm": 0.3125,
|
7007 |
+
"learning_rate": 2.0349184643586595e-07,
|
7008 |
+
"loss": 0.8266,
|
7009 |
+
"step": 4995
|
7010 |
+
},
|
7011 |
+
{
|
7012 |
+
"epoch": 0.9827044025157232,
|
7013 |
+
"grad_norm": 0.30859375,
|
7014 |
+
"learning_rate": 1.8220566602040745e-07,
|
7015 |
+
"loss": 0.8174,
|
7016 |
+
"step": 5000
|
7017 |
+
},
|
7018 |
+
{
|
7019 |
+
"epoch": 0.983687106918239,
|
7020 |
+
"grad_norm": 0.302734375,
|
7021 |
+
"learning_rate": 1.6209412891659003e-07,
|
7022 |
+
"loss": 0.8052,
|
7023 |
+
"step": 5005
|
7024 |
+
},
|
7025 |
+
{
|
7026 |
+
"epoch": 0.9846698113207547,
|
7027 |
+
"grad_norm": 0.302734375,
|
7028 |
+
"learning_rate": 1.4315747179446392e-07,
|
7029 |
+
"loss": 0.7871,
|
7030 |
+
"step": 5010
|
7031 |
+
},
|
7032 |
+
{
|
7033 |
+
"epoch": 0.9856525157232704,
|
7034 |
+
"grad_norm": 0.31640625,
|
7035 |
+
"learning_rate": 1.2539591749821666e-07,
|
7036 |
+
"loss": 0.7973,
|
7037 |
+
"step": 5015
|
7038 |
+
},
|
7039 |
+
{
|
7040 |
+
"epoch": 0.9866352201257862,
|
7041 |
+
"grad_norm": 0.33203125,
|
7042 |
+
"learning_rate": 1.088096750436085e-07,
|
7043 |
+
"loss": 0.7972,
|
7044 |
+
"step": 5020
|
7045 |
+
},
|
7046 |
+
{
|
7047 |
+
"epoch": 0.9876179245283019,
|
7048 |
+
"grad_norm": 0.31640625,
|
7049 |
+
"learning_rate": 9.339893961548551e-08,
|
7050 |
+
"loss": 0.8152,
|
7051 |
+
"step": 5025
|
7052 |
+
},
|
7053 |
+
{
|
7054 |
+
"epoch": 0.9886006289308176,
|
7055 |
+
"grad_norm": 0.310546875,
|
7056 |
+
"learning_rate": 7.916389256541479e-08,
|
7057 |
+
"loss": 0.8147,
|
7058 |
+
"step": 5030
|
7059 |
+
},
|
7060 |
+
{
|
7061 |
+
"epoch": 0.9895833333333334,
|
7062 |
+
"grad_norm": 0.302734375,
|
7063 |
+
"learning_rate": 6.610470140967495e-08,
|
7064 |
+
"loss": 0.81,
|
7065 |
+
"step": 5035
|
7066 |
+
},
|
7067 |
+
{
|
7068 |
+
"epoch": 0.9905660377358491,
|
7069 |
+
"grad_norm": 0.310546875,
|
7070 |
+
"learning_rate": 5.422151982719115e-08,
|
7071 |
+
"loss": 0.8167,
|
7072 |
+
"step": 5040
|
7073 |
+
},
|
7074 |
+
{
|
7075 |
+
"epoch": 0.9915487421383647,
|
7076 |
+
"grad_norm": 0.330078125,
|
7077 |
+
"learning_rate": 4.351448765775867e-08,
|
7078 |
+
"loss": 0.8175,
|
7079 |
+
"step": 5045
|
7080 |
+
},
|
7081 |
+
{
|
7082 |
+
"epoch": 0.9925314465408805,
|
7083 |
+
"grad_norm": 0.310546875,
|
7084 |
+
"learning_rate": 3.3983730900377655e-08,
|
7085 |
+
"loss": 0.8009,
|
7086 |
+
"step": 5050
|
7087 |
+
},
|
7088 |
+
{
|
7089 |
+
"epoch": 0.9935141509433962,
|
7090 |
+
"grad_norm": 0.30078125,
|
7091 |
+
"learning_rate": 2.5629361711809742e-08,
|
7092 |
+
"loss": 0.8025,
|
7093 |
+
"step": 5055
|
7094 |
+
},
|
7095 |
+
{
|
7096 |
+
"epoch": 0.9944968553459119,
|
7097 |
+
"grad_norm": 0.30078125,
|
7098 |
+
"learning_rate": 1.8451478405223653e-08,
|
7099 |
+
"loss": 0.7953,
|
7100 |
+
"step": 5060
|
7101 |
+
},
|
7102 |
+
{
|
7103 |
+
"epoch": 0.9954795597484277,
|
7104 |
+
"grad_norm": 0.314453125,
|
7105 |
+
"learning_rate": 1.2450165449062744e-08,
|
7106 |
+
"loss": 0.7893,
|
7107 |
+
"step": 5065
|
7108 |
+
},
|
7109 |
+
{
|
7110 |
+
"epoch": 0.9964622641509434,
|
7111 |
+
"grad_norm": 0.3046875,
|
7112 |
+
"learning_rate": 7.62549346601249e-09,
|
7113 |
+
"loss": 0.8113,
|
7114 |
+
"step": 5070
|
7115 |
+
},
|
7116 |
+
{
|
7117 |
+
"epoch": 0.9974449685534591,
|
7118 |
+
"grad_norm": 0.302734375,
|
7119 |
+
"learning_rate": 3.977519232223337e-09,
|
7120 |
+
"loss": 0.8174,
|
7121 |
+
"step": 5075
|
7122 |
+
},
|
7123 |
+
{
|
7124 |
+
"epoch": 0.9984276729559748,
|
7125 |
+
"grad_norm": 0.302734375,
|
7126 |
+
"learning_rate": 1.5062856765779565e-09,
|
7127 |
+
"loss": 0.8089,
|
7128 |
+
"step": 5080
|
7129 |
+
},
|
7130 |
+
{
|
7131 |
+
"epoch": 0.9994103773584906,
|
7132 |
+
"grad_norm": 0.3125,
|
7133 |
+
"learning_rate": 2.118218802582561e-10,
|
7134 |
+
"loss": 0.8288,
|
7135 |
+
"step": 5085
|
7136 |
+
},
|
7137 |
+
{
|
7138 |
+
"epoch": 1.0,
|
7139 |
+
"eval_loss": 0.7983009815216064,
|
7140 |
+
"eval_runtime": 7962.7938,
|
7141 |
+
"eval_samples_per_second": 10.22,
|
7142 |
+
"eval_steps_per_second": 0.16,
|
7143 |
+
"step": 5088
|
7144 |
+
},
|
7145 |
+
{
|
7146 |
+
"epoch": 1.0,
|
7147 |
+
"step": 5088,
|
7148 |
+
"total_flos": 1.5751056572484157e+19,
|
7149 |
+
"train_loss": 0.09284130807192821,
|
7150 |
+
"train_runtime": 20560.1048,
|
7151 |
+
"train_samples_per_second": 15.837,
|
7152 |
+
"train_steps_per_second": 0.247
|
7153 |
}
|
7154 |
],
|
7155 |
"logging_steps": 5,
|
7156 |
+
"max_steps": 5088,
|
7157 |
"num_input_tokens_seen": 0,
|
7158 |
+
"num_train_epochs": 1,
|
7159 |
"save_steps": 500,
|
7160 |
"stateful_callbacks": {
|
7161 |
"TrainerControl": {
|
|
|
7169 |
"attributes": {}
|
7170 |
}
|
7171 |
},
|
7172 |
+
"total_flos": 1.5751056572484157e+19,
|
7173 |
"train_batch_size": 16,
|
7174 |
"trial_name": null,
|
7175 |
"trial_params": null
|