File size: 2,438 Bytes
017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b 0df2eb9 017ab6b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
{
"best_metric": 1.1008135080337524,
"best_model_checkpoint": "/kaggle/output/checkpoint-5000",
"epoch": 0.20371577574967406,
"eval_steps": 1000,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.7777777777777777e-11,
"loss": 1.1383,
"step": 1
},
{
"epoch": 0.04,
"learning_rate": 2.7750000000000004e-08,
"loss": 1.1424,
"step": 1000
},
{
"epoch": 0.04,
"eval_accuracy": 0.32375249500998005,
"eval_loss": 1.1077626943588257,
"eval_runtime": 54.8633,
"eval_samples_per_second": 91.318,
"eval_steps_per_second": 11.428,
"step": 1000
},
{
"epoch": 0.08,
"learning_rate": 5.5527777777777784e-08,
"loss": 1.1244,
"step": 2000
},
{
"epoch": 0.08,
"eval_accuracy": 0.33652694610778444,
"eval_loss": 1.1080161333084106,
"eval_runtime": 54.7384,
"eval_samples_per_second": 91.526,
"eval_steps_per_second": 11.454,
"step": 2000
},
{
"epoch": 0.12,
"learning_rate": 8.327777777777778e-08,
"loss": 1.1228,
"step": 3000
},
{
"epoch": 0.12,
"eval_accuracy": 0.34331337325349304,
"eval_loss": 1.1084064245224,
"eval_runtime": 54.7948,
"eval_samples_per_second": 91.432,
"eval_steps_per_second": 11.443,
"step": 3000
},
{
"epoch": 0.16,
"learning_rate": 1.1105555555555557e-07,
"loss": 1.1216,
"step": 4000
},
{
"epoch": 0.16,
"eval_accuracy": 0.3385229540918164,
"eval_loss": 1.1014840602874756,
"eval_runtime": 54.8508,
"eval_samples_per_second": 91.339,
"eval_steps_per_second": 11.431,
"step": 4000
},
{
"epoch": 0.2,
"learning_rate": 1.3880555555555558e-07,
"loss": 1.1181,
"step": 5000
},
{
"epoch": 0.2,
"eval_accuracy": 0.33073852295409184,
"eval_loss": 1.1008135080337524,
"eval_runtime": 54.8304,
"eval_samples_per_second": 91.373,
"eval_steps_per_second": 11.435,
"step": 5000
}
],
"logging_steps": 1000,
"max_steps": 10000000,
"num_train_epochs": 408,
"save_steps": 1000,
"total_flos": 1.045177565184e+16,
"trial_name": null,
"trial_params": null
}
|