File size: 1,544 Bytes
9754a88 65e2720 9754a88 65e2720 9754a88 d0b8347 65e2720 d0b8347 9754a88 d0b8347 65e2720 9754a88 d0b8347 65e2720 d0b8347 65e2720 9754a88 d0b8347 65e2720 9754a88 d0b8347 65e2720 9754a88 65e2720 9754a88 65e2720 9754a88 65e2720 9754a88 65e2720 9754a88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.92,
"eval_steps": 500,
"global_step": 18,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11,
"learning_rate": 0.0001,
"loss": 2.1556,
"step": 1
},
{
"epoch": 0.53,
"learning_rate": 0.00018314696123025454,
"loss": 1.6966,
"step": 5
},
{
"epoch": 0.96,
"eval_loss": 0.4002748727798462,
"eval_runtime": 6.1602,
"eval_samples_per_second": 16.233,
"eval_steps_per_second": 2.11,
"step": 9
},
{
"epoch": 1.07,
"learning_rate": 0.0001,
"loss": 0.5695,
"step": 10
},
{
"epoch": 1.6,
"learning_rate": 1.6853038769745467e-05,
"loss": 0.3419,
"step": 15
},
{
"epoch": 1.92,
"eval_loss": 0.3410404324531555,
"eval_runtime": 7.1037,
"eval_samples_per_second": 14.077,
"eval_steps_per_second": 1.83,
"step": 18
},
{
"epoch": 1.92,
"step": 18,
"total_flos": 27578625622016.0,
"train_loss": 0.8035296764638689,
"train_runtime": 392.0592,
"train_samples_per_second": 1.53,
"train_steps_per_second": 0.046
}
],
"logging_steps": 5,
"max_steps": 18,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 27578625622016.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|