revan_f / trainer_state.json
revands's picture
Upload 11 files
26035e4 verified
raw
history blame contribute delete
No virus
8.43 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.984,
"global_step": 62,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 2.5e-05,
"loss": 2.4811,
"step": 1
},
{
"epoch": 0.06,
"learning_rate": 5e-05,
"loss": 2.7677,
"step": 2
},
{
"epoch": 0.1,
"learning_rate": 7.500000000000001e-05,
"loss": 2.8184,
"step": 3
},
{
"epoch": 0.13,
"learning_rate": 0.0001,
"loss": 2.7164,
"step": 4
},
{
"epoch": 0.16,
"learning_rate": 9.992667069255619e-05,
"loss": 2.6905,
"step": 5
},
{
"epoch": 0.19,
"learning_rate": 9.970689785771798e-05,
"loss": 2.4758,
"step": 6
},
{
"epoch": 0.22,
"learning_rate": 9.934132612707632e-05,
"loss": 2.1961,
"step": 7
},
{
"epoch": 0.26,
"learning_rate": 9.883102778550434e-05,
"loss": 2.1084,
"step": 8
},
{
"epoch": 0.29,
"learning_rate": 9.817749962596115e-05,
"loss": 1.9172,
"step": 9
},
{
"epoch": 0.32,
"learning_rate": 9.738265855914013e-05,
"loss": 1.7626,
"step": 10
},
{
"epoch": 0.35,
"learning_rate": 9.644883599083958e-05,
"loss": 1.6495,
"step": 11
},
{
"epoch": 0.38,
"learning_rate": 9.537877098354786e-05,
"loss": 1.3566,
"step": 12
},
{
"epoch": 0.42,
"learning_rate": 9.417560222230115e-05,
"loss": 1.2721,
"step": 13
},
{
"epoch": 0.42,
"eval_loss": 1.381595492362976,
"eval_runtime": 1.4441,
"eval_samples_per_second": 34.623,
"eval_steps_per_second": 4.847,
"step": 13
},
{
"epoch": 0.45,
"learning_rate": 9.284285880837946e-05,
"loss": 1.1247,
"step": 14
},
{
"epoch": 0.48,
"learning_rate": 9.138444990784453e-05,
"loss": 1.3827,
"step": 15
},
{
"epoch": 0.51,
"learning_rate": 8.980465328528219e-05,
"loss": 1.4877,
"step": 16
},
{
"epoch": 0.54,
"learning_rate": 8.810810275638183e-05,
"loss": 1.4821,
"step": 17
},
{
"epoch": 0.58,
"learning_rate": 8.629977459615655e-05,
"loss": 1.3786,
"step": 18
},
{
"epoch": 0.61,
"learning_rate": 8.438497294267117e-05,
"loss": 1.2494,
"step": 19
},
{
"epoch": 0.64,
"learning_rate": 8.236931423909138e-05,
"loss": 1.1829,
"step": 20
},
{
"epoch": 0.67,
"learning_rate": 8.025871075968828e-05,
"loss": 1.0193,
"step": 21
},
{
"epoch": 0.7,
"learning_rate": 7.805935326811912e-05,
"loss": 1.4785,
"step": 22
},
{
"epoch": 0.74,
"learning_rate": 7.577769285885109e-05,
"loss": 1.4004,
"step": 23
},
{
"epoch": 0.77,
"learning_rate": 7.342042203498951e-05,
"loss": 1.3328,
"step": 24
},
{
"epoch": 0.8,
"learning_rate": 7.099445507801323e-05,
"loss": 1.2686,
"step": 25
},
{
"epoch": 0.83,
"learning_rate": 6.850690776699573e-05,
"loss": 1.0142,
"step": 26
},
{
"epoch": 0.83,
"eval_loss": 1.1247260570526123,
"eval_runtime": 1.4412,
"eval_samples_per_second": 34.693,
"eval_steps_per_second": 4.857,
"step": 26
},
{
"epoch": 0.86,
"learning_rate": 6.5965076506799e-05,
"loss": 0.9446,
"step": 27
},
{
"epoch": 0.9,
"learning_rate": 6.337641692646106e-05,
"loss": 0.8311,
"step": 28
},
{
"epoch": 0.93,
"learning_rate": 6.0748522010551215e-05,
"loss": 1.3538,
"step": 29
},
{
"epoch": 0.96,
"learning_rate": 5.808909982763825e-05,
"loss": 1.1263,
"step": 30
},
{
"epoch": 0.99,
"learning_rate": 5.540595092119709e-05,
"loss": 0.9153,
"step": 31
},
{
"epoch": 1.02,
"learning_rate": 5.270694542927088e-05,
"loss": 0.9691,
"step": 32
},
{
"epoch": 1.06,
"learning_rate": 5e-05,
"loss": 1.377,
"step": 33
},
{
"epoch": 1.09,
"learning_rate": 4.729305457072913e-05,
"loss": 1.242,
"step": 34
},
{
"epoch": 1.12,
"learning_rate": 4.4594049078802925e-05,
"loss": 1.2525,
"step": 35
},
{
"epoch": 1.15,
"learning_rate": 4.1910900172361764e-05,
"loss": 1.0716,
"step": 36
},
{
"epoch": 1.18,
"learning_rate": 3.92514779894488e-05,
"loss": 0.9491,
"step": 37
},
{
"epoch": 1.22,
"learning_rate": 3.6623583073538966e-05,
"loss": 0.7949,
"step": 38
},
{
"epoch": 1.25,
"learning_rate": 3.403492349320101e-05,
"loss": 1.1087,
"step": 39
},
{
"epoch": 1.25,
"eval_loss": 1.0872222185134888,
"eval_runtime": 1.4388,
"eval_samples_per_second": 34.752,
"eval_steps_per_second": 4.865,
"step": 39
},
{
"epoch": 1.28,
"learning_rate": 3.149309223300428e-05,
"loss": 1.3412,
"step": 40
},
{
"epoch": 1.31,
"learning_rate": 2.900554492198677e-05,
"loss": 1.1742,
"step": 41
},
{
"epoch": 1.34,
"learning_rate": 2.65795779650105e-05,
"loss": 1.0373,
"step": 42
},
{
"epoch": 1.38,
"learning_rate": 2.422230714114891e-05,
"loss": 1.0752,
"step": 43
},
{
"epoch": 1.41,
"learning_rate": 2.194064673188089e-05,
"loss": 0.8705,
"step": 44
},
{
"epoch": 1.44,
"learning_rate": 1.9741289240311755e-05,
"loss": 0.8475,
"step": 45
},
{
"epoch": 1.47,
"learning_rate": 1.7630685760908622e-05,
"loss": 1.2784,
"step": 46
},
{
"epoch": 1.5,
"learning_rate": 1.561502705732883e-05,
"loss": 1.4262,
"step": 47
},
{
"epoch": 1.54,
"learning_rate": 1.3700225403843469e-05,
"loss": 1.1989,
"step": 48
},
{
"epoch": 1.57,
"learning_rate": 1.1891897243618182e-05,
"loss": 1.0853,
"step": 49
},
{
"epoch": 1.6,
"learning_rate": 1.0195346714717813e-05,
"loss": 0.8383,
"step": 50
},
{
"epoch": 1.63,
"learning_rate": 8.615550092155478e-06,
"loss": 0.9405,
"step": 51
},
{
"epoch": 1.66,
"learning_rate": 7.157141191620548e-06,
"loss": 0.837,
"step": 52
},
{
"epoch": 1.66,
"eval_loss": 1.077386736869812,
"eval_runtime": 1.4402,
"eval_samples_per_second": 34.716,
"eval_steps_per_second": 4.86,
"step": 52
},
{
"epoch": 1.7,
"learning_rate": 5.824397777698859e-06,
"loss": 1.1777,
"step": 53
},
{
"epoch": 1.73,
"learning_rate": 4.621229016452156e-06,
"loss": 1.2908,
"step": 54
},
{
"epoch": 1.76,
"learning_rate": 3.551164009160429e-06,
"loss": 1.2032,
"step": 55
},
{
"epoch": 1.79,
"learning_rate": 2.6173414408598827e-06,
"loss": 1.0568,
"step": 56
},
{
"epoch": 1.82,
"learning_rate": 1.8225003740388547e-06,
"loss": 1.029,
"step": 57
},
{
"epoch": 1.86,
"learning_rate": 1.1689722144956671e-06,
"loss": 1.0957,
"step": 58
},
{
"epoch": 1.89,
"learning_rate": 6.58673872923693e-07,
"loss": 0.7948,
"step": 59
},
{
"epoch": 1.92,
"learning_rate": 2.9310214228202013e-07,
"loss": 1.1412,
"step": 60
},
{
"epoch": 1.95,
"learning_rate": 7.332930744380906e-08,
"loss": 1.3215,
"step": 61
},
{
"epoch": 1.98,
"learning_rate": 0.0,
"loss": 1.0566,
"step": 62
}
],
"max_steps": 62,
"num_train_epochs": 2,
"total_flos": 1589370366787584.0,
"trial_name": null,
"trial_params": null
}