newsdiscourse-model / trainer_state.json
alex2awesome's picture
Training in progress, step 2500
836ec68
raw
history blame
9.78 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 3510,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14,
"eval_f1": 0.3722687284743791,
"eval_loss": 1.3361328840255737,
"eval_runtime": 3.2985,
"eval_samples_per_second": 29.71,
"eval_steps_per_second": 29.71,
"step": 100
},
{
"epoch": 0.28,
"eval_f1": 0.4344699929794222,
"eval_loss": 1.1836130619049072,
"eval_runtime": 3.274,
"eval_samples_per_second": 29.933,
"eval_steps_per_second": 29.933,
"step": 200
},
{
"epoch": 0.43,
"eval_f1": 0.3996935122704078,
"eval_loss": 1.1635534763336182,
"eval_runtime": 3.2772,
"eval_samples_per_second": 29.904,
"eval_steps_per_second": 29.904,
"step": 300
},
{
"epoch": 0.57,
"eval_f1": 0.5028054395723479,
"eval_loss": 1.353499174118042,
"eval_runtime": 3.2926,
"eval_samples_per_second": 29.764,
"eval_steps_per_second": 29.764,
"step": 400
},
{
"epoch": 0.71,
"learning_rate": 2.572649572649573e-05,
"loss": 1.2064,
"step": 500
},
{
"epoch": 0.71,
"eval_f1": 0.47071964928887344,
"eval_loss": 1.2940737009048462,
"eval_runtime": 3.3744,
"eval_samples_per_second": 29.042,
"eval_steps_per_second": 29.042,
"step": 500
},
{
"epoch": 0.85,
"eval_f1": 0.49370416278560797,
"eval_loss": 1.2891041040420532,
"eval_runtime": 3.3094,
"eval_samples_per_second": 29.613,
"eval_steps_per_second": 29.613,
"step": 600
},
{
"epoch": 1.0,
"eval_f1": 0.47736617527595926,
"eval_loss": 1.2047343254089355,
"eval_runtime": 3.3145,
"eval_samples_per_second": 29.567,
"eval_steps_per_second": 29.567,
"step": 700
},
{
"epoch": 1.14,
"eval_f1": 0.4943830125990876,
"eval_loss": 1.2190661430358887,
"eval_runtime": 3.3377,
"eval_samples_per_second": 29.362,
"eval_steps_per_second": 29.362,
"step": 800
},
{
"epoch": 1.28,
"eval_f1": 0.4777863203104454,
"eval_loss": 1.174961805343628,
"eval_runtime": 3.3391,
"eval_samples_per_second": 29.349,
"eval_steps_per_second": 29.349,
"step": 900
},
{
"epoch": 1.42,
"learning_rate": 2.1452991452991456e-05,
"loss": 0.9207,
"step": 1000
},
{
"epoch": 1.42,
"eval_f1": 0.49086129207075757,
"eval_loss": 1.3087153434753418,
"eval_runtime": 3.4003,
"eval_samples_per_second": 28.821,
"eval_steps_per_second": 28.821,
"step": 1000
},
{
"epoch": 1.57,
"eval_f1": 0.49757882395260544,
"eval_loss": 1.2435556650161743,
"eval_runtime": 3.4366,
"eval_samples_per_second": 28.517,
"eval_steps_per_second": 28.517,
"step": 1100
},
{
"epoch": 1.71,
"eval_f1": 0.503327058221218,
"eval_loss": 1.1465363502502441,
"eval_runtime": 3.2944,
"eval_samples_per_second": 29.748,
"eval_steps_per_second": 29.748,
"step": 1200
},
{
"epoch": 1.85,
"eval_f1": 0.5141589868888157,
"eval_loss": 1.113364577293396,
"eval_runtime": 3.3642,
"eval_samples_per_second": 29.131,
"eval_steps_per_second": 29.131,
"step": 1300
},
{
"epoch": 1.99,
"eval_f1": 0.5383469405673188,
"eval_loss": 1.1939647197723389,
"eval_runtime": 3.3033,
"eval_samples_per_second": 29.668,
"eval_steps_per_second": 29.668,
"step": 1400
},
{
"epoch": 2.14,
"learning_rate": 1.7179487179487178e-05,
"loss": 0.8149,
"step": 1500
},
{
"epoch": 2.14,
"eval_f1": 0.5291030100787034,
"eval_loss": 1.2552497386932373,
"eval_runtime": 3.7541,
"eval_samples_per_second": 26.105,
"eval_steps_per_second": 26.105,
"step": 1500
},
{
"epoch": 2.28,
"eval_f1": 0.5259736412492381,
"eval_loss": 1.3746747970581055,
"eval_runtime": 4.9995,
"eval_samples_per_second": 19.602,
"eval_steps_per_second": 19.602,
"step": 1600
},
{
"epoch": 2.42,
"eval_f1": 0.5329388682083431,
"eval_loss": 1.3680145740509033,
"eval_runtime": 5.1597,
"eval_samples_per_second": 18.993,
"eval_steps_per_second": 18.993,
"step": 1700
},
{
"epoch": 2.56,
"eval_f1": 0.5189920532535025,
"eval_loss": 1.27865469455719,
"eval_runtime": 5.0223,
"eval_samples_per_second": 19.513,
"eval_steps_per_second": 19.513,
"step": 1800
},
{
"epoch": 2.71,
"eval_f1": 0.5409205239275264,
"eval_loss": 1.3888845443725586,
"eval_runtime": 3.3132,
"eval_samples_per_second": 29.579,
"eval_steps_per_second": 29.579,
"step": 1900
},
{
"epoch": 2.85,
"learning_rate": 1.2905982905982905e-05,
"loss": 0.6152,
"step": 2000
},
{
"epoch": 2.85,
"eval_f1": 0.543504294934508,
"eval_loss": 1.3602004051208496,
"eval_runtime": 3.3336,
"eval_samples_per_second": 29.398,
"eval_steps_per_second": 29.398,
"step": 2000
},
{
"epoch": 2.99,
"eval_f1": 0.5467811408362643,
"eval_loss": 1.3174574375152588,
"eval_runtime": 5.0569,
"eval_samples_per_second": 19.379,
"eval_steps_per_second": 19.379,
"step": 2100
},
{
"epoch": 3.13,
"eval_f1": 0.5365057187973831,
"eval_loss": 1.5886870622634888,
"eval_runtime": 4.5058,
"eval_samples_per_second": 21.75,
"eval_steps_per_second": 21.75,
"step": 2200
},
{
"epoch": 3.28,
"eval_f1": 0.5563382534701277,
"eval_loss": 1.517231822013855,
"eval_runtime": 4.5269,
"eval_samples_per_second": 21.648,
"eval_steps_per_second": 21.648,
"step": 2300
},
{
"epoch": 3.42,
"eval_f1": 0.5661390498930103,
"eval_loss": 1.5470443964004517,
"eval_runtime": 3.6905,
"eval_samples_per_second": 26.555,
"eval_steps_per_second": 26.555,
"step": 2400
},
{
"epoch": 3.56,
"learning_rate": 8.632478632478633e-06,
"loss": 0.4719,
"step": 2500
},
{
"epoch": 3.56,
"eval_f1": 0.521216772952552,
"eval_loss": 1.4928430318832397,
"eval_runtime": 3.3155,
"eval_samples_per_second": 29.558,
"eval_steps_per_second": 29.558,
"step": 2500
},
{
"epoch": 3.7,
"eval_f1": 0.5356457612585566,
"eval_loss": 1.6497721672058105,
"eval_runtime": 4.8518,
"eval_samples_per_second": 20.199,
"eval_steps_per_second": 20.199,
"step": 2600
},
{
"epoch": 3.85,
"eval_f1": 0.5596834952223371,
"eval_loss": 1.4976708889007568,
"eval_runtime": 4.6972,
"eval_samples_per_second": 20.863,
"eval_steps_per_second": 20.863,
"step": 2700
},
{
"epoch": 3.99,
"eval_f1": 0.5470066167039311,
"eval_loss": 1.471981167793274,
"eval_runtime": 4.5019,
"eval_samples_per_second": 21.769,
"eval_steps_per_second": 21.769,
"step": 2800
},
{
"epoch": 4.13,
"eval_f1": 0.5492964393504802,
"eval_loss": 1.5796676874160767,
"eval_runtime": 4.7041,
"eval_samples_per_second": 20.833,
"eval_steps_per_second": 20.833,
"step": 2900
},
{
"epoch": 4.27,
"learning_rate": 4.358974358974359e-06,
"loss": 0.372,
"step": 3000
},
{
"epoch": 4.27,
"eval_f1": 0.5445354826532323,
"eval_loss": 1.6874395608901978,
"eval_runtime": 3.9793,
"eval_samples_per_second": 24.628,
"eval_steps_per_second": 24.628,
"step": 3000
},
{
"epoch": 4.42,
"eval_f1": 0.5544723066439012,
"eval_loss": 1.6702477931976318,
"eval_runtime": 4.7443,
"eval_samples_per_second": 20.656,
"eval_steps_per_second": 20.656,
"step": 3100
},
{
"epoch": 4.56,
"eval_f1": 0.5469058666319371,
"eval_loss": 1.7671833038330078,
"eval_runtime": 4.6665,
"eval_samples_per_second": 21.001,
"eval_steps_per_second": 21.001,
"step": 3200
},
{
"epoch": 4.7,
"eval_f1": 0.5485370297299399,
"eval_loss": 1.7351080179214478,
"eval_runtime": 4.8851,
"eval_samples_per_second": 20.061,
"eval_steps_per_second": 20.061,
"step": 3300
},
{
"epoch": 4.84,
"eval_f1": 0.5497797755164764,
"eval_loss": 1.7282612323760986,
"eval_runtime": 5.1791,
"eval_samples_per_second": 18.922,
"eval_steps_per_second": 18.922,
"step": 3400
},
{
"epoch": 4.99,
"learning_rate": 8.547008547008547e-08,
"loss": 0.2944,
"step": 3500
},
{
"epoch": 4.99,
"eval_f1": 0.5503861750639598,
"eval_loss": 1.698703408241272,
"eval_runtime": 5.6701,
"eval_samples_per_second": 17.284,
"eval_steps_per_second": 17.284,
"step": 3500
},
{
"epoch": 5.0,
"step": 3510,
"total_flos": 2890172619430200.0,
"train_loss": 0.6695007115008145,
"train_runtime": 916.4977,
"train_samples_per_second": 3.83,
"train_steps_per_second": 3.83
}
],
"max_steps": 3510,
"num_train_epochs": 5,
"total_flos": 2890172619430200.0,
"trial_name": null,
"trial_params": null
}