opsgenius-gptlarge_8bit / trainer_state.json
YALCINKAYA's picture
initial fine-tuning commit
6a23a66 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 90.0,
"eval_steps": 500,
"global_step": 360,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.5,
"grad_norm": 0.5093985795974731,
"learning_rate": 0.0009980973490458728,
"loss": 2.5467,
"step": 10
},
{
"epoch": 5.0,
"grad_norm": 0.27049773931503296,
"learning_rate": 0.000992403876506104,
"loss": 1.618,
"step": 20
},
{
"epoch": 7.5,
"grad_norm": 0.22319507598876953,
"learning_rate": 0.0009829629131445341,
"loss": 1.3933,
"step": 30
},
{
"epoch": 10.0,
"grad_norm": 0.2564426362514496,
"learning_rate": 0.0009698463103929542,
"loss": 1.2537,
"step": 40
},
{
"epoch": 12.5,
"grad_norm": 0.29759547114372253,
"learning_rate": 0.0009531538935183251,
"loss": 1.1368,
"step": 50
},
{
"epoch": 15.0,
"grad_norm": 0.39310407638549805,
"learning_rate": 0.0009330127018922195,
"loss": 1.0137,
"step": 60
},
{
"epoch": 17.5,
"grad_norm": 0.4282013475894928,
"learning_rate": 0.0009095760221444959,
"loss": 0.9095,
"step": 70
},
{
"epoch": 20.0,
"grad_norm": 0.5433480143547058,
"learning_rate": 0.000883022221559489,
"loss": 0.8259,
"step": 80
},
{
"epoch": 22.5,
"grad_norm": 0.5130556225776672,
"learning_rate": 0.0008535533905932737,
"loss": 0.7432,
"step": 90
},
{
"epoch": 25.0,
"grad_norm": 0.5033867359161377,
"learning_rate": 0.0008213938048432696,
"loss": 0.6756,
"step": 100
},
{
"epoch": 27.5,
"grad_norm": 0.5322891473770142,
"learning_rate": 0.0007867882181755231,
"loss": 0.6123,
"step": 110
},
{
"epoch": 30.0,
"grad_norm": 0.5658751726150513,
"learning_rate": 0.00075,
"loss": 0.567,
"step": 120
},
{
"epoch": 32.5,
"grad_norm": 0.5688820481300354,
"learning_rate": 0.0007113091308703497,
"loss": 0.5171,
"step": 130
},
{
"epoch": 35.0,
"grad_norm": 0.48720425367355347,
"learning_rate": 0.0006710100716628344,
"loss": 0.4825,
"step": 140
},
{
"epoch": 37.5,
"grad_norm": 0.5325213074684143,
"learning_rate": 0.0006294095225512603,
"loss": 0.4498,
"step": 150
},
{
"epoch": 40.0,
"grad_norm": 0.527807891368866,
"learning_rate": 0.0005868240888334653,
"loss": 0.4184,
"step": 160
},
{
"epoch": 42.5,
"grad_norm": 0.4882418215274811,
"learning_rate": 0.0005435778713738292,
"loss": 0.3946,
"step": 170
},
{
"epoch": 45.0,
"grad_norm": 0.49641069769859314,
"learning_rate": 0.0005,
"loss": 0.3706,
"step": 180
},
{
"epoch": 47.5,
"grad_norm": 0.5216576457023621,
"learning_rate": 0.00045642212862617086,
"loss": 0.3529,
"step": 190
},
{
"epoch": 50.0,
"grad_norm": 0.5289739966392517,
"learning_rate": 0.00041317591116653486,
"loss": 0.3334,
"step": 200
},
{
"epoch": 52.5,
"grad_norm": 0.49065396189689636,
"learning_rate": 0.0003705904774487396,
"loss": 0.3166,
"step": 210
},
{
"epoch": 55.0,
"grad_norm": 0.4922383725643158,
"learning_rate": 0.0003289899283371657,
"loss": 0.3086,
"step": 220
},
{
"epoch": 57.5,
"grad_norm": 0.42886731028556824,
"learning_rate": 0.0002886908691296504,
"loss": 0.2894,
"step": 230
},
{
"epoch": 60.0,
"grad_norm": 0.42780551314353943,
"learning_rate": 0.0002500000000000001,
"loss": 0.286,
"step": 240
},
{
"epoch": 62.5,
"grad_norm": 0.4010085463523865,
"learning_rate": 0.00021321178182447708,
"loss": 0.2775,
"step": 250
},
{
"epoch": 65.0,
"grad_norm": 0.4402850866317749,
"learning_rate": 0.0001786061951567303,
"loss": 0.2714,
"step": 260
},
{
"epoch": 67.5,
"grad_norm": 0.4067270755767822,
"learning_rate": 0.00014644660940672628,
"loss": 0.2664,
"step": 270
},
{
"epoch": 70.0,
"grad_norm": 0.43572983145713806,
"learning_rate": 0.00011697777844051105,
"loss": 0.2582,
"step": 280
},
{
"epoch": 72.5,
"grad_norm": 0.3881024420261383,
"learning_rate": 9.042397785550405e-05,
"loss": 0.2549,
"step": 290
},
{
"epoch": 75.0,
"grad_norm": 0.3768531382083893,
"learning_rate": 6.698729810778065e-05,
"loss": 0.251,
"step": 300
},
{
"epoch": 77.5,
"grad_norm": 0.3921433389186859,
"learning_rate": 4.684610648167503e-05,
"loss": 0.2509,
"step": 310
},
{
"epoch": 80.0,
"grad_norm": 0.38551065325737,
"learning_rate": 3.0153689607045842e-05,
"loss": 0.2466,
"step": 320
},
{
"epoch": 82.5,
"grad_norm": 0.3801429867744446,
"learning_rate": 1.70370868554659e-05,
"loss": 0.2462,
"step": 330
},
{
"epoch": 85.0,
"grad_norm": 0.39348262548446655,
"learning_rate": 7.59612349389599e-06,
"loss": 0.2468,
"step": 340
},
{
"epoch": 87.5,
"grad_norm": 0.3822017014026642,
"learning_rate": 1.9026509541272275e-06,
"loss": 0.2477,
"step": 350
},
{
"epoch": 90.0,
"grad_norm": 0.3794040381908417,
"learning_rate": 0.0,
"loss": 0.2449,
"step": 360
}
],
"logging_steps": 10,
"max_steps": 360,
"num_input_tokens_seen": 0,
"num_train_epochs": 90,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.43272328822784e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}