llama3-central-pretrained-model-1 / trainer_state.json
tommyssw's picture
Upload 18 files
bdd5ef6 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.976501305483029,
"eval_steps": 500,
"global_step": 285,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05221932114882506,
"grad_norm": 4.916470527648926,
"learning_rate": 4.996203791083291e-05,
"loss": 3.0294,
"step": 5
},
{
"epoch": 0.10443864229765012,
"grad_norm": 2.873619318008423,
"learning_rate": 4.984826693294874e-05,
"loss": 2.7312,
"step": 10
},
{
"epoch": 0.1566579634464752,
"grad_norm": 1.968766689300537,
"learning_rate": 4.965903258506806e-05,
"loss": 2.6282,
"step": 15
},
{
"epoch": 0.20887728459530025,
"grad_norm": 2.165731430053711,
"learning_rate": 4.9394909565685894e-05,
"loss": 2.5533,
"step": 20
},
{
"epoch": 0.26109660574412535,
"grad_norm": 2.0350794792175293,
"learning_rate": 4.905670000773126e-05,
"loss": 2.5412,
"step": 25
},
{
"epoch": 0.3133159268929504,
"grad_norm": 1.8538161516189575,
"learning_rate": 4.864543104251587e-05,
"loss": 2.5643,
"step": 30
},
{
"epoch": 0.36553524804177545,
"grad_norm": 1.6806731224060059,
"learning_rate": 4.8162351680370044e-05,
"loss": 2.5158,
"step": 35
},
{
"epoch": 0.4177545691906005,
"grad_norm": 1.7877177000045776,
"learning_rate": 4.760892901743944e-05,
"loss": 2.5183,
"step": 40
},
{
"epoch": 0.4699738903394256,
"grad_norm": 1.8654683828353882,
"learning_rate": 4.698684378016222e-05,
"loss": 2.496,
"step": 45
},
{
"epoch": 0.5221932114882507,
"grad_norm": 1.7663869857788086,
"learning_rate": 4.629798522095818e-05,
"loss": 2.5069,
"step": 50
},
{
"epoch": 0.5744125326370757,
"grad_norm": 1.914778470993042,
"learning_rate": 4.554444538063113e-05,
"loss": 2.4605,
"step": 55
},
{
"epoch": 0.6266318537859008,
"grad_norm": 1.7610474824905396,
"learning_rate": 4.4728512734909844e-05,
"loss": 2.4223,
"step": 60
},
{
"epoch": 0.6788511749347258,
"grad_norm": 1.6278265714645386,
"learning_rate": 4.385266524442241e-05,
"loss": 2.4468,
"step": 65
},
{
"epoch": 0.7310704960835509,
"grad_norm": 1.709999442100525,
"learning_rate": 4.2919562829211283e-05,
"loss": 2.3933,
"step": 70
},
{
"epoch": 0.783289817232376,
"grad_norm": 1.6477118730545044,
"learning_rate": 4.193203929064353e-05,
"loss": 2.454,
"step": 75
},
{
"epoch": 0.835509138381201,
"grad_norm": 1.584774136543274,
"learning_rate": 4.089309370524921e-05,
"loss": 2.4139,
"step": 80
},
{
"epoch": 0.8877284595300261,
"grad_norm": 1.5124833583831787,
"learning_rate": 3.9805881316624506e-05,
"loss": 2.3528,
"step": 85
},
{
"epoch": 0.9399477806788512,
"grad_norm": 1.6704648733139038,
"learning_rate": 3.867370395306068e-05,
"loss": 2.3643,
"step": 90
},
{
"epoch": 0.9921671018276762,
"grad_norm": 1.619065761566162,
"learning_rate": 3.7500000000000003e-05,
"loss": 2.3584,
"step": 95
},
{
"epoch": 1.0443864229765012,
"grad_norm": 1.49176824092865,
"learning_rate": 3.628833395777224e-05,
"loss": 2.1535,
"step": 100
},
{
"epoch": 1.0966057441253263,
"grad_norm": 1.5439894199371338,
"learning_rate": 3.504238561632424e-05,
"loss": 2.0786,
"step": 105
},
{
"epoch": 1.1488250652741514,
"grad_norm": 1.5870767831802368,
"learning_rate": 3.376593887981887e-05,
"loss": 2.0251,
"step": 110
},
{
"epoch": 1.2010443864229765,
"grad_norm": 1.5849578380584717,
"learning_rate": 3.246287027504237e-05,
"loss": 2.0486,
"step": 115
},
{
"epoch": 1.2532637075718016,
"grad_norm": 1.5910005569458008,
"learning_rate": 3.1137137178519985e-05,
"loss": 2.003,
"step": 120
},
{
"epoch": 1.3054830287206267,
"grad_norm": 1.5487288236618042,
"learning_rate": 2.9792765798093465e-05,
"loss": 2.0196,
"step": 125
},
{
"epoch": 1.3577023498694518,
"grad_norm": 1.572863221168518,
"learning_rate": 2.8433838945460205e-05,
"loss": 1.9855,
"step": 130
},
{
"epoch": 1.4099216710182767,
"grad_norm": 1.5365219116210938,
"learning_rate": 2.7064483636808313e-05,
"loss": 2.0136,
"step": 135
},
{
"epoch": 1.4621409921671018,
"grad_norm": 1.619452714920044,
"learning_rate": 2.5688858559204053e-05,
"loss": 1.9636,
"step": 140
},
{
"epoch": 1.514360313315927,
"grad_norm": 1.5345263481140137,
"learning_rate": 2.4311141440795953e-05,
"loss": 1.9941,
"step": 145
},
{
"epoch": 1.566579634464752,
"grad_norm": 1.59757661819458,
"learning_rate": 2.2935516363191693e-05,
"loss": 1.9606,
"step": 150
},
{
"epoch": 1.6187989556135771,
"grad_norm": 1.493499994277954,
"learning_rate": 2.1566161054539798e-05,
"loss": 2.0351,
"step": 155
},
{
"epoch": 1.671018276762402,
"grad_norm": 1.5033268928527832,
"learning_rate": 2.0207234201906547e-05,
"loss": 1.9508,
"step": 160
},
{
"epoch": 1.723237597911227,
"grad_norm": 1.487410545349121,
"learning_rate": 1.8862862821480025e-05,
"loss": 1.9504,
"step": 165
},
{
"epoch": 1.7754569190600522,
"grad_norm": 1.6940875053405762,
"learning_rate": 1.7537129724957642e-05,
"loss": 1.9508,
"step": 170
},
{
"epoch": 1.8276762402088773,
"grad_norm": 1.555407166481018,
"learning_rate": 1.6234061120181142e-05,
"loss": 1.8806,
"step": 175
},
{
"epoch": 1.8798955613577024,
"grad_norm": 1.6240324974060059,
"learning_rate": 1.495761438367577e-05,
"loss": 1.9759,
"step": 180
},
{
"epoch": 1.9321148825065273,
"grad_norm": 1.5653289556503296,
"learning_rate": 1.3711666042227772e-05,
"loss": 1.9918,
"step": 185
},
{
"epoch": 1.9843342036553526,
"grad_norm": 1.5873417854309082,
"learning_rate": 1.2500000000000006e-05,
"loss": 1.8922,
"step": 190
},
{
"epoch": 2.0365535248041775,
"grad_norm": 1.4565200805664062,
"learning_rate": 1.1326296046939333e-05,
"loss": 1.7482,
"step": 195
},
{
"epoch": 2.0887728459530024,
"grad_norm": 1.5391967296600342,
"learning_rate": 1.0194118683375503e-05,
"loss": 1.6307,
"step": 200
},
{
"epoch": 2.1409921671018277,
"grad_norm": 1.497523307800293,
"learning_rate": 9.106906294750805e-06,
"loss": 1.6288,
"step": 205
},
{
"epoch": 2.1932114882506526,
"grad_norm": 1.5081855058670044,
"learning_rate": 8.067960709356478e-06,
"loss": 1.6239,
"step": 210
},
{
"epoch": 2.245430809399478,
"grad_norm": 1.4794273376464844,
"learning_rate": 7.080437170788723e-06,
"loss": 1.6385,
"step": 215
},
{
"epoch": 2.297650130548303,
"grad_norm": 1.590654730796814,
"learning_rate": 6.147334755577596e-06,
"loss": 1.6511,
"step": 220
},
{
"epoch": 2.349869451697128,
"grad_norm": 1.516768217086792,
"learning_rate": 5.271487265090163e-06,
"loss": 1.6356,
"step": 225
},
{
"epoch": 2.402088772845953,
"grad_norm": 1.5770176649093628,
"learning_rate": 4.4555546193688735e-06,
"loss": 1.6163,
"step": 230
},
{
"epoch": 2.454308093994778,
"grad_norm": 1.5710504055023193,
"learning_rate": 3.7020147790418263e-06,
"loss": 1.6386,
"step": 235
},
{
"epoch": 2.506527415143603,
"grad_norm": 1.5108474493026733,
"learning_rate": 3.013156219837776e-06,
"loss": 1.6722,
"step": 240
},
{
"epoch": 2.558746736292428,
"grad_norm": 1.5271434783935547,
"learning_rate": 2.391070982560564e-06,
"loss": 1.6134,
"step": 245
},
{
"epoch": 2.6109660574412534,
"grad_norm": 1.5538915395736694,
"learning_rate": 1.837648319629956e-06,
"loss": 1.6621,
"step": 250
},
{
"epoch": 2.6631853785900783,
"grad_norm": 1.4325404167175293,
"learning_rate": 1.3545689574841342e-06,
"loss": 1.5881,
"step": 255
},
{
"epoch": 2.7154046997389036,
"grad_norm": 1.505527138710022,
"learning_rate": 9.432999922687396e-07,
"loss": 1.6241,
"step": 260
},
{
"epoch": 2.7676240208877285,
"grad_norm": 1.491540789604187,
"learning_rate": 6.050904343141095e-07,
"loss": 1.5779,
"step": 265
},
{
"epoch": 2.8198433420365534,
"grad_norm": 1.508405327796936,
"learning_rate": 3.4096741493194197e-07,
"loss": 1.5785,
"step": 270
},
{
"epoch": 2.8720626631853787,
"grad_norm": 1.4897537231445312,
"learning_rate": 1.517330670512629e-07,
"loss": 1.6248,
"step": 275
},
{
"epoch": 2.9242819843342036,
"grad_norm": 1.4672536849975586,
"learning_rate": 3.796208916709565e-08,
"loss": 1.5898,
"step": 280
},
{
"epoch": 2.976501305483029,
"grad_norm": 1.4578866958618164,
"learning_rate": 0.0,
"loss": 1.5727,
"step": 285
},
{
"epoch": 2.976501305483029,
"step": 285,
"total_flos": 1.0513138638127104e+17,
"train_loss": 2.0445492710983544,
"train_runtime": 467.486,
"train_samples_per_second": 4.916,
"train_steps_per_second": 0.61
}
],
"logging_steps": 5,
"max_steps": 285,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.0513138638127104e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}