dian_r64_ep7 / trainer_state.json
Trace2333's picture
Upload folder using huggingface_hub
aef5d6a verified
raw
history blame contribute delete
No virus
15 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 342,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011695906432748537,
"grad_norm": 0.333984375,
"learning_rate": 0.0003,
"loss": 0.3604,
"step": 4
},
{
"epoch": 0.023391812865497075,
"grad_norm": 0.359375,
"learning_rate": 0.00029989634325549745,
"loss": 0.3609,
"step": 8
},
{
"epoch": 0.03508771929824561,
"grad_norm": 0.365234375,
"learning_rate": 0.00029958551628493234,
"loss": 0.359,
"step": 12
},
{
"epoch": 0.04678362573099415,
"grad_norm": 0.40625,
"learning_rate": 0.00029906794867912953,
"loss": 0.3622,
"step": 16
},
{
"epoch": 0.05847953216374269,
"grad_norm": 0.369140625,
"learning_rate": 0.0002983443557630634,
"loss": 0.3654,
"step": 20
},
{
"epoch": 0.07017543859649122,
"grad_norm": 0.34765625,
"learning_rate": 0.0002974157376072144,
"loss": 0.4022,
"step": 24
},
{
"epoch": 0.08187134502923976,
"grad_norm": 0.365234375,
"learning_rate": 0.0002962833776453813,
"loss": 0.3845,
"step": 28
},
{
"epoch": 0.0935672514619883,
"grad_norm": 0.38671875,
"learning_rate": 0.00029494884090086083,
"loss": 0.4164,
"step": 32
},
{
"epoch": 0.10526315789473684,
"grad_norm": 0.373046875,
"learning_rate": 0.00029341397182344444,
"loss": 0.3409,
"step": 36
},
{
"epoch": 0.11695906432748537,
"grad_norm": 0.376953125,
"learning_rate": 0.0002916808917402228,
"loss": 0.3849,
"step": 40
},
{
"epoch": 0.1286549707602339,
"grad_norm": 0.3984375,
"learning_rate": 0.0002897519959237211,
"loss": 0.3758,
"step": 44
},
{
"epoch": 0.14035087719298245,
"grad_norm": 0.380859375,
"learning_rate": 0.00028762995028141694,
"loss": 0.4021,
"step": 48
},
{
"epoch": 0.15204678362573099,
"grad_norm": 0.388671875,
"learning_rate": 0.00028531768767121657,
"loss": 0.4045,
"step": 52
},
{
"epoch": 0.16374269005847952,
"grad_norm": 0.37109375,
"learning_rate": 0.0002828184038479814,
"loss": 0.3909,
"step": 56
},
{
"epoch": 0.17543859649122806,
"grad_norm": 0.365234375,
"learning_rate": 0.00028013555304670765,
"loss": 0.3994,
"step": 60
},
{
"epoch": 0.1871345029239766,
"grad_norm": 0.3828125,
"learning_rate": 0.00027727284320846243,
"loss": 0.351,
"step": 64
},
{
"epoch": 0.19883040935672514,
"grad_norm": 0.400390625,
"learning_rate": 0.0002742342308556763,
"loss": 0.3811,
"step": 68
},
{
"epoch": 0.21052631578947367,
"grad_norm": 0.3828125,
"learning_rate": 0.00027102391562387317,
"loss": 0.4118,
"step": 72
},
{
"epoch": 0.2222222222222222,
"grad_norm": 0.353515625,
"learning_rate": 0.0002676463344573965,
"loss": 0.4378,
"step": 76
},
{
"epoch": 0.23391812865497075,
"grad_norm": 0.37109375,
"learning_rate": 0.00026410615547715297,
"loss": 0.4012,
"step": 80
},
{
"epoch": 0.24561403508771928,
"grad_norm": 0.390625,
"learning_rate": 0.0002604082715288501,
"loss": 0.3868,
"step": 84
},
{
"epoch": 0.2573099415204678,
"grad_norm": 0.359375,
"learning_rate": 0.00025655779342064275,
"loss": 0.3576,
"step": 88
},
{
"epoch": 0.26900584795321636,
"grad_norm": 0.37890625,
"learning_rate": 0.00025256004285953735,
"loss": 0.3907,
"step": 92
},
{
"epoch": 0.2807017543859649,
"grad_norm": 0.37109375,
"learning_rate": 0.0002484205450963138,
"loss": 0.4188,
"step": 96
},
{
"epoch": 0.29239766081871343,
"grad_norm": 0.373046875,
"learning_rate": 0.00024414502128913227,
"loss": 0.3973,
"step": 100
},
{
"epoch": 0.30409356725146197,
"grad_norm": 0.375,
"learning_rate": 0.0002397393805963781,
"loss": 0.3653,
"step": 104
},
{
"epoch": 0.3157894736842105,
"grad_norm": 0.3984375,
"learning_rate": 0.00023520971200967334,
"loss": 0.426,
"step": 108
},
{
"epoch": 0.32748538011695905,
"grad_norm": 0.33203125,
"learning_rate": 0.00023056227593834302,
"loss": 0.3664,
"step": 112
},
{
"epoch": 0.3391812865497076,
"grad_norm": 0.34765625,
"learning_rate": 0.0002258034955569662,
"loss": 0.4021,
"step": 116
},
{
"epoch": 0.3508771929824561,
"grad_norm": 0.376953125,
"learning_rate": 0.00022093994792797152,
"loss": 0.3933,
"step": 120
},
{
"epoch": 0.36257309941520466,
"grad_norm": 0.390625,
"learning_rate": 0.00021597835491154492,
"loss": 0.3885,
"step": 124
},
{
"epoch": 0.3742690058479532,
"grad_norm": 0.361328125,
"learning_rate": 0.00021092557387541476,
"loss": 0.4028,
"step": 128
},
{
"epoch": 0.38596491228070173,
"grad_norm": 0.388671875,
"learning_rate": 0.00020578858821735302,
"loss": 0.3869,
"step": 132
},
{
"epoch": 0.39766081871345027,
"grad_norm": 0.38671875,
"learning_rate": 0.0002005744977134912,
"loss": 0.3927,
"step": 136
},
{
"epoch": 0.4093567251461988,
"grad_norm": 0.390625,
"learning_rate": 0.0001952905087057917,
"loss": 0.4099,
"step": 140
},
{
"epoch": 0.42105263157894735,
"grad_norm": 0.36328125,
"learning_rate": 0.00018994392414223475,
"loss": 0.352,
"step": 144
},
{
"epoch": 0.4327485380116959,
"grad_norm": 0.373046875,
"learning_rate": 0.00018454213348348796,
"loss": 0.417,
"step": 148
},
{
"epoch": 0.4444444444444444,
"grad_norm": 0.375,
"learning_rate": 0.0001790926024900069,
"loss": 0.3728,
"step": 152
},
{
"epoch": 0.45614035087719296,
"grad_norm": 0.40234375,
"learning_rate": 0.0001736028629036829,
"loss": 0.4106,
"step": 156
},
{
"epoch": 0.4678362573099415,
"grad_norm": 0.365234375,
"learning_rate": 0.00016808050203829842,
"loss": 0.45,
"step": 160
},
{
"epoch": 0.47953216374269003,
"grad_norm": 0.376953125,
"learning_rate": 0.0001625331522931772,
"loss": 0.3749,
"step": 164
},
{
"epoch": 0.49122807017543857,
"grad_norm": 0.380859375,
"learning_rate": 0.0001569684806045217,
"loss": 0.402,
"step": 168
},
{
"epoch": 0.5029239766081871,
"grad_norm": 0.369140625,
"learning_rate": 0.00015139417784901834,
"loss": 0.3631,
"step": 172
},
{
"epoch": 0.5146198830409356,
"grad_norm": 0.36328125,
"learning_rate": 0.00014581794821435376,
"loss": 0.3859,
"step": 176
},
{
"epoch": 0.5263157894736842,
"grad_norm": 0.3828125,
"learning_rate": 0.0001402474985513351,
"loss": 0.3781,
"step": 180
},
{
"epoch": 0.5380116959064327,
"grad_norm": 0.345703125,
"learning_rate": 0.00013469052772232873,
"loss": 0.3859,
"step": 184
},
{
"epoch": 0.5497076023391813,
"grad_norm": 0.353515625,
"learning_rate": 0.0001291547159607405,
"loss": 0.3901,
"step": 188
},
{
"epoch": 0.5614035087719298,
"grad_norm": 0.359375,
"learning_rate": 0.0001236477142562421,
"loss": 0.3818,
"step": 192
},
{
"epoch": 0.5730994152046783,
"grad_norm": 0.357421875,
"learning_rate": 0.00011817713378041565,
"loss": 0.376,
"step": 196
},
{
"epoch": 0.5847953216374269,
"grad_norm": 0.365234375,
"learning_rate": 0.00011275053536743006,
"loss": 0.3491,
"step": 200
},
{
"epoch": 0.5964912280701754,
"grad_norm": 0.3828125,
"learning_rate": 0.0001073754190642881,
"loss": 0.4016,
"step": 204
},
{
"epoch": 0.6081871345029239,
"grad_norm": 0.337890625,
"learning_rate": 0.0001020592137650872,
"loss": 0.3907,
"step": 208
},
{
"epoch": 0.6198830409356725,
"grad_norm": 0.3515625,
"learning_rate": 9.680926694361964e-05,
"loss": 0.3731,
"step": 212
},
{
"epoch": 0.631578947368421,
"grad_norm": 0.380859375,
"learning_rate": 9.163283449850317e-05,
"loss": 0.3877,
"step": 216
},
{
"epoch": 0.6432748538011696,
"grad_norm": 0.35546875,
"learning_rate": 8.653707072487629e-05,
"loss": 0.3481,
"step": 220
},
{
"epoch": 0.6549707602339181,
"grad_norm": 0.32421875,
"learning_rate": 8.152901842651953e-05,
"loss": 0.3408,
"step": 224
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.326171875,
"learning_rate": 7.661559918206663e-05,
"loss": 0.3984,
"step": 228
},
{
"epoch": 0.6783625730994152,
"grad_norm": 0.337890625,
"learning_rate": 7.180360377876123e-05,
"loss": 0.3573,
"step": 232
},
{
"epoch": 0.6900584795321637,
"grad_norm": 0.3671875,
"learning_rate": 6.709968282697749e-05,
"loss": 0.3781,
"step": 236
},
{
"epoch": 0.7017543859649122,
"grad_norm": 0.341796875,
"learning_rate": 6.251033756847875e-05,
"loss": 0.3328,
"step": 240
},
{
"epoch": 0.7134502923976608,
"grad_norm": 0.357421875,
"learning_rate": 5.804191089111711e-05,
"loss": 0.369,
"step": 244
},
{
"epoch": 0.7251461988304093,
"grad_norm": 0.33984375,
"learning_rate": 5.3700578562391386e-05,
"loss": 0.341,
"step": 248
},
{
"epoch": 0.7368421052631579,
"grad_norm": 0.37109375,
"learning_rate": 4.9492340693981646e-05,
"loss": 0.4122,
"step": 252
},
{
"epoch": 0.7485380116959064,
"grad_norm": 0.34765625,
"learning_rate": 4.542301344905496e-05,
"loss": 0.3331,
"step": 256
},
{
"epoch": 0.7602339181286549,
"grad_norm": 0.34765625,
"learning_rate": 4.149822100380507e-05,
"loss": 0.3633,
"step": 260
},
{
"epoch": 0.7719298245614035,
"grad_norm": 0.359375,
"learning_rate": 3.7723387774334816e-05,
"loss": 0.32,
"step": 264
},
{
"epoch": 0.783625730994152,
"grad_norm": 0.3671875,
"learning_rate": 3.410373091962575e-05,
"loss": 0.3594,
"step": 268
},
{
"epoch": 0.7953216374269005,
"grad_norm": 0.345703125,
"learning_rate": 3.064425313095474e-05,
"loss": 0.3852,
"step": 272
},
{
"epoch": 0.8070175438596491,
"grad_norm": 0.34765625,
"learning_rate": 2.734973571772527e-05,
"loss": 0.3965,
"step": 276
},
{
"epoch": 0.8187134502923976,
"grad_norm": 0.345703125,
"learning_rate": 2.422473199926742e-05,
"loss": 0.3503,
"step": 280
},
{
"epoch": 0.8304093567251462,
"grad_norm": 0.349609375,
"learning_rate": 2.1273561011741404e-05,
"loss": 0.367,
"step": 284
},
{
"epoch": 0.8421052631578947,
"grad_norm": 0.345703125,
"learning_rate": 1.8500301538841072e-05,
"loss": 0.354,
"step": 288
},
{
"epoch": 0.8538011695906432,
"grad_norm": 0.35546875,
"learning_rate": 1.5908786474548e-05,
"loss": 0.3526,
"step": 292
},
{
"epoch": 0.8654970760233918,
"grad_norm": 0.33984375,
"learning_rate": 1.3502597525727504e-05,
"loss": 0.3669,
"step": 296
},
{
"epoch": 0.8771929824561403,
"grad_norm": 0.36328125,
"learning_rate": 1.1285060261887419e-05,
"loss": 0.3918,
"step": 300
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.330078125,
"learning_rate": 9.259239518942219e-06,
"loss": 0.3425,
"step": 304
},
{
"epoch": 0.9005847953216374,
"grad_norm": 0.36328125,
"learning_rate": 7.427935163333998e-06,
"loss": 0.36,
"step": 308
},
{
"epoch": 0.9122807017543859,
"grad_norm": 0.3203125,
"learning_rate": 5.793678222365433e-06,
"loss": 0.376,
"step": 312
},
{
"epoch": 0.9239766081871345,
"grad_norm": 0.349609375,
"learning_rate": 4.358727386092198e-06,
"loss": 0.3917,
"step": 316
},
{
"epoch": 0.935672514619883,
"grad_norm": 0.357421875,
"learning_rate": 3.125065885610456e-06,
"loss": 0.385,
"step": 320
},
{
"epoch": 0.9473684210526315,
"grad_norm": 0.32421875,
"learning_rate": 2.0943987520529725e-06,
"loss": 0.4026,
"step": 324
},
{
"epoch": 0.9590643274853801,
"grad_norm": 0.33203125,
"learning_rate": 1.268150460082823e-06,
"loss": 0.3496,
"step": 328
},
{
"epoch": 0.9707602339181286,
"grad_norm": 0.34765625,
"learning_rate": 6.47462959141265e-07,
"loss": 0.3549,
"step": 332
},
{
"epoch": 0.9824561403508771,
"grad_norm": 0.341796875,
"learning_rate": 2.3319409517102984e-07,
"loss": 0.3478,
"step": 336
},
{
"epoch": 0.9941520467836257,
"grad_norm": 0.322265625,
"learning_rate": 2.5916424995919837e-08,
"loss": 0.3478,
"step": 340
},
{
"epoch": 1.0,
"step": 342,
"total_flos": 2.2327861511297434e+17,
"train_loss": 0.37826008022877206,
"train_runtime": 1375.4554,
"train_samples_per_second": 7.937,
"train_steps_per_second": 0.249
}
],
"logging_steps": 4,
"max_steps": 342,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.2327861511297434e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}