sft_06221544_policy2 / trainer_state.json
WDong's picture
Upload 15 files
a904533 verified
raw
history blame contribute delete
No virus
13.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9528795811518327,
"eval_steps": 500,
"global_step": 141,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.041884816753926704,
"grad_norm": 0.21733999252319336,
"learning_rate": 1.3333333333333333e-05,
"loss": 1.0015,
"step": 2
},
{
"epoch": 0.08376963350785341,
"grad_norm": 0.21422453224658966,
"learning_rate": 2.6666666666666667e-05,
"loss": 0.9283,
"step": 4
},
{
"epoch": 0.1256544502617801,
"grad_norm": 0.18293505907058716,
"learning_rate": 4e-05,
"loss": 0.8643,
"step": 6
},
{
"epoch": 0.16753926701570682,
"grad_norm": 0.2287076711654663,
"learning_rate": 5.333333333333333e-05,
"loss": 1.0013,
"step": 8
},
{
"epoch": 0.2094240837696335,
"grad_norm": 0.2509159445762634,
"learning_rate": 6.666666666666667e-05,
"loss": 0.9341,
"step": 10
},
{
"epoch": 0.2513089005235602,
"grad_norm": 0.21400025486946106,
"learning_rate": 8e-05,
"loss": 0.9068,
"step": 12
},
{
"epoch": 0.2931937172774869,
"grad_norm": 0.20343424379825592,
"learning_rate": 9.333333333333334e-05,
"loss": 0.8083,
"step": 14
},
{
"epoch": 0.33507853403141363,
"grad_norm": 0.25129085779190063,
"learning_rate": 9.998445910004082e-05,
"loss": 0.8985,
"step": 16
},
{
"epoch": 0.3769633507853403,
"grad_norm": 0.28799620270729065,
"learning_rate": 9.986018985905901e-05,
"loss": 0.9362,
"step": 18
},
{
"epoch": 0.418848167539267,
"grad_norm": 0.29551103711128235,
"learning_rate": 9.961196033000861e-05,
"loss": 0.9174,
"step": 20
},
{
"epoch": 0.4607329842931937,
"grad_norm": 0.31457847356796265,
"learning_rate": 9.924038765061042e-05,
"loss": 0.8732,
"step": 22
},
{
"epoch": 0.5026178010471204,
"grad_norm": 0.2715758979320526,
"learning_rate": 9.874639560909117e-05,
"loss": 0.8653,
"step": 24
},
{
"epoch": 0.5445026178010471,
"grad_norm": 0.25824904441833496,
"learning_rate": 9.81312123475006e-05,
"loss": 0.8592,
"step": 26
},
{
"epoch": 0.5863874345549738,
"grad_norm": 0.22251802682876587,
"learning_rate": 9.73963673083566e-05,
"loss": 0.805,
"step": 28
},
{
"epoch": 0.6282722513089005,
"grad_norm": 0.22290602326393127,
"learning_rate": 9.654368743221022e-05,
"loss": 0.7464,
"step": 30
},
{
"epoch": 0.6701570680628273,
"grad_norm": 0.2132934331893921,
"learning_rate": 9.557529261558367e-05,
"loss": 0.7796,
"step": 32
},
{
"epoch": 0.7120418848167539,
"grad_norm": 0.19897978007793427,
"learning_rate": 9.449359044057345e-05,
"loss": 0.7768,
"step": 34
},
{
"epoch": 0.7539267015706806,
"grad_norm": 0.21434754133224487,
"learning_rate": 9.330127018922194e-05,
"loss": 0.8751,
"step": 36
},
{
"epoch": 0.7958115183246073,
"grad_norm": 0.2214685082435608,
"learning_rate": 9.200129615753859e-05,
"loss": 0.7966,
"step": 38
},
{
"epoch": 0.837696335078534,
"grad_norm": 0.20432740449905396,
"learning_rate": 9.059690028579284e-05,
"loss": 0.7963,
"step": 40
},
{
"epoch": 0.8795811518324608,
"grad_norm": 0.23610389232635498,
"learning_rate": 8.90915741234015e-05,
"loss": 0.8332,
"step": 42
},
{
"epoch": 0.9214659685863874,
"grad_norm": 0.23847244679927826,
"learning_rate": 8.748906014838672e-05,
"loss": 0.7869,
"step": 44
},
{
"epoch": 0.9633507853403142,
"grad_norm": 0.23536360263824463,
"learning_rate": 8.579334246298593e-05,
"loss": 0.8517,
"step": 46
},
{
"epoch": 1.0052356020942408,
"grad_norm": 0.24705368280410767,
"learning_rate": 8.400863688854597e-05,
"loss": 0.8694,
"step": 48
},
{
"epoch": 1.0471204188481675,
"grad_norm": 0.22067400813102722,
"learning_rate": 8.213938048432697e-05,
"loss": 0.8002,
"step": 50
},
{
"epoch": 1.0890052356020943,
"grad_norm": 0.21238695085048676,
"learning_rate": 8.019022051627388e-05,
"loss": 0.7798,
"step": 52
},
{
"epoch": 1.130890052356021,
"grad_norm": 0.2280968427658081,
"learning_rate": 7.81660029031811e-05,
"loss": 0.8114,
"step": 54
},
{
"epoch": 1.1727748691099475,
"grad_norm": 0.2327934205532074,
"learning_rate": 7.60717601689749e-05,
"loss": 0.8206,
"step": 56
},
{
"epoch": 1.2146596858638743,
"grad_norm": 0.2437446266412735,
"learning_rate": 7.391269893106592e-05,
"loss": 0.8477,
"step": 58
},
{
"epoch": 1.256544502617801,
"grad_norm": 0.2429206371307373,
"learning_rate": 7.169418695587791e-05,
"loss": 0.8213,
"step": 60
},
{
"epoch": 1.2984293193717278,
"grad_norm": 0.23616257309913635,
"learning_rate": 6.942173981373474e-05,
"loss": 0.8287,
"step": 62
},
{
"epoch": 1.3403141361256545,
"grad_norm": 0.2597900629043579,
"learning_rate": 6.710100716628344e-05,
"loss": 0.7643,
"step": 64
},
{
"epoch": 1.3821989528795813,
"grad_norm": 0.2668094038963318,
"learning_rate": 6.473775872054521e-05,
"loss": 0.7315,
"step": 66
},
{
"epoch": 1.4240837696335078,
"grad_norm": 0.2583785355091095,
"learning_rate": 6.233786988451468e-05,
"loss": 0.7811,
"step": 68
},
{
"epoch": 1.4659685863874345,
"grad_norm": 0.2756960391998291,
"learning_rate": 5.9907307159969884e-05,
"loss": 0.754,
"step": 70
},
{
"epoch": 1.5078534031413613,
"grad_norm": 0.28554120659828186,
"learning_rate": 5.745211330880872e-05,
"loss": 0.7809,
"step": 72
},
{
"epoch": 1.5497382198952878,
"grad_norm": 0.3146248459815979,
"learning_rate": 5.497839232979084e-05,
"loss": 0.8319,
"step": 74
},
{
"epoch": 1.5916230366492146,
"grad_norm": 0.2715282142162323,
"learning_rate": 5.249229428303486e-05,
"loss": 0.7745,
"step": 76
},
{
"epoch": 1.6335078534031413,
"grad_norm": 0.28730642795562744,
"learning_rate": 5e-05,
"loss": 0.8002,
"step": 78
},
{
"epoch": 1.675392670157068,
"grad_norm": 0.3208574652671814,
"learning_rate": 4.750770571696514e-05,
"loss": 0.7718,
"step": 80
},
{
"epoch": 1.7172774869109948,
"grad_norm": 0.307959645986557,
"learning_rate": 4.502160767020918e-05,
"loss": 0.742,
"step": 82
},
{
"epoch": 1.7591623036649215,
"grad_norm": 0.34215685725212097,
"learning_rate": 4.254788669119127e-05,
"loss": 0.7687,
"step": 84
},
{
"epoch": 1.8010471204188483,
"grad_norm": 0.28537240624427795,
"learning_rate": 4.0092692840030134e-05,
"loss": 0.7066,
"step": 86
},
{
"epoch": 1.8429319371727748,
"grad_norm": 0.31295880675315857,
"learning_rate": 3.7662130115485314e-05,
"loss": 0.7098,
"step": 88
},
{
"epoch": 1.8848167539267016,
"grad_norm": 0.2994559705257416,
"learning_rate": 3.5262241279454785e-05,
"loss": 0.6961,
"step": 90
},
{
"epoch": 1.9267015706806283,
"grad_norm": 0.3853859603404999,
"learning_rate": 3.289899283371657e-05,
"loss": 0.7948,
"step": 92
},
{
"epoch": 1.9685863874345548,
"grad_norm": 0.3592422306537628,
"learning_rate": 3.0578260186265265e-05,
"loss": 0.7264,
"step": 94
},
{
"epoch": 2.0104712041884816,
"grad_norm": 0.42988094687461853,
"learning_rate": 2.8305813044122097e-05,
"loss": 0.6696,
"step": 96
},
{
"epoch": 2.0523560209424083,
"grad_norm": 0.3221912086009979,
"learning_rate": 2.6087301068934106e-05,
"loss": 0.7135,
"step": 98
},
{
"epoch": 2.094240837696335,
"grad_norm": 0.3392656743526459,
"learning_rate": 2.39282398310251e-05,
"loss": 0.7865,
"step": 100
},
{
"epoch": 2.136125654450262,
"grad_norm": 0.32586315274238586,
"learning_rate": 2.1833997096818898e-05,
"loss": 0.7411,
"step": 102
},
{
"epoch": 2.1780104712041886,
"grad_norm": 0.36464083194732666,
"learning_rate": 1.980977948372612e-05,
"loss": 0.772,
"step": 104
},
{
"epoch": 2.2198952879581153,
"grad_norm": 0.3270440697669983,
"learning_rate": 1.7860619515673033e-05,
"loss": 0.7096,
"step": 106
},
{
"epoch": 2.261780104712042,
"grad_norm": 0.3391498923301697,
"learning_rate": 1.599136311145402e-05,
"loss": 0.7378,
"step": 108
},
{
"epoch": 2.303664921465969,
"grad_norm": 0.3948444128036499,
"learning_rate": 1.4206657537014079e-05,
"loss": 0.6471,
"step": 110
},
{
"epoch": 2.345549738219895,
"grad_norm": 0.38464757800102234,
"learning_rate": 1.2510939851613285e-05,
"loss": 0.7359,
"step": 112
},
{
"epoch": 2.387434554973822,
"grad_norm": 0.38830411434173584,
"learning_rate": 1.090842587659851e-05,
"loss": 0.7572,
"step": 114
},
{
"epoch": 2.4293193717277486,
"grad_norm": 0.37749606370925903,
"learning_rate": 9.403099714207175e-06,
"loss": 0.7411,
"step": 116
},
{
"epoch": 2.4712041884816753,
"grad_norm": 0.3769814968109131,
"learning_rate": 7.998703842461431e-06,
"loss": 0.6692,
"step": 118
},
{
"epoch": 2.513089005235602,
"grad_norm": 0.4309244453907013,
"learning_rate": 6.698729810778065e-06,
"loss": 0.7619,
"step": 120
},
{
"epoch": 2.554973821989529,
"grad_norm": 0.37709513306617737,
"learning_rate": 5.506409559426573e-06,
"loss": 0.708,
"step": 122
},
{
"epoch": 2.5968586387434556,
"grad_norm": 0.40647101402282715,
"learning_rate": 4.424707384416344e-06,
"loss": 0.7279,
"step": 124
},
{
"epoch": 2.6387434554973823,
"grad_norm": 0.3731394112110138,
"learning_rate": 3.4563125677897932e-06,
"loss": 0.7629,
"step": 126
},
{
"epoch": 2.680628272251309,
"grad_norm": 0.40869101881980896,
"learning_rate": 2.603632691643415e-06,
"loss": 0.7334,
"step": 128
},
{
"epoch": 2.7225130890052354,
"grad_norm": 0.33611902594566345,
"learning_rate": 1.8687876524993987e-06,
"loss": 0.6643,
"step": 130
},
{
"epoch": 2.7643979057591626,
"grad_norm": 0.38377344608306885,
"learning_rate": 1.2536043909088191e-06,
"loss": 0.7363,
"step": 132
},
{
"epoch": 2.806282722513089,
"grad_norm": 0.38260236382484436,
"learning_rate": 7.596123493895991e-07,
"loss": 0.7018,
"step": 134
},
{
"epoch": 2.8481675392670156,
"grad_norm": 0.3747893273830414,
"learning_rate": 3.8803966999139684e-07,
"loss": 0.8282,
"step": 136
},
{
"epoch": 2.8900523560209423,
"grad_norm": 0.40073326230049133,
"learning_rate": 1.3981014094099353e-07,
"loss": 0.6387,
"step": 138
},
{
"epoch": 2.931937172774869,
"grad_norm": 0.4021676480770111,
"learning_rate": 1.5540899959187727e-08,
"loss": 0.7076,
"step": 140
},
{
"epoch": 2.9528795811518327,
"step": 141,
"total_flos": 5.744674604829082e+16,
"train_loss": 0.7884446775659602,
"train_runtime": 510.788,
"train_samples_per_second": 8.957,
"train_steps_per_second": 0.276
}
],
"logging_steps": 2,
"max_steps": 141,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5.744674604829082e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}