general_reasoner-step_rft_fixed / trainer_state.json
Renjie-Ranger's picture
Upload folder using huggingface_hub
1fe9217 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 532,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.037638202775817454,
"grad_norm": 13.610795098502107,
"learning_rate": 8.333333333333333e-07,
"loss": 0.5914,
"step": 10
},
{
"epoch": 0.07527640555163491,
"grad_norm": 0.9699645754094162,
"learning_rate": 1.7592592592592594e-06,
"loss": 0.5843,
"step": 20
},
{
"epoch": 0.11291460832745237,
"grad_norm": 0.5845916009861534,
"learning_rate": 2.6851851851851856e-06,
"loss": 0.5781,
"step": 30
},
{
"epoch": 0.15055281110326982,
"grad_norm": 0.5917279916406908,
"learning_rate": 3.6111111111111115e-06,
"loss": 0.5653,
"step": 40
},
{
"epoch": 0.18819101387908727,
"grad_norm": 0.5696097079287838,
"learning_rate": 4.537037037037038e-06,
"loss": 0.5927,
"step": 50
},
{
"epoch": 0.22582921665490474,
"grad_norm": 0.5761389794818529,
"learning_rate": 4.998650245168965e-06,
"loss": 0.5859,
"step": 60
},
{
"epoch": 0.26346741943072216,
"grad_norm": 0.576477237978447,
"learning_rate": 4.987860949769804e-06,
"loss": 0.582,
"step": 70
},
{
"epoch": 0.30110562220653964,
"grad_norm": 0.556511371295206,
"learning_rate": 4.9663289476829e-06,
"loss": 0.5712,
"step": 80
},
{
"epoch": 0.3387438249823571,
"grad_norm": 0.47938070366971103,
"learning_rate": 4.934147215158732e-06,
"loss": 0.5614,
"step": 90
},
{
"epoch": 0.37638202775817453,
"grad_norm": 0.41700685090674505,
"learning_rate": 4.891454714510784e-06,
"loss": 0.6062,
"step": 100
},
{
"epoch": 0.414020230533992,
"grad_norm": 0.4084825379730698,
"learning_rate": 4.838435794069406e-06,
"loss": 0.5703,
"step": 110
},
{
"epoch": 0.4516584333098095,
"grad_norm": 0.4167412677659701,
"learning_rate": 4.775319392156593e-06,
"loss": 0.5933,
"step": 120
},
{
"epoch": 0.4892966360856269,
"grad_norm": 0.4066934770436394,
"learning_rate": 4.70237804851899e-06,
"loss": 0.5457,
"step": 130
},
{
"epoch": 0.5269348388614443,
"grad_norm": 0.39475972226137096,
"learning_rate": 4.619926727487774e-06,
"loss": 0.5797,
"step": 140
},
{
"epoch": 0.5645730416372619,
"grad_norm": 0.42220410240509404,
"learning_rate": 4.528321457947091e-06,
"loss": 0.5977,
"step": 150
},
{
"epoch": 0.6022112444130793,
"grad_norm": 0.3914884231722468,
"learning_rate": 4.427957795983715e-06,
"loss": 0.5789,
"step": 160
},
{
"epoch": 0.6398494471888967,
"grad_norm": 0.40266440109112717,
"learning_rate": 4.319269116856291e-06,
"loss": 0.5716,
"step": 170
},
{
"epoch": 0.6774876499647142,
"grad_norm": 0.41129740802214726,
"learning_rate": 4.2027247436595245e-06,
"loss": 0.5597,
"step": 180
},
{
"epoch": 0.7151258527405316,
"grad_norm": 0.4686973204175566,
"learning_rate": 4.078827920763835e-06,
"loss": 0.5803,
"step": 190
},
{
"epoch": 0.7527640555163491,
"grad_norm": 0.38540729078595337,
"learning_rate": 3.948113640781265e-06,
"loss": 0.5723,
"step": 200
},
{
"epoch": 0.7904022582921666,
"grad_norm": 0.40445228822396667,
"learning_rate": 3.8111463344409026e-06,
"loss": 0.561,
"step": 210
},
{
"epoch": 0.828040461067984,
"grad_norm": 0.4653487942714477,
"learning_rate": 3.668517433349069e-06,
"loss": 0.5807,
"step": 220
},
{
"epoch": 0.8656786638438014,
"grad_norm": 0.4241292760945627,
"learning_rate": 3.520842816158374e-06,
"loss": 0.5889,
"step": 230
},
{
"epoch": 0.903316866619619,
"grad_norm": 0.4124860362346966,
"learning_rate": 3.368760149173219e-06,
"loss": 0.5976,
"step": 240
},
{
"epoch": 0.9409550693954364,
"grad_norm": 0.40795355323908694,
"learning_rate": 3.212926132875141e-06,
"loss": 0.5658,
"step": 250
},
{
"epoch": 0.9785932721712538,
"grad_norm": 0.41045446321734286,
"learning_rate": 3.054013666257638e-06,
"loss": 0.5814,
"step": 260
},
{
"epoch": 1.015055281110327,
"grad_norm": 0.4409995582342298,
"learning_rate": 2.8927089412150176e-06,
"loss": 0.5907,
"step": 270
},
{
"epoch": 1.0526934838861444,
"grad_norm": 0.4043201145148639,
"learning_rate": 2.729708479531844e-06,
"loss": 0.5346,
"step": 280
},
{
"epoch": 1.090331686661962,
"grad_norm": 0.3985913687368217,
"learning_rate": 2.5657161252674047e-06,
"loss": 0.535,
"step": 290
},
{
"epoch": 1.1279698894377794,
"grad_norm": 0.3946559086316167,
"learning_rate": 2.4014400055222337e-06,
"loss": 0.5177,
"step": 300
},
{
"epoch": 1.1656080922135967,
"grad_norm": 0.42945485549653484,
"learning_rate": 2.2375894727102552e-06,
"loss": 0.553,
"step": 310
},
{
"epoch": 1.2032462949894143,
"grad_norm": 0.39437996434435546,
"learning_rate": 2.0748720415399542e-06,
"loss": 0.5312,
"step": 320
},
{
"epoch": 1.2408844977652318,
"grad_norm": 0.3716214397572245,
"learning_rate": 1.913990333930858e-06,
"loss": 0.5286,
"step": 330
},
{
"epoch": 1.278522700541049,
"grad_norm": 0.40068573317589684,
"learning_rate": 1.7556390450573213e-06,
"loss": 0.5362,
"step": 340
},
{
"epoch": 1.3161609033168666,
"grad_norm": 0.3616596747761561,
"learning_rate": 1.600501943620384e-06,
"loss": 0.5286,
"step": 350
},
{
"epoch": 1.353799106092684,
"grad_norm": 0.38779033566892823,
"learning_rate": 1.4492489193006884e-06,
"loss": 0.5263,
"step": 360
},
{
"epoch": 1.3914373088685015,
"grad_norm": 0.3757853739580612,
"learning_rate": 1.302533090141689e-06,
"loss": 0.516,
"step": 370
},
{
"epoch": 1.429075511644319,
"grad_norm": 0.38727201501707054,
"learning_rate": 1.1609879823536233e-06,
"loss": 0.5416,
"step": 380
},
{
"epoch": 1.4667137144201363,
"grad_norm": 0.41256807796835565,
"learning_rate": 1.0252247947159846e-06,
"loss": 0.5489,
"step": 390
},
{
"epoch": 1.5043519171959538,
"grad_norm": 0.3710868452573983,
"learning_rate": 8.95829759390954e-07,
"loss": 0.545,
"step": 400
},
{
"epoch": 1.5419901199717714,
"grad_norm": 0.3694905808253818,
"learning_rate": 7.733616105439077e-07,
"loss": 0.5208,
"step": 410
},
{
"epoch": 1.5796283227475887,
"grad_norm": 0.3615477041656024,
"learning_rate": 6.58349171701651e-07,
"loss": 0.5237,
"step": 420
},
{
"epoch": 1.6172665255234062,
"grad_norm": 0.3804185781850594,
"learning_rate": 5.51289072266255e-07,
"loss": 0.5608,
"step": 430
},
{
"epoch": 1.6549047282992237,
"grad_norm": 0.40468195026925746,
"learning_rate": 4.5264360304473065e-07,
"loss": 0.5576,
"step": 440
},
{
"epoch": 1.692542931075041,
"grad_norm": 0.367137564529641,
"learning_rate": 3.6283872005444087e-07,
"loss": 0.5312,
"step": 450
},
{
"epoch": 1.7301811338508586,
"grad_norm": 0.3963215645004468,
"learning_rate": 2.8226220522394735e-07,
"loss": 0.5362,
"step": 460
},
{
"epoch": 1.7678193366266761,
"grad_norm": 0.38136903383145776,
"learning_rate": 2.1126199193144904e-07,
"loss": 0.558,
"step": 470
},
{
"epoch": 1.8054575394024934,
"grad_norm": 0.38938465164870284,
"learning_rate": 1.5014466261124128e-07,
"loss": 0.5114,
"step": 480
},
{
"epoch": 1.843095742178311,
"grad_norm": 0.37548505603471355,
"learning_rate": 9.917412491559337e-08,
"loss": 0.5402,
"step": 490
},
{
"epoch": 1.8807339449541285,
"grad_norm": 0.3575809194576506,
"learning_rate": 5.8570472148445633e-08,
"loss": 0.5349,
"step": 500
},
{
"epoch": 1.9183721477299458,
"grad_norm": 0.3899130339561275,
"learning_rate": 2.8509032891635146e-08,
"loss": 0.5419,
"step": 510
},
{
"epoch": 1.9560103505057633,
"grad_norm": 0.4032737933879565,
"learning_rate": 9.119613927399684e-09,
"loss": 0.5312,
"step": 520
},
{
"epoch": 1.9936485532815809,
"grad_norm": 0.3608459597802774,
"learning_rate": 4.859397262726995e-10,
"loss": 0.5583,
"step": 530
},
{
"epoch": 2.0,
"step": 532,
"total_flos": 8.119097669378376e+17,
"train_loss": 0.558736775154458,
"train_runtime": 23106.9266,
"train_samples_per_second": 11.774,
"train_steps_per_second": 0.023
}
],
"logging_steps": 10,
"max_steps": 532,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.119097669378376e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}