|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7836990595611285, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001567398119122257, |
|
"grad_norm": 0.5552594661712646, |
|
"learning_rate": 3.448275862068966e-06, |
|
"loss": 3.3921, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01567398119122257, |
|
"grad_norm": 0.660531222820282, |
|
"learning_rate": 3.4482758620689657e-05, |
|
"loss": 3.5922, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03134796238244514, |
|
"grad_norm": 0.5598097443580627, |
|
"learning_rate": 6.896551724137931e-05, |
|
"loss": 3.619, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.047021943573667714, |
|
"grad_norm": 0.5996779799461365, |
|
"learning_rate": 0.00010344827586206898, |
|
"loss": 3.4824, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06269592476489028, |
|
"grad_norm": 0.8172075152397156, |
|
"learning_rate": 0.00013793103448275863, |
|
"loss": 3.3997, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07836990595611286, |
|
"grad_norm": 1.6439019441604614, |
|
"learning_rate": 0.00017241379310344826, |
|
"loss": 3.2485, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09404388714733543, |
|
"grad_norm": 0.28180772066116333, |
|
"learning_rate": 0.00019999942697524717, |
|
"loss": 2.9975, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.109717868338558, |
|
"grad_norm": 0.3870689868927002, |
|
"learning_rate": 0.00019997937179843937, |
|
"loss": 3.0446, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12539184952978055, |
|
"grad_norm": 0.575548529624939, |
|
"learning_rate": 0.00019993067195079803, |
|
"loss": 3.0178, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14106583072100312, |
|
"grad_norm": 0.781446635723114, |
|
"learning_rate": 0.00019985334138511237, |
|
"loss": 3.0394, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.15673981191222572, |
|
"grad_norm": 1.725696325302124, |
|
"learning_rate": 0.00019974740225703878, |
|
"loss": 3.0751, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1724137931034483, |
|
"grad_norm": 0.2845414876937866, |
|
"learning_rate": 0.00019961288491875278, |
|
"loss": 2.9291, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.18808777429467086, |
|
"grad_norm": 0.36810651421546936, |
|
"learning_rate": 0.00019944982791025333, |
|
"loss": 2.9491, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.20376175548589343, |
|
"grad_norm": 0.5454439520835876, |
|
"learning_rate": 0.00019925827794832056, |
|
"loss": 3.0337, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.219435736677116, |
|
"grad_norm": 0.6503669619560242, |
|
"learning_rate": 0.00019903828991313138, |
|
"loss": 3.0246, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.23510971786833856, |
|
"grad_norm": 1.4392451047897339, |
|
"learning_rate": 0.00019878992683253582, |
|
"loss": 3.0232, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2507836990595611, |
|
"grad_norm": 0.2859440743923187, |
|
"learning_rate": 0.00019851325986399934, |
|
"loss": 2.8955, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2664576802507837, |
|
"grad_norm": 0.44268104434013367, |
|
"learning_rate": 0.0001982083682742156, |
|
"loss": 2.9338, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.28213166144200624, |
|
"grad_norm": 0.5128395557403564, |
|
"learning_rate": 0.00019787533941639638, |
|
"loss": 3.0089, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.29780564263322884, |
|
"grad_norm": 0.7328920364379883, |
|
"learning_rate": 0.00019751426870524407, |
|
"loss": 3.0157, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.31347962382445144, |
|
"grad_norm": 1.5265012979507446, |
|
"learning_rate": 0.000197125259589615, |
|
"loss": 2.9007, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.329153605015674, |
|
"grad_norm": 0.2766813635826111, |
|
"learning_rate": 0.0001967084235228807, |
|
"loss": 2.8275, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3448275862068966, |
|
"grad_norm": 0.36695396900177, |
|
"learning_rate": 0.00019626387993099579, |
|
"loss": 2.9158, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3605015673981191, |
|
"grad_norm": 0.5359162092208862, |
|
"learning_rate": 0.00019579175617828187, |
|
"loss": 2.9465, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3761755485893417, |
|
"grad_norm": 0.6529833674430847, |
|
"learning_rate": 0.0001952921875309368, |
|
"loss": 2.981, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.39184952978056425, |
|
"grad_norm": 1.5314627885818481, |
|
"learning_rate": 0.00019476531711828027, |
|
"loss": 2.9737, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.40752351097178685, |
|
"grad_norm": 0.2949506342411041, |
|
"learning_rate": 0.00019421129589174618, |
|
"loss": 2.8208, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4231974921630094, |
|
"grad_norm": 0.39567869901657104, |
|
"learning_rate": 0.00019363028258163447, |
|
"loss": 2.8557, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.438871473354232, |
|
"grad_norm": 0.5587254166603088, |
|
"learning_rate": 0.00019302244365163376, |
|
"loss": 2.9494, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 0.7218978404998779, |
|
"learning_rate": 0.0001923879532511287, |
|
"loss": 2.9742, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4702194357366771, |
|
"grad_norm": 1.4482598304748535, |
|
"learning_rate": 0.0001917269931653049, |
|
"loss": 2.8646, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.48589341692789967, |
|
"grad_norm": 0.2901701033115387, |
|
"learning_rate": 0.00019103975276306678, |
|
"loss": 2.7788, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5015673981191222, |
|
"grad_norm": 0.4310539960861206, |
|
"learning_rate": 0.00019032642894278192, |
|
"loss": 2.8655, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5172413793103449, |
|
"grad_norm": 0.5589954853057861, |
|
"learning_rate": 0.0001895872260758688, |
|
"loss": 2.914, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5329153605015674, |
|
"grad_norm": 0.7243526577949524, |
|
"learning_rate": 0.00018882235594824308, |
|
"loss": 2.9191, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.54858934169279, |
|
"grad_norm": 1.4200222492218018, |
|
"learning_rate": 0.00018803203769963967, |
|
"loss": 2.8128, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5642633228840125, |
|
"grad_norm": 0.26594147086143494, |
|
"learning_rate": 0.000187216497760828, |
|
"loss": 2.762, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5799373040752351, |
|
"grad_norm": 0.3894258439540863, |
|
"learning_rate": 0.00018637596978873835, |
|
"loss": 2.9077, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5956112852664577, |
|
"grad_norm": 0.5348561406135559, |
|
"learning_rate": 0.00018551069459951758, |
|
"loss": 2.9292, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6112852664576802, |
|
"grad_norm": 0.746507465839386, |
|
"learning_rate": 0.00018462092009953408, |
|
"loss": 2.8795, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6269592476489029, |
|
"grad_norm": 1.5225753784179688, |
|
"learning_rate": 0.0001837069012143511, |
|
"loss": 2.8263, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6426332288401254, |
|
"grad_norm": 0.26905450224876404, |
|
"learning_rate": 0.00018276889981568906, |
|
"loss": 2.7218, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.658307210031348, |
|
"grad_norm": 0.3912515342235565, |
|
"learning_rate": 0.00018180718464639787, |
|
"loss": 2.819, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6739811912225705, |
|
"grad_norm": 0.5661373138427734, |
|
"learning_rate": 0.00018082203124346045, |
|
"loss": 2.8772, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 0.805776059627533, |
|
"learning_rate": 0.0001798137218590498, |
|
"loss": 2.9562, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7053291536050157, |
|
"grad_norm": 1.4879732131958008, |
|
"learning_rate": 0.00017878254537966216, |
|
"loss": 2.7925, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7210031347962382, |
|
"grad_norm": 0.28762391209602356, |
|
"learning_rate": 0.00017772879724334937, |
|
"loss": 2.8006, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7366771159874608, |
|
"grad_norm": 0.41769474744796753, |
|
"learning_rate": 0.00017665277935507398, |
|
"loss": 2.8148, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7523510971786834, |
|
"grad_norm": 0.633671760559082, |
|
"learning_rate": 0.00017555480000021198, |
|
"loss": 2.8461, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.768025078369906, |
|
"grad_norm": 0.816681444644928, |
|
"learning_rate": 0.00017443517375622704, |
|
"loss": 2.8826, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.7836990595611285, |
|
"grad_norm": 1.3913438320159912, |
|
"learning_rate": 0.00017329422140254235, |
|
"loss": 2.7449, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1914, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.158266300583117e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|