|
{ |
|
"best_metric": 0.8218151926994324, |
|
"best_model_checkpoint": "saves/BLOOM-7B/lora/train_1/checkpoint-210", |
|
"epoch": 0.3390757451462948, |
|
"eval_steps": 10, |
|
"global_step": 310, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010937927262783703, |
|
"grad_norm": 1.1790575981140137, |
|
"learning_rate": 0.00029999015487222375, |
|
"loss": 1.6811, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010937927262783703, |
|
"eval_loss": 0.9062243103981018, |
|
"eval_runtime": 210.246, |
|
"eval_samples_per_second": 22.412, |
|
"eval_steps_per_second": 0.704, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.021875854525567406, |
|
"grad_norm": 0.5708920359611511, |
|
"learning_rate": 0.00029996062078124905, |
|
"loss": 0.6248, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021875854525567406, |
|
"eval_loss": 0.8811991810798645, |
|
"eval_runtime": 210.0954, |
|
"eval_samples_per_second": 22.428, |
|
"eval_steps_per_second": 0.704, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03281378178835111, |
|
"grad_norm": 0.5245503783226013, |
|
"learning_rate": 0.0002999114016039678, |
|
"loss": 0.5466, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03281378178835111, |
|
"eval_loss": 0.8906806707382202, |
|
"eval_runtime": 210.0232, |
|
"eval_samples_per_second": 22.436, |
|
"eval_steps_per_second": 0.705, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04375170905113481, |
|
"grad_norm": 0.31997227668762207, |
|
"learning_rate": 0.00029984250380130117, |
|
"loss": 0.5591, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04375170905113481, |
|
"eval_loss": 0.8906031250953674, |
|
"eval_runtime": 209.6861, |
|
"eval_samples_per_second": 22.472, |
|
"eval_steps_per_second": 0.706, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05468963631391851, |
|
"grad_norm": 0.2958827614784241, |
|
"learning_rate": 0.0002997539364173515, |
|
"loss": 0.5318, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05468963631391851, |
|
"eval_loss": 0.8625577688217163, |
|
"eval_runtime": 210.182, |
|
"eval_samples_per_second": 22.419, |
|
"eval_steps_per_second": 0.704, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06562756357670221, |
|
"grad_norm": 0.396158367395401, |
|
"learning_rate": 0.00029964571107821494, |
|
"loss": 0.496, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06562756357670221, |
|
"eval_loss": 0.8501101136207581, |
|
"eval_runtime": 210.0313, |
|
"eval_samples_per_second": 22.435, |
|
"eval_steps_per_second": 0.705, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07656549083948591, |
|
"grad_norm": 0.4420228600502014, |
|
"learning_rate": 0.00029951784199045534, |
|
"loss": 0.4855, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07656549083948591, |
|
"eval_loss": 0.8276830911636353, |
|
"eval_runtime": 210.1084, |
|
"eval_samples_per_second": 22.427, |
|
"eval_steps_per_second": 0.704, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08750341810226962, |
|
"grad_norm": 0.4005095958709717, |
|
"learning_rate": 0.0002993703459392396, |
|
"loss": 0.4746, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08750341810226962, |
|
"eval_loss": 0.8740801215171814, |
|
"eval_runtime": 210.097, |
|
"eval_samples_per_second": 22.428, |
|
"eval_steps_per_second": 0.704, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09844134536505332, |
|
"grad_norm": 0.4457601308822632, |
|
"learning_rate": 0.00029920324228613376, |
|
"loss": 0.4846, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09844134536505332, |
|
"eval_loss": 0.8443948030471802, |
|
"eval_runtime": 209.5315, |
|
"eval_samples_per_second": 22.488, |
|
"eval_steps_per_second": 0.706, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10937927262783702, |
|
"grad_norm": 0.3862694799900055, |
|
"learning_rate": 0.0002990165529665622, |
|
"loss": 0.4424, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10937927262783702, |
|
"eval_loss": 0.843928873538971, |
|
"eval_runtime": 210.1339, |
|
"eval_samples_per_second": 22.424, |
|
"eval_steps_per_second": 0.704, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12031719989062073, |
|
"grad_norm": 0.32919445633888245, |
|
"learning_rate": 0.0002988103024869277, |
|
"loss": 0.4514, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12031719989062073, |
|
"eval_loss": 0.8323902487754822, |
|
"eval_runtime": 210.1443, |
|
"eval_samples_per_second": 22.423, |
|
"eval_steps_per_second": 0.704, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13125512715340443, |
|
"grad_norm": 0.3766002953052521, |
|
"learning_rate": 0.00029858451792139453, |
|
"loss": 0.4485, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13125512715340443, |
|
"eval_loss": 0.8639554381370544, |
|
"eval_runtime": 210.1293, |
|
"eval_samples_per_second": 22.424, |
|
"eval_steps_per_second": 0.704, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14219305441618812, |
|
"grad_norm": 0.3994982838630676, |
|
"learning_rate": 0.0002983392289083346, |
|
"loss": 0.443, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14219305441618812, |
|
"eval_loss": 0.853752076625824, |
|
"eval_runtime": 210.1289, |
|
"eval_samples_per_second": 22.424, |
|
"eval_steps_per_second": 0.704, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15313098167897182, |
|
"grad_norm": 0.2923962473869324, |
|
"learning_rate": 0.0002980744676464371, |
|
"loss": 0.4316, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15313098167897182, |
|
"eval_loss": 0.826151430606842, |
|
"eval_runtime": 209.9359, |
|
"eval_samples_per_second": 22.445, |
|
"eval_steps_per_second": 0.705, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16406890894175555, |
|
"grad_norm": 0.33790677785873413, |
|
"learning_rate": 0.0002977902688904813, |
|
"loss": 0.4435, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16406890894175555, |
|
"eval_loss": 0.855368971824646, |
|
"eval_runtime": 210.0755, |
|
"eval_samples_per_second": 22.43, |
|
"eval_steps_per_second": 0.705, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17500683620453925, |
|
"grad_norm": 0.3131038248538971, |
|
"learning_rate": 0.00029748666994677467, |
|
"loss": 0.4269, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17500683620453925, |
|
"eval_loss": 0.8361687660217285, |
|
"eval_runtime": 209.5838, |
|
"eval_samples_per_second": 22.483, |
|
"eval_steps_per_second": 0.706, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18594476346732294, |
|
"grad_norm": 0.35536178946495056, |
|
"learning_rate": 0.00029716371066825593, |
|
"loss": 0.4322, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18594476346732294, |
|
"eval_loss": 0.8331068158149719, |
|
"eval_runtime": 210.059, |
|
"eval_samples_per_second": 22.432, |
|
"eval_steps_per_second": 0.705, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19688269073010664, |
|
"grad_norm": 0.3734683692455292, |
|
"learning_rate": 0.0002968214334492632, |
|
"loss": 0.4136, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19688269073010664, |
|
"eval_loss": 0.8500174283981323, |
|
"eval_runtime": 209.7577, |
|
"eval_samples_per_second": 22.464, |
|
"eval_steps_per_second": 0.706, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.20782061799289034, |
|
"grad_norm": 0.3501605987548828, |
|
"learning_rate": 0.00029645988321996917, |
|
"loss": 0.4262, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.20782061799289034, |
|
"eval_loss": 0.8315255641937256, |
|
"eval_runtime": 209.8869, |
|
"eval_samples_per_second": 22.45, |
|
"eval_steps_per_second": 0.705, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21875854525567404, |
|
"grad_norm": 0.2926044464111328, |
|
"learning_rate": 0.00029607910744048336, |
|
"loss": 0.4283, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21875854525567404, |
|
"eval_loss": 0.8357769846916199, |
|
"eval_runtime": 210.1683, |
|
"eval_samples_per_second": 22.42, |
|
"eval_steps_per_second": 0.704, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22969647251845776, |
|
"grad_norm": 0.4317471385002136, |
|
"learning_rate": 0.00029567915609462174, |
|
"loss": 0.3983, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.22969647251845776, |
|
"eval_loss": 0.8218151926994324, |
|
"eval_runtime": 210.2221, |
|
"eval_samples_per_second": 22.414, |
|
"eval_steps_per_second": 0.704, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.24063439978124146, |
|
"grad_norm": 0.4090133309364319, |
|
"learning_rate": 0.00029526008168334573, |
|
"loss": 0.409, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24063439978124146, |
|
"eval_loss": 0.8271812200546265, |
|
"eval_runtime": 209.5484, |
|
"eval_samples_per_second": 22.486, |
|
"eval_steps_per_second": 0.706, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.25157232704402516, |
|
"grad_norm": 0.46969935297966003, |
|
"learning_rate": 0.0002948219392178703, |
|
"loss": 0.4173, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25157232704402516, |
|
"eval_loss": 0.8557965159416199, |
|
"eval_runtime": 210.1229, |
|
"eval_samples_per_second": 22.425, |
|
"eval_steps_per_second": 0.704, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.26251025430680885, |
|
"grad_norm": 0.3677867650985718, |
|
"learning_rate": 0.0002943647862124429, |
|
"loss": 0.3847, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26251025430680885, |
|
"eval_loss": 0.8389946818351746, |
|
"eval_runtime": 210.1338, |
|
"eval_samples_per_second": 22.424, |
|
"eval_steps_per_second": 0.704, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.27344818156959255, |
|
"grad_norm": 0.43112123012542725, |
|
"learning_rate": 0.0002938886826767936, |
|
"loss": 0.3923, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27344818156959255, |
|
"eval_loss": 0.8553555011749268, |
|
"eval_runtime": 209.3302, |
|
"eval_samples_per_second": 22.51, |
|
"eval_steps_per_second": 0.707, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.28438610883237625, |
|
"grad_norm": 0.4160371422767639, |
|
"learning_rate": 0.00029339369110825756, |
|
"loss": 0.4014, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28438610883237625, |
|
"eval_loss": 0.8533877730369568, |
|
"eval_runtime": 209.8359, |
|
"eval_samples_per_second": 22.456, |
|
"eval_steps_per_second": 0.705, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.29532403609515995, |
|
"grad_norm": 0.426897794008255, |
|
"learning_rate": 0.00029287987648357134, |
|
"loss": 0.3967, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29532403609515995, |
|
"eval_loss": 0.8450255990028381, |
|
"eval_runtime": 209.7931, |
|
"eval_samples_per_second": 22.46, |
|
"eval_steps_per_second": 0.705, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.30626196335794365, |
|
"grad_norm": 0.42360490560531616, |
|
"learning_rate": 0.00029234730625034343, |
|
"loss": 0.3936, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.30626196335794365, |
|
"eval_loss": 0.8424352407455444, |
|
"eval_runtime": 209.6462, |
|
"eval_samples_per_second": 22.476, |
|
"eval_steps_per_second": 0.706, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3171998906207274, |
|
"grad_norm": 0.3254799246788025, |
|
"learning_rate": 0.00029179605031820044, |
|
"loss": 0.3816, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3171998906207274, |
|
"eval_loss": 0.8733253479003906, |
|
"eval_runtime": 210.002, |
|
"eval_samples_per_second": 22.438, |
|
"eval_steps_per_second": 0.705, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3281378178835111, |
|
"grad_norm": 0.44433578848838806, |
|
"learning_rate": 0.0002912261810496102, |
|
"loss": 0.3852, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3281378178835111, |
|
"eval_loss": 0.8514276146888733, |
|
"eval_runtime": 209.6715, |
|
"eval_samples_per_second": 22.473, |
|
"eval_steps_per_second": 0.706, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3390757451462948, |
|
"grad_norm": 0.3501994013786316, |
|
"learning_rate": 0.0002906377732503829, |
|
"loss": 0.379, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3390757451462948, |
|
"eval_loss": 0.854888379573822, |
|
"eval_runtime": 210.2495, |
|
"eval_samples_per_second": 22.411, |
|
"eval_steps_per_second": 0.704, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3390757451462948, |
|
"step": 310, |
|
"total_flos": 4.228376200709407e+17, |
|
"train_loss": 0.4842093152384604, |
|
"train_runtime": 19786.6932, |
|
"train_samples_per_second": 35.484, |
|
"train_steps_per_second": 0.139 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2742, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.228376200709407e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|