| { |
| "best_metric": 3.141986846923828, |
| "best_model_checkpoint": "contract1/checkpoint-1455", |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 1455, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0859106529209622, |
| "grad_norm": 32.766380310058594, |
| "learning_rate": 6.849315068493151e-06, |
| "loss": 7.5498, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1718213058419244, |
| "grad_norm": 55.16777420043945, |
| "learning_rate": 1.5068493150684931e-05, |
| "loss": 7.7409, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.25773195876288657, |
| "grad_norm": 19.554790496826172, |
| "learning_rate": 2.363013698630137e-05, |
| "loss": 6.8118, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.3436426116838488, |
| "grad_norm": 11.124310493469238, |
| "learning_rate": 3.219178082191781e-05, |
| "loss": 5.94, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.42955326460481097, |
| "grad_norm": 9.250848770141602, |
| "learning_rate": 4.075342465753425e-05, |
| "loss": 5.2277, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5154639175257731, |
| "grad_norm": 3.131469964981079, |
| "learning_rate": 4.9315068493150684e-05, |
| "loss": 4.6817, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6013745704467354, |
| "grad_norm": 4.209794998168945, |
| "learning_rate": 4.912146676852559e-05, |
| "loss": 4.3776, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.6872852233676976, |
| "grad_norm": 3.0055902004241943, |
| "learning_rate": 4.816653934300993e-05, |
| "loss": 4.3974, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7731958762886598, |
| "grad_norm": 2.923142433166504, |
| "learning_rate": 4.7211611917494275e-05, |
| "loss": 4.2811, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.8591065292096219, |
| "grad_norm": 3.110403060913086, |
| "learning_rate": 4.625668449197861e-05, |
| "loss": 4.1323, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9450171821305842, |
| "grad_norm": 2.941375970840454, |
| "learning_rate": 4.530175706646295e-05, |
| "loss": 4.1902, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_gen_len": 15.1115, |
| "eval_loss": 3.686694622039795, |
| "eval_rouge1": 17.9543, |
| "eval_rouge2": 4.0352, |
| "eval_rougeL": 16.3506, |
| "eval_rougeLsum": 16.4818, |
| "eval_runtime": 11.3626, |
| "eval_samples_per_second": 25.61, |
| "eval_steps_per_second": 3.256, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.0309278350515463, |
| "grad_norm": 3.0971412658691406, |
| "learning_rate": 4.434682964094729e-05, |
| "loss": 4.0055, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.1168384879725086, |
| "grad_norm": 3.2513363361358643, |
| "learning_rate": 4.339190221543163e-05, |
| "loss": 3.9592, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.2027491408934707, |
| "grad_norm": 4.395771026611328, |
| "learning_rate": 4.2436974789915967e-05, |
| "loss": 3.8709, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.2886597938144329, |
| "grad_norm": 2.7217512130737305, |
| "learning_rate": 4.1482047364400305e-05, |
| "loss": 3.7554, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.3745704467353952, |
| "grad_norm": 3.703568696975708, |
| "learning_rate": 4.052711993888464e-05, |
| "loss": 3.7343, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.4604810996563573, |
| "grad_norm": 2.7263598442077637, |
| "learning_rate": 3.957219251336899e-05, |
| "loss": 3.7497, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.5463917525773194, |
| "grad_norm": 2.4919683933258057, |
| "learning_rate": 3.861726508785333e-05, |
| "loss": 3.7073, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.6323024054982818, |
| "grad_norm": 2.5988521575927734, |
| "learning_rate": 3.7662337662337665e-05, |
| "loss": 3.6325, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.718213058419244, |
| "grad_norm": 3.717288017272949, |
| "learning_rate": 3.6707410236822004e-05, |
| "loss": 3.687, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.8041237113402062, |
| "grad_norm": 3.393786668777466, |
| "learning_rate": 3.575248281130634e-05, |
| "loss": 3.7349, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.8900343642611683, |
| "grad_norm": 2.5332796573638916, |
| "learning_rate": 3.479755538579068e-05, |
| "loss": 3.7308, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.9759450171821307, |
| "grad_norm": 3.4967894554138184, |
| "learning_rate": 3.384262796027502e-05, |
| "loss": 3.6033, |
| "step": 575 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_gen_len": 14.7061, |
| "eval_loss": 3.3814778327941895, |
| "eval_rouge1": 20.6781, |
| "eval_rouge2": 5.109, |
| "eval_rougeL": 17.5025, |
| "eval_rougeLsum": 17.5956, |
| "eval_runtime": 11.6963, |
| "eval_samples_per_second": 24.88, |
| "eval_steps_per_second": 3.163, |
| "step": 582 |
| }, |
| { |
| "epoch": 2.0618556701030926, |
| "grad_norm": 3.7303099632263184, |
| "learning_rate": 3.288770053475936e-05, |
| "loss": 3.4857, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.147766323024055, |
| "grad_norm": 2.58085036277771, |
| "learning_rate": 3.1932773109243696e-05, |
| "loss": 3.7377, |
| "step": 625 |
| }, |
| { |
| "epoch": 2.2336769759450172, |
| "grad_norm": 2.9038166999816895, |
| "learning_rate": 3.097784568372804e-05, |
| "loss": 3.4969, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.319587628865979, |
| "grad_norm": 1.8798184394836426, |
| "learning_rate": 3.002291825821238e-05, |
| "loss": 3.3667, |
| "step": 675 |
| }, |
| { |
| "epoch": 2.4054982817869415, |
| "grad_norm": 2.5839955806732178, |
| "learning_rate": 2.9067990832696718e-05, |
| "loss": 3.5371, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.491408934707904, |
| "grad_norm": 14.803485870361328, |
| "learning_rate": 2.8113063407181056e-05, |
| "loss": 3.4758, |
| "step": 725 |
| }, |
| { |
| "epoch": 2.5773195876288657, |
| "grad_norm": 2.901104688644409, |
| "learning_rate": 2.7158135981665394e-05, |
| "loss": 3.4274, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.663230240549828, |
| "grad_norm": 3.5598862171173096, |
| "learning_rate": 2.6203208556149733e-05, |
| "loss": 3.5939, |
| "step": 775 |
| }, |
| { |
| "epoch": 2.7491408934707904, |
| "grad_norm": 2.656578540802002, |
| "learning_rate": 2.524828113063407e-05, |
| "loss": 3.5227, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.8350515463917527, |
| "grad_norm": 2.2073974609375, |
| "learning_rate": 2.4293353705118413e-05, |
| "loss": 3.5447, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.9209621993127146, |
| "grad_norm": 3.0660665035247803, |
| "learning_rate": 2.333842627960275e-05, |
| "loss": 3.4734, |
| "step": 850 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_gen_len": 16.5439, |
| "eval_loss": 3.232574462890625, |
| "eval_rouge1": 20.2411, |
| "eval_rouge2": 5.2598, |
| "eval_rougeL": 17.2676, |
| "eval_rougeLsum": 17.4831, |
| "eval_runtime": 12.7924, |
| "eval_samples_per_second": 22.748, |
| "eval_steps_per_second": 2.892, |
| "step": 873 |
| }, |
| { |
| "epoch": 3.006872852233677, |
| "grad_norm": 2.2971296310424805, |
| "learning_rate": 2.238349885408709e-05, |
| "loss": 3.4626, |
| "step": 875 |
| }, |
| { |
| "epoch": 3.0927835051546393, |
| "grad_norm": 5.520618438720703, |
| "learning_rate": 2.1428571428571428e-05, |
| "loss": 3.4557, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.178694158075601, |
| "grad_norm": 2.2981772422790527, |
| "learning_rate": 2.047364400305577e-05, |
| "loss": 3.2812, |
| "step": 925 |
| }, |
| { |
| "epoch": 3.2646048109965635, |
| "grad_norm": 6.0153069496154785, |
| "learning_rate": 1.951871657754011e-05, |
| "loss": 3.4321, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.350515463917526, |
| "grad_norm": 2.2888569831848145, |
| "learning_rate": 1.8563789152024447e-05, |
| "loss": 3.392, |
| "step": 975 |
| }, |
| { |
| "epoch": 3.436426116838488, |
| "grad_norm": 5.259116172790527, |
| "learning_rate": 1.7608861726508785e-05, |
| "loss": 3.4009, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.52233676975945, |
| "grad_norm": 2.115800380706787, |
| "learning_rate": 1.6653934300993127e-05, |
| "loss": 3.3249, |
| "step": 1025 |
| }, |
| { |
| "epoch": 3.6082474226804124, |
| "grad_norm": 2.3146419525146484, |
| "learning_rate": 1.5699006875477465e-05, |
| "loss": 3.2829, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.6941580756013748, |
| "grad_norm": 2.9118130207061768, |
| "learning_rate": 1.4744079449961804e-05, |
| "loss": 3.4347, |
| "step": 1075 |
| }, |
| { |
| "epoch": 3.7800687285223367, |
| "grad_norm": 2.7317888736724854, |
| "learning_rate": 1.3789152024446142e-05, |
| "loss": 3.2167, |
| "step": 1100 |
| }, |
| { |
| "epoch": 3.865979381443299, |
| "grad_norm": 4.284421920776367, |
| "learning_rate": 1.2834224598930484e-05, |
| "loss": 3.431, |
| "step": 1125 |
| }, |
| { |
| "epoch": 3.9518900343642613, |
| "grad_norm": 3.761094808578491, |
| "learning_rate": 1.1879297173414822e-05, |
| "loss": 3.4635, |
| "step": 1150 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_gen_len": 15.6284, |
| "eval_loss": 3.164484739303589, |
| "eval_rouge1": 20.158, |
| "eval_rouge2": 4.9421, |
| "eval_rougeL": 17.0338, |
| "eval_rougeLsum": 17.2585, |
| "eval_runtime": 11.6665, |
| "eval_samples_per_second": 24.943, |
| "eval_steps_per_second": 3.171, |
| "step": 1164 |
| }, |
| { |
| "epoch": 4.037800687285223, |
| "grad_norm": 2.3253726959228516, |
| "learning_rate": 1.092436974789916e-05, |
| "loss": 3.3823, |
| "step": 1175 |
| }, |
| { |
| "epoch": 4.123711340206185, |
| "grad_norm": 5.085910797119141, |
| "learning_rate": 9.969442322383499e-06, |
| "loss": 3.2498, |
| "step": 1200 |
| }, |
| { |
| "epoch": 4.209621993127148, |
| "grad_norm": 2.912647008895874, |
| "learning_rate": 9.014514896867839e-06, |
| "loss": 3.3191, |
| "step": 1225 |
| }, |
| { |
| "epoch": 4.29553264604811, |
| "grad_norm": 5.910384178161621, |
| "learning_rate": 8.059587471352178e-06, |
| "loss": 3.4222, |
| "step": 1250 |
| }, |
| { |
| "epoch": 4.381443298969073, |
| "grad_norm": 10.643930435180664, |
| "learning_rate": 7.104660045836517e-06, |
| "loss": 3.4691, |
| "step": 1275 |
| }, |
| { |
| "epoch": 4.4673539518900345, |
| "grad_norm": 2.9152700901031494, |
| "learning_rate": 6.149732620320856e-06, |
| "loss": 3.2257, |
| "step": 1300 |
| }, |
| { |
| "epoch": 4.553264604810996, |
| "grad_norm": 2.8727643489837646, |
| "learning_rate": 5.194805194805195e-06, |
| "loss": 3.3841, |
| "step": 1325 |
| }, |
| { |
| "epoch": 4.639175257731958, |
| "grad_norm": 8.290576934814453, |
| "learning_rate": 4.239877769289534e-06, |
| "loss": 3.1381, |
| "step": 1350 |
| }, |
| { |
| "epoch": 4.725085910652921, |
| "grad_norm": 2.3321030139923096, |
| "learning_rate": 3.2849503437738733e-06, |
| "loss": 3.3243, |
| "step": 1375 |
| }, |
| { |
| "epoch": 4.810996563573883, |
| "grad_norm": 3.101409912109375, |
| "learning_rate": 2.3300229182582125e-06, |
| "loss": 3.1536, |
| "step": 1400 |
| }, |
| { |
| "epoch": 4.896907216494846, |
| "grad_norm": 4.4823174476623535, |
| "learning_rate": 1.3750954927425516e-06, |
| "loss": 3.3531, |
| "step": 1425 |
| }, |
| { |
| "epoch": 4.982817869415808, |
| "grad_norm": 2.517242193222046, |
| "learning_rate": 4.2016806722689076e-07, |
| "loss": 3.4086, |
| "step": 1450 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_gen_len": 15.5, |
| "eval_loss": 3.141986846923828, |
| "eval_rouge1": 19.8864, |
| "eval_rouge2": 4.9499, |
| "eval_rougeL": 16.8946, |
| "eval_rougeLsum": 17.1002, |
| "eval_runtime": 12.3635, |
| "eval_samples_per_second": 23.537, |
| "eval_steps_per_second": 2.993, |
| "step": 1455 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 1455, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.01 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9422115569664.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|