|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.07948599059415778, |
|
"eval_steps": 500, |
|
"global_step": 4800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001655958137378287, |
|
"grad_norm": 0.19597935676574707, |
|
"learning_rate": 0.00019966890756553004, |
|
"loss": 2.1972, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.003311916274756574, |
|
"grad_norm": 0.25808241963386536, |
|
"learning_rate": 0.00019933771577352244, |
|
"loss": 1.9677, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0049678744121348616, |
|
"grad_norm": 0.23811133205890656, |
|
"learning_rate": 0.00019900652398151486, |
|
"loss": 1.9341, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.006623832549513148, |
|
"grad_norm": 0.26714324951171875, |
|
"learning_rate": 0.00019867533218950728, |
|
"loss": 1.915, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.008279790686891435, |
|
"grad_norm": 0.23645658791065216, |
|
"learning_rate": 0.0001983441403974997, |
|
"loss": 1.8916, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.009935748824269723, |
|
"grad_norm": 0.2878512740135193, |
|
"learning_rate": 0.00019801294860549213, |
|
"loss": 1.9003, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.01159170696164801, |
|
"grad_norm": 0.2687942087650299, |
|
"learning_rate": 0.00019768175681348456, |
|
"loss": 1.876, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.013247665099026296, |
|
"grad_norm": 0.2722982168197632, |
|
"learning_rate": 0.00019735056502147698, |
|
"loss": 1.9004, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.014903623236404583, |
|
"grad_norm": 0.25342944264411926, |
|
"learning_rate": 0.0001970193732294694, |
|
"loss": 1.8947, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.01655958137378287, |
|
"grad_norm": 0.2900806963443756, |
|
"learning_rate": 0.0001966881814374618, |
|
"loss": 1.8795, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.018215539511161158, |
|
"grad_norm": 0.24855603277683258, |
|
"learning_rate": 0.00019635698964545422, |
|
"loss": 1.8657, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.019871497648539446, |
|
"grad_norm": 0.25272709131240845, |
|
"learning_rate": 0.00019602579785344665, |
|
"loss": 1.8687, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.02152745578591773, |
|
"grad_norm": 0.31408464908599854, |
|
"learning_rate": 0.00019569460606143904, |
|
"loss": 1.8332, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.02318341392329602, |
|
"grad_norm": 0.26880863308906555, |
|
"learning_rate": 0.00019536341426943147, |
|
"loss": 1.8603, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.024839372060674308, |
|
"grad_norm": 0.2371913194656372, |
|
"learning_rate": 0.0001950322224774239, |
|
"loss": 1.8273, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.026495330198052593, |
|
"grad_norm": 0.2510370910167694, |
|
"learning_rate": 0.00019470103068541632, |
|
"loss": 1.8524, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.02815128833543088, |
|
"grad_norm": 0.26143962144851685, |
|
"learning_rate": 0.00019436983889340874, |
|
"loss": 1.8543, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.029807246472809166, |
|
"grad_norm": 0.2438499480485916, |
|
"learning_rate": 0.00019403864710140116, |
|
"loss": 1.8411, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.03146320461018746, |
|
"grad_norm": 0.2666601836681366, |
|
"learning_rate": 0.0001937074553093936, |
|
"loss": 1.8548, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.03311916274756574, |
|
"grad_norm": 0.2752065062522888, |
|
"learning_rate": 0.000193376263517386, |
|
"loss": 1.8534, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.03477512088494403, |
|
"grad_norm": 0.24849963188171387, |
|
"learning_rate": 0.00019304507172537844, |
|
"loss": 1.8476, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.036431079022322316, |
|
"grad_norm": 0.2809307277202606, |
|
"learning_rate": 0.00019271387993337083, |
|
"loss": 1.8505, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.038087037159700604, |
|
"grad_norm": 0.23209506273269653, |
|
"learning_rate": 0.00019238268814136326, |
|
"loss": 1.864, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.03974299529707889, |
|
"grad_norm": 0.25108611583709717, |
|
"learning_rate": 0.00019205149634935568, |
|
"loss": 1.8411, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.041398953434457174, |
|
"grad_norm": 0.2639986276626587, |
|
"learning_rate": 0.0001917203045573481, |
|
"loss": 1.8456, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.04305491157183546, |
|
"grad_norm": 0.2927249073982239, |
|
"learning_rate": 0.00019138911276534053, |
|
"loss": 1.8361, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.04471086970921375, |
|
"grad_norm": 0.2660035192966461, |
|
"learning_rate": 0.00019105792097333295, |
|
"loss": 1.8352, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.04636682784659204, |
|
"grad_norm": 0.23683211207389832, |
|
"learning_rate": 0.00019072672918132538, |
|
"loss": 1.824, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.04802278598397033, |
|
"grad_norm": 0.7001804709434509, |
|
"learning_rate": 0.00019039553738931777, |
|
"loss": 1.82, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.049678744121348616, |
|
"grad_norm": 0.2500315010547638, |
|
"learning_rate": 0.0001900643455973102, |
|
"loss": 1.862, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.0513347022587269, |
|
"grad_norm": 0.2476750761270523, |
|
"learning_rate": 0.00018973315380530262, |
|
"loss": 1.8247, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.052990660396105185, |
|
"grad_norm": 0.23064357042312622, |
|
"learning_rate": 0.00018940196201329502, |
|
"loss": 1.8685, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.054646618533483474, |
|
"grad_norm": 0.2495209276676178, |
|
"learning_rate": 0.00018907077022128744, |
|
"loss": 1.8214, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.05630257667086176, |
|
"grad_norm": 0.25310614705085754, |
|
"learning_rate": 0.00018873957842927987, |
|
"loss": 1.84, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.05795853480824005, |
|
"grad_norm": 0.24329665303230286, |
|
"learning_rate": 0.0001884083866372723, |
|
"loss": 1.7982, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.05961449294561833, |
|
"grad_norm": 0.25951218605041504, |
|
"learning_rate": 0.00018807719484526471, |
|
"loss": 1.8078, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.06127045108299662, |
|
"grad_norm": 0.24307559430599213, |
|
"learning_rate": 0.00018774600305325714, |
|
"loss": 1.8181, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.06292640922037492, |
|
"grad_norm": 0.27577558159828186, |
|
"learning_rate": 0.00018741481126124956, |
|
"loss": 1.8158, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.06458236735775319, |
|
"grad_norm": 0.21584127843379974, |
|
"learning_rate": 0.00018708361946924199, |
|
"loss": 1.8362, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.06623832549513148, |
|
"grad_norm": 0.2558760344982147, |
|
"learning_rate": 0.0001867524276772344, |
|
"loss": 1.8425, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.06789428363250977, |
|
"grad_norm": 0.2206682711839676, |
|
"learning_rate": 0.0001864212358852268, |
|
"loss": 1.8155, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.06955024176988805, |
|
"grad_norm": 0.23946842551231384, |
|
"learning_rate": 0.00018609004409321923, |
|
"loss": 1.8349, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.07120619990726634, |
|
"grad_norm": 0.22356823086738586, |
|
"learning_rate": 0.00018575885230121165, |
|
"loss": 1.8013, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.07286215804464463, |
|
"grad_norm": 0.28327444195747375, |
|
"learning_rate": 0.00018542766050920408, |
|
"loss": 1.8193, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.07451811618202292, |
|
"grad_norm": 0.259748637676239, |
|
"learning_rate": 0.0001850964687171965, |
|
"loss": 1.8401, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.07617407431940121, |
|
"grad_norm": 0.24509303271770477, |
|
"learning_rate": 0.00018476527692518893, |
|
"loss": 1.8313, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.0778300324567795, |
|
"grad_norm": 0.2799519896507263, |
|
"learning_rate": 0.00018443408513318132, |
|
"loss": 1.8437, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.07948599059415778, |
|
"grad_norm": 0.25356101989746094, |
|
"learning_rate": 0.00018410289334117375, |
|
"loss": 1.7989, |
|
"step": 4800 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 60388, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 800, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.318952218329088e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|