|
{ |
|
"best_metric": 0.9388719201087952, |
|
"best_model_checkpoint": "/home/datta0/models/lora_final/Qwen2-7B_magiccoder_ortho/checkpoint-12", |
|
"epoch": 0.99836867862969, |
|
"eval_steps": 4, |
|
"global_step": 153, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0065252854812398045, |
|
"grad_norm": 4.3915696144104, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.8061, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.013050570962479609, |
|
"grad_norm": 3.7475483417510986, |
|
"learning_rate": 0.00015, |
|
"loss": 0.931, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.026101141924959218, |
|
"grad_norm": 5.367969989776611, |
|
"learning_rate": 0.0003, |
|
"loss": 0.8262, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.026101141924959218, |
|
"eval_loss": 1.0883674621582031, |
|
"eval_runtime": 24.5353, |
|
"eval_samples_per_second": 19.931, |
|
"eval_steps_per_second": 2.527, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.03915171288743882, |
|
"grad_norm": 11.019035339355469, |
|
"learning_rate": 0.00029986665273697545, |
|
"loss": 1.0003, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.052202283849918436, |
|
"grad_norm": 2.1015965938568115, |
|
"learning_rate": 0.0002994668480344693, |
|
"loss": 0.9776, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.052202283849918436, |
|
"eval_loss": 0.9662861227989197, |
|
"eval_runtime": 24.6827, |
|
"eval_samples_per_second": 19.811, |
|
"eval_steps_per_second": 2.512, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.06525285481239804, |
|
"grad_norm": 1.750746726989746, |
|
"learning_rate": 0.0002988012967306524, |
|
"loss": 0.9319, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07830342577487764, |
|
"grad_norm": 2.2127320766448975, |
|
"learning_rate": 0.000297871182151455, |
|
"loss": 0.9345, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07830342577487764, |
|
"eval_loss": 0.9388719201087952, |
|
"eval_runtime": 24.6723, |
|
"eval_samples_per_second": 19.82, |
|
"eval_steps_per_second": 2.513, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.09135399673735727, |
|
"grad_norm": 1.6279808282852173, |
|
"learning_rate": 0.00029667815800665635, |
|
"loss": 0.9489, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.10440456769983687, |
|
"grad_norm": 1.5321425199508667, |
|
"learning_rate": 0.0002952243454496488, |
|
"loss": 0.9026, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.10440456769983687, |
|
"eval_loss": 0.9481999278068542, |
|
"eval_runtime": 24.6231, |
|
"eval_samples_per_second": 19.859, |
|
"eval_steps_per_second": 2.518, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.11745513866231648, |
|
"grad_norm": 1.355367660522461, |
|
"learning_rate": 0.0002935123293061047, |
|
"loss": 0.9004, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.13050570962479607, |
|
"grad_norm": 1.5347142219543457, |
|
"learning_rate": 0.0002915451534782506, |
|
"loss": 0.9618, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13050570962479607, |
|
"eval_loss": 0.957125186920166, |
|
"eval_runtime": 24.5628, |
|
"eval_samples_per_second": 19.908, |
|
"eval_steps_per_second": 2.524, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14355628058727568, |
|
"grad_norm": 1.5545670986175537, |
|
"learning_rate": 0.0002893263155329204, |
|
"loss": 0.9457, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1566068515497553, |
|
"grad_norm": 1.4520829916000366, |
|
"learning_rate": 0.00028685976048300875, |
|
"loss": 0.8685, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1566068515497553, |
|
"eval_loss": 0.9719114899635315, |
|
"eval_runtime": 24.4752, |
|
"eval_samples_per_second": 19.979, |
|
"eval_steps_per_second": 2.533, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.16965742251223492, |
|
"grad_norm": 1.3440358638763428, |
|
"learning_rate": 0.00028414987377338235, |
|
"loss": 1.0084, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.18270799347471453, |
|
"grad_norm": 1.43779718875885, |
|
"learning_rate": 0.0002812014734837191, |
|
"loss": 0.8834, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.18270799347471453, |
|
"eval_loss": 0.9751714468002319, |
|
"eval_runtime": 56.0537, |
|
"eval_samples_per_second": 8.724, |
|
"eval_steps_per_second": 1.106, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.19575856443719414, |
|
"grad_norm": 1.4336111545562744, |
|
"learning_rate": 0.0002780198017621379, |
|
"loss": 0.9617, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.20880913539967375, |
|
"grad_norm": 1.291685700416565, |
|
"learning_rate": 0.00027461051550485116, |
|
"loss": 1.0185, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.20880913539967375, |
|
"eval_loss": 0.987638533115387, |
|
"eval_runtime": 59.7835, |
|
"eval_samples_per_second": 8.18, |
|
"eval_steps_per_second": 1.037, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.22185970636215335, |
|
"grad_norm": 1.3673038482666016, |
|
"learning_rate": 0.00027097967629840906, |
|
"loss": 0.9289, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.23491027732463296, |
|
"grad_norm": 1.321115255355835, |
|
"learning_rate": 0.0002671337396424204, |
|
"loss": 0.9354, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.23491027732463296, |
|
"eval_loss": 0.9922739863395691, |
|
"eval_runtime": 58.5264, |
|
"eval_samples_per_second": 8.355, |
|
"eval_steps_per_second": 1.059, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.24796084828711257, |
|
"grad_norm": 1.3209092617034912, |
|
"learning_rate": 0.00026307954347190983, |
|
"loss": 0.9003, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.26101141924959215, |
|
"grad_norm": 1.2740062475204468, |
|
"learning_rate": 0.00025882429599971866, |
|
"loss": 0.9734, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.26101141924959215, |
|
"eval_loss": 0.9982444047927856, |
|
"eval_runtime": 56.4488, |
|
"eval_samples_per_second": 8.663, |
|
"eval_steps_per_second": 1.098, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2740619902120718, |
|
"grad_norm": 1.3060563802719116, |
|
"learning_rate": 0.0002543755629005657, |
|
"loss": 0.9583, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.28711256117455136, |
|
"grad_norm": 1.2693545818328857, |
|
"learning_rate": 0.0002497412538595537, |
|
"loss": 1.034, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.28711256117455136, |
|
"eval_loss": 1.0034517049789429, |
|
"eval_runtime": 57.2987, |
|
"eval_samples_per_second": 8.534, |
|
"eval_steps_per_second": 1.082, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.300163132137031, |
|
"grad_norm": 1.3035016059875488, |
|
"learning_rate": 0.00024492960850903755, |
|
"loss": 0.9648, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.3132137030995106, |
|
"grad_norm": 1.4393730163574219, |
|
"learning_rate": 0.00023994918177885902, |
|
"loss": 1.0067, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3132137030995106, |
|
"eval_loss": 1.0048160552978516, |
|
"eval_runtime": 56.3331, |
|
"eval_samples_per_second": 8.681, |
|
"eval_steps_per_second": 1.101, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3262642740619902, |
|
"grad_norm": 1.5344454050064087, |
|
"learning_rate": 0.0002348088286859938, |
|
"loss": 1.0498, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.33931484502446985, |
|
"grad_norm": 1.2956377267837524, |
|
"learning_rate": 0.00022951768859065402, |
|
"loss": 0.932, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.33931484502446985, |
|
"eval_loss": 1.00808584690094, |
|
"eval_runtime": 56.5841, |
|
"eval_samples_per_second": 8.642, |
|
"eval_steps_per_second": 1.096, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3523654159869494, |
|
"grad_norm": 1.5058661699295044, |
|
"learning_rate": 0.0002240851689468395, |
|
"loss": 0.9455, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.36541598694942906, |
|
"grad_norm": 1.3148020505905151, |
|
"learning_rate": 0.00021852092857622808, |
|
"loss": 0.9407, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.36541598694942906, |
|
"eval_loss": 1.006118655204773, |
|
"eval_runtime": 57.5921, |
|
"eval_samples_per_second": 8.491, |
|
"eval_steps_per_second": 1.077, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.37846655791190864, |
|
"grad_norm": 1.1989065408706665, |
|
"learning_rate": 0.00021283486049514277, |
|
"loss": 1.023, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.3915171288743883, |
|
"grad_norm": 1.3775067329406738, |
|
"learning_rate": 0.00020703707432513004, |
|
"loss": 0.9682, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3915171288743883, |
|
"eval_loss": 1.0053811073303223, |
|
"eval_runtime": 57.2201, |
|
"eval_samples_per_second": 8.546, |
|
"eval_steps_per_second": 1.084, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.40456769983686786, |
|
"grad_norm": 1.320212960243225, |
|
"learning_rate": 0.00020113787831842152, |
|
"loss": 0.8986, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.4176182707993475, |
|
"grad_norm": 1.325500726699829, |
|
"learning_rate": 0.0001951477610302378, |
|
"loss": 1.0224, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.4176182707993475, |
|
"eval_loss": 1.0092753171920776, |
|
"eval_runtime": 56.0548, |
|
"eval_samples_per_second": 8.724, |
|
"eval_steps_per_second": 1.106, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.43066884176182707, |
|
"grad_norm": 1.2880396842956543, |
|
"learning_rate": 0.0001890773726705198, |
|
"loss": 0.9943, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4437194127243067, |
|
"grad_norm": 1.237645149230957, |
|
"learning_rate": 0.00018293750616824443, |
|
"loss": 1.0145, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.4437194127243067, |
|
"eval_loss": 1.009407639503479, |
|
"eval_runtime": 24.7188, |
|
"eval_samples_per_second": 19.783, |
|
"eval_steps_per_second": 2.508, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.4567699836867863, |
|
"grad_norm": 1.1990931034088135, |
|
"learning_rate": 0.00017673907798199052, |
|
"loss": 1.0333, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4698205546492659, |
|
"grad_norm": 1.2862218618392944, |
|
"learning_rate": 0.000170493108690874, |
|
"loss": 0.9756, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.4698205546492659, |
|
"eval_loss": 1.010068416595459, |
|
"eval_runtime": 24.7249, |
|
"eval_samples_per_second": 19.778, |
|
"eval_steps_per_second": 2.508, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.4828711256117455, |
|
"grad_norm": 1.2775288820266724, |
|
"learning_rate": 0.00016421070340036023, |
|
"loss": 1.0124, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.49592169657422513, |
|
"grad_norm": 1.3676966428756714, |
|
"learning_rate": 0.00015790303199779193, |
|
"loss": 0.9968, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.49592169657422513, |
|
"eval_loss": 1.0086660385131836, |
|
"eval_runtime": 24.6773, |
|
"eval_samples_per_second": 19.816, |
|
"eval_steps_per_second": 2.512, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5089722675367048, |
|
"grad_norm": 1.2739876508712769, |
|
"learning_rate": 0.00015158130929273695, |
|
"loss": 0.9405, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5220228384991843, |
|
"grad_norm": 1.3879481554031372, |
|
"learning_rate": 0.00014525677507746615, |
|
"loss": 0.9566, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5220228384991843, |
|
"eval_loss": 1.0094032287597656, |
|
"eval_runtime": 24.6744, |
|
"eval_samples_per_second": 19.818, |
|
"eval_steps_per_second": 2.513, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5350734094616639, |
|
"grad_norm": 1.246418833732605, |
|
"learning_rate": 0.00013894067414301314, |
|
"loss": 1.0481, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.5481239804241436, |
|
"grad_norm": 1.3928742408752441, |
|
"learning_rate": 0.0001326442362863458, |
|
"loss": 1.0394, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5481239804241436, |
|
"eval_loss": 1.008681297302246, |
|
"eval_runtime": 24.6024, |
|
"eval_samples_per_second": 19.876, |
|
"eval_steps_per_second": 2.52, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5611745513866232, |
|
"grad_norm": 1.2170292139053345, |
|
"learning_rate": 0.00012637865634419735, |
|
"loss": 0.9979, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.5742251223491027, |
|
"grad_norm": 1.3591171503067017, |
|
"learning_rate": 0.00012015507428905507, |
|
"loss": 0.9546, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5742251223491027, |
|
"eval_loss": 1.0074015855789185, |
|
"eval_runtime": 24.5002, |
|
"eval_samples_per_second": 19.959, |
|
"eval_steps_per_second": 2.531, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5872756933115824, |
|
"grad_norm": 1.336329460144043, |
|
"learning_rate": 0.00011398455542269575, |
|
"loss": 0.9125, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.600326264274062, |
|
"grad_norm": 1.2378321886062622, |
|
"learning_rate": 0.00010787807070248305, |
|
"loss": 1.0347, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.600326264274062, |
|
"eval_loss": 1.0086424350738525, |
|
"eval_runtime": 24.4221, |
|
"eval_samples_per_second": 20.023, |
|
"eval_steps_per_second": 2.539, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.6133768352365416, |
|
"grad_norm": 1.3458659648895264, |
|
"learning_rate": 0.00010184647723540557, |
|
"loss": 0.9567, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.6264274061990212, |
|
"grad_norm": 1.251621961593628, |
|
"learning_rate": 9.590049897453668e-05, |
|
"loss": 0.9639, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6264274061990212, |
|
"eval_loss": 1.004166841506958, |
|
"eval_runtime": 56.7542, |
|
"eval_samples_per_second": 8.616, |
|
"eval_steps_per_second": 1.092, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6394779771615008, |
|
"grad_norm": 1.15924870967865, |
|
"learning_rate": 9.005070765223768e-05, |
|
"loss": 1.0447, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6525285481239804, |
|
"grad_norm": 1.4097235202789307, |
|
"learning_rate": 8.430750398400308e-05, |
|
"loss": 1.0543, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6525285481239804, |
|
"eval_loss": 1.002665638923645, |
|
"eval_runtime": 55.6845, |
|
"eval_samples_per_second": 8.782, |
|
"eval_steps_per_second": 1.113, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6655791190864601, |
|
"grad_norm": 1.3108314275741577, |
|
"learning_rate": 7.868109917636821e-05, |
|
"loss": 0.9645, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.6786296900489397, |
|
"grad_norm": 1.2921593189239502, |
|
"learning_rate": 7.318149677175675e-05, |
|
"loss": 0.9346, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6786296900489397, |
|
"eval_loss": 1.003048300743103, |
|
"eval_runtime": 57.4498, |
|
"eval_samples_per_second": 8.512, |
|
"eval_steps_per_second": 1.079, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6916802610114192, |
|
"grad_norm": 1.2615606784820557, |
|
"learning_rate": 6.781847486254697e-05, |
|
"loss": 0.9565, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.7047308319738989, |
|
"grad_norm": 1.3441969156265259, |
|
"learning_rate": 6.260156870598071e-05, |
|
"loss": 0.9744, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.7047308319738989, |
|
"eval_loss": 1.0019466876983643, |
|
"eval_runtime": 56.5017, |
|
"eval_samples_per_second": 8.655, |
|
"eval_steps_per_second": 1.097, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.7177814029363785, |
|
"grad_norm": 1.1984766721725464, |
|
"learning_rate": 5.7540053770823644e-05, |
|
"loss": 0.9558, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7308319738988581, |
|
"grad_norm": 1.259084701538086, |
|
"learning_rate": 5.264292924592073e-05, |
|
"loss": 0.9546, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7308319738988581, |
|
"eval_loss": 0.9984883069992065, |
|
"eval_runtime": 56.938, |
|
"eval_samples_per_second": 8.588, |
|
"eval_steps_per_second": 1.089, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7438825448613376, |
|
"grad_norm": 1.2619880437850952, |
|
"learning_rate": 4.791890203996634e-05, |
|
"loss": 0.9784, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.7569331158238173, |
|
"grad_norm": 1.1600760221481323, |
|
"learning_rate": 4.3376371300938786e-05, |
|
"loss": 0.9138, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7569331158238173, |
|
"eval_loss": 0.9968593716621399, |
|
"eval_runtime": 56.3466, |
|
"eval_samples_per_second": 8.678, |
|
"eval_steps_per_second": 1.1, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7699836867862969, |
|
"grad_norm": 1.2669286727905273, |
|
"learning_rate": 3.9023413482721426e-05, |
|
"loss": 0.9714, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.7830342577487766, |
|
"grad_norm": 1.2815442085266113, |
|
"learning_rate": 3.4867767985462507e-05, |
|
"loss": 0.9026, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7830342577487766, |
|
"eval_loss": 0.9961332082748413, |
|
"eval_runtime": 56.6715, |
|
"eval_samples_per_second": 8.629, |
|
"eval_steps_per_second": 1.094, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7960848287112561, |
|
"grad_norm": 1.2086176872253418, |
|
"learning_rate": 3.09168233952042e-05, |
|
"loss": 1.0291, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.8091353996737357, |
|
"grad_norm": 1.2728592157363892, |
|
"learning_rate": 2.717760434724613e-05, |
|
"loss": 0.9746, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8091353996737357, |
|
"eval_loss": 0.9953013062477112, |
|
"eval_runtime": 56.695, |
|
"eval_samples_per_second": 8.625, |
|
"eval_steps_per_second": 1.094, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8221859706362153, |
|
"grad_norm": 1.163971185684204, |
|
"learning_rate": 2.3656759036600187e-05, |
|
"loss": 0.9733, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.835236541598695, |
|
"grad_norm": 1.2905343770980835, |
|
"learning_rate": 2.0360547397742523e-05, |
|
"loss": 0.9453, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.835236541598695, |
|
"eval_loss": 0.9950230717658997, |
|
"eval_runtime": 57.4352, |
|
"eval_samples_per_second": 8.514, |
|
"eval_steps_per_second": 1.079, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8482871125611745, |
|
"grad_norm": 1.2126384973526, |
|
"learning_rate": 1.7294829974678338e-05, |
|
"loss": 0.922, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8613376835236541, |
|
"grad_norm": 1.3399946689605713, |
|
"learning_rate": 1.4465057501108546e-05, |
|
"loss": 1.0311, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8613376835236541, |
|
"eval_loss": 0.9933781027793884, |
|
"eval_runtime": 56.8077, |
|
"eval_samples_per_second": 8.608, |
|
"eval_steps_per_second": 1.091, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8743882544861338, |
|
"grad_norm": 1.2741433382034302, |
|
"learning_rate": 1.1876261209224314e-05, |
|
"loss": 0.9365, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.8874388254486134, |
|
"grad_norm": 1.1750285625457764, |
|
"learning_rate": 9.533043884359615e-06, |
|
"loss": 0.971, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.8874388254486134, |
|
"eval_loss": 0.992695152759552, |
|
"eval_runtime": 24.7252, |
|
"eval_samples_per_second": 19.777, |
|
"eval_steps_per_second": 2.508, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.9004893964110929, |
|
"grad_norm": 1.1639913320541382, |
|
"learning_rate": 7.439571681407053e-06, |
|
"loss": 1.0128, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.9135399673735726, |
|
"grad_norm": 1.2708672285079956, |
|
"learning_rate": 5.59956671754635e-06, |
|
"loss": 0.9957, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9135399673735726, |
|
"eval_loss": 0.9919000864028931, |
|
"eval_runtime": 24.7098, |
|
"eval_samples_per_second": 19.79, |
|
"eval_steps_per_second": 2.509, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9265905383360522, |
|
"grad_norm": 1.3160277605056763, |
|
"learning_rate": 4.016300454455945e-06, |
|
"loss": 1.0054, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.9396411092985318, |
|
"grad_norm": 1.325445532798767, |
|
"learning_rate": 2.692587881773478e-06, |
|
"loss": 0.9502, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.9396411092985318, |
|
"eval_loss": 0.9917099475860596, |
|
"eval_runtime": 24.7029, |
|
"eval_samples_per_second": 19.795, |
|
"eval_steps_per_second": 2.51, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.9526916802610114, |
|
"grad_norm": 1.1836706399917603, |
|
"learning_rate": 1.6307825121469164e-06, |
|
"loss": 0.991, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.965742251223491, |
|
"grad_norm": 1.2473053932189941, |
|
"learning_rate": 8.327721967749779e-07, |
|
"loss": 1.0133, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.965742251223491, |
|
"eval_loss": 0.9915127158164978, |
|
"eval_runtime": 24.6188, |
|
"eval_samples_per_second": 19.863, |
|
"eval_steps_per_second": 2.518, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.9787928221859706, |
|
"grad_norm": 1.237483024597168, |
|
"learning_rate": 2.9997576887660913e-07, |
|
"loss": 0.9316, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9918433931484503, |
|
"grad_norm": 1.279980182647705, |
|
"learning_rate": 3.334052105728458e-08, |
|
"loss": 0.9684, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.9918433931484503, |
|
"eval_loss": 0.9916173219680786, |
|
"eval_runtime": 24.5672, |
|
"eval_samples_per_second": 19.905, |
|
"eval_steps_per_second": 2.524, |
|
"step": 152 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 153, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 4, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.85963932651946e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|