{ "best_metric": 0.9216057062149048, "best_model_checkpoint": "/home/datta0/models/lora_final/Qwen2-7B_magiccoder_reverse/checkpoint-4", "epoch": 0.99836867862969, "eval_steps": 4, "global_step": 153, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0065252854812398045, "grad_norm": 6.942051887512207, "learning_rate": 7.5e-05, "loss": 0.8051, "step": 1 }, { "epoch": 0.013050570962479609, "grad_norm": 5.598752498626709, "learning_rate": 0.00015, "loss": 0.9302, "step": 2 }, { "epoch": 0.026101141924959218, "grad_norm": 4.465769290924072, "learning_rate": 0.0003, "loss": 0.8213, "step": 4 }, { "epoch": 0.026101141924959218, "eval_loss": 0.9216057062149048, "eval_runtime": 24.7492, "eval_samples_per_second": 19.758, "eval_steps_per_second": 2.505, "step": 4 }, { "epoch": 0.03915171288743882, "grad_norm": 3.274362564086914, "learning_rate": 0.00029986665273697545, "loss": 0.8625, "step": 6 }, { "epoch": 0.052202283849918436, "grad_norm": 5.245540618896484, "learning_rate": 0.0002994668480344693, "loss": 0.9106, "step": 8 }, { "epoch": 0.052202283849918436, "eval_loss": 0.9644020795822144, "eval_runtime": 24.7265, "eval_samples_per_second": 19.776, "eval_steps_per_second": 2.507, "step": 8 }, { "epoch": 0.06525285481239804, "grad_norm": 2.9444775581359863, "learning_rate": 0.0002988012967306524, "loss": 0.928, "step": 10 }, { "epoch": 0.07830342577487764, "grad_norm": 3.0126848220825195, "learning_rate": 0.000297871182151455, "loss": 0.9506, "step": 12 }, { "epoch": 0.07830342577487764, "eval_loss": 0.9630553722381592, "eval_runtime": 24.7024, "eval_samples_per_second": 19.796, "eval_steps_per_second": 2.51, "step": 12 }, { "epoch": 0.09135399673735727, "grad_norm": 2.676522731781006, "learning_rate": 0.00029667815800665635, "loss": 0.9756, "step": 14 }, { "epoch": 0.10440456769983687, "grad_norm": 2.626117467880249, "learning_rate": 0.0002952243454496488, "loss": 0.9339, "step": 16 }, { "epoch": 0.10440456769983687, "eval_loss": 0.9845271706581116, "eval_runtime": 24.6457, "eval_samples_per_second": 19.841, "eval_steps_per_second": 2.516, "step": 16 }, { "epoch": 0.11745513866231648, "grad_norm": 2.454320192337036, "learning_rate": 0.0002935123293061047, "loss": 0.9374, "step": 18 }, { "epoch": 0.13050570962479607, "grad_norm": 2.6138737201690674, "learning_rate": 0.0002915451534782506, "loss": 1.0039, "step": 20 }, { "epoch": 0.13050570962479607, "eval_loss": 0.9996753334999084, "eval_runtime": 24.5982, "eval_samples_per_second": 19.879, "eval_steps_per_second": 2.521, "step": 20 }, { "epoch": 0.14355628058727568, "grad_norm": 2.4134886264801025, "learning_rate": 0.0002893263155329204, "loss": 0.9932, "step": 22 }, { "epoch": 0.1566068515497553, "grad_norm": 2.5122950077056885, "learning_rate": 0.00028685976048300875, "loss": 0.9095, "step": 24 }, { "epoch": 0.1566068515497553, "eval_loss": 1.0116466283798218, "eval_runtime": 24.4885, "eval_samples_per_second": 19.969, "eval_steps_per_second": 2.532, "step": 24 }, { "epoch": 0.16965742251223492, "grad_norm": 2.41286563873291, "learning_rate": 0.00028414987377338235, "loss": 1.0434, "step": 26 }, { "epoch": 0.18270799347471453, "grad_norm": 2.3061952590942383, "learning_rate": 0.0002812014734837191, "loss": 0.9241, "step": 28 }, { "epoch": 0.18270799347471453, "eval_loss": 1.0198205709457397, "eval_runtime": 73.4147, "eval_samples_per_second": 6.661, "eval_steps_per_second": 0.845, "step": 28 }, { "epoch": 0.19575856443719414, "grad_norm": 2.5358309745788574, "learning_rate": 0.0002780198017621379, "loss": 1.0064, "step": 30 }, { "epoch": 0.20880913539967375, "grad_norm": 2.349397897720337, "learning_rate": 0.00027461051550485116, "loss": 1.0582, "step": 32 }, { "epoch": 0.20880913539967375, "eval_loss": 1.0290634632110596, "eval_runtime": 57.0475, "eval_samples_per_second": 8.572, "eval_steps_per_second": 1.087, "step": 32 }, { "epoch": 0.22185970636215335, "grad_norm": 2.219332456588745, "learning_rate": 0.00027097967629840906, "loss": 0.9762, "step": 34 }, { "epoch": 0.23491027732463296, "grad_norm": 2.143191337585449, "learning_rate": 0.0002671337396424204, "loss": 0.9677, "step": 36 }, { "epoch": 0.23491027732463296, "eval_loss": 1.0306977033615112, "eval_runtime": 55.6789, "eval_samples_per_second": 8.783, "eval_steps_per_second": 1.114, "step": 36 }, { "epoch": 0.24796084828711257, "grad_norm": 2.179919481277466, "learning_rate": 0.00026307954347190983, "loss": 0.9415, "step": 38 }, { "epoch": 0.26101141924959215, "grad_norm": 2.126628875732422, "learning_rate": 0.00025882429599971866, "loss": 1.0044, "step": 40 }, { "epoch": 0.26101141924959215, "eval_loss": 1.03548002243042, "eval_runtime": 56.2682, "eval_samples_per_second": 8.691, "eval_steps_per_second": 1.102, "step": 40 }, { "epoch": 0.2740619902120718, "grad_norm": 1.9612793922424316, "learning_rate": 0.0002543755629005657, "loss": 0.9929, "step": 42 }, { "epoch": 0.28711256117455136, "grad_norm": 2.20817494392395, "learning_rate": 0.0002497412538595537, "loss": 1.0672, "step": 44 }, { "epoch": 0.28711256117455136, "eval_loss": 1.038764476776123, "eval_runtime": 56.5986, "eval_samples_per_second": 8.64, "eval_steps_per_second": 1.095, "step": 44 }, { "epoch": 0.300163132137031, "grad_norm": 2.2024221420288086, "learning_rate": 0.00024492960850903755, "loss": 1.0003, "step": 46 }, { "epoch": 0.3132137030995106, "grad_norm": 1.9989386796951294, "learning_rate": 0.00023994918177885902, "loss": 1.0368, "step": 48 }, { "epoch": 0.3132137030995106, "eval_loss": 1.0401816368103027, "eval_runtime": 55.8152, "eval_samples_per_second": 8.761, "eval_steps_per_second": 1.111, "step": 48 }, { "epoch": 0.3262642740619902, "grad_norm": 2.487414836883545, "learning_rate": 0.0002348088286859938, "loss": 1.0797, "step": 50 }, { "epoch": 0.33931484502446985, "grad_norm": 2.199925661087036, "learning_rate": 0.00022951768859065402, "loss": 0.9603, "step": 52 }, { "epoch": 0.33931484502446985, "eval_loss": 1.042026400566101, "eval_runtime": 56.1668, "eval_samples_per_second": 8.706, "eval_steps_per_second": 1.104, "step": 52 }, { "epoch": 0.3523654159869494, "grad_norm": 2.082878589630127, "learning_rate": 0.0002240851689468395, "loss": 0.9746, "step": 54 }, { "epoch": 0.36541598694942906, "grad_norm": 2.201341390609741, "learning_rate": 0.00021852092857622808, "loss": 0.9709, "step": 56 }, { "epoch": 0.36541598694942906, "eval_loss": 1.0397862195968628, "eval_runtime": 57.0586, "eval_samples_per_second": 8.57, "eval_steps_per_second": 1.087, "step": 56 }, { "epoch": 0.37846655791190864, "grad_norm": 2.0373122692108154, "learning_rate": 0.00021283486049514277, "loss": 1.0489, "step": 58 }, { "epoch": 0.3915171288743883, "grad_norm": 2.22078537940979, "learning_rate": 0.00020703707432513004, "loss": 1.0019, "step": 60 }, { "epoch": 0.3915171288743883, "eval_loss": 1.0403335094451904, "eval_runtime": 56.1745, "eval_samples_per_second": 8.705, "eval_steps_per_second": 1.104, "step": 60 }, { "epoch": 0.40456769983686786, "grad_norm": 2.051842212677002, "learning_rate": 0.00020113787831842152, "loss": 0.9318, "step": 62 }, { "epoch": 0.4176182707993475, "grad_norm": 2.2714943885803223, "learning_rate": 0.0001951477610302378, "loss": 1.0537, "step": 64 }, { "epoch": 0.4176182707993475, "eval_loss": 1.0384303331375122, "eval_runtime": 55.0642, "eval_samples_per_second": 8.881, "eval_steps_per_second": 1.126, "step": 64 }, { "epoch": 0.43066884176182707, "grad_norm": 2.05861234664917, "learning_rate": 0.0001890773726705198, "loss": 1.0197, "step": 66 }, { "epoch": 0.4437194127243067, "grad_norm": 2.1242425441741943, "learning_rate": 0.00018293750616824443, "loss": 1.0365, "step": 68 }, { "epoch": 0.4437194127243067, "eval_loss": 1.0344992876052856, "eval_runtime": 24.7472, "eval_samples_per_second": 19.76, "eval_steps_per_second": 2.505, "step": 68 }, { "epoch": 0.4567699836867863, "grad_norm": 1.84840989112854, "learning_rate": 0.00017673907798199052, "loss": 1.0531, "step": 70 }, { "epoch": 0.4698205546492659, "grad_norm": 2.0347371101379395, "learning_rate": 0.000170493108690874, "loss": 1.0, "step": 72 }, { "epoch": 0.4698205546492659, "eval_loss": 1.0331792831420898, "eval_runtime": 24.7561, "eval_samples_per_second": 19.753, "eval_steps_per_second": 2.504, "step": 72 }, { "epoch": 0.4828711256117455, "grad_norm": 1.9134975671768188, "learning_rate": 0.00016421070340036023, "loss": 1.0346, "step": 74 }, { "epoch": 0.49592169657422513, "grad_norm": 2.098032236099243, "learning_rate": 0.00015790303199779193, "loss": 1.0165, "step": 76 }, { "epoch": 0.49592169657422513, "eval_loss": 1.0305981636047363, "eval_runtime": 24.7106, "eval_samples_per_second": 19.789, "eval_steps_per_second": 2.509, "step": 76 }, { "epoch": 0.5089722675367048, "grad_norm": 2.1426265239715576, "learning_rate": 0.00015158130929273695, "loss": 0.9569, "step": 78 }, { "epoch": 0.5220228384991843, "grad_norm": 1.9341685771942139, "learning_rate": 0.00014525677507746615, "loss": 0.9778, "step": 80 }, { "epoch": 0.5220228384991843, "eval_loss": 1.0271245241165161, "eval_runtime": 24.6768, "eval_samples_per_second": 19.816, "eval_steps_per_second": 2.512, "step": 80 }, { "epoch": 0.5350734094616639, "grad_norm": 1.9822169542312622, "learning_rate": 0.00013894067414301314, "loss": 1.0639, "step": 82 }, { "epoch": 0.5481239804241436, "grad_norm": 1.9346858263015747, "learning_rate": 0.0001326442362863458, "loss": 1.0497, "step": 84 }, { "epoch": 0.5481239804241436, "eval_loss": 1.0228689908981323, "eval_runtime": 24.5949, "eval_samples_per_second": 19.882, "eval_steps_per_second": 2.521, "step": 84 }, { "epoch": 0.5611745513866232, "grad_norm": 1.9368449449539185, "learning_rate": 0.00012637865634419735, "loss": 1.013, "step": 86 }, { "epoch": 0.5742251223491027, "grad_norm": 1.977944016456604, "learning_rate": 0.00012015507428905507, "loss": 0.9652, "step": 88 }, { "epoch": 0.5742251223491027, "eval_loss": 1.020308256149292, "eval_runtime": 24.5264, "eval_samples_per_second": 19.938, "eval_steps_per_second": 2.528, "step": 88 }, { "epoch": 0.5872756933115824, "grad_norm": 2.2710931301116943, "learning_rate": 0.00011398455542269575, "loss": 0.93, "step": 90 }, { "epoch": 0.600326264274062, "grad_norm": 1.9686428308486938, "learning_rate": 0.00010787807070248305, "loss": 1.0435, "step": 92 }, { "epoch": 0.600326264274062, "eval_loss": 1.0184926986694336, "eval_runtime": 24.4479, "eval_samples_per_second": 20.002, "eval_steps_per_second": 2.536, "step": 92 }, { "epoch": 0.6133768352365416, "grad_norm": 2.019303560256958, "learning_rate": 0.00010184647723540557, "loss": 0.9686, "step": 94 }, { "epoch": 0.6264274061990212, "grad_norm": 1.969067096710205, "learning_rate": 9.590049897453668e-05, "loss": 0.9769, "step": 96 }, { "epoch": 0.6264274061990212, "eval_loss": 1.0141024589538574, "eval_runtime": 55.8487, "eval_samples_per_second": 8.756, "eval_steps_per_second": 1.11, "step": 96 }, { "epoch": 0.6394779771615008, "grad_norm": 1.8334566354751587, "learning_rate": 9.005070765223768e-05, "loss": 1.0576, "step": 98 }, { "epoch": 0.6525285481239804, "grad_norm": 2.123537302017212, "learning_rate": 8.430750398400308e-05, "loss": 1.0648, "step": 100 }, { "epoch": 0.6525285481239804, "eval_loss": 1.0104012489318848, "eval_runtime": 56.3038, "eval_samples_per_second": 8.685, "eval_steps_per_second": 1.101, "step": 100 }, { "epoch": 0.6655791190864601, "grad_norm": 1.8945276737213135, "learning_rate": 7.868109917636821e-05, "loss": 0.9739, "step": 102 }, { "epoch": 0.6786296900489397, "grad_norm": 1.9878089427947998, "learning_rate": 7.318149677175675e-05, "loss": 0.9463, "step": 104 }, { "epoch": 0.6786296900489397, "eval_loss": 1.007932424545288, "eval_runtime": 57.0911, "eval_samples_per_second": 8.565, "eval_steps_per_second": 1.086, "step": 104 }, { "epoch": 0.6916802610114192, "grad_norm": 1.9067178964614868, "learning_rate": 6.781847486254697e-05, "loss": 0.963, "step": 106 }, { "epoch": 0.7047308319738989, "grad_norm": 1.9966986179351807, "learning_rate": 6.260156870598071e-05, "loss": 0.9835, "step": 108 }, { "epoch": 0.7047308319738989, "eval_loss": 1.004884958267212, "eval_runtime": 55.902, "eval_samples_per_second": 8.747, "eval_steps_per_second": 1.109, "step": 108 }, { "epoch": 0.7177814029363785, "grad_norm": 1.834557294845581, "learning_rate": 5.7540053770823644e-05, "loss": 0.9684, "step": 110 }, { "epoch": 0.7308319738988581, "grad_norm": 2.008937120437622, "learning_rate": 5.264292924592073e-05, "loss": 0.9584, "step": 112 }, { "epoch": 0.7308319738988581, "eval_loss": 1.001002550125122, "eval_runtime": 55.5718, "eval_samples_per_second": 8.799, "eval_steps_per_second": 1.116, "step": 112 }, { "epoch": 0.7438825448613376, "grad_norm": 1.9365612268447876, "learning_rate": 4.791890203996634e-05, "loss": 0.9816, "step": 114 }, { "epoch": 0.7569331158238173, "grad_norm": 1.697938084602356, "learning_rate": 4.3376371300938786e-05, "loss": 0.9185, "step": 116 }, { "epoch": 0.7569331158238173, "eval_loss": 0.9972716569900513, "eval_runtime": 56.4675, "eval_samples_per_second": 8.66, "eval_steps_per_second": 1.098, "step": 116 }, { "epoch": 0.7699836867862969, "grad_norm": 1.8656069040298462, "learning_rate": 3.9023413482721426e-05, "loss": 0.9744, "step": 118 }, { "epoch": 0.7830342577487766, "grad_norm": 1.8504821062088013, "learning_rate": 3.4867767985462507e-05, "loss": 0.9021, "step": 120 }, { "epoch": 0.7830342577487766, "eval_loss": 0.994992733001709, "eval_runtime": 56.1712, "eval_samples_per_second": 8.706, "eval_steps_per_second": 1.104, "step": 120 }, { "epoch": 0.7960848287112561, "grad_norm": 1.8380029201507568, "learning_rate": 3.09168233952042e-05, "loss": 1.0255, "step": 122 }, { "epoch": 0.8091353996737357, "grad_norm": 1.8176851272583008, "learning_rate": 2.717760434724613e-05, "loss": 0.9684, "step": 124 }, { "epoch": 0.8091353996737357, "eval_loss": 0.9929932951927185, "eval_runtime": 56.7685, "eval_samples_per_second": 8.614, "eval_steps_per_second": 1.092, "step": 124 }, { "epoch": 0.8221859706362153, "grad_norm": 1.7307217121124268, "learning_rate": 2.3656759036600187e-05, "loss": 0.9727, "step": 126 }, { "epoch": 0.835236541598695, "grad_norm": 1.9544731378555298, "learning_rate": 2.0360547397742523e-05, "loss": 0.9461, "step": 128 }, { "epoch": 0.835236541598695, "eval_loss": 0.9912956953048706, "eval_runtime": 56.6869, "eval_samples_per_second": 8.626, "eval_steps_per_second": 1.094, "step": 128 }, { "epoch": 0.8482871125611745, "grad_norm": 1.797264575958252, "learning_rate": 1.7294829974678338e-05, "loss": 0.9235, "step": 130 }, { "epoch": 0.8613376835236541, "grad_norm": 1.994328498840332, "learning_rate": 1.4465057501108546e-05, "loss": 1.0232, "step": 132 }, { "epoch": 0.8613376835236541, "eval_loss": 0.9894663691520691, "eval_runtime": 56.1708, "eval_samples_per_second": 8.706, "eval_steps_per_second": 1.104, "step": 132 }, { "epoch": 0.8743882544861338, "grad_norm": 1.8575083017349243, "learning_rate": 1.1876261209224314e-05, "loss": 0.9372, "step": 134 }, { "epoch": 0.8874388254486134, "grad_norm": 1.6837760210037231, "learning_rate": 9.533043884359615e-06, "loss": 0.9646, "step": 136 }, { "epoch": 0.8874388254486134, "eval_loss": 0.9884896874427795, "eval_runtime": 24.7353, "eval_samples_per_second": 19.769, "eval_steps_per_second": 2.507, "step": 136 }, { "epoch": 0.9004893964110929, "grad_norm": 1.7492249011993408, "learning_rate": 7.439571681407053e-06, "loss": 1.0043, "step": 138 }, { "epoch": 0.9135399673735726, "grad_norm": 1.8709555864334106, "learning_rate": 5.59956671754635e-06, "loss": 0.9912, "step": 140 }, { "epoch": 0.9135399673735726, "eval_loss": 0.9873647093772888, "eval_runtime": 24.7216, "eval_samples_per_second": 19.78, "eval_steps_per_second": 2.508, "step": 140 }, { "epoch": 0.9265905383360522, "grad_norm": 1.91972017288208, "learning_rate": 4.016300454455945e-06, "loss": 0.9987, "step": 142 }, { "epoch": 0.9396411092985318, "grad_norm": 2.0142247676849365, "learning_rate": 2.692587881773478e-06, "loss": 0.9464, "step": 144 }, { "epoch": 0.9396411092985318, "eval_loss": 0.9870482683181763, "eval_runtime": 24.723, "eval_samples_per_second": 19.779, "eval_steps_per_second": 2.508, "step": 144 }, { "epoch": 0.9526916802610114, "grad_norm": 1.8588862419128418, "learning_rate": 1.6307825121469164e-06, "loss": 0.9875, "step": 146 }, { "epoch": 0.965742251223491, "grad_norm": 1.8777186870574951, "learning_rate": 8.327721967749779e-07, "loss": 1.0104, "step": 148 }, { "epoch": 0.965742251223491, "eval_loss": 0.9868296980857849, "eval_runtime": 24.6717, "eval_samples_per_second": 19.82, "eval_steps_per_second": 2.513, "step": 148 }, { "epoch": 0.9787928221859706, "grad_norm": 1.91819167137146, "learning_rate": 2.9997576887660913e-07, "loss": 0.9288, "step": 150 }, { "epoch": 0.9918433931484503, "grad_norm": 1.8686131238937378, "learning_rate": 3.334052105728458e-08, "loss": 0.9624, "step": 152 }, { "epoch": 0.9918433931484503, "eval_loss": 0.9868960976600647, "eval_runtime": 24.5863, "eval_samples_per_second": 19.889, "eval_steps_per_second": 2.522, "step": 152 } ], "logging_steps": 2, "max_steps": 153, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.85963932651946e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }