{ "best_metric": 0.17586912065439672, "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-3403", "epoch": 9.997061416397296, "eval_steps": 500, "global_step": 17010, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29385836027034967, "grad_norm": 1.9668159484863281, "learning_rate": 4.853027630805409e-05, "loss": 0.4174, "step": 500 }, { "epoch": 0.5877167205406993, "grad_norm": 3.246731758117676, "learning_rate": 4.7060552616108174e-05, "loss": 0.2765, "step": 1000 }, { "epoch": 0.8815750808110491, "grad_norm": 2.936720609664917, "learning_rate": 4.559082892416226e-05, "loss": 0.2596, "step": 1500 }, { "epoch": 0.9997061416397296, "eval_accuracy": 0.7671626010120082, "eval_f1": 0.14622946114658822, "eval_loss": 0.7913026213645935, "eval_precision": 0.0793057825511713, "eval_recall": 0.9365808823529411, "eval_runtime": 16.052, "eval_samples_per_second": 424.06, "eval_steps_per_second": 53.015, "step": 1701 }, { "epoch": 1.1754334410813987, "grad_norm": 2.302980661392212, "learning_rate": 4.4121105232216346e-05, "loss": 0.2135, "step": 2000 }, { "epoch": 1.4692918013517484, "grad_norm": 3.7150967121124268, "learning_rate": 4.265138154027043e-05, "loss": 0.1839, "step": 2500 }, { "epoch": 1.7631501616220981, "grad_norm": 1.2019191980361938, "learning_rate": 4.118165784832452e-05, "loss": 0.1853, "step": 3000 }, { "epoch": 2.0, "eval_accuracy": 0.8099635429897495, "eval_f1": 0.17586912065439672, "eval_loss": 0.6631014347076416, "eval_precision": 0.09691960931630353, "eval_recall": 0.9485294117647058, "eval_runtime": 15.875, "eval_samples_per_second": 428.788, "eval_steps_per_second": 53.606, "step": 3403 }, { "epoch": 2.0570085218924477, "grad_norm": 2.090308904647827, "learning_rate": 3.971193415637861e-05, "loss": 0.1684, "step": 3500 }, { "epoch": 2.3508668821627974, "grad_norm": 0.9995286464691162, "learning_rate": 3.824221046443269e-05, "loss": 0.1222, "step": 4000 }, { "epoch": 2.644725242433147, "grad_norm": 1.3526027202606201, "learning_rate": 3.677248677248677e-05, "loss": 0.1277, "step": 4500 }, { "epoch": 2.938583602703497, "grad_norm": 2.2816500663757324, "learning_rate": 3.530276308054086e-05, "loss": 0.1254, "step": 5000 }, { "epoch": 2.9997061416397295, "eval_accuracy": 0.7754975935626944, "eval_f1": 0.16528259292106748, "eval_loss": 1.072888731956482, "eval_precision": 0.0905877154220062, "eval_recall": 0.9420955882352942, "eval_runtime": 15.8836, "eval_samples_per_second": 428.556, "eval_steps_per_second": 53.577, "step": 5104 }, { "epoch": 3.2324419629738466, "grad_norm": 0.6440290808677673, "learning_rate": 3.3833039388594945e-05, "loss": 0.0929, "step": 5500 }, { "epoch": 3.5263003232441963, "grad_norm": 3.7906153202056885, "learning_rate": 3.2363315696649034e-05, "loss": 0.0896, "step": 6000 }, { "epoch": 3.820158683514546, "grad_norm": 1.0953032970428467, "learning_rate": 3.0893592004703116e-05, "loss": 0.0823, "step": 6500 }, { "epoch": 4.0, "eval_accuracy": 0.7719411469883488, "eval_f1": 0.16243814311523055, "eval_loss": 1.2567578554153442, "eval_precision": 0.08880872627329726, "eval_recall": 0.9503676470588235, "eval_runtime": 15.8976, "eval_samples_per_second": 428.179, "eval_steps_per_second": 53.53, "step": 6806 }, { "epoch": 4.114017043784895, "grad_norm": 1.7823286056518555, "learning_rate": 2.9423868312757202e-05, "loss": 0.0761, "step": 7000 }, { "epoch": 4.407875404055245, "grad_norm": 1.165720820426941, "learning_rate": 2.795414462081129e-05, "loss": 0.0603, "step": 7500 }, { "epoch": 4.701733764325595, "grad_norm": 4.1130452156066895, "learning_rate": 2.648442092886537e-05, "loss": 0.0589, "step": 8000 }, { "epoch": 4.9955921245959445, "grad_norm": 0.3957385718822479, "learning_rate": 2.501469723691946e-05, "loss": 0.0597, "step": 8500 }, { "epoch": 4.99970614163973, "eval_accuracy": 0.7836540772119656, "eval_f1": 0.17101181993461315, "eval_loss": 1.1907650232315063, "eval_precision": 0.0940872613227562, "eval_recall": 0.9375, "eval_runtime": 16.019, "eval_samples_per_second": 424.932, "eval_steps_per_second": 53.124, "step": 8507 }, { "epoch": 5.289450484866294, "grad_norm": 1.0087623596191406, "learning_rate": 2.3544973544973546e-05, "loss": 0.0423, "step": 9000 }, { "epoch": 5.583308845136644, "grad_norm": 2.162200450897217, "learning_rate": 2.2075249853027632e-05, "loss": 0.043, "step": 9500 }, { "epoch": 5.877167205406994, "grad_norm": 1.1820895671844482, "learning_rate": 2.0605526161081718e-05, "loss": 0.0446, "step": 10000 }, { "epoch": 6.0, "eval_accuracy": 0.7811686840461102, "eval_f1": 0.1718036055495555, "eval_loss": 1.384422779083252, "eval_precision": 0.09443784820531555, "eval_recall": 0.9503676470588235, "eval_runtime": 15.9006, "eval_samples_per_second": 428.098, "eval_steps_per_second": 53.52, "step": 10209 }, { "epoch": 6.171025565677343, "grad_norm": 2.190476894378662, "learning_rate": 1.91358024691358e-05, "loss": 0.0333, "step": 10500 }, { "epoch": 6.464883925947693, "grad_norm": 1.5180469751358032, "learning_rate": 1.766607877718989e-05, "loss": 0.0333, "step": 11000 }, { "epoch": 6.758742286218043, "grad_norm": 1.4279770851135254, "learning_rate": 1.6196355085243976e-05, "loss": 0.0325, "step": 11500 }, { "epoch": 6.99970614163973, "eval_accuracy": 0.7866406684471785, "eval_f1": 0.1704836709384043, "eval_loss": 1.5515447854995728, "eval_precision": 0.09366766603070772, "eval_recall": 0.9476102941176471, "eval_runtime": 15.8267, "eval_samples_per_second": 430.095, "eval_steps_per_second": 53.77, "step": 11910 }, { "epoch": 7.052600646488393, "grad_norm": 0.6558970212936401, "learning_rate": 1.472663139329806e-05, "loss": 0.0289, "step": 12000 }, { "epoch": 7.346459006758742, "grad_norm": 0.26189878582954407, "learning_rate": 1.3256907701352148e-05, "loss": 0.0224, "step": 12500 }, { "epoch": 7.640317367029092, "grad_norm": 1.370686650276184, "learning_rate": 1.1787184009406232e-05, "loss": 0.0231, "step": 13000 }, { "epoch": 7.934175727299442, "grad_norm": 0.36619672179222107, "learning_rate": 1.0317460317460318e-05, "loss": 0.022, "step": 13500 }, { "epoch": 8.0, "eval_accuracy": 0.7842582611859856, "eval_f1": 0.1688722903304376, "eval_loss": 1.6299601793289185, "eval_precision": 0.09261733012734882, "eval_recall": 0.9558823529411765, "eval_runtime": 15.9057, "eval_samples_per_second": 427.961, "eval_steps_per_second": 53.503, "step": 13612 }, { "epoch": 8.22803408756979, "grad_norm": 1.0557399988174438, "learning_rate": 8.847736625514404e-06, "loss": 0.0164, "step": 14000 }, { "epoch": 8.521892447840141, "grad_norm": 4.32920503616333, "learning_rate": 7.37801293356849e-06, "loss": 0.0164, "step": 14500 }, { "epoch": 8.81575080811049, "grad_norm": 0.1674884408712387, "learning_rate": 5.908289241622575e-06, "loss": 0.017, "step": 15000 }, { "epoch": 8.999706141639729, "eval_accuracy": 0.7844848301762433, "eval_f1": 0.16934759532946844, "eval_loss": 1.7459304332733154, "eval_precision": 0.09292947396720136, "eval_recall": 0.953125, "eval_runtime": 16.0915, "eval_samples_per_second": 423.018, "eval_steps_per_second": 52.885, "step": 15313 }, { "epoch": 9.10960916838084, "grad_norm": 3.3734261989593506, "learning_rate": 4.438565549676661e-06, "loss": 0.0171, "step": 15500 }, { "epoch": 9.40346752865119, "grad_norm": 0.19828377664089203, "learning_rate": 2.9688418577307467e-06, "loss": 0.013, "step": 16000 }, { "epoch": 9.69732588892154, "grad_norm": 1.491190791130066, "learning_rate": 1.4991181657848325e-06, "loss": 0.0133, "step": 16500 }, { "epoch": 9.991184249191889, "grad_norm": 2.5641109943389893, "learning_rate": 2.9394473838918286e-08, "loss": 0.0135, "step": 17000 }, { "epoch": 9.997061416397296, "eval_accuracy": 0.7845534874460183, "eval_f1": 0.16896354888689555, "eval_loss": 1.7860842943191528, "eval_precision": 0.09270693512304251, "eval_recall": 0.9522058823529411, "eval_runtime": 15.9319, "eval_samples_per_second": 427.256, "eval_steps_per_second": 53.415, "step": 17010 }, { "epoch": 9.997061416397296, "step": 17010, "total_flos": 6700722040732752.0, "train_loss": 0.08913132946046923, "train_runtime": 3337.712, "train_samples_per_second": 81.565, "train_steps_per_second": 5.096 } ], "logging_steps": 500, "max_steps": 17010, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 6700722040732752.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }