{ "best_metric": 0.46305870034683594, "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-11910", "epoch": 9.997061416397296, "eval_steps": 500, "global_step": 17010, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29385836027034967, "grad_norm": 1.9668159484863281, "learning_rate": 4.853027630805409e-05, "loss": 0.4174, "step": 500 }, { "epoch": 0.5877167205406993, "grad_norm": 3.246731758117676, "learning_rate": 4.7060552616108174e-05, "loss": 0.2765, "step": 1000 }, { "epoch": 0.8815750808110491, "grad_norm": 2.936720609664917, "learning_rate": 4.559082892416226e-05, "loss": 0.2596, "step": 1500 }, { "epoch": 0.9997061416397296, "eval_accuracy": 0.8358953937837708, "eval_f1": 0.3926881971617123, "eval_loss": 0.431875616312027, "eval_precision": 0.26165460347108727, "eval_recall": 0.786616752456715, "eval_runtime": 16.2639, "eval_samples_per_second": 418.535, "eval_steps_per_second": 52.325, "step": 1701 }, { "epoch": 1.1754334410813987, "grad_norm": 2.302980661392212, "learning_rate": 4.4121105232216346e-05, "loss": 0.2135, "step": 2000 }, { "epoch": 1.4692918013517484, "grad_norm": 3.7150967121124268, "learning_rate": 4.265138154027043e-05, "loss": 0.1839, "step": 2500 }, { "epoch": 1.7631501616220981, "grad_norm": 1.2019191980361938, "learning_rate": 4.118165784832452e-05, "loss": 0.1853, "step": 3000 }, { "epoch": 2.0, "eval_accuracy": 0.8644911466450625, "eval_f1": 0.44846535316981634, "eval_loss": 0.3841294050216675, "eval_precision": 0.3142374154770849, "eval_recall": 0.7828731867103416, "eval_runtime": 16.2484, "eval_samples_per_second": 418.933, "eval_steps_per_second": 52.374, "step": 3403 }, { "epoch": 2.0570085218924477, "grad_norm": 2.090308904647827, "learning_rate": 3.971193415637861e-05, "loss": 0.1684, "step": 3500 }, { "epoch": 2.3508668821627974, "grad_norm": 0.9995286464691162, "learning_rate": 3.824221046443269e-05, "loss": 0.1222, "step": 4000 }, { "epoch": 2.644725242433147, "grad_norm": 1.3526027202606201, "learning_rate": 3.677248677248677e-05, "loss": 0.1277, "step": 4500 }, { "epoch": 2.938583602703497, "grad_norm": 2.2816500663757324, "learning_rate": 3.530276308054086e-05, "loss": 0.1254, "step": 5000 }, { "epoch": 2.9997061416397295, "eval_accuracy": 0.843626202360437, "eval_f1": 0.4435178651613316, "eval_loss": 0.6409665942192078, "eval_precision": 0.30552364118426867, "eval_recall": 0.8088441740758072, "eval_runtime": 16.2306, "eval_samples_per_second": 419.394, "eval_steps_per_second": 52.432, "step": 5104 }, { "epoch": 3.2324419629738466, "grad_norm": 0.6440290808677673, "learning_rate": 3.3833039388594945e-05, "loss": 0.0929, "step": 5500 }, { "epoch": 3.5263003232441963, "grad_norm": 3.7906153202056885, "learning_rate": 3.2363315696649034e-05, "loss": 0.0896, "step": 6000 }, { "epoch": 3.820158683514546, "grad_norm": 1.0953032970428467, "learning_rate": 3.0893592004703116e-05, "loss": 0.0823, "step": 6500 }, { "epoch": 4.0, "eval_accuracy": 0.843598739452527, "eval_f1": 0.4336244267135767, "eval_loss": 0.7241775393486023, "eval_precision": 0.2964012711500472, "eval_recall": 0.8074403369209172, "eval_runtime": 16.5937, "eval_samples_per_second": 410.216, "eval_steps_per_second": 51.285, "step": 6806 }, { "epoch": 4.114017043784895, "grad_norm": 1.7823286056518555, "learning_rate": 2.9423868312757202e-05, "loss": 0.0761, "step": 7000 }, { "epoch": 4.407875404055245, "grad_norm": 1.165720820426941, "learning_rate": 2.795414462081129e-05, "loss": 0.0603, "step": 7500 }, { "epoch": 4.701733764325595, "grad_norm": 4.1130452156066895, "learning_rate": 2.648442092886537e-05, "loss": 0.0589, "step": 8000 }, { "epoch": 4.9955921245959445, "grad_norm": 0.3957385718822479, "learning_rate": 2.501469723691946e-05, "loss": 0.0597, "step": 8500 }, { "epoch": 4.99970614163973, "eval_accuracy": 0.8501829716239504, "eval_f1": 0.4494872643069798, "eval_loss": 0.7755796909332275, "eval_precision": 0.3133474771700028, "eval_recall": 0.7948058025269069, "eval_runtime": 16.1833, "eval_samples_per_second": 420.619, "eval_steps_per_second": 52.585, "step": 8507 }, { "epoch": 5.289450484866294, "grad_norm": 1.0087623596191406, "learning_rate": 2.3544973544973546e-05, "loss": 0.0423, "step": 9000 }, { "epoch": 5.583308845136644, "grad_norm": 2.162200450897217, "learning_rate": 2.2075249853027632e-05, "loss": 0.043, "step": 9500 }, { "epoch": 5.877167205406994, "grad_norm": 1.1820895671844482, "learning_rate": 2.0605526161081718e-05, "loss": 0.0446, "step": 10000 }, { "epoch": 6.0, "eval_accuracy": 0.8483223596130476, "eval_f1": 0.4512908099586152, "eval_loss": 0.8560824394226074, "eval_precision": 0.31372728103023106, "eval_recall": 0.8036967711745437, "eval_runtime": 16.2581, "eval_samples_per_second": 418.684, "eval_steps_per_second": 52.343, "step": 10209 }, { "epoch": 6.171025565677343, "grad_norm": 2.190476894378662, "learning_rate": 1.91358024691358e-05, "loss": 0.0333, "step": 10500 }, { "epoch": 6.464883925947693, "grad_norm": 1.5180469751358032, "learning_rate": 1.766607877718989e-05, "loss": 0.0333, "step": 11000 }, { "epoch": 6.758742286218043, "grad_norm": 1.4279770851135254, "learning_rate": 1.6196355085243976e-05, "loss": 0.0325, "step": 11500 }, { "epoch": 6.99970614163973, "eval_accuracy": 0.8559776451929613, "eval_f1": 0.46305870034683594, "eval_loss": 0.9498798847198486, "eval_precision": 0.32143181611701643, "eval_recall": 0.8277959756668226, "eval_runtime": 16.1891, "eval_samples_per_second": 420.467, "eval_steps_per_second": 52.566, "step": 11910 }, { "epoch": 7.052600646488393, "grad_norm": 0.6558970212936401, "learning_rate": 1.472663139329806e-05, "loss": 0.0289, "step": 12000 }, { "epoch": 7.346459006758742, "grad_norm": 0.26189878582954407, "learning_rate": 1.3256907701352148e-05, "loss": 0.0224, "step": 12500 }, { "epoch": 7.640317367029092, "grad_norm": 1.370686650276184, "learning_rate": 1.1787184009406232e-05, "loss": 0.0231, "step": 13000 }, { "epoch": 7.934175727299442, "grad_norm": 0.36619672179222107, "learning_rate": 1.0317460317460318e-05, "loss": 0.022, "step": 13500 }, { "epoch": 8.0, "eval_accuracy": 0.851041187496138, "eval_f1": 0.45333161323614785, "eval_loss": 1.045249104499817, "eval_precision": 0.3129397096802921, "eval_recall": 0.8221806270472625, "eval_runtime": 16.479, "eval_samples_per_second": 413.071, "eval_steps_per_second": 51.641, "step": 13612 }, { "epoch": 8.22803408756979, "grad_norm": 1.0557399988174438, "learning_rate": 8.847736625514404e-06, "loss": 0.0164, "step": 14000 }, { "epoch": 8.521892447840141, "grad_norm": 4.32920503616333, "learning_rate": 7.37801293356849e-06, "loss": 0.0164, "step": 14500 }, { "epoch": 8.81575080811049, "grad_norm": 0.1674884408712387, "learning_rate": 5.908289241622575e-06, "loss": 0.017, "step": 15000 }, { "epoch": 8.999706141639729, "eval_accuracy": 0.8523937357107058, "eval_f1": 0.45305514157973176, "eval_loss": 1.102479100227356, "eval_precision": 0.31328972130119187, "eval_recall": 0.8179691155825924, "eval_runtime": 16.1914, "eval_samples_per_second": 420.409, "eval_steps_per_second": 52.559, "step": 15313 }, { "epoch": 9.10960916838084, "grad_norm": 3.3734261989593506, "learning_rate": 4.438565549676661e-06, "loss": 0.0171, "step": 15500 }, { "epoch": 9.40346752865119, "grad_norm": 0.19828377664089203, "learning_rate": 2.9688418577307467e-06, "loss": 0.013, "step": 16000 }, { "epoch": 9.69732588892154, "grad_norm": 1.491190791130066, "learning_rate": 1.4991181657848325e-06, "loss": 0.0133, "step": 16500 }, { "epoch": 9.991184249191889, "grad_norm": 2.5641109943389893, "learning_rate": 2.9394473838918286e-08, "loss": 0.0135, "step": 17000 }, { "epoch": 9.997061416397296, "eval_accuracy": 0.8526409018818958, "eval_f1": 0.45504563402161957, "eval_loss": 1.1188451051712036, "eval_precision": 0.3145413870246085, "eval_recall": 0.8224145999064109, "eval_runtime": 16.3165, "eval_samples_per_second": 417.185, "eval_steps_per_second": 52.156, "step": 17010 }, { "epoch": 9.997061416397296, "step": 17010, "total_flos": 6700722040732752.0, "train_loss": 0.08913132946046923, "train_runtime": 3311.208, "train_samples_per_second": 82.218, "train_steps_per_second": 5.137 } ], "logging_steps": 500, "max_steps": 17010, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 6700722040732752.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }