{ "best_metric": 0.9352444128563532, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/canine/canine-base-finetuned-pos-ud-Chinese-GSD/checkpoint-6500", "epoch": 72.0, "global_step": 9000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8, "learning_rate": 7.92e-05, "loss": 1.4501, "step": 100 }, { "epoch": 1.6, "learning_rate": 7.946845637583894e-05, "loss": 0.3724, "step": 200 }, { "epoch": 2.4, "learning_rate": 7.893154362416109e-05, "loss": 0.2641, "step": 300 }, { "epoch": 3.2, "learning_rate": 7.839463087248322e-05, "loss": 0.2101, "step": 400 }, { "epoch": 4.0, "learning_rate": 7.785771812080537e-05, "loss": 0.1691, "step": 500 }, { "epoch": 4.0, "eval_accuracy": 0.9264787175234936, "eval_loss": 0.23948951065540314, "eval_runtime": 1.9758, "eval_samples_per_second": 253.068, "eval_steps_per_second": 31.887, "step": 500 }, { "epoch": 4.8, "learning_rate": 7.732080536912752e-05, "loss": 0.1239, "step": 600 }, { "epoch": 5.6, "learning_rate": 7.678389261744967e-05, "loss": 0.1021, "step": 700 }, { "epoch": 6.4, "learning_rate": 7.624697986577182e-05, "loss": 0.0813, "step": 800 }, { "epoch": 7.2, "learning_rate": 7.571006711409396e-05, "loss": 0.068, "step": 900 }, { "epoch": 8.0, "learning_rate": 7.517315436241611e-05, "loss": 0.0573, "step": 1000 }, { "epoch": 8.0, "eval_accuracy": 0.9302692884782437, "eval_loss": 0.31052935123443604, "eval_runtime": 1.9708, "eval_samples_per_second": 253.701, "eval_steps_per_second": 31.966, "step": 1000 }, { "epoch": 8.8, "learning_rate": 7.463624161073826e-05, "loss": 0.0423, "step": 1100 }, { "epoch": 9.6, "learning_rate": 7.409932885906041e-05, "loss": 0.0371, "step": 1200 }, { "epoch": 10.4, "learning_rate": 7.356241610738256e-05, "loss": 0.034, "step": 1300 }, { "epoch": 11.2, "learning_rate": 7.30255033557047e-05, "loss": 0.029, "step": 1400 }, { "epoch": 12.0, "learning_rate": 7.248859060402685e-05, "loss": 0.0261, "step": 1500 }, { "epoch": 12.0, "eval_accuracy": 0.9308220800758115, "eval_loss": 0.36600401997566223, "eval_runtime": 2.0064, "eval_samples_per_second": 249.199, "eval_steps_per_second": 31.399, "step": 1500 }, { "epoch": 12.8, "learning_rate": 7.1951677852349e-05, "loss": 0.0224, "step": 1600 }, { "epoch": 13.6, "learning_rate": 7.141476510067115e-05, "loss": 0.0206, "step": 1700 }, { "epoch": 14.4, "learning_rate": 7.08778523489933e-05, "loss": 0.0196, "step": 1800 }, { "epoch": 15.2, "learning_rate": 7.034093959731545e-05, "loss": 0.0183, "step": 1900 }, { "epoch": 16.0, "learning_rate": 6.98040268456376e-05, "loss": 0.0162, "step": 2000 }, { "epoch": 16.0, "eval_accuracy": 0.9320066334991708, "eval_loss": 0.38709351420402527, "eval_runtime": 1.9738, "eval_samples_per_second": 253.318, "eval_steps_per_second": 31.918, "step": 2000 }, { "epoch": 16.8, "learning_rate": 6.926711409395974e-05, "loss": 0.0149, "step": 2100 }, { "epoch": 17.6, "learning_rate": 6.873020134228189e-05, "loss": 0.0126, "step": 2200 }, { "epoch": 18.4, "learning_rate": 6.819328859060404e-05, "loss": 0.0128, "step": 2300 }, { "epoch": 19.2, "learning_rate": 6.765637583892619e-05, "loss": 0.0116, "step": 2400 }, { "epoch": 20.0, "learning_rate": 6.711946308724834e-05, "loss": 0.0121, "step": 2500 }, { "epoch": 20.0, "eval_accuracy": 0.9307431098475875, "eval_loss": 0.4287450313568115, "eval_runtime": 1.9669, "eval_samples_per_second": 254.204, "eval_steps_per_second": 32.03, "step": 2500 }, { "epoch": 20.8, "learning_rate": 6.658255033557048e-05, "loss": 0.0109, "step": 2600 }, { "epoch": 21.6, "learning_rate": 6.604563758389262e-05, "loss": 0.0103, "step": 2700 }, { "epoch": 22.4, "learning_rate": 6.550872483221477e-05, "loss": 0.0086, "step": 2800 }, { "epoch": 23.2, "learning_rate": 6.497181208053692e-05, "loss": 0.0098, "step": 2900 }, { "epoch": 24.0, "learning_rate": 6.443489932885906e-05, "loss": 0.0089, "step": 3000 }, { "epoch": 24.0, "eval_accuracy": 0.9299534075653478, "eval_loss": 0.47967803478240967, "eval_runtime": 1.9764, "eval_samples_per_second": 252.99, "eval_steps_per_second": 31.877, "step": 3000 }, { "epoch": 24.8, "learning_rate": 6.389798657718121e-05, "loss": 0.008, "step": 3100 }, { "epoch": 25.6, "learning_rate": 6.336107382550336e-05, "loss": 0.0087, "step": 3200 }, { "epoch": 26.4, "learning_rate": 6.282416107382551e-05, "loss": 0.0076, "step": 3300 }, { "epoch": 27.2, "learning_rate": 6.228724832214766e-05, "loss": 0.0072, "step": 3400 }, { "epoch": 28.0, "learning_rate": 6.17503355704698e-05, "loss": 0.0076, "step": 3500 }, { "epoch": 28.0, "eval_accuracy": 0.9304272289346917, "eval_loss": 0.4785325825214386, "eval_runtime": 1.9748, "eval_samples_per_second": 253.192, "eval_steps_per_second": 31.902, "step": 3500 }, { "epoch": 28.8, "learning_rate": 6.121342281879195e-05, "loss": 0.0069, "step": 3600 }, { "epoch": 29.6, "learning_rate": 6.06765100671141e-05, "loss": 0.0063, "step": 3700 }, { "epoch": 30.4, "learning_rate": 6.013959731543624e-05, "loss": 0.0057, "step": 3800 }, { "epoch": 31.2, "learning_rate": 5.960268456375839e-05, "loss": 0.0052, "step": 3900 }, { "epoch": 32.0, "learning_rate": 5.906577181208054e-05, "loss": 0.0062, "step": 4000 }, { "epoch": 32.0, "eval_accuracy": 0.9330332464660823, "eval_loss": 0.4929106533527374, "eval_runtime": 1.9685, "eval_samples_per_second": 254.006, "eval_steps_per_second": 32.005, "step": 4000 }, { "epoch": 32.8, "learning_rate": 5.852885906040269e-05, "loss": 0.006, "step": 4100 }, { "epoch": 33.6, "learning_rate": 5.7991946308724836e-05, "loss": 0.0059, "step": 4200 }, { "epoch": 34.4, "learning_rate": 5.7455033557046984e-05, "loss": 0.0052, "step": 4300 }, { "epoch": 35.2, "learning_rate": 5.691812080536913e-05, "loss": 0.0057, "step": 4400 }, { "epoch": 36.0, "learning_rate": 5.638120805369128e-05, "loss": 0.0061, "step": 4500 }, { "epoch": 36.0, "eval_accuracy": 0.9331911869225302, "eval_loss": 0.4974651634693146, "eval_runtime": 1.9873, "eval_samples_per_second": 251.591, "eval_steps_per_second": 31.701, "step": 4500 }, { "epoch": 36.8, "learning_rate": 5.584429530201343e-05, "loss": 0.005, "step": 4600 }, { "epoch": 37.6, "learning_rate": 5.530738255033558e-05, "loss": 0.0056, "step": 4700 }, { "epoch": 38.4, "learning_rate": 5.4770469798657725e-05, "loss": 0.005, "step": 4800 }, { "epoch": 39.2, "learning_rate": 5.4233557046979874e-05, "loss": 0.0049, "step": 4900 }, { "epoch": 40.0, "learning_rate": 5.3696644295302015e-05, "loss": 0.0042, "step": 5000 }, { "epoch": 40.0, "eval_accuracy": 0.9307431098475875, "eval_loss": 0.5192113518714905, "eval_runtime": 1.9958, "eval_samples_per_second": 250.525, "eval_steps_per_second": 31.566, "step": 5000 }, { "epoch": 40.8, "learning_rate": 5.315973154362416e-05, "loss": 0.0034, "step": 5100 }, { "epoch": 41.6, "learning_rate": 5.262281879194631e-05, "loss": 0.0049, "step": 5200 }, { "epoch": 42.4, "learning_rate": 5.208590604026846e-05, "loss": 0.0044, "step": 5300 }, { "epoch": 43.2, "learning_rate": 5.154899328859061e-05, "loss": 0.0034, "step": 5400 }, { "epoch": 44.0, "learning_rate": 5.1012080536912756e-05, "loss": 0.004, "step": 5500 }, { "epoch": 44.0, "eval_accuracy": 0.9303482587064676, "eval_loss": 0.5481078028678894, "eval_runtime": 2.0544, "eval_samples_per_second": 243.381, "eval_steps_per_second": 30.666, "step": 5500 }, { "epoch": 44.8, "learning_rate": 5.0475167785234905e-05, "loss": 0.0041, "step": 5600 }, { "epoch": 45.6, "learning_rate": 4.993825503355705e-05, "loss": 0.0043, "step": 5700 }, { "epoch": 46.4, "learning_rate": 4.94013422818792e-05, "loss": 0.0039, "step": 5800 }, { "epoch": 47.2, "learning_rate": 4.886442953020135e-05, "loss": 0.0036, "step": 5900 }, { "epoch": 48.0, "learning_rate": 4.83275167785235e-05, "loss": 0.0036, "step": 6000 }, { "epoch": 48.0, "eval_accuracy": 0.9341388296612178, "eval_loss": 0.5258472561836243, "eval_runtime": 1.9913, "eval_samples_per_second": 251.096, "eval_steps_per_second": 31.638, "step": 6000 }, { "epoch": 48.8, "learning_rate": 4.779060402684564e-05, "loss": 0.0035, "step": 6100 }, { "epoch": 49.6, "learning_rate": 4.725369127516779e-05, "loss": 0.0036, "step": 6200 }, { "epoch": 50.4, "learning_rate": 4.6716778523489936e-05, "loss": 0.0029, "step": 6300 }, { "epoch": 51.2, "learning_rate": 4.6179865771812084e-05, "loss": 0.0031, "step": 6400 }, { "epoch": 52.0, "learning_rate": 4.564295302013423e-05, "loss": 0.0034, "step": 6500 }, { "epoch": 52.0, "eval_accuracy": 0.9352444128563532, "eval_loss": 0.545491635799408, "eval_runtime": 1.9836, "eval_samples_per_second": 252.069, "eval_steps_per_second": 31.761, "step": 6500 }, { "epoch": 52.8, "learning_rate": 4.510604026845638e-05, "loss": 0.0036, "step": 6600 }, { "epoch": 53.6, "learning_rate": 4.456912751677853e-05, "loss": 0.0042, "step": 6700 }, { "epoch": 54.4, "learning_rate": 4.403221476510068e-05, "loss": 0.0032, "step": 6800 }, { "epoch": 55.2, "learning_rate": 4.3495302013422825e-05, "loss": 0.0024, "step": 6900 }, { "epoch": 56.0, "learning_rate": 4.295838926174497e-05, "loss": 0.003, "step": 7000 }, { "epoch": 56.0, "eval_accuracy": 0.9342967701176657, "eval_loss": 0.5486834049224854, "eval_runtime": 1.9857, "eval_samples_per_second": 251.794, "eval_steps_per_second": 31.726, "step": 7000 }, { "epoch": 56.8, "learning_rate": 4.242147651006712e-05, "loss": 0.0025, "step": 7100 }, { "epoch": 57.6, "learning_rate": 4.188456375838927e-05, "loss": 0.0025, "step": 7200 }, { "epoch": 58.4, "learning_rate": 4.134765100671141e-05, "loss": 0.0026, "step": 7300 }, { "epoch": 59.2, "learning_rate": 4.081073825503356e-05, "loss": 0.0027, "step": 7400 }, { "epoch": 60.0, "learning_rate": 4.027382550335571e-05, "loss": 0.0026, "step": 7500 }, { "epoch": 60.0, "eval_accuracy": 0.9327963357814104, "eval_loss": 0.5487620830535889, "eval_runtime": 1.9816, "eval_samples_per_second": 252.318, "eval_steps_per_second": 31.792, "step": 7500 }, { "epoch": 60.8, "learning_rate": 3.9736912751677856e-05, "loss": 0.0024, "step": 7600 }, { "epoch": 61.6, "learning_rate": 3.9200000000000004e-05, "loss": 0.0024, "step": 7700 }, { "epoch": 62.4, "learning_rate": 3.866308724832215e-05, "loss": 0.0029, "step": 7800 }, { "epoch": 63.2, "learning_rate": 3.81261744966443e-05, "loss": 0.0023, "step": 7900 }, { "epoch": 64.0, "learning_rate": 3.758926174496645e-05, "loss": 0.002, "step": 8000 }, { "epoch": 64.0, "eval_accuracy": 0.9337439785200979, "eval_loss": 0.5773606300354004, "eval_runtime": 1.9747, "eval_samples_per_second": 253.197, "eval_steps_per_second": 31.903, "step": 8000 }, { "epoch": 64.8, "learning_rate": 3.705234899328859e-05, "loss": 0.0023, "step": 8100 }, { "epoch": 65.6, "learning_rate": 3.651543624161074e-05, "loss": 0.0017, "step": 8200 }, { "epoch": 66.4, "learning_rate": 3.597852348993289e-05, "loss": 0.0019, "step": 8300 }, { "epoch": 67.2, "learning_rate": 3.5441610738255035e-05, "loss": 0.0024, "step": 8400 }, { "epoch": 68.0, "learning_rate": 3.490469798657718e-05, "loss": 0.0019, "step": 8500 }, { "epoch": 68.0, "eval_accuracy": 0.9334280976072021, "eval_loss": 0.5738530158996582, "eval_runtime": 1.9799, "eval_samples_per_second": 252.543, "eval_steps_per_second": 31.82, "step": 8500 }, { "epoch": 68.8, "learning_rate": 3.436778523489933e-05, "loss": 0.0017, "step": 8600 }, { "epoch": 69.6, "learning_rate": 3.383087248322148e-05, "loss": 0.0021, "step": 8700 }, { "epoch": 70.4, "learning_rate": 3.329395973154363e-05, "loss": 0.0018, "step": 8800 }, { "epoch": 71.2, "learning_rate": 3.2757046979865776e-05, "loss": 0.002, "step": 8900 }, { "epoch": 72.0, "learning_rate": 3.2220134228187925e-05, "loss": 0.0019, "step": 9000 }, { "epoch": 72.0, "eval_accuracy": 0.9341388296612178, "eval_loss": 0.5780801177024841, "eval_runtime": 1.9799, "eval_samples_per_second": 252.532, "eval_steps_per_second": 31.819, "step": 9000 }, { "epoch": 72.0, "step": 9000, "total_flos": 4.726158700419072e+16, "train_loss": 0.03907104196647803, "train_runtime": 1295.117, "train_samples_per_second": 370.623, "train_steps_per_second": 11.582 } ], "max_steps": 15000, "num_train_epochs": 120, "total_flos": 4.726158700419072e+16, "trial_name": null, "trial_params": null }