{ "best_metric": 0.9714, "best_model_checkpoint": "models/distilcamembert-allocine\\checkpoint-5500", "epoch": 3.0, "global_step": 7500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0000000000000001e-07, "loss": 0.7377, "step": 1 }, { "epoch": 0.05, "learning_rate": 1.25e-05, "loss": 0.5782, "step": 125 }, { "epoch": 0.1, "learning_rate": 2.5e-05, "loss": 0.1837, "step": 250 }, { "epoch": 0.15, "learning_rate": 3.7500000000000003e-05, "loss": 0.1665, "step": 375 }, { "epoch": 0.2, "learning_rate": 5e-05, "loss": 0.1504, "step": 500 }, { "epoch": 0.2, "eval_accuracy": 0.95545, "eval_f1": 0.9541784520442272, "eval_loss": 0.1289823055267334, "eval_precision": 0.9614467820499534, "eval_recall": 0.9470191915067374, "eval_runtime": 51.8793, "eval_samples_per_second": 385.51, "eval_steps_per_second": 24.094, "step": 500 }, { "epoch": 0.25, "learning_rate": 4.910714285714286e-05, "loss": 0.1469, "step": 625 }, { "epoch": 0.3, "learning_rate": 4.8214285714285716e-05, "loss": 0.1363, "step": 750 }, { "epoch": 0.35, "learning_rate": 4.732142857142857e-05, "loss": 0.1332, "step": 875 }, { "epoch": 0.4, "learning_rate": 4.642857142857143e-05, "loss": 0.1334, "step": 1000 }, { "epoch": 0.4, "eval_accuracy": 0.96235, "eval_f1": 0.9618984971917219, "eval_loss": 0.10494959354400635, "eval_precision": 0.9536470352162135, "eval_recall": 0.9702939975500204, "eval_runtime": 52.9168, "eval_samples_per_second": 377.951, "eval_steps_per_second": 23.622, "step": 1000 }, { "epoch": 0.45, "learning_rate": 4.5535714285714286e-05, "loss": 0.1286, "step": 1125 }, { "epoch": 0.5, "learning_rate": 4.464285714285715e-05, "loss": 0.1322, "step": 1250 }, { "epoch": 0.55, "learning_rate": 4.375e-05, "loss": 0.1214, "step": 1375 }, { "epoch": 0.6, "learning_rate": 4.2857142857142856e-05, "loss": 0.1158, "step": 1500 }, { "epoch": 0.6, "eval_accuracy": 0.963, "eval_f1": 0.962742926190716, "eval_loss": 0.10521914064884186, "eval_precision": 0.9498311146433539, "eval_recall": 0.9760106165781952, "eval_runtime": 52.1412, "eval_samples_per_second": 383.574, "eval_steps_per_second": 23.973, "step": 1500 }, { "epoch": 0.65, "learning_rate": 4.196428571428572e-05, "loss": 0.1146, "step": 1625 }, { "epoch": 0.7, "learning_rate": 4.107142857142857e-05, "loss": 0.1154, "step": 1750 }, { "epoch": 0.75, "learning_rate": 4.017857142857143e-05, "loss": 0.1246, "step": 1875 }, { "epoch": 0.8, "learning_rate": 3.928571428571429e-05, "loss": 0.1153, "step": 2000 }, { "epoch": 0.8, "eval_accuracy": 0.9661, "eval_f1": 0.9652770664754685, "eval_loss": 0.09492386132478714, "eval_precision": 0.9685508735868448, "eval_recall": 0.9620253164556962, "eval_runtime": 53.0859, "eval_samples_per_second": 376.748, "eval_steps_per_second": 23.547, "step": 2000 }, { "epoch": 0.85, "learning_rate": 3.839285714285715e-05, "loss": 0.1064, "step": 2125 }, { "epoch": 0.9, "learning_rate": 3.7500000000000003e-05, "loss": 0.112, "step": 2250 }, { "epoch": 0.95, "learning_rate": 3.6607142857142853e-05, "loss": 0.1062, "step": 2375 }, { "epoch": 1.0, "learning_rate": 3.571428571428572e-05, "loss": 0.1053, "step": 2500 }, { "epoch": 1.0, "eval_accuracy": 0.9666, "eval_f1": 0.9663374319693611, "eval_loss": 0.09356806427240372, "eval_precision": 0.95421974522293, "eval_recall": 0.9787668436096366, "eval_runtime": 52.3106, "eval_samples_per_second": 382.331, "eval_steps_per_second": 23.896, "step": 2500 }, { "epoch": 1.05, "learning_rate": 3.4821428571428574e-05, "loss": 0.0797, "step": 2625 }, { "epoch": 1.1, "learning_rate": 3.392857142857143e-05, "loss": 0.0684, "step": 2750 }, { "epoch": 1.15, "learning_rate": 3.303571428571429e-05, "loss": 0.0745, "step": 2875 }, { "epoch": 1.2, "learning_rate": 3.2142857142857144e-05, "loss": 0.0755, "step": 3000 }, { "epoch": 1.2, "eval_accuracy": 0.97, "eval_f1": 0.9695400548279013, "eval_loss": 0.09874136000871658, "eval_precision": 0.964350636235104, "eval_recall": 0.9747856267864434, "eval_runtime": 52.4682, "eval_samples_per_second": 381.183, "eval_steps_per_second": 23.824, "step": 3000 }, { "epoch": 1.25, "learning_rate": 3.125e-05, "loss": 0.0771, "step": 3125 }, { "epoch": 1.3, "learning_rate": 3.0357142857142857e-05, "loss": 0.0733, "step": 3250 }, { "epoch": 1.35, "learning_rate": 2.9464285714285718e-05, "loss": 0.069, "step": 3375 }, { "epoch": 1.4, "learning_rate": 2.857142857142857e-05, "loss": 0.0716, "step": 3500 }, { "epoch": 1.4, "eval_accuracy": 0.9688, "eval_f1": 0.9684370257966616, "eval_loss": 0.10781414806842804, "eval_precision": 0.9597954682173652, "eval_recall": 0.9772356063699469, "eval_runtime": 52.2793, "eval_samples_per_second": 382.561, "eval_steps_per_second": 23.91, "step": 3500 }, { "epoch": 1.45, "learning_rate": 2.767857142857143e-05, "loss": 0.0781, "step": 3625 }, { "epoch": 1.5, "learning_rate": 2.6785714285714288e-05, "loss": 0.0725, "step": 3750 }, { "epoch": 1.55, "learning_rate": 2.5892857142857148e-05, "loss": 0.0755, "step": 3875 }, { "epoch": 1.6, "learning_rate": 2.5e-05, "loss": 0.0688, "step": 4000 }, { "epoch": 1.6, "eval_accuracy": 0.9673, "eval_f1": 0.967032967032967, "eval_loss": 0.10506118088960648, "eval_precision": 0.9551882095200159, "eval_recall": 0.9791751735402205, "eval_runtime": 52.2828, "eval_samples_per_second": 382.535, "eval_steps_per_second": 23.908, "step": 4000 }, { "epoch": 1.65, "learning_rate": 2.4107142857142858e-05, "loss": 0.07, "step": 4125 }, { "epoch": 1.7, "learning_rate": 2.3214285714285715e-05, "loss": 0.0735, "step": 4250 }, { "epoch": 1.75, "learning_rate": 2.2321428571428575e-05, "loss": 0.0748, "step": 4375 }, { "epoch": 1.8, "learning_rate": 2.1428571428571428e-05, "loss": 0.0691, "step": 4500 }, { "epoch": 1.8, "eval_accuracy": 0.97095, "eval_f1": 0.9703949044585987, "eval_loss": 0.09402387589216232, "eval_precision": 0.9687658968358938, "eval_recall": 0.972029399755002, "eval_runtime": 52.5632, "eval_samples_per_second": 380.494, "eval_steps_per_second": 23.781, "step": 4500 }, { "epoch": 1.85, "learning_rate": 2.0535714285714285e-05, "loss": 0.0655, "step": 4625 }, { "epoch": 1.9, "learning_rate": 1.9642857142857145e-05, "loss": 0.0714, "step": 4750 }, { "epoch": 1.95, "learning_rate": 1.8750000000000002e-05, "loss": 0.0783, "step": 4875 }, { "epoch": 2.0, "learning_rate": 1.785714285714286e-05, "loss": 0.0733, "step": 5000 }, { "epoch": 2.0, "eval_accuracy": 0.96855, "eval_f1": 0.9683171309122047, "eval_loss": 0.10380826145410538, "eval_precision": 0.9557522123893806, "eval_recall": 0.9812168231931401, "eval_runtime": 52.269, "eval_samples_per_second": 382.636, "eval_steps_per_second": 23.915, "step": 5000 }, { "epoch": 2.05, "learning_rate": 1.6964285714285715e-05, "loss": 0.0461, "step": 5125 }, { "epoch": 2.1, "learning_rate": 1.6071428571428572e-05, "loss": 0.0451, "step": 5250 }, { "epoch": 2.15, "learning_rate": 1.5178571428571429e-05, "loss": 0.0502, "step": 5375 }, { "epoch": 2.2, "learning_rate": 1.4285714285714285e-05, "loss": 0.0476, "step": 5500 }, { "epoch": 2.2, "eval_accuracy": 0.9714, "eval_f1": 0.9709909727152854, "eval_loss": 0.10657692700624466, "eval_precision": 0.9648256399919372, "eval_recall": 0.9772356063699469, "eval_runtime": 51.6788, "eval_samples_per_second": 387.006, "eval_steps_per_second": 24.188, "step": 5500 }, { "epoch": 2.25, "learning_rate": 1.3392857142857144e-05, "loss": 0.0522, "step": 5625 }, { "epoch": 2.3, "learning_rate": 1.25e-05, "loss": 0.0476, "step": 5750 }, { "epoch": 2.35, "learning_rate": 1.1607142857142857e-05, "loss": 0.0404, "step": 5875 }, { "epoch": 2.4, "learning_rate": 1.0714285714285714e-05, "loss": 0.047, "step": 6000 }, { "epoch": 2.4, "eval_accuracy": 0.96895, "eval_f1": 0.9686316108501288, "eval_loss": 0.10983184725046158, "eval_precision": 0.9587041295870413, "eval_recall": 0.9787668436096366, "eval_runtime": 52.0939, "eval_samples_per_second": 383.922, "eval_steps_per_second": 23.995, "step": 6000 }, { "epoch": 2.45, "learning_rate": 9.821428571428573e-06, "loss": 0.0426, "step": 6125 }, { "epoch": 2.5, "learning_rate": 8.92857142857143e-06, "loss": 0.0396, "step": 6250 }, { "epoch": 2.55, "learning_rate": 8.035714285714286e-06, "loss": 0.0481, "step": 6375 }, { "epoch": 2.6, "learning_rate": 7.142857142857143e-06, "loss": 0.0431, "step": 6500 }, { "epoch": 2.6, "eval_accuracy": 0.9711, "eval_f1": 0.9706211243265224, "eval_loss": 0.11103978008031845, "eval_precision": 0.9665924276169265, "eval_recall": 0.9746835443037974, "eval_runtime": 54.7325, "eval_samples_per_second": 365.413, "eval_steps_per_second": 22.838, "step": 6500 }, { "epoch": 2.65, "learning_rate": 6.25e-06, "loss": 0.0396, "step": 6625 }, { "epoch": 2.7, "learning_rate": 5.357142857142857e-06, "loss": 0.0446, "step": 6750 }, { "epoch": 2.75, "learning_rate": 4.464285714285715e-06, "loss": 0.0362, "step": 6875 }, { "epoch": 2.8, "learning_rate": 3.5714285714285714e-06, "loss": 0.0464, "step": 7000 }, { "epoch": 2.8, "eval_accuracy": 0.9697, "eval_f1": 0.9693970306029694, "eval_loss": 0.11486733704805374, "eval_precision": 0.9592244653208075, "eval_recall": 0.9797876684360963, "eval_runtime": 52.2973, "eval_samples_per_second": 382.429, "eval_steps_per_second": 23.902, "step": 7000 }, { "epoch": 2.85, "learning_rate": 2.6785714285714285e-06, "loss": 0.0431, "step": 7125 }, { "epoch": 2.9, "learning_rate": 1.7857142857142857e-06, "loss": 0.0343, "step": 7250 }, { "epoch": 2.95, "learning_rate": 8.928571428571428e-07, "loss": 0.0467, "step": 7375 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 0.0342, "step": 7500 }, { "epoch": 3.0, "eval_accuracy": 0.9703, "eval_f1": 0.9699270959902794, "eval_loss": 0.11218974739313126, "eval_precision": 0.9621333869023704, "eval_recall": 0.9778481012658228, "eval_runtime": 52.6026, "eval_samples_per_second": 380.209, "eval_steps_per_second": 23.763, "step": 7500 }, { "epoch": 3.0, "step": 7500, "total_flos": 4.553211650587354e+16, "train_loss": 0.08937127710183461, "train_runtime": 4426.0374, "train_samples_per_second": 108.449, "train_steps_per_second": 1.695 }, { "epoch": 3.0, "eval_accuracy": 0.9714, "eval_f1": 0.9709909727152854, "eval_loss": 0.10657692700624466, "eval_precision": 0.9648256399919372, "eval_recall": 0.9772356063699469, "eval_runtime": 52.9584, "eval_samples_per_second": 377.655, "eval_steps_per_second": 23.603, "step": 7500 } ], "max_steps": 7500, "num_train_epochs": 3, "total_flos": 4.553211650587354e+16, "trial_name": null, "trial_params": null }