{ "best_metric": 1.0422136783599854, "best_model_checkpoint": "models/deberta-v3-base/1691009101/checkpoint-470", "epoch": 4.986737400530504, "global_step": 470, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 1.9787234042553193e-05, "loss": 2.1681, "step": 5 }, { "epoch": 0.11, "learning_rate": 1.9574468085106384e-05, "loss": 2.1189, "step": 10 }, { "epoch": 0.16, "learning_rate": 1.9361702127659575e-05, "loss": 2.163, "step": 15 }, { "epoch": 0.21, "learning_rate": 1.914893617021277e-05, "loss": 2.111, "step": 20 }, { "epoch": 0.27, "learning_rate": 1.893617021276596e-05, "loss": 2.1833, "step": 25 }, { "epoch": 0.32, "learning_rate": 1.872340425531915e-05, "loss": 2.0812, "step": 30 }, { "epoch": 0.37, "learning_rate": 1.8510638297872342e-05, "loss": 2.1295, "step": 35 }, { "epoch": 0.42, "learning_rate": 1.8297872340425533e-05, "loss": 2.0143, "step": 40 }, { "epoch": 0.48, "learning_rate": 1.8085106382978724e-05, "loss": 2.1968, "step": 45 }, { "epoch": 0.53, "learning_rate": 1.7872340425531915e-05, "loss": 1.995, "step": 50 }, { "epoch": 0.58, "learning_rate": 1.765957446808511e-05, "loss": 2.0799, "step": 55 }, { "epoch": 0.64, "learning_rate": 1.74468085106383e-05, "loss": 1.9155, "step": 60 }, { "epoch": 0.69, "learning_rate": 1.723404255319149e-05, "loss": 1.9628, "step": 65 }, { "epoch": 0.74, "learning_rate": 1.7021276595744682e-05, "loss": 1.9793, "step": 70 }, { "epoch": 0.8, "learning_rate": 1.6808510638297873e-05, "loss": 1.8198, "step": 75 }, { "epoch": 0.85, "learning_rate": 1.6595744680851064e-05, "loss": 1.6043, "step": 80 }, { "epoch": 0.9, "learning_rate": 1.6382978723404255e-05, "loss": 1.6209, "step": 85 }, { "epoch": 0.95, "learning_rate": 1.6170212765957446e-05, "loss": 1.5354, "step": 90 }, { "epoch": 1.0, "eval_dignity_contempt_accuracy": 0.797153024911032, "eval_loss": 1.4699641466140747, "eval_mae": 0.5818505338078291, "eval_n_samples": 670, "eval_runtime": 13.4181, "eval_samples_per_second": 49.933, "eval_scorable_accuracy": 0.8776119402985074, "eval_steps_per_second": 6.26, "eval_top_1_accuracy": 0.5223880597014925, "eval_top_2_accuracy": 0.8843416370106761, "step": 94 }, { "epoch": 1.01, "learning_rate": 1.595744680851064e-05, "loss": 1.5383, "step": 95 }, { "epoch": 1.06, "learning_rate": 1.5744680851063832e-05, "loss": 1.5934, "step": 100 }, { "epoch": 1.11, "learning_rate": 1.5531914893617023e-05, "loss": 1.5562, "step": 105 }, { "epoch": 1.17, "learning_rate": 1.5319148936170214e-05, "loss": 1.4006, "step": 110 }, { "epoch": 1.22, "learning_rate": 1.5106382978723407e-05, "loss": 1.4296, "step": 115 }, { "epoch": 1.27, "learning_rate": 1.4893617021276596e-05, "loss": 1.3508, "step": 120 }, { "epoch": 1.33, "learning_rate": 1.4680851063829789e-05, "loss": 1.3246, "step": 125 }, { "epoch": 1.38, "learning_rate": 1.4468085106382981e-05, "loss": 1.2573, "step": 130 }, { "epoch": 1.43, "learning_rate": 1.425531914893617e-05, "loss": 1.4491, "step": 135 }, { "epoch": 1.49, "learning_rate": 1.4042553191489363e-05, "loss": 1.2537, "step": 140 }, { "epoch": 1.54, "learning_rate": 1.3829787234042556e-05, "loss": 1.3947, "step": 145 }, { "epoch": 1.59, "learning_rate": 1.3617021276595745e-05, "loss": 1.282, "step": 150 }, { "epoch": 1.64, "learning_rate": 1.3404255319148938e-05, "loss": 1.3167, "step": 155 }, { "epoch": 1.7, "learning_rate": 1.3191489361702127e-05, "loss": 1.3456, "step": 160 }, { "epoch": 1.75, "learning_rate": 1.297872340425532e-05, "loss": 1.2924, "step": 165 }, { "epoch": 1.8, "learning_rate": 1.2765957446808513e-05, "loss": 1.4235, "step": 170 }, { "epoch": 1.86, "learning_rate": 1.2553191489361702e-05, "loss": 1.157, "step": 175 }, { "epoch": 1.91, "learning_rate": 1.2340425531914895e-05, "loss": 1.1315, "step": 180 }, { "epoch": 1.96, "learning_rate": 1.2127659574468087e-05, "loss": 1.2265, "step": 185 }, { "epoch": 1.99, "eval_dignity_contempt_accuracy": 0.8087248322147651, "eval_loss": 1.1996970176696777, "eval_mae": 0.5134228187919463, "eval_n_samples": 670, "eval_runtime": 13.4177, "eval_samples_per_second": 49.934, "eval_scorable_accuracy": 0.9313432835820895, "eval_steps_per_second": 6.26, "eval_top_1_accuracy": 0.5492537313432836, "eval_top_2_accuracy": 0.9328859060402684, "step": 188 }, { "epoch": 2.02, "learning_rate": 1.1914893617021277e-05, "loss": 1.1528, "step": 190 }, { "epoch": 2.07, "learning_rate": 1.170212765957447e-05, "loss": 1.1732, "step": 195 }, { "epoch": 2.12, "learning_rate": 1.1489361702127662e-05, "loss": 1.1946, "step": 200 }, { "epoch": 2.18, "learning_rate": 1.1276595744680851e-05, "loss": 1.0079, "step": 205 }, { "epoch": 2.23, "learning_rate": 1.1063829787234044e-05, "loss": 0.9267, "step": 210 }, { "epoch": 2.28, "learning_rate": 1.0851063829787233e-05, "loss": 1.06, "step": 215 }, { "epoch": 2.33, "learning_rate": 1.0638297872340426e-05, "loss": 1.0085, "step": 220 }, { "epoch": 2.39, "learning_rate": 1.0425531914893619e-05, "loss": 1.0053, "step": 225 }, { "epoch": 2.44, "learning_rate": 1.0212765957446808e-05, "loss": 1.0857, "step": 230 }, { "epoch": 2.49, "learning_rate": 1e-05, "loss": 0.8903, "step": 235 }, { "epoch": 2.55, "learning_rate": 9.787234042553192e-06, "loss": 1.1211, "step": 240 }, { "epoch": 2.6, "learning_rate": 9.574468085106385e-06, "loss": 1.0351, "step": 245 }, { "epoch": 2.65, "learning_rate": 9.361702127659576e-06, "loss": 0.9636, "step": 250 }, { "epoch": 2.71, "learning_rate": 9.148936170212767e-06, "loss": 0.9087, "step": 255 }, { "epoch": 2.76, "learning_rate": 8.936170212765958e-06, "loss": 0.9624, "step": 260 }, { "epoch": 2.81, "learning_rate": 8.72340425531915e-06, "loss": 0.9403, "step": 265 }, { "epoch": 2.86, "learning_rate": 8.510638297872341e-06, "loss": 0.9479, "step": 270 }, { "epoch": 2.92, "learning_rate": 8.297872340425532e-06, "loss": 0.8829, "step": 275 }, { "epoch": 2.97, "learning_rate": 8.085106382978723e-06, "loss": 0.992, "step": 280 }, { "epoch": 2.99, "eval_dignity_contempt_accuracy": 0.8266199649737302, "eval_loss": 1.091895580291748, "eval_mae": 0.4658493870402802, "eval_n_samples": 670, "eval_runtime": 13.4128, "eval_samples_per_second": 49.952, "eval_scorable_accuracy": 0.9059701492537313, "eval_steps_per_second": 6.263, "eval_top_1_accuracy": 0.5776119402985075, "eval_top_2_accuracy": 0.9316987740805605, "step": 282 }, { "epoch": 3.02, "learning_rate": 7.872340425531916e-06, "loss": 0.9552, "step": 285 }, { "epoch": 3.08, "learning_rate": 7.659574468085107e-06, "loss": 0.8174, "step": 290 }, { "epoch": 3.13, "learning_rate": 7.446808510638298e-06, "loss": 0.766, "step": 295 }, { "epoch": 3.18, "learning_rate": 7.234042553191491e-06, "loss": 0.7773, "step": 300 }, { "epoch": 3.24, "learning_rate": 7.021276595744682e-06, "loss": 0.9016, "step": 305 }, { "epoch": 3.29, "learning_rate": 6.808510638297873e-06, "loss": 0.8505, "step": 310 }, { "epoch": 3.34, "learning_rate": 6.595744680851064e-06, "loss": 0.9318, "step": 315 }, { "epoch": 3.4, "learning_rate": 6.382978723404256e-06, "loss": 0.7373, "step": 320 }, { "epoch": 3.45, "learning_rate": 6.170212765957447e-06, "loss": 0.7445, "step": 325 }, { "epoch": 3.5, "learning_rate": 5.957446808510638e-06, "loss": 0.8805, "step": 330 }, { "epoch": 3.55, "learning_rate": 5.744680851063831e-06, "loss": 0.7931, "step": 335 }, { "epoch": 3.61, "learning_rate": 5.531914893617022e-06, "loss": 0.8024, "step": 340 }, { "epoch": 3.66, "learning_rate": 5.319148936170213e-06, "loss": 0.7106, "step": 345 }, { "epoch": 3.71, "learning_rate": 5.106382978723404e-06, "loss": 0.7161, "step": 350 }, { "epoch": 3.77, "learning_rate": 4.893617021276596e-06, "loss": 0.7998, "step": 355 }, { "epoch": 3.82, "learning_rate": 4.680851063829788e-06, "loss": 0.6772, "step": 360 }, { "epoch": 3.87, "learning_rate": 4.468085106382979e-06, "loss": 0.7802, "step": 365 }, { "epoch": 3.93, "learning_rate": 4.255319148936171e-06, "loss": 0.7525, "step": 370 }, { "epoch": 3.98, "learning_rate": 4.042553191489362e-06, "loss": 0.7967, "step": 375 }, { "epoch": 4.0, "eval_dignity_contempt_accuracy": 0.8305084745762712, "eval_loss": 1.0538846254348755, "eval_mae": 0.43559322033898307, "eval_n_samples": 670, "eval_runtime": 13.4019, "eval_samples_per_second": 49.993, "eval_scorable_accuracy": 0.926865671641791, "eval_steps_per_second": 6.268, "eval_top_1_accuracy": 0.6044776119402985, "eval_top_2_accuracy": 0.9372881355932203, "step": 377 }, { "epoch": 4.03, "learning_rate": 3.8297872340425535e-06, "loss": 0.7263, "step": 380 }, { "epoch": 4.08, "learning_rate": 3.6170212765957453e-06, "loss": 0.7746, "step": 385 }, { "epoch": 4.14, "learning_rate": 3.4042553191489363e-06, "loss": 0.7231, "step": 390 }, { "epoch": 4.19, "learning_rate": 3.191489361702128e-06, "loss": 0.7129, "step": 395 }, { "epoch": 4.24, "learning_rate": 2.978723404255319e-06, "loss": 0.7609, "step": 400 }, { "epoch": 4.3, "learning_rate": 2.765957446808511e-06, "loss": 0.7708, "step": 405 }, { "epoch": 4.35, "learning_rate": 2.553191489361702e-06, "loss": 0.6699, "step": 410 }, { "epoch": 4.4, "learning_rate": 2.340425531914894e-06, "loss": 0.5928, "step": 415 }, { "epoch": 4.46, "learning_rate": 2.1276595744680853e-06, "loss": 0.6845, "step": 420 }, { "epoch": 4.51, "learning_rate": 1.9148936170212767e-06, "loss": 0.6752, "step": 425 }, { "epoch": 4.56, "learning_rate": 1.7021276595744682e-06, "loss": 0.5874, "step": 430 }, { "epoch": 4.62, "learning_rate": 1.4893617021276596e-06, "loss": 0.6781, "step": 435 }, { "epoch": 4.67, "learning_rate": 1.276595744680851e-06, "loss": 0.6023, "step": 440 }, { "epoch": 4.72, "learning_rate": 1.0638297872340427e-06, "loss": 0.5626, "step": 445 }, { "epoch": 4.77, "learning_rate": 8.510638297872341e-07, "loss": 0.6321, "step": 450 }, { "epoch": 4.83, "learning_rate": 6.382978723404255e-07, "loss": 0.6318, "step": 455 }, { "epoch": 4.88, "learning_rate": 4.2553191489361704e-07, "loss": 0.5816, "step": 460 }, { "epoch": 4.93, "learning_rate": 2.1276595744680852e-07, "loss": 0.7118, "step": 465 }, { "epoch": 4.99, "learning_rate": 0.0, "loss": 0.6383, "step": 470 }, { "epoch": 4.99, "eval_dignity_contempt_accuracy": 0.8398637137989778, "eval_loss": 1.0422136783599854, "eval_mae": 0.42759795570698467, "eval_n_samples": 670, "eval_runtime": 13.4143, "eval_samples_per_second": 49.947, "eval_scorable_accuracy": 0.9223880597014925, "eval_steps_per_second": 6.262, "eval_top_1_accuracy": 0.6119402985074627, "eval_top_2_accuracy": 0.9369676320272572, "step": 470 } ], "max_steps": 470, "num_train_epochs": 5, "total_flos": 7904494608261120.0, "trial_name": null, "trial_params": null }