| { |
| "best_metric": 1.1135584115982056, |
| "best_model_checkpoint": "./model/emotion_classification/checkpoint-301", |
| "epoch": 30.0, |
| "eval_steps": 500, |
| "global_step": 1290, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "learning_rate": 9.972609476841367e-05, |
| "loss": 1.9172, |
| "step": 43 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.43333333333333335, |
| "eval_f1": 0.32631946371225834, |
| "eval_loss": 1.5750515460968018, |
| "eval_runtime": 4.0633, |
| "eval_samples_per_second": 29.533, |
| "eval_steps_per_second": 1.969, |
| "step": 43 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 9.890738003669029e-05, |
| "loss": 1.4505, |
| "step": 86 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.5333333333333333, |
| "eval_f1": 0.4650626007978949, |
| "eval_loss": 1.3041330575942993, |
| "eval_runtime": 4.1206, |
| "eval_samples_per_second": 29.122, |
| "eval_steps_per_second": 1.941, |
| "step": 86 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 9.755282581475769e-05, |
| "loss": 1.1121, |
| "step": 129 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.48333333333333334, |
| "eval_f1": 0.46841408327694595, |
| "eval_loss": 1.2902252674102783, |
| "eval_runtime": 4.2082, |
| "eval_samples_per_second": 28.516, |
| "eval_steps_per_second": 1.901, |
| "step": 129 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 9.567727288213005e-05, |
| "loss": 0.8491, |
| "step": 172 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.5166666666666667, |
| "eval_f1": 0.49163216814145605, |
| "eval_loss": 1.2308896780014038, |
| "eval_runtime": 4.2459, |
| "eval_samples_per_second": 28.263, |
| "eval_steps_per_second": 1.884, |
| "step": 172 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 9.330127018922194e-05, |
| "loss": 0.6168, |
| "step": 215 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.5583333333333333, |
| "eval_f1": 0.5309547133900583, |
| "eval_loss": 1.2573224306106567, |
| "eval_runtime": 4.1217, |
| "eval_samples_per_second": 29.114, |
| "eval_steps_per_second": 1.941, |
| "step": 215 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 9.045084971874738e-05, |
| "loss": 0.3953, |
| "step": 258 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.575, |
| "eval_f1": 0.5400530867590878, |
| "eval_loss": 1.1502172946929932, |
| "eval_runtime": 4.3017, |
| "eval_samples_per_second": 27.896, |
| "eval_steps_per_second": 1.86, |
| "step": 258 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 8.715724127386972e-05, |
| "loss": 0.3048, |
| "step": 301 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.65, |
| "eval_f1": 0.6231481481481482, |
| "eval_loss": 1.1135584115982056, |
| "eval_runtime": 4.1683, |
| "eval_samples_per_second": 28.789, |
| "eval_steps_per_second": 1.919, |
| "step": 301 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 8.345653031794292e-05, |
| "loss": 0.1875, |
| "step": 344 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.5666666666666667, |
| "eval_f1": 0.5597741659473293, |
| "eval_loss": 1.4224319458007812, |
| "eval_runtime": 4.2212, |
| "eval_samples_per_second": 28.428, |
| "eval_steps_per_second": 1.895, |
| "step": 344 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 7.938926261462366e-05, |
| "loss": 0.1277, |
| "step": 387 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.6166666666666667, |
| "eval_f1": 0.6011135939243728, |
| "eval_loss": 1.346667766571045, |
| "eval_runtime": 3.955, |
| "eval_samples_per_second": 30.341, |
| "eval_steps_per_second": 2.023, |
| "step": 387 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 0.1123, |
| "step": 430 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.5833333333333334, |
| "eval_f1": 0.5656809749645115, |
| "eval_loss": 1.583767294883728, |
| "eval_runtime": 4.0704, |
| "eval_samples_per_second": 29.481, |
| "eval_steps_per_second": 1.965, |
| "step": 430 |
| }, |
| { |
| "epoch": 11.0, |
| "learning_rate": 7.033683215379002e-05, |
| "loss": 0.1123, |
| "step": 473 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.5833333333333334, |
| "eval_f1": 0.5549914858886633, |
| "eval_loss": 1.50627601146698, |
| "eval_runtime": 4.0673, |
| "eval_samples_per_second": 29.504, |
| "eval_steps_per_second": 1.967, |
| "step": 473 |
| }, |
| { |
| "epoch": 12.0, |
| "learning_rate": 6.545084971874738e-05, |
| "loss": 0.0694, |
| "step": 516 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.55, |
| "eval_f1": 0.5320146001860588, |
| "eval_loss": 1.7733001708984375, |
| "eval_runtime": 4.0123, |
| "eval_samples_per_second": 29.908, |
| "eval_steps_per_second": 1.994, |
| "step": 516 |
| }, |
| { |
| "epoch": 13.0, |
| "learning_rate": 6.0395584540887963e-05, |
| "loss": 0.0499, |
| "step": 559 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.5833333333333334, |
| "eval_f1": 0.5536173850790786, |
| "eval_loss": 1.6328585147857666, |
| "eval_runtime": 4.0403, |
| "eval_samples_per_second": 29.701, |
| "eval_steps_per_second": 1.98, |
| "step": 559 |
| }, |
| { |
| "epoch": 14.0, |
| "learning_rate": 5.522642316338268e-05, |
| "loss": 0.0367, |
| "step": 602 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.5833333333333334, |
| "eval_f1": 0.5684853336495889, |
| "eval_loss": 1.6878242492675781, |
| "eval_runtime": 3.9845, |
| "eval_samples_per_second": 30.116, |
| "eval_steps_per_second": 2.008, |
| "step": 602 |
| }, |
| { |
| "epoch": 15.0, |
| "learning_rate": 5e-05, |
| "loss": 0.0291, |
| "step": 645 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.575, |
| "eval_f1": 0.5392005606664051, |
| "eval_loss": 1.685531735420227, |
| "eval_runtime": 3.9802, |
| "eval_samples_per_second": 30.149, |
| "eval_steps_per_second": 2.01, |
| "step": 645 |
| }, |
| { |
| "epoch": 16.0, |
| "learning_rate": 4.477357683661734e-05, |
| "loss": 0.0284, |
| "step": 688 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.6083333333333333, |
| "eval_f1": 0.5880117663277057, |
| "eval_loss": 1.7869312763214111, |
| "eval_runtime": 4.0749, |
| "eval_samples_per_second": 29.448, |
| "eval_steps_per_second": 1.963, |
| "step": 688 |
| }, |
| { |
| "epoch": 17.0, |
| "learning_rate": 3.960441545911204e-05, |
| "loss": 0.0316, |
| "step": 731 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.5916666666666667, |
| "eval_f1": 0.566969594710963, |
| "eval_loss": 1.5830930471420288, |
| "eval_runtime": 4.0457, |
| "eval_samples_per_second": 29.661, |
| "eval_steps_per_second": 1.977, |
| "step": 731 |
| }, |
| { |
| "epoch": 18.0, |
| "learning_rate": 3.4549150281252636e-05, |
| "loss": 0.0273, |
| "step": 774 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.625, |
| "eval_f1": 0.598442406793843, |
| "eval_loss": 1.5932706594467163, |
| "eval_runtime": 4.1291, |
| "eval_samples_per_second": 29.062, |
| "eval_steps_per_second": 1.937, |
| "step": 774 |
| }, |
| { |
| "epoch": 19.0, |
| "learning_rate": 2.9663167846209998e-05, |
| "loss": 0.0234, |
| "step": 817 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.5833333333333334, |
| "eval_f1": 0.5652118457947398, |
| "eval_loss": 1.7830352783203125, |
| "eval_runtime": 4.2401, |
| "eval_samples_per_second": 28.301, |
| "eval_steps_per_second": 1.887, |
| "step": 817 |
| }, |
| { |
| "epoch": 20.0, |
| "learning_rate": 2.500000000000001e-05, |
| "loss": 0.0194, |
| "step": 860 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.6083333333333333, |
| "eval_f1": 0.5878385871868214, |
| "eval_loss": 1.6804471015930176, |
| "eval_runtime": 3.9914, |
| "eval_samples_per_second": 30.065, |
| "eval_steps_per_second": 2.004, |
| "step": 860 |
| }, |
| { |
| "epoch": 21.0, |
| "learning_rate": 2.061073738537635e-05, |
| "loss": 0.0214, |
| "step": 903 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.6, |
| "eval_f1": 0.5700534489379734, |
| "eval_loss": 1.596238374710083, |
| "eval_runtime": 4.0432, |
| "eval_samples_per_second": 29.68, |
| "eval_steps_per_second": 1.979, |
| "step": 903 |
| }, |
| { |
| "epoch": 22.0, |
| "learning_rate": 1.6543469682057106e-05, |
| "loss": 0.0204, |
| "step": 946 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.625, |
| "eval_f1": 0.5992033196773662, |
| "eval_loss": 1.5684361457824707, |
| "eval_runtime": 4.0843, |
| "eval_samples_per_second": 29.381, |
| "eval_steps_per_second": 1.959, |
| "step": 946 |
| }, |
| { |
| "epoch": 23.0, |
| "learning_rate": 1.2842758726130283e-05, |
| "loss": 0.0178, |
| "step": 989 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.625, |
| "eval_f1": 0.5992033196773662, |
| "eval_loss": 1.592431664466858, |
| "eval_runtime": 4.0813, |
| "eval_samples_per_second": 29.402, |
| "eval_steps_per_second": 1.96, |
| "step": 989 |
| }, |
| { |
| "epoch": 24.0, |
| "learning_rate": 9.549150281252633e-06, |
| "loss": 0.0173, |
| "step": 1032 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.6166666666666667, |
| "eval_f1": 0.5932522595359776, |
| "eval_loss": 1.6227874755859375, |
| "eval_runtime": 4.2004, |
| "eval_samples_per_second": 28.569, |
| "eval_steps_per_second": 1.905, |
| "step": 1032 |
| }, |
| { |
| "epoch": 25.0, |
| "learning_rate": 6.698729810778065e-06, |
| "loss": 0.016, |
| "step": 1075 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.6333333333333333, |
| "eval_f1": 0.6072678358063984, |
| "eval_loss": 1.6176973581314087, |
| "eval_runtime": 4.0151, |
| "eval_samples_per_second": 29.887, |
| "eval_steps_per_second": 1.992, |
| "step": 1075 |
| }, |
| { |
| "epoch": 26.0, |
| "learning_rate": 4.322727117869951e-06, |
| "loss": 0.016, |
| "step": 1118 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.625, |
| "eval_f1": 0.6009141292059813, |
| "eval_loss": 1.6267857551574707, |
| "eval_runtime": 4.055, |
| "eval_samples_per_second": 29.593, |
| "eval_steps_per_second": 1.973, |
| "step": 1118 |
| }, |
| { |
| "epoch": 27.0, |
| "learning_rate": 2.4471741852423237e-06, |
| "loss": 0.016, |
| "step": 1161 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.625, |
| "eval_f1": 0.6009141292059813, |
| "eval_loss": 1.6387226581573486, |
| "eval_runtime": 4.2902, |
| "eval_samples_per_second": 27.971, |
| "eval_steps_per_second": 1.865, |
| "step": 1161 |
| }, |
| { |
| "epoch": 28.0, |
| "learning_rate": 1.0926199633097157e-06, |
| "loss": 0.0159, |
| "step": 1204 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.625, |
| "eval_f1": 0.6009141292059813, |
| "eval_loss": 1.6403223276138306, |
| "eval_runtime": 4.1003, |
| "eval_samples_per_second": 29.266, |
| "eval_steps_per_second": 1.951, |
| "step": 1204 |
| }, |
| { |
| "epoch": 29.0, |
| "learning_rate": 2.7390523158633554e-07, |
| "loss": 0.0162, |
| "step": 1247 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_accuracy": 0.625, |
| "eval_f1": 0.6009141292059813, |
| "eval_loss": 1.6409173011779785, |
| "eval_runtime": 4.0023, |
| "eval_samples_per_second": 29.983, |
| "eval_steps_per_second": 1.999, |
| "step": 1247 |
| }, |
| { |
| "epoch": 30.0, |
| "learning_rate": 0.0, |
| "loss": 0.018, |
| "step": 1290 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_accuracy": 0.625, |
| "eval_f1": 0.6009141292059813, |
| "eval_loss": 1.6411793231964111, |
| "eval_runtime": 4.269, |
| "eval_samples_per_second": 28.11, |
| "eval_steps_per_second": 1.874, |
| "step": 1290 |
| }, |
| { |
| "epoch": 30.0, |
| "step": 1290, |
| "total_flos": 1.5809215993675776e+18, |
| "train_loss": 0.25586533430934877, |
| "train_runtime": 2391.6751, |
| "train_samples_per_second": 8.53, |
| "train_steps_per_second": 0.539 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 1290, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 30, |
| "save_steps": 500, |
| "total_flos": 1.5809215993675776e+18, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|