{ "best_metric": 0.39916983246803284, "best_model_checkpoint": "Action_all_10_class/checkpoint-1200", "epoch": 5.0, "eval_steps": 100, "global_step": 1245, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 0.00019839357429718877, "loss": 2.2294, "step": 10 }, { "epoch": 0.08, "learning_rate": 0.00019678714859437752, "loss": 2.1036, "step": 20 }, { "epoch": 0.12, "learning_rate": 0.00019518072289156628, "loss": 1.9019, "step": 30 }, { "epoch": 0.16, "learning_rate": 0.00019357429718875504, "loss": 1.6396, "step": 40 }, { "epoch": 0.2, "learning_rate": 0.00019196787148594377, "loss": 1.5942, "step": 50 }, { "epoch": 0.24, "learning_rate": 0.00019036144578313252, "loss": 1.3722, "step": 60 }, { "epoch": 0.28, "learning_rate": 0.00018875502008032128, "loss": 1.2927, "step": 70 }, { "epoch": 0.32, "learning_rate": 0.00018714859437751004, "loss": 1.2947, "step": 80 }, { "epoch": 0.36, "learning_rate": 0.0001855421686746988, "loss": 1.2353, "step": 90 }, { "epoch": 0.4, "learning_rate": 0.00018393574297188755, "loss": 1.1348, "step": 100 }, { "epoch": 0.4, "eval_accuracy": 0.698005698005698, "eval_loss": 1.0964313745498657, "eval_runtime": 13.3411, "eval_samples_per_second": 52.619, "eval_steps_per_second": 6.596, "step": 100 }, { "epoch": 0.44, "learning_rate": 0.0001823293172690763, "loss": 1.0305, "step": 110 }, { "epoch": 0.48, "learning_rate": 0.00018072289156626507, "loss": 1.0352, "step": 120 }, { "epoch": 0.52, "learning_rate": 0.00017911646586345382, "loss": 1.0957, "step": 130 }, { "epoch": 0.56, "learning_rate": 0.00017751004016064258, "loss": 1.1747, "step": 140 }, { "epoch": 0.6, "learning_rate": 0.00017590361445783134, "loss": 1.0863, "step": 150 }, { "epoch": 0.64, "learning_rate": 0.0001742971887550201, "loss": 1.0802, "step": 160 }, { "epoch": 0.68, "learning_rate": 0.00017269076305220885, "loss": 1.0912, "step": 170 }, { "epoch": 0.72, "learning_rate": 0.0001710843373493976, "loss": 1.0056, "step": 180 }, { "epoch": 0.76, "learning_rate": 0.00016947791164658636, "loss": 0.9641, "step": 190 }, { "epoch": 0.8, "learning_rate": 0.00016787148594377512, "loss": 0.9992, "step": 200 }, { "epoch": 0.8, "eval_accuracy": 0.7948717948717948, "eval_loss": 0.7362096309661865, "eval_runtime": 13.2093, "eval_samples_per_second": 53.145, "eval_steps_per_second": 6.662, "step": 200 }, { "epoch": 0.84, "learning_rate": 0.00016626506024096388, "loss": 0.9851, "step": 210 }, { "epoch": 0.88, "learning_rate": 0.00016465863453815263, "loss": 0.886, "step": 220 }, { "epoch": 0.92, "learning_rate": 0.0001630522088353414, "loss": 0.9604, "step": 230 }, { "epoch": 0.96, "learning_rate": 0.00016144578313253015, "loss": 0.7707, "step": 240 }, { "epoch": 1.0, "learning_rate": 0.00016, "loss": 0.9153, "step": 250 }, { "epoch": 1.04, "learning_rate": 0.00015839357429718874, "loss": 0.7581, "step": 260 }, { "epoch": 1.08, "learning_rate": 0.0001567871485943775, "loss": 0.6622, "step": 270 }, { "epoch": 1.12, "learning_rate": 0.00015518072289156626, "loss": 0.7251, "step": 280 }, { "epoch": 1.16, "learning_rate": 0.00015357429718875501, "loss": 0.7955, "step": 290 }, { "epoch": 1.2, "learning_rate": 0.00015196787148594377, "loss": 0.8314, "step": 300 }, { "epoch": 1.2, "eval_accuracy": 0.8205128205128205, "eval_loss": 0.6410418748855591, "eval_runtime": 12.6655, "eval_samples_per_second": 55.426, "eval_steps_per_second": 6.948, "step": 300 }, { "epoch": 1.24, "learning_rate": 0.00015036144578313253, "loss": 0.845, "step": 310 }, { "epoch": 1.29, "learning_rate": 0.00014875502008032128, "loss": 0.7132, "step": 320 }, { "epoch": 1.33, "learning_rate": 0.00014714859437751004, "loss": 0.7161, "step": 330 }, { "epoch": 1.37, "learning_rate": 0.0001455421686746988, "loss": 0.7773, "step": 340 }, { "epoch": 1.41, "learning_rate": 0.00014393574297188756, "loss": 0.7676, "step": 350 }, { "epoch": 1.45, "learning_rate": 0.0001423293172690763, "loss": 0.8516, "step": 360 }, { "epoch": 1.49, "learning_rate": 0.00014072289156626507, "loss": 0.6597, "step": 370 }, { "epoch": 1.53, "learning_rate": 0.00013911646586345383, "loss": 0.6998, "step": 380 }, { "epoch": 1.57, "learning_rate": 0.00013751004016064258, "loss": 0.8191, "step": 390 }, { "epoch": 1.61, "learning_rate": 0.00013590361445783134, "loss": 0.7359, "step": 400 }, { "epoch": 1.61, "eval_accuracy": 0.8247863247863247, "eval_loss": 0.5803518891334534, "eval_runtime": 12.2034, "eval_samples_per_second": 57.525, "eval_steps_per_second": 7.211, "step": 400 }, { "epoch": 1.65, "learning_rate": 0.0001342971887550201, "loss": 0.7354, "step": 410 }, { "epoch": 1.69, "learning_rate": 0.00013269076305220885, "loss": 0.7507, "step": 420 }, { "epoch": 1.73, "learning_rate": 0.0001310843373493976, "loss": 0.8088, "step": 430 }, { "epoch": 1.77, "learning_rate": 0.00012947791164658637, "loss": 0.7279, "step": 440 }, { "epoch": 1.81, "learning_rate": 0.00012787148594377512, "loss": 0.7254, "step": 450 }, { "epoch": 1.85, "learning_rate": 0.00012626506024096385, "loss": 0.6707, "step": 460 }, { "epoch": 1.89, "learning_rate": 0.0001246586345381526, "loss": 0.6482, "step": 470 }, { "epoch": 1.93, "learning_rate": 0.00012305220883534137, "loss": 0.7238, "step": 480 }, { "epoch": 1.97, "learning_rate": 0.00012144578313253012, "loss": 0.6626, "step": 490 }, { "epoch": 2.01, "learning_rate": 0.00011983935742971888, "loss": 0.776, "step": 500 }, { "epoch": 2.01, "eval_accuracy": 0.8376068376068376, "eval_loss": 0.5489934086799622, "eval_runtime": 12.4349, "eval_samples_per_second": 56.454, "eval_steps_per_second": 7.077, "step": 500 }, { "epoch": 2.05, "learning_rate": 0.00011823293172690764, "loss": 0.8069, "step": 510 }, { "epoch": 2.09, "learning_rate": 0.0001166265060240964, "loss": 0.5448, "step": 520 }, { "epoch": 2.13, "learning_rate": 0.00011502008032128515, "loss": 0.7318, "step": 530 }, { "epoch": 2.17, "learning_rate": 0.00011341365461847391, "loss": 0.5083, "step": 540 }, { "epoch": 2.21, "learning_rate": 0.00011180722891566267, "loss": 0.6493, "step": 550 }, { "epoch": 2.25, "learning_rate": 0.0001102008032128514, "loss": 0.4928, "step": 560 }, { "epoch": 2.29, "learning_rate": 0.00010859437751004015, "loss": 0.5395, "step": 570 }, { "epoch": 2.33, "learning_rate": 0.00010698795180722891, "loss": 0.5588, "step": 580 }, { "epoch": 2.37, "learning_rate": 0.00010538152610441767, "loss": 0.5892, "step": 590 }, { "epoch": 2.41, "learning_rate": 0.00010377510040160642, "loss": 0.614, "step": 600 }, { "epoch": 2.41, "eval_accuracy": 0.8504273504273504, "eval_loss": 0.5006864070892334, "eval_runtime": 12.2103, "eval_samples_per_second": 57.493, "eval_steps_per_second": 7.207, "step": 600 }, { "epoch": 2.45, "learning_rate": 0.00010216867469879518, "loss": 0.7658, "step": 610 }, { "epoch": 2.49, "learning_rate": 0.00010056224899598394, "loss": 0.5152, "step": 620 }, { "epoch": 2.53, "learning_rate": 9.89558232931727e-05, "loss": 0.4941, "step": 630 }, { "epoch": 2.57, "learning_rate": 9.734939759036145e-05, "loss": 0.6831, "step": 640 }, { "epoch": 2.61, "learning_rate": 9.574297188755021e-05, "loss": 0.5971, "step": 650 }, { "epoch": 2.65, "learning_rate": 9.413654618473896e-05, "loss": 0.5842, "step": 660 }, { "epoch": 2.69, "learning_rate": 9.253012048192772e-05, "loss": 0.5976, "step": 670 }, { "epoch": 2.73, "learning_rate": 9.092369477911648e-05, "loss": 0.5051, "step": 680 }, { "epoch": 2.77, "learning_rate": 8.931726907630522e-05, "loss": 0.4737, "step": 690 }, { "epoch": 2.81, "learning_rate": 8.771084337349398e-05, "loss": 0.5484, "step": 700 }, { "epoch": 2.81, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.5322346091270447, "eval_runtime": 12.3228, "eval_samples_per_second": 56.968, "eval_steps_per_second": 7.141, "step": 700 }, { "epoch": 2.85, "learning_rate": 8.610441767068274e-05, "loss": 0.4436, "step": 710 }, { "epoch": 2.89, "learning_rate": 8.449799196787149e-05, "loss": 0.6452, "step": 720 }, { "epoch": 2.93, "learning_rate": 8.289156626506025e-05, "loss": 0.5724, "step": 730 }, { "epoch": 2.97, "learning_rate": 8.128514056224899e-05, "loss": 0.3933, "step": 740 }, { "epoch": 3.01, "learning_rate": 7.967871485943775e-05, "loss": 0.5753, "step": 750 }, { "epoch": 3.05, "learning_rate": 7.80722891566265e-05, "loss": 0.4426, "step": 760 }, { "epoch": 3.09, "learning_rate": 7.646586345381526e-05, "loss": 0.5442, "step": 770 }, { "epoch": 3.13, "learning_rate": 7.485943775100402e-05, "loss": 0.4839, "step": 780 }, { "epoch": 3.17, "learning_rate": 7.325301204819278e-05, "loss": 0.3711, "step": 790 }, { "epoch": 3.21, "learning_rate": 7.164658634538153e-05, "loss": 0.3844, "step": 800 }, { "epoch": 3.21, "eval_accuracy": 0.8518518518518519, "eval_loss": 0.5012323260307312, "eval_runtime": 12.9006, "eval_samples_per_second": 54.416, "eval_steps_per_second": 6.821, "step": 800 }, { "epoch": 3.25, "learning_rate": 7.004016064257029e-05, "loss": 0.513, "step": 810 }, { "epoch": 3.29, "learning_rate": 6.843373493975905e-05, "loss": 0.3505, "step": 820 }, { "epoch": 3.33, "learning_rate": 6.68273092369478e-05, "loss": 0.4022, "step": 830 }, { "epoch": 3.37, "learning_rate": 6.522088353413655e-05, "loss": 0.3574, "step": 840 }, { "epoch": 3.41, "learning_rate": 6.36144578313253e-05, "loss": 0.5703, "step": 850 }, { "epoch": 3.45, "learning_rate": 6.200803212851406e-05, "loss": 0.4585, "step": 860 }, { "epoch": 3.49, "learning_rate": 6.040160642570282e-05, "loss": 0.3114, "step": 870 }, { "epoch": 3.53, "learning_rate": 5.8795180722891576e-05, "loss": 0.3508, "step": 880 }, { "epoch": 3.57, "learning_rate": 5.718875502008032e-05, "loss": 0.3614, "step": 890 }, { "epoch": 3.61, "learning_rate": 5.5582329317269076e-05, "loss": 0.5681, "step": 900 }, { "epoch": 3.61, "eval_accuracy": 0.8589743589743589, "eval_loss": 0.49228861927986145, "eval_runtime": 13.1999, "eval_samples_per_second": 53.182, "eval_steps_per_second": 6.667, "step": 900 }, { "epoch": 3.65, "learning_rate": 5.397590361445783e-05, "loss": 0.5074, "step": 910 }, { "epoch": 3.69, "learning_rate": 5.236947791164659e-05, "loss": 0.3371, "step": 920 }, { "epoch": 3.73, "learning_rate": 5.076305220883535e-05, "loss": 0.5226, "step": 930 }, { "epoch": 3.78, "learning_rate": 4.9156626506024104e-05, "loss": 0.4462, "step": 940 }, { "epoch": 3.82, "learning_rate": 4.7550200803212854e-05, "loss": 0.4781, "step": 950 }, { "epoch": 3.86, "learning_rate": 4.594377510040161e-05, "loss": 0.4001, "step": 960 }, { "epoch": 3.9, "learning_rate": 4.433734939759036e-05, "loss": 0.4788, "step": 970 }, { "epoch": 3.94, "learning_rate": 4.273092369477912e-05, "loss": 0.4561, "step": 980 }, { "epoch": 3.98, "learning_rate": 4.1124497991967875e-05, "loss": 0.2563, "step": 990 }, { "epoch": 4.02, "learning_rate": 3.9518072289156625e-05, "loss": 0.4315, "step": 1000 }, { "epoch": 4.02, "eval_accuracy": 0.8774928774928775, "eval_loss": 0.42575880885124207, "eval_runtime": 13.0968, "eval_samples_per_second": 53.601, "eval_steps_per_second": 6.719, "step": 1000 }, { "epoch": 4.06, "learning_rate": 3.791164658634538e-05, "loss": 0.2898, "step": 1010 }, { "epoch": 4.1, "learning_rate": 3.630522088353414e-05, "loss": 0.3803, "step": 1020 }, { "epoch": 4.14, "learning_rate": 3.4698795180722896e-05, "loss": 0.2827, "step": 1030 }, { "epoch": 4.18, "learning_rate": 3.309236947791165e-05, "loss": 0.3382, "step": 1040 }, { "epoch": 4.22, "learning_rate": 3.14859437751004e-05, "loss": 0.3808, "step": 1050 }, { "epoch": 4.26, "learning_rate": 2.987951807228916e-05, "loss": 0.2551, "step": 1060 }, { "epoch": 4.3, "learning_rate": 2.827309236947791e-05, "loss": 0.3011, "step": 1070 }, { "epoch": 4.34, "learning_rate": 2.6666666666666667e-05, "loss": 0.4091, "step": 1080 }, { "epoch": 4.38, "learning_rate": 2.5060240963855423e-05, "loss": 0.286, "step": 1090 }, { "epoch": 4.42, "learning_rate": 2.345381526104418e-05, "loss": 0.3804, "step": 1100 }, { "epoch": 4.42, "eval_accuracy": 0.8675213675213675, "eval_loss": 0.4324430227279663, "eval_runtime": 12.1257, "eval_samples_per_second": 57.894, "eval_steps_per_second": 7.257, "step": 1100 }, { "epoch": 4.46, "learning_rate": 2.1847389558232934e-05, "loss": 0.2672, "step": 1110 }, { "epoch": 4.5, "learning_rate": 2.0240963855421687e-05, "loss": 0.3661, "step": 1120 }, { "epoch": 4.54, "learning_rate": 1.863453815261044e-05, "loss": 0.3126, "step": 1130 }, { "epoch": 4.58, "learning_rate": 1.7028112449799198e-05, "loss": 0.3455, "step": 1140 }, { "epoch": 4.62, "learning_rate": 1.5421686746987955e-05, "loss": 0.3604, "step": 1150 }, { "epoch": 4.66, "learning_rate": 1.3815261044176708e-05, "loss": 0.4628, "step": 1160 }, { "epoch": 4.7, "learning_rate": 1.2208835341365463e-05, "loss": 0.4074, "step": 1170 }, { "epoch": 4.74, "learning_rate": 1.0602409638554217e-05, "loss": 0.2512, "step": 1180 }, { "epoch": 4.78, "learning_rate": 8.995983935742972e-06, "loss": 0.2974, "step": 1190 }, { "epoch": 4.82, "learning_rate": 7.389558232931727e-06, "loss": 0.2887, "step": 1200 }, { "epoch": 4.82, "eval_accuracy": 0.8803418803418803, "eval_loss": 0.39916983246803284, "eval_runtime": 12.2383, "eval_samples_per_second": 57.361, "eval_steps_per_second": 7.191, "step": 1200 }, { "epoch": 4.86, "learning_rate": 5.783132530120483e-06, "loss": 0.3073, "step": 1210 }, { "epoch": 4.9, "learning_rate": 4.176706827309238e-06, "loss": 0.2825, "step": 1220 }, { "epoch": 4.94, "learning_rate": 2.570281124497992e-06, "loss": 0.288, "step": 1230 }, { "epoch": 4.98, "learning_rate": 9.638554216867472e-07, "loss": 0.3198, "step": 1240 }, { "epoch": 5.0, "step": 1245, "total_flos": 1.539101261655982e+18, "train_loss": 0.6643937945844658, "train_runtime": 748.7697, "train_samples_per_second": 26.524, "train_steps_per_second": 1.663 } ], "logging_steps": 10, "max_steps": 1245, "num_train_epochs": 5, "save_steps": 100, "total_flos": 1.539101261655982e+18, "trial_name": null, "trial_params": null }