{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 660, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.045454545454545456, "grad_norm": 11.08301830291748, "learning_rate": 4.9242424242424245e-05, "loss": 1.3636, "step": 10 }, { "epoch": 0.09090909090909091, "grad_norm": 6.645240306854248, "learning_rate": 4.848484848484849e-05, "loss": 0.948, "step": 20 }, { "epoch": 0.13636363636363635, "grad_norm": 25.287090301513672, "learning_rate": 4.772727272727273e-05, "loss": 0.9232, "step": 30 }, { "epoch": 0.18181818181818182, "grad_norm": 10.437969207763672, "learning_rate": 4.696969696969697e-05, "loss": 0.7862, "step": 40 }, { "epoch": 0.22727272727272727, "grad_norm": 4.972208499908447, "learning_rate": 4.621212121212121e-05, "loss": 0.7418, "step": 50 }, { "epoch": 0.2727272727272727, "grad_norm": 16.393909454345703, "learning_rate": 4.545454545454546e-05, "loss": 0.6632, "step": 60 }, { "epoch": 0.3181818181818182, "grad_norm": 11.954917907714844, "learning_rate": 4.46969696969697e-05, "loss": 0.7647, "step": 70 }, { "epoch": 0.36363636363636365, "grad_norm": 6.628509044647217, "learning_rate": 4.3939393939393944e-05, "loss": 0.8068, "step": 80 }, { "epoch": 0.4090909090909091, "grad_norm": 3.830747365951538, "learning_rate": 4.318181818181819e-05, "loss": 0.8173, "step": 90 }, { "epoch": 0.45454545454545453, "grad_norm": 9.427088737487793, "learning_rate": 4.242424242424243e-05, "loss": 0.5975, "step": 100 }, { "epoch": 0.5, "grad_norm": 15.573807716369629, "learning_rate": 4.166666666666667e-05, "loss": 0.76, "step": 110 }, { "epoch": 0.5454545454545454, "grad_norm": 4.234420299530029, "learning_rate": 4.0909090909090915e-05, "loss": 0.5861, "step": 120 }, { "epoch": 0.5909090909090909, "grad_norm": 10.21924114227295, "learning_rate": 4.015151515151515e-05, "loss": 0.6307, "step": 130 }, { "epoch": 0.6363636363636364, "grad_norm": 15.529552459716797, "learning_rate": 3.939393939393939e-05, "loss": 0.5305, "step": 140 }, { "epoch": 0.6818181818181818, "grad_norm": 15.605684280395508, "learning_rate": 3.8636363636363636e-05, "loss": 0.517, "step": 150 }, { "epoch": 0.7272727272727273, "grad_norm": 4.426260471343994, "learning_rate": 3.787878787878788e-05, "loss": 0.6307, "step": 160 }, { "epoch": 0.7727272727272727, "grad_norm": 6.0872673988342285, "learning_rate": 3.712121212121212e-05, "loss": 0.6244, "step": 170 }, { "epoch": 0.8181818181818182, "grad_norm": 8.042257308959961, "learning_rate": 3.6363636363636364e-05, "loss": 0.657, "step": 180 }, { "epoch": 0.8636363636363636, "grad_norm": 5.940113067626953, "learning_rate": 3.560606060606061e-05, "loss": 0.5958, "step": 190 }, { "epoch": 0.9090909090909091, "grad_norm": 8.203106880187988, "learning_rate": 3.484848484848485e-05, "loss": 0.4277, "step": 200 }, { "epoch": 0.9545454545454546, "grad_norm": 6.048317909240723, "learning_rate": 3.409090909090909e-05, "loss": 0.5817, "step": 210 }, { "epoch": 1.0, "grad_norm": 10.408190727233887, "learning_rate": 3.3333333333333335e-05, "loss": 0.4926, "step": 220 }, { "epoch": 1.0, "eval_accuracy": 0.8351063829787234, "eval_accuracy_label_communication_issue": 0.0, "eval_accuracy_label_general_query": 0.0, "eval_accuracy_label_other": 0.847887323943662, "eval_accuracy_label_praise": 0.7315436241610739, "eval_accuracy_label_service_issue": 0.9780346820809248, "eval_f1": 0.7977035566821362, "eval_loss": 0.476932555437088, "eval_precision": 0.7653675138091269, "eval_recall": 0.8351063829787234, "eval_runtime": 47.9926, "eval_samples_per_second": 31.338, "eval_steps_per_second": 1.959, "step": 220 }, { "epoch": 1.0454545454545454, "grad_norm": 5.125838756561279, "learning_rate": 3.257575757575758e-05, "loss": 0.46, "step": 230 }, { "epoch": 1.0909090909090908, "grad_norm": 6.349830150604248, "learning_rate": 3.181818181818182e-05, "loss": 0.4112, "step": 240 }, { "epoch": 1.1363636363636362, "grad_norm": 19.836454391479492, "learning_rate": 3.106060606060606e-05, "loss": 0.3519, "step": 250 }, { "epoch": 1.1818181818181819, "grad_norm": 7.154183864593506, "learning_rate": 3.0303030303030306e-05, "loss": 0.3793, "step": 260 }, { "epoch": 1.2272727272727273, "grad_norm": 9.307719230651855, "learning_rate": 2.954545454545455e-05, "loss": 0.5391, "step": 270 }, { "epoch": 1.2727272727272727, "grad_norm": 27.64773941040039, "learning_rate": 2.878787878787879e-05, "loss": 0.4752, "step": 280 }, { "epoch": 1.3181818181818181, "grad_norm": 13.110358238220215, "learning_rate": 2.803030303030303e-05, "loss": 0.5639, "step": 290 }, { "epoch": 1.3636363636363638, "grad_norm": 10.741105079650879, "learning_rate": 2.7272727272727273e-05, "loss": 0.3713, "step": 300 }, { "epoch": 1.4090909090909092, "grad_norm": 6.00734806060791, "learning_rate": 2.6515151515151516e-05, "loss": 0.3944, "step": 310 }, { "epoch": 1.4545454545454546, "grad_norm": 17.730316162109375, "learning_rate": 2.575757575757576e-05, "loss": 0.45, "step": 320 }, { "epoch": 1.5, "grad_norm": 13.08516788482666, "learning_rate": 2.5e-05, "loss": 0.4115, "step": 330 }, { "epoch": 1.5454545454545454, "grad_norm": 10.093771934509277, "learning_rate": 2.4242424242424244e-05, "loss": 0.2893, "step": 340 }, { "epoch": 1.5909090909090908, "grad_norm": 10.081781387329102, "learning_rate": 2.3484848484848487e-05, "loss": 0.277, "step": 350 }, { "epoch": 1.6363636363636362, "grad_norm": 17.387615203857422, "learning_rate": 2.272727272727273e-05, "loss": 0.5264, "step": 360 }, { "epoch": 1.6818181818181817, "grad_norm": 9.769563674926758, "learning_rate": 2.1969696969696972e-05, "loss": 0.5075, "step": 370 }, { "epoch": 1.7272727272727273, "grad_norm": 9.551799774169922, "learning_rate": 2.1212121212121215e-05, "loss": 0.4975, "step": 380 }, { "epoch": 1.7727272727272727, "grad_norm": 3.705615758895874, "learning_rate": 2.0454545454545457e-05, "loss": 0.3067, "step": 390 }, { "epoch": 1.8181818181818183, "grad_norm": 15.211760520935059, "learning_rate": 1.9696969696969697e-05, "loss": 0.4204, "step": 400 }, { "epoch": 1.8636363636363638, "grad_norm": 11.120340347290039, "learning_rate": 1.893939393939394e-05, "loss": 0.4252, "step": 410 }, { "epoch": 1.9090909090909092, "grad_norm": 24.251697540283203, "learning_rate": 1.8181818181818182e-05, "loss": 0.2903, "step": 420 }, { "epoch": 1.9545454545454546, "grad_norm": 6.106864929199219, "learning_rate": 1.7424242424242425e-05, "loss": 0.3099, "step": 430 }, { "epoch": 2.0, "grad_norm": 497.832275390625, "learning_rate": 1.6666666666666667e-05, "loss": 0.4722, "step": 440 }, { "epoch": 2.0, "eval_accuracy": 0.8138297872340425, "eval_accuracy_label_communication_issue": 0.5918367346938775, "eval_accuracy_label_general_query": 0.2972972972972973, "eval_accuracy_label_other": 0.8591549295774648, "eval_accuracy_label_praise": 0.8523489932885906, "eval_accuracy_label_service_issue": 0.8358381502890173, "eval_f1": 0.833458233857523, "eval_loss": 0.4489530324935913, "eval_precision": 0.8684601912444823, "eval_recall": 0.8138297872340425, "eval_runtime": 48.2944, "eval_samples_per_second": 31.142, "eval_steps_per_second": 1.946, "step": 440 }, { "epoch": 2.0454545454545454, "grad_norm": 4.993196964263916, "learning_rate": 1.590909090909091e-05, "loss": 0.3753, "step": 450 }, { "epoch": 2.090909090909091, "grad_norm": 10.294816017150879, "learning_rate": 1.5151515151515153e-05, "loss": 0.227, "step": 460 }, { "epoch": 2.1363636363636362, "grad_norm": 11.7720365524292, "learning_rate": 1.4393939393939396e-05, "loss": 0.3358, "step": 470 }, { "epoch": 2.1818181818181817, "grad_norm": 4.280463695526123, "learning_rate": 1.3636363636363637e-05, "loss": 0.2411, "step": 480 }, { "epoch": 2.227272727272727, "grad_norm": 5.480221271514893, "learning_rate": 1.287878787878788e-05, "loss": 0.2354, "step": 490 }, { "epoch": 2.2727272727272725, "grad_norm": 11.005305290222168, "learning_rate": 1.2121212121212122e-05, "loss": 0.3645, "step": 500 }, { "epoch": 2.3181818181818183, "grad_norm": 8.943399429321289, "learning_rate": 1.1363636363636365e-05, "loss": 0.2661, "step": 510 }, { "epoch": 2.3636363636363638, "grad_norm": 16.569984436035156, "learning_rate": 1.0606060606060607e-05, "loss": 0.3468, "step": 520 }, { "epoch": 2.409090909090909, "grad_norm": 16.938270568847656, "learning_rate": 9.848484848484848e-06, "loss": 0.2875, "step": 530 }, { "epoch": 2.4545454545454546, "grad_norm": 16.039297103881836, "learning_rate": 9.090909090909091e-06, "loss": 0.2259, "step": 540 }, { "epoch": 2.5, "grad_norm": 8.718212127685547, "learning_rate": 8.333333333333334e-06, "loss": 0.2414, "step": 550 }, { "epoch": 2.5454545454545454, "grad_norm": 5.3960137367248535, "learning_rate": 7.5757575757575764e-06, "loss": 0.1906, "step": 560 }, { "epoch": 2.590909090909091, "grad_norm": 12.368656158447266, "learning_rate": 6.818181818181818e-06, "loss": 0.292, "step": 570 }, { "epoch": 2.6363636363636362, "grad_norm": 7.215673923492432, "learning_rate": 6.060606060606061e-06, "loss": 0.2161, "step": 580 }, { "epoch": 2.6818181818181817, "grad_norm": 10.76251220703125, "learning_rate": 5.303030303030304e-06, "loss": 0.2502, "step": 590 }, { "epoch": 2.7272727272727275, "grad_norm": 3.9787635803222656, "learning_rate": 4.5454545454545455e-06, "loss": 0.3901, "step": 600 }, { "epoch": 2.7727272727272725, "grad_norm": 17.325733184814453, "learning_rate": 3.7878787878787882e-06, "loss": 0.318, "step": 610 }, { "epoch": 2.8181818181818183, "grad_norm": 2.0341148376464844, "learning_rate": 3.0303030303030305e-06, "loss": 0.2546, "step": 620 }, { "epoch": 2.8636363636363638, "grad_norm": 4.3380279541015625, "learning_rate": 2.2727272727272728e-06, "loss": 0.3142, "step": 630 }, { "epoch": 2.909090909090909, "grad_norm": 57.76769256591797, "learning_rate": 1.5151515151515152e-06, "loss": 0.3151, "step": 640 }, { "epoch": 2.9545454545454546, "grad_norm": 13.915431022644043, "learning_rate": 7.575757575757576e-07, "loss": 0.2874, "step": 650 }, { "epoch": 3.0, "grad_norm": 11.066320419311523, "learning_rate": 0.0, "loss": 0.1949, "step": 660 }, { "epoch": 3.0, "eval_accuracy": 0.8650265957446809, "eval_accuracy_label_communication_issue": 0.3877551020408163, "eval_accuracy_label_general_query": 0.5135135135135135, "eval_accuracy_label_other": 0.8535211267605634, "eval_accuracy_label_praise": 0.7986577181208053, "eval_accuracy_label_service_issue": 0.9502890173410404, "eval_f1": 0.8646844293958481, "eval_loss": 0.4295935034751892, "eval_precision": 0.8662149933902713, "eval_recall": 0.8650265957446809, "eval_runtime": 48.0636, "eval_samples_per_second": 31.292, "eval_steps_per_second": 1.956, "step": 660 }, { "epoch": 3.0, "step": 660, "total_flos": 2769055331291136.0, "train_loss": 0.4658627540776224, "train_runtime": 1450.7799, "train_samples_per_second": 7.254, "train_steps_per_second": 0.455 }, { "epoch": 3.0, "eval_accuracy": 0.8650265957446809, "eval_accuracy_label_communication_issue": 0.3877551020408163, "eval_accuracy_label_general_query": 0.5135135135135135, "eval_accuracy_label_other": 0.8535211267605634, "eval_accuracy_label_praise": 0.7986577181208053, "eval_accuracy_label_service_issue": 0.9502890173410404, "eval_f1": 0.8646844293958481, "eval_loss": 0.4295935034751892, "eval_precision": 0.8662149933902713, "eval_recall": 0.8650265957446809, "eval_runtime": 43.1294, "eval_samples_per_second": 34.872, "eval_steps_per_second": 2.179, "step": 660 } ], "logging_steps": 10, "max_steps": 660, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2769055331291136.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }