{ "best_metric": 0.3858742415904999, "best_model_checkpoint": "few-shot-learning-classification-bert-sm-1K-1/checkpoint-1280", "epoch": 2.0, "eval_steps": 500, "global_step": 1280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0390625, "grad_norm": 7.191597938537598, "learning_rate": 1.4375e-06, "loss": 1.4451, "step": 25 }, { "epoch": 0.078125, "grad_norm": 5.644325256347656, "learning_rate": 3e-06, "loss": 1.4487, "step": 50 }, { "epoch": 0.1171875, "grad_norm": 13.361610412597656, "learning_rate": 4.5e-06, "loss": 1.319, "step": 75 }, { "epoch": 0.15625, "grad_norm": 14.431085586547852, "learning_rate": 6.0625e-06, "loss": 1.2472, "step": 100 }, { "epoch": 0.1953125, "grad_norm": 22.173587799072266, "learning_rate": 7.625e-06, "loss": 1.1274, "step": 125 }, { "epoch": 0.234375, "grad_norm": 14.653789520263672, "learning_rate": 9.1875e-06, "loss": 0.966, "step": 150 }, { "epoch": 0.2734375, "grad_norm": 8.065316200256348, "learning_rate": 1.075e-05, "loss": 0.8051, "step": 175 }, { "epoch": 0.3125, "grad_norm": 9.035745620727539, "learning_rate": 1.2312500000000001e-05, "loss": 0.7299, "step": 200 }, { "epoch": 0.3515625, "grad_norm": 7.260746002197266, "learning_rate": 1.3875e-05, "loss": 0.5627, "step": 225 }, { "epoch": 0.390625, "grad_norm": 11.369009017944336, "learning_rate": 1.54375e-05, "loss": 0.6384, "step": 250 }, { "epoch": 0.4296875, "grad_norm": 9.945943832397461, "learning_rate": 1.7e-05, "loss": 0.5355, "step": 275 }, { "epoch": 0.46875, "grad_norm": 8.494958877563477, "learning_rate": 1.85625e-05, "loss": 0.4033, "step": 300 }, { "epoch": 0.5078125, "grad_norm": 13.045336723327637, "learning_rate": 1.9986111111111114e-05, "loss": 0.3506, "step": 325 }, { "epoch": 0.546875, "grad_norm": 1.5317492485046387, "learning_rate": 1.98125e-05, "loss": 0.4383, "step": 350 }, { "epoch": 0.5859375, "grad_norm": 51.39152908325195, "learning_rate": 1.963888888888889e-05, "loss": 0.4716, "step": 375 }, { "epoch": 0.625, "grad_norm": 0.5919365882873535, "learning_rate": 1.946527777777778e-05, "loss": 0.2496, "step": 400 }, { "epoch": 0.6640625, "grad_norm": 0.27208948135375977, "learning_rate": 1.9291666666666667e-05, "loss": 0.326, "step": 425 }, { "epoch": 0.703125, "grad_norm": 0.17534124851226807, "learning_rate": 1.9118055555555557e-05, "loss": 0.3539, "step": 450 }, { "epoch": 0.7421875, "grad_norm": 0.4635702967643738, "learning_rate": 1.8944444444444447e-05, "loss": 0.3726, "step": 475 }, { "epoch": 0.78125, "grad_norm": 18.82594108581543, "learning_rate": 1.8770833333333337e-05, "loss": 0.5068, "step": 500 }, { "epoch": 0.8203125, "grad_norm": 24.958038330078125, "learning_rate": 1.8604166666666667e-05, "loss": 0.3299, "step": 525 }, { "epoch": 0.859375, "grad_norm": 61.970924377441406, "learning_rate": 1.8430555555555557e-05, "loss": 0.5022, "step": 550 }, { "epoch": 0.8984375, "grad_norm": 0.2635006606578827, "learning_rate": 1.8256944444444447e-05, "loss": 0.4954, "step": 575 }, { "epoch": 0.9375, "grad_norm": 6.639841556549072, "learning_rate": 1.8083333333333334e-05, "loss": 0.3813, "step": 600 }, { "epoch": 0.9765625, "grad_norm": 2.1155128479003906, "learning_rate": 1.7909722222222223e-05, "loss": 0.4112, "step": 625 }, { "epoch": 1.0, "eval_accuracy": 0.8675, "eval_f1_macro": 0.8671356661653469, "eval_f1_micro": 0.8675, "eval_f1_weighted": 0.8671356661653468, "eval_loss": 0.5370703339576721, "eval_precision_macro": 0.8730718238693385, "eval_precision_micro": 0.8675, "eval_precision_weighted": 0.8730718238693385, "eval_recall_macro": 0.8674999999999999, "eval_recall_micro": 0.8675, "eval_recall_weighted": 0.8675, "eval_runtime": 6.2872, "eval_samples_per_second": 127.242, "eval_steps_per_second": 63.621, "step": 640 }, { "epoch": 1.015625, "grad_norm": 31.680767059326172, "learning_rate": 1.773611111111111e-05, "loss": 0.3012, "step": 650 }, { "epoch": 1.0546875, "grad_norm": 31.29473114013672, "learning_rate": 1.7562500000000003e-05, "loss": 0.2361, "step": 675 }, { "epoch": 1.09375, "grad_norm": 0.18347026407718658, "learning_rate": 1.738888888888889e-05, "loss": 0.2887, "step": 700 }, { "epoch": 1.1328125, "grad_norm": 1.14323091506958, "learning_rate": 1.721527777777778e-05, "loss": 0.2525, "step": 725 }, { "epoch": 1.171875, "grad_norm": 0.09450375288724899, "learning_rate": 1.7041666666666666e-05, "loss": 0.3185, "step": 750 }, { "epoch": 1.2109375, "grad_norm": 0.28490039706230164, "learning_rate": 1.6868055555555556e-05, "loss": 0.2852, "step": 775 }, { "epoch": 1.25, "grad_norm": 0.4697229862213135, "learning_rate": 1.6694444444444446e-05, "loss": 0.4275, "step": 800 }, { "epoch": 1.2890625, "grad_norm": 0.09609019011259079, "learning_rate": 1.6520833333333336e-05, "loss": 0.0966, "step": 825 }, { "epoch": 1.328125, "grad_norm": 34.88169479370117, "learning_rate": 1.6347222222222223e-05, "loss": 0.3033, "step": 850 }, { "epoch": 1.3671875, "grad_norm": 45.051490783691406, "learning_rate": 1.6173611111111113e-05, "loss": 0.4049, "step": 875 }, { "epoch": 1.40625, "grad_norm": 0.3537954092025757, "learning_rate": 1.6000000000000003e-05, "loss": 0.2878, "step": 900 }, { "epoch": 1.4453125, "grad_norm": 91.70577239990234, "learning_rate": 1.582638888888889e-05, "loss": 0.2305, "step": 925 }, { "epoch": 1.484375, "grad_norm": 0.0591551698744297, "learning_rate": 1.565277777777778e-05, "loss": 0.2653, "step": 950 }, { "epoch": 1.5234375, "grad_norm": 13.28045654296875, "learning_rate": 1.5479166666666666e-05, "loss": 0.3921, "step": 975 }, { "epoch": 1.5625, "grad_norm": 0.16305220127105713, "learning_rate": 1.5305555555555556e-05, "loss": 0.1752, "step": 1000 }, { "epoch": 1.6015625, "grad_norm": 0.06767012178897858, "learning_rate": 1.5131944444444446e-05, "loss": 0.3607, "step": 1025 }, { "epoch": 1.640625, "grad_norm": 1.8044508695602417, "learning_rate": 1.4958333333333336e-05, "loss": 0.2305, "step": 1050 }, { "epoch": 1.6796875, "grad_norm": 0.15962672233581543, "learning_rate": 1.4784722222222224e-05, "loss": 0.3076, "step": 1075 }, { "epoch": 1.71875, "grad_norm": 48.03413391113281, "learning_rate": 1.4611111111111112e-05, "loss": 0.3941, "step": 1100 }, { "epoch": 1.7578125, "grad_norm": 20.047348022460938, "learning_rate": 1.4437500000000002e-05, "loss": 0.2488, "step": 1125 }, { "epoch": 1.796875, "grad_norm": 0.09602449089288712, "learning_rate": 1.426388888888889e-05, "loss": 0.3055, "step": 1150 }, { "epoch": 1.8359375, "grad_norm": 6.260819435119629, "learning_rate": 1.4090277777777778e-05, "loss": 0.2926, "step": 1175 }, { "epoch": 1.875, "grad_norm": 0.12005895376205444, "learning_rate": 1.3916666666666667e-05, "loss": 0.4649, "step": 1200 }, { "epoch": 1.9140625, "grad_norm": 6.192780494689941, "learning_rate": 1.3743055555555555e-05, "loss": 0.2597, "step": 1225 }, { "epoch": 1.953125, "grad_norm": 0.2921382188796997, "learning_rate": 1.3576388888888889e-05, "loss": 0.3106, "step": 1250 }, { "epoch": 1.9921875, "grad_norm": 6.799332618713379, "learning_rate": 1.3402777777777779e-05, "loss": 0.501, "step": 1275 }, { "epoch": 2.0, "eval_accuracy": 0.9075, "eval_f1_macro": 0.9076167120902879, "eval_f1_micro": 0.9075, "eval_f1_weighted": 0.9076167120902879, "eval_loss": 0.3858742415904999, "eval_precision_macro": 0.9080215201299329, "eval_precision_micro": 0.9075, "eval_precision_weighted": 0.9080215201299328, "eval_recall_macro": 0.9075, "eval_recall_micro": 0.9075, "eval_recall_weighted": 0.9075, "eval_runtime": 6.273, "eval_samples_per_second": 127.53, "eval_steps_per_second": 63.765, "step": 1280 } ], "logging_steps": 25, "max_steps": 3200, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 1683940992614400.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }