|
{ |
|
"best_metric": 0.3858742415904999, |
|
"best_model_checkpoint": "few-shot-learning-classification-bert-sm-1K-1/checkpoint-1280", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 1280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0390625, |
|
"grad_norm": 7.191597938537598, |
|
"learning_rate": 1.4375e-06, |
|
"loss": 1.4451, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.078125, |
|
"grad_norm": 5.644325256347656, |
|
"learning_rate": 3e-06, |
|
"loss": 1.4487, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1171875, |
|
"grad_norm": 13.361610412597656, |
|
"learning_rate": 4.5e-06, |
|
"loss": 1.319, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 14.431085586547852, |
|
"learning_rate": 6.0625e-06, |
|
"loss": 1.2472, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1953125, |
|
"grad_norm": 22.173587799072266, |
|
"learning_rate": 7.625e-06, |
|
"loss": 1.1274, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.234375, |
|
"grad_norm": 14.653789520263672, |
|
"learning_rate": 9.1875e-06, |
|
"loss": 0.966, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2734375, |
|
"grad_norm": 8.065316200256348, |
|
"learning_rate": 1.075e-05, |
|
"loss": 0.8051, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 9.035745620727539, |
|
"learning_rate": 1.2312500000000001e-05, |
|
"loss": 0.7299, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3515625, |
|
"grad_norm": 7.260746002197266, |
|
"learning_rate": 1.3875e-05, |
|
"loss": 0.5627, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.390625, |
|
"grad_norm": 11.369009017944336, |
|
"learning_rate": 1.54375e-05, |
|
"loss": 0.6384, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4296875, |
|
"grad_norm": 9.945943832397461, |
|
"learning_rate": 1.7e-05, |
|
"loss": 0.5355, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"grad_norm": 8.494958877563477, |
|
"learning_rate": 1.85625e-05, |
|
"loss": 0.4033, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5078125, |
|
"grad_norm": 13.045336723327637, |
|
"learning_rate": 1.9986111111111114e-05, |
|
"loss": 0.3506, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.546875, |
|
"grad_norm": 1.5317492485046387, |
|
"learning_rate": 1.98125e-05, |
|
"loss": 0.4383, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5859375, |
|
"grad_norm": 51.39152908325195, |
|
"learning_rate": 1.963888888888889e-05, |
|
"loss": 0.4716, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 0.5919365882873535, |
|
"learning_rate": 1.946527777777778e-05, |
|
"loss": 0.2496, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6640625, |
|
"grad_norm": 0.27208948135375977, |
|
"learning_rate": 1.9291666666666667e-05, |
|
"loss": 0.326, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.703125, |
|
"grad_norm": 0.17534124851226807, |
|
"learning_rate": 1.9118055555555557e-05, |
|
"loss": 0.3539, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7421875, |
|
"grad_norm": 0.4635702967643738, |
|
"learning_rate": 1.8944444444444447e-05, |
|
"loss": 0.3726, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 18.82594108581543, |
|
"learning_rate": 1.8770833333333337e-05, |
|
"loss": 0.5068, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8203125, |
|
"grad_norm": 24.958038330078125, |
|
"learning_rate": 1.8604166666666667e-05, |
|
"loss": 0.3299, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.859375, |
|
"grad_norm": 61.970924377441406, |
|
"learning_rate": 1.8430555555555557e-05, |
|
"loss": 0.5022, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8984375, |
|
"grad_norm": 0.2635006606578827, |
|
"learning_rate": 1.8256944444444447e-05, |
|
"loss": 0.4954, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 6.639841556549072, |
|
"learning_rate": 1.8083333333333334e-05, |
|
"loss": 0.3813, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9765625, |
|
"grad_norm": 2.1155128479003906, |
|
"learning_rate": 1.7909722222222223e-05, |
|
"loss": 0.4112, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8675, |
|
"eval_f1_macro": 0.8671356661653469, |
|
"eval_f1_micro": 0.8675, |
|
"eval_f1_weighted": 0.8671356661653468, |
|
"eval_loss": 0.5370703339576721, |
|
"eval_precision_macro": 0.8730718238693385, |
|
"eval_precision_micro": 0.8675, |
|
"eval_precision_weighted": 0.8730718238693385, |
|
"eval_recall_macro": 0.8674999999999999, |
|
"eval_recall_micro": 0.8675, |
|
"eval_recall_weighted": 0.8675, |
|
"eval_runtime": 6.2872, |
|
"eval_samples_per_second": 127.242, |
|
"eval_steps_per_second": 63.621, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.015625, |
|
"grad_norm": 31.680767059326172, |
|
"learning_rate": 1.773611111111111e-05, |
|
"loss": 0.3012, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0546875, |
|
"grad_norm": 31.29473114013672, |
|
"learning_rate": 1.7562500000000003e-05, |
|
"loss": 0.2361, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.09375, |
|
"grad_norm": 0.18347026407718658, |
|
"learning_rate": 1.738888888888889e-05, |
|
"loss": 0.2887, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1328125, |
|
"grad_norm": 1.14323091506958, |
|
"learning_rate": 1.721527777777778e-05, |
|
"loss": 0.2525, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.171875, |
|
"grad_norm": 0.09450375288724899, |
|
"learning_rate": 1.7041666666666666e-05, |
|
"loss": 0.3185, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2109375, |
|
"grad_norm": 0.28490039706230164, |
|
"learning_rate": 1.6868055555555556e-05, |
|
"loss": 0.2852, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.4697229862213135, |
|
"learning_rate": 1.6694444444444446e-05, |
|
"loss": 0.4275, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2890625, |
|
"grad_norm": 0.09609019011259079, |
|
"learning_rate": 1.6520833333333336e-05, |
|
"loss": 0.0966, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.328125, |
|
"grad_norm": 34.88169479370117, |
|
"learning_rate": 1.6347222222222223e-05, |
|
"loss": 0.3033, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.3671875, |
|
"grad_norm": 45.051490783691406, |
|
"learning_rate": 1.6173611111111113e-05, |
|
"loss": 0.4049, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.40625, |
|
"grad_norm": 0.3537954092025757, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.2878, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4453125, |
|
"grad_norm": 91.70577239990234, |
|
"learning_rate": 1.582638888888889e-05, |
|
"loss": 0.2305, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.484375, |
|
"grad_norm": 0.0591551698744297, |
|
"learning_rate": 1.565277777777778e-05, |
|
"loss": 0.2653, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.5234375, |
|
"grad_norm": 13.28045654296875, |
|
"learning_rate": 1.5479166666666666e-05, |
|
"loss": 0.3921, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 0.16305220127105713, |
|
"learning_rate": 1.5305555555555556e-05, |
|
"loss": 0.1752, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6015625, |
|
"grad_norm": 0.06767012178897858, |
|
"learning_rate": 1.5131944444444446e-05, |
|
"loss": 0.3607, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.640625, |
|
"grad_norm": 1.8044508695602417, |
|
"learning_rate": 1.4958333333333336e-05, |
|
"loss": 0.2305, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.6796875, |
|
"grad_norm": 0.15962672233581543, |
|
"learning_rate": 1.4784722222222224e-05, |
|
"loss": 0.3076, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.71875, |
|
"grad_norm": 48.03413391113281, |
|
"learning_rate": 1.4611111111111112e-05, |
|
"loss": 0.3941, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.7578125, |
|
"grad_norm": 20.047348022460938, |
|
"learning_rate": 1.4437500000000002e-05, |
|
"loss": 0.2488, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.796875, |
|
"grad_norm": 0.09602449089288712, |
|
"learning_rate": 1.426388888888889e-05, |
|
"loss": 0.3055, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.8359375, |
|
"grad_norm": 6.260819435119629, |
|
"learning_rate": 1.4090277777777778e-05, |
|
"loss": 0.2926, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 0.12005895376205444, |
|
"learning_rate": 1.3916666666666667e-05, |
|
"loss": 0.4649, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.9140625, |
|
"grad_norm": 6.192780494689941, |
|
"learning_rate": 1.3743055555555555e-05, |
|
"loss": 0.2597, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.953125, |
|
"grad_norm": 0.2921382188796997, |
|
"learning_rate": 1.3576388888888889e-05, |
|
"loss": 0.3106, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.9921875, |
|
"grad_norm": 6.799332618713379, |
|
"learning_rate": 1.3402777777777779e-05, |
|
"loss": 0.501, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9075, |
|
"eval_f1_macro": 0.9076167120902879, |
|
"eval_f1_micro": 0.9075, |
|
"eval_f1_weighted": 0.9076167120902879, |
|
"eval_loss": 0.3858742415904999, |
|
"eval_precision_macro": 0.9080215201299329, |
|
"eval_precision_micro": 0.9075, |
|
"eval_precision_weighted": 0.9080215201299328, |
|
"eval_recall_macro": 0.9075, |
|
"eval_recall_micro": 0.9075, |
|
"eval_recall_weighted": 0.9075, |
|
"eval_runtime": 6.273, |
|
"eval_samples_per_second": 127.53, |
|
"eval_steps_per_second": 63.765, |
|
"step": 1280 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 3200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 1683940992614400.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|