pravin691983's picture
Upload folder using huggingface_hub
f2af77a verified
{
"best_metric": 0.3858742415904999,
"best_model_checkpoint": "few-shot-learning-classification-bert-sm-1K-1/checkpoint-1280",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 1280,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0390625,
"grad_norm": 7.191597938537598,
"learning_rate": 1.4375e-06,
"loss": 1.4451,
"step": 25
},
{
"epoch": 0.078125,
"grad_norm": 5.644325256347656,
"learning_rate": 3e-06,
"loss": 1.4487,
"step": 50
},
{
"epoch": 0.1171875,
"grad_norm": 13.361610412597656,
"learning_rate": 4.5e-06,
"loss": 1.319,
"step": 75
},
{
"epoch": 0.15625,
"grad_norm": 14.431085586547852,
"learning_rate": 6.0625e-06,
"loss": 1.2472,
"step": 100
},
{
"epoch": 0.1953125,
"grad_norm": 22.173587799072266,
"learning_rate": 7.625e-06,
"loss": 1.1274,
"step": 125
},
{
"epoch": 0.234375,
"grad_norm": 14.653789520263672,
"learning_rate": 9.1875e-06,
"loss": 0.966,
"step": 150
},
{
"epoch": 0.2734375,
"grad_norm": 8.065316200256348,
"learning_rate": 1.075e-05,
"loss": 0.8051,
"step": 175
},
{
"epoch": 0.3125,
"grad_norm": 9.035745620727539,
"learning_rate": 1.2312500000000001e-05,
"loss": 0.7299,
"step": 200
},
{
"epoch": 0.3515625,
"grad_norm": 7.260746002197266,
"learning_rate": 1.3875e-05,
"loss": 0.5627,
"step": 225
},
{
"epoch": 0.390625,
"grad_norm": 11.369009017944336,
"learning_rate": 1.54375e-05,
"loss": 0.6384,
"step": 250
},
{
"epoch": 0.4296875,
"grad_norm": 9.945943832397461,
"learning_rate": 1.7e-05,
"loss": 0.5355,
"step": 275
},
{
"epoch": 0.46875,
"grad_norm": 8.494958877563477,
"learning_rate": 1.85625e-05,
"loss": 0.4033,
"step": 300
},
{
"epoch": 0.5078125,
"grad_norm": 13.045336723327637,
"learning_rate": 1.9986111111111114e-05,
"loss": 0.3506,
"step": 325
},
{
"epoch": 0.546875,
"grad_norm": 1.5317492485046387,
"learning_rate": 1.98125e-05,
"loss": 0.4383,
"step": 350
},
{
"epoch": 0.5859375,
"grad_norm": 51.39152908325195,
"learning_rate": 1.963888888888889e-05,
"loss": 0.4716,
"step": 375
},
{
"epoch": 0.625,
"grad_norm": 0.5919365882873535,
"learning_rate": 1.946527777777778e-05,
"loss": 0.2496,
"step": 400
},
{
"epoch": 0.6640625,
"grad_norm": 0.27208948135375977,
"learning_rate": 1.9291666666666667e-05,
"loss": 0.326,
"step": 425
},
{
"epoch": 0.703125,
"grad_norm": 0.17534124851226807,
"learning_rate": 1.9118055555555557e-05,
"loss": 0.3539,
"step": 450
},
{
"epoch": 0.7421875,
"grad_norm": 0.4635702967643738,
"learning_rate": 1.8944444444444447e-05,
"loss": 0.3726,
"step": 475
},
{
"epoch": 0.78125,
"grad_norm": 18.82594108581543,
"learning_rate": 1.8770833333333337e-05,
"loss": 0.5068,
"step": 500
},
{
"epoch": 0.8203125,
"grad_norm": 24.958038330078125,
"learning_rate": 1.8604166666666667e-05,
"loss": 0.3299,
"step": 525
},
{
"epoch": 0.859375,
"grad_norm": 61.970924377441406,
"learning_rate": 1.8430555555555557e-05,
"loss": 0.5022,
"step": 550
},
{
"epoch": 0.8984375,
"grad_norm": 0.2635006606578827,
"learning_rate": 1.8256944444444447e-05,
"loss": 0.4954,
"step": 575
},
{
"epoch": 0.9375,
"grad_norm": 6.639841556549072,
"learning_rate": 1.8083333333333334e-05,
"loss": 0.3813,
"step": 600
},
{
"epoch": 0.9765625,
"grad_norm": 2.1155128479003906,
"learning_rate": 1.7909722222222223e-05,
"loss": 0.4112,
"step": 625
},
{
"epoch": 1.0,
"eval_accuracy": 0.8675,
"eval_f1_macro": 0.8671356661653469,
"eval_f1_micro": 0.8675,
"eval_f1_weighted": 0.8671356661653468,
"eval_loss": 0.5370703339576721,
"eval_precision_macro": 0.8730718238693385,
"eval_precision_micro": 0.8675,
"eval_precision_weighted": 0.8730718238693385,
"eval_recall_macro": 0.8674999999999999,
"eval_recall_micro": 0.8675,
"eval_recall_weighted": 0.8675,
"eval_runtime": 6.2872,
"eval_samples_per_second": 127.242,
"eval_steps_per_second": 63.621,
"step": 640
},
{
"epoch": 1.015625,
"grad_norm": 31.680767059326172,
"learning_rate": 1.773611111111111e-05,
"loss": 0.3012,
"step": 650
},
{
"epoch": 1.0546875,
"grad_norm": 31.29473114013672,
"learning_rate": 1.7562500000000003e-05,
"loss": 0.2361,
"step": 675
},
{
"epoch": 1.09375,
"grad_norm": 0.18347026407718658,
"learning_rate": 1.738888888888889e-05,
"loss": 0.2887,
"step": 700
},
{
"epoch": 1.1328125,
"grad_norm": 1.14323091506958,
"learning_rate": 1.721527777777778e-05,
"loss": 0.2525,
"step": 725
},
{
"epoch": 1.171875,
"grad_norm": 0.09450375288724899,
"learning_rate": 1.7041666666666666e-05,
"loss": 0.3185,
"step": 750
},
{
"epoch": 1.2109375,
"grad_norm": 0.28490039706230164,
"learning_rate": 1.6868055555555556e-05,
"loss": 0.2852,
"step": 775
},
{
"epoch": 1.25,
"grad_norm": 0.4697229862213135,
"learning_rate": 1.6694444444444446e-05,
"loss": 0.4275,
"step": 800
},
{
"epoch": 1.2890625,
"grad_norm": 0.09609019011259079,
"learning_rate": 1.6520833333333336e-05,
"loss": 0.0966,
"step": 825
},
{
"epoch": 1.328125,
"grad_norm": 34.88169479370117,
"learning_rate": 1.6347222222222223e-05,
"loss": 0.3033,
"step": 850
},
{
"epoch": 1.3671875,
"grad_norm": 45.051490783691406,
"learning_rate": 1.6173611111111113e-05,
"loss": 0.4049,
"step": 875
},
{
"epoch": 1.40625,
"grad_norm": 0.3537954092025757,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.2878,
"step": 900
},
{
"epoch": 1.4453125,
"grad_norm": 91.70577239990234,
"learning_rate": 1.582638888888889e-05,
"loss": 0.2305,
"step": 925
},
{
"epoch": 1.484375,
"grad_norm": 0.0591551698744297,
"learning_rate": 1.565277777777778e-05,
"loss": 0.2653,
"step": 950
},
{
"epoch": 1.5234375,
"grad_norm": 13.28045654296875,
"learning_rate": 1.5479166666666666e-05,
"loss": 0.3921,
"step": 975
},
{
"epoch": 1.5625,
"grad_norm": 0.16305220127105713,
"learning_rate": 1.5305555555555556e-05,
"loss": 0.1752,
"step": 1000
},
{
"epoch": 1.6015625,
"grad_norm": 0.06767012178897858,
"learning_rate": 1.5131944444444446e-05,
"loss": 0.3607,
"step": 1025
},
{
"epoch": 1.640625,
"grad_norm": 1.8044508695602417,
"learning_rate": 1.4958333333333336e-05,
"loss": 0.2305,
"step": 1050
},
{
"epoch": 1.6796875,
"grad_norm": 0.15962672233581543,
"learning_rate": 1.4784722222222224e-05,
"loss": 0.3076,
"step": 1075
},
{
"epoch": 1.71875,
"grad_norm": 48.03413391113281,
"learning_rate": 1.4611111111111112e-05,
"loss": 0.3941,
"step": 1100
},
{
"epoch": 1.7578125,
"grad_norm": 20.047348022460938,
"learning_rate": 1.4437500000000002e-05,
"loss": 0.2488,
"step": 1125
},
{
"epoch": 1.796875,
"grad_norm": 0.09602449089288712,
"learning_rate": 1.426388888888889e-05,
"loss": 0.3055,
"step": 1150
},
{
"epoch": 1.8359375,
"grad_norm": 6.260819435119629,
"learning_rate": 1.4090277777777778e-05,
"loss": 0.2926,
"step": 1175
},
{
"epoch": 1.875,
"grad_norm": 0.12005895376205444,
"learning_rate": 1.3916666666666667e-05,
"loss": 0.4649,
"step": 1200
},
{
"epoch": 1.9140625,
"grad_norm": 6.192780494689941,
"learning_rate": 1.3743055555555555e-05,
"loss": 0.2597,
"step": 1225
},
{
"epoch": 1.953125,
"grad_norm": 0.2921382188796997,
"learning_rate": 1.3576388888888889e-05,
"loss": 0.3106,
"step": 1250
},
{
"epoch": 1.9921875,
"grad_norm": 6.799332618713379,
"learning_rate": 1.3402777777777779e-05,
"loss": 0.501,
"step": 1275
},
{
"epoch": 2.0,
"eval_accuracy": 0.9075,
"eval_f1_macro": 0.9076167120902879,
"eval_f1_micro": 0.9075,
"eval_f1_weighted": 0.9076167120902879,
"eval_loss": 0.3858742415904999,
"eval_precision_macro": 0.9080215201299329,
"eval_precision_micro": 0.9075,
"eval_precision_weighted": 0.9080215201299328,
"eval_recall_macro": 0.9075,
"eval_recall_micro": 0.9075,
"eval_recall_weighted": 0.9075,
"eval_runtime": 6.273,
"eval_samples_per_second": 127.53,
"eval_steps_per_second": 63.765,
"step": 1280
}
],
"logging_steps": 25,
"max_steps": 3200,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 1683940992614400.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}