GuntramG's picture
Upload folder using huggingface_hub
aac3138 verified
{
"best_metric": 0.18285594880580902,
"best_model_checkpoint": "autotrain-6doma-5m8vf/checkpoint-1107",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1107,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06775067750677506,
"grad_norm": 16.566085815429688,
"learning_rate": 9.90990990990991e-06,
"loss": 1.5667,
"step": 25
},
{
"epoch": 0.13550135501355012,
"grad_norm": 25.27834129333496,
"learning_rate": 2.117117117117117e-05,
"loss": 1.2958,
"step": 50
},
{
"epoch": 0.2032520325203252,
"grad_norm": 14.882951736450195,
"learning_rate": 3.2432432432432436e-05,
"loss": 1.0021,
"step": 75
},
{
"epoch": 0.27100271002710025,
"grad_norm": 12.05569839477539,
"learning_rate": 4.369369369369369e-05,
"loss": 0.7635,
"step": 100
},
{
"epoch": 0.33875338753387535,
"grad_norm": 15.701233863830566,
"learning_rate": 4.944779116465864e-05,
"loss": 0.572,
"step": 125
},
{
"epoch": 0.4065040650406504,
"grad_norm": 45.54197692871094,
"learning_rate": 4.8242971887550205e-05,
"loss": 0.4778,
"step": 150
},
{
"epoch": 0.4742547425474255,
"grad_norm": 20.10284996032715,
"learning_rate": 4.698795180722892e-05,
"loss": 0.4213,
"step": 175
},
{
"epoch": 0.5420054200542005,
"grad_norm": 52.795291900634766,
"learning_rate": 4.573293172690764e-05,
"loss": 0.4171,
"step": 200
},
{
"epoch": 0.6097560975609756,
"grad_norm": 32.24135208129883,
"learning_rate": 4.447791164658635e-05,
"loss": 0.4031,
"step": 225
},
{
"epoch": 0.6775067750677507,
"grad_norm": 48.17521286010742,
"learning_rate": 4.3222891566265064e-05,
"loss": 0.3206,
"step": 250
},
{
"epoch": 0.7452574525745257,
"grad_norm": 27.5257511138916,
"learning_rate": 4.196787148594378e-05,
"loss": 0.3616,
"step": 275
},
{
"epoch": 0.8130081300813008,
"grad_norm": 15.912370681762695,
"learning_rate": 4.071285140562249e-05,
"loss": 0.508,
"step": 300
},
{
"epoch": 0.8807588075880759,
"grad_norm": 12.763589859008789,
"learning_rate": 3.9508032128514064e-05,
"loss": 0.3685,
"step": 325
},
{
"epoch": 0.948509485094851,
"grad_norm": 20.36044692993164,
"learning_rate": 3.8253012048192774e-05,
"loss": 0.393,
"step": 350
},
{
"epoch": 1.0,
"eval_accuracy": 0.9189005768578216,
"eval_f1_macro": 0.8913236764060113,
"eval_f1_micro": 0.9189005768578216,
"eval_f1_weighted": 0.9195807716070247,
"eval_loss": 0.2322985827922821,
"eval_precision_macro": 0.8992556342366311,
"eval_precision_micro": 0.9189005768578216,
"eval_precision_weighted": 0.9235420876186199,
"eval_recall_macro": 0.8887207219589304,
"eval_recall_micro": 0.9189005768578216,
"eval_recall_weighted": 0.9189005768578216,
"eval_runtime": 19.8492,
"eval_samples_per_second": 148.469,
"eval_steps_per_second": 9.32,
"step": 369
},
{
"epoch": 1.016260162601626,
"grad_norm": 14.446638107299805,
"learning_rate": 3.699799196787149e-05,
"loss": 0.2934,
"step": 375
},
{
"epoch": 1.084010840108401,
"grad_norm": 29.589651107788086,
"learning_rate": 3.57429718875502e-05,
"loss": 0.3801,
"step": 400
},
{
"epoch": 1.151761517615176,
"grad_norm": 23.76276397705078,
"learning_rate": 3.4487951807228916e-05,
"loss": 0.2276,
"step": 425
},
{
"epoch": 1.2195121951219512,
"grad_norm": 33.06072998046875,
"learning_rate": 3.323293172690763e-05,
"loss": 0.2623,
"step": 450
},
{
"epoch": 1.2872628726287263,
"grad_norm": 31.562694549560547,
"learning_rate": 3.197791164658634e-05,
"loss": 0.3324,
"step": 475
},
{
"epoch": 1.3550135501355014,
"grad_norm": 25.050046920776367,
"learning_rate": 3.072289156626506e-05,
"loss": 0.3613,
"step": 500
},
{
"epoch": 1.4227642276422765,
"grad_norm": 5.65738582611084,
"learning_rate": 2.9467871485943778e-05,
"loss": 0.3689,
"step": 525
},
{
"epoch": 1.4905149051490514,
"grad_norm": 30.50360870361328,
"learning_rate": 2.821285140562249e-05,
"loss": 0.2128,
"step": 550
},
{
"epoch": 1.5582655826558267,
"grad_norm": 31.306838989257812,
"learning_rate": 2.6957831325301207e-05,
"loss": 0.3329,
"step": 575
},
{
"epoch": 1.6260162601626016,
"grad_norm": 5.569540023803711,
"learning_rate": 2.570281124497992e-05,
"loss": 0.3934,
"step": 600
},
{
"epoch": 1.6937669376693767,
"grad_norm": 79.83793640136719,
"learning_rate": 2.4447791164658633e-05,
"loss": 0.3329,
"step": 625
},
{
"epoch": 1.7615176151761518,
"grad_norm": 11.711432456970215,
"learning_rate": 2.319277108433735e-05,
"loss": 0.3065,
"step": 650
},
{
"epoch": 1.8292682926829267,
"grad_norm": 27.71021842956543,
"learning_rate": 2.1937751004016066e-05,
"loss": 0.3361,
"step": 675
},
{
"epoch": 1.897018970189702,
"grad_norm": 24.346481323242188,
"learning_rate": 2.068273092369478e-05,
"loss": 0.3967,
"step": 700
},
{
"epoch": 1.9647696476964769,
"grad_norm": 7.5306549072265625,
"learning_rate": 1.9427710843373495e-05,
"loss": 0.3304,
"step": 725
},
{
"epoch": 2.0,
"eval_accuracy": 0.9284017645062775,
"eval_f1_macro": 0.903605865288441,
"eval_f1_micro": 0.9284017645062775,
"eval_f1_weighted": 0.9277100982185731,
"eval_loss": 0.2046061009168625,
"eval_precision_macro": 0.9095877174004062,
"eval_precision_micro": 0.9284017645062775,
"eval_precision_weighted": 0.9281331331487362,
"eval_recall_macro": 0.8989112570392443,
"eval_recall_micro": 0.9284017645062775,
"eval_recall_weighted": 0.9284017645062775,
"eval_runtime": 19.9432,
"eval_samples_per_second": 147.769,
"eval_steps_per_second": 9.276,
"step": 738
},
{
"epoch": 2.032520325203252,
"grad_norm": 28.1395206451416,
"learning_rate": 1.822289156626506e-05,
"loss": 0.332,
"step": 750
},
{
"epoch": 2.100271002710027,
"grad_norm": 7.682183265686035,
"learning_rate": 1.6967871485943776e-05,
"loss": 0.2995,
"step": 775
},
{
"epoch": 2.168021680216802,
"grad_norm": 23.640390396118164,
"learning_rate": 1.5712851405622492e-05,
"loss": 0.3089,
"step": 800
},
{
"epoch": 2.2357723577235773,
"grad_norm": 3.6244945526123047,
"learning_rate": 1.4457831325301205e-05,
"loss": 0.2557,
"step": 825
},
{
"epoch": 2.303523035230352,
"grad_norm": 109.68293762207031,
"learning_rate": 1.3202811244979921e-05,
"loss": 0.3734,
"step": 850
},
{
"epoch": 2.3712737127371275,
"grad_norm": 28.19609832763672,
"learning_rate": 1.1947791164658636e-05,
"loss": 0.3144,
"step": 875
},
{
"epoch": 2.4390243902439024,
"grad_norm": 17.588850021362305,
"learning_rate": 1.069277108433735e-05,
"loss": 0.4061,
"step": 900
},
{
"epoch": 2.5067750677506773,
"grad_norm": 21.384654998779297,
"learning_rate": 9.437751004016063e-06,
"loss": 0.2626,
"step": 925
},
{
"epoch": 2.5745257452574526,
"grad_norm": 0.35269397497177124,
"learning_rate": 8.18273092369478e-06,
"loss": 0.2822,
"step": 950
},
{
"epoch": 2.642276422764228,
"grad_norm": 21.37306785583496,
"learning_rate": 6.927710843373494e-06,
"loss": 0.4436,
"step": 975
},
{
"epoch": 2.710027100271003,
"grad_norm": 17.97796630859375,
"learning_rate": 5.672690763052209e-06,
"loss": 0.2517,
"step": 1000
},
{
"epoch": 2.7777777777777777,
"grad_norm": 9.19117259979248,
"learning_rate": 4.417670682730924e-06,
"loss": 0.2395,
"step": 1025
},
{
"epoch": 2.845528455284553,
"grad_norm": 64.22978210449219,
"learning_rate": 3.1626506024096387e-06,
"loss": 0.3387,
"step": 1050
},
{
"epoch": 2.913279132791328,
"grad_norm": 1.4875394105911255,
"learning_rate": 1.9076305220883537e-06,
"loss": 0.2559,
"step": 1075
},
{
"epoch": 2.9810298102981028,
"grad_norm": 0.5281310677528381,
"learning_rate": 6.526104417670682e-07,
"loss": 0.3605,
"step": 1100
},
{
"epoch": 3.0,
"eval_accuracy": 0.9365456396335257,
"eval_f1_macro": 0.9148614413559308,
"eval_f1_micro": 0.9365456396335257,
"eval_f1_weighted": 0.9364564915178187,
"eval_loss": 0.18285594880580902,
"eval_precision_macro": 0.9159613318061458,
"eval_precision_micro": 0.9365456396335257,
"eval_precision_weighted": 0.9365041505044936,
"eval_recall_macro": 0.9139276800740521,
"eval_recall_micro": 0.9365456396335257,
"eval_recall_weighted": 0.9365456396335257,
"eval_runtime": 19.7723,
"eval_samples_per_second": 149.047,
"eval_steps_per_second": 9.357,
"step": 1107
}
],
"logging_steps": 25,
"max_steps": 1107,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 2.225421168402862e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}