GuntramG's picture
Upload folder using huggingface_hub
ef03452 verified
raw
history blame
No virus
10.8 kB
{
"best_metric": 0.19515299797058105,
"best_model_checkpoint": "autotrain-rtvyh-y5ben/checkpoint-1146",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1146,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06544502617801047,
"grad_norm": 27.387327194213867,
"learning_rate": 9.130434782608697e-06,
"loss": 1.9398,
"step": 25
},
{
"epoch": 0.13089005235602094,
"grad_norm": 22.39668846130371,
"learning_rate": 2e-05,
"loss": 1.7177,
"step": 50
},
{
"epoch": 0.19633507853403143,
"grad_norm": 17.08791160583496,
"learning_rate": 3.086956521739131e-05,
"loss": 1.3264,
"step": 75
},
{
"epoch": 0.2617801047120419,
"grad_norm": 14.287014961242676,
"learning_rate": 4.1739130434782605e-05,
"loss": 0.9999,
"step": 100
},
{
"epoch": 0.32722513089005234,
"grad_norm": 5.287769794464111,
"learning_rate": 4.97090203685742e-05,
"loss": 0.7594,
"step": 125
},
{
"epoch": 0.39267015706806285,
"grad_norm": 37.07988739013672,
"learning_rate": 4.849660523763337e-05,
"loss": 0.5061,
"step": 150
},
{
"epoch": 0.4581151832460733,
"grad_norm": 2.10866641998291,
"learning_rate": 4.728419010669253e-05,
"loss": 0.4617,
"step": 175
},
{
"epoch": 0.5235602094240838,
"grad_norm": 37.698177337646484,
"learning_rate": 4.6071774975751696e-05,
"loss": 0.3837,
"step": 200
},
{
"epoch": 0.5890052356020943,
"grad_norm": 20.22072410583496,
"learning_rate": 4.485935984481087e-05,
"loss": 0.4478,
"step": 225
},
{
"epoch": 0.6544502617801047,
"grad_norm": 37.976768493652344,
"learning_rate": 4.364694471387003e-05,
"loss": 0.4318,
"step": 250
},
{
"epoch": 0.7198952879581152,
"grad_norm": 25.226699829101562,
"learning_rate": 4.2434529582929193e-05,
"loss": 0.3503,
"step": 275
},
{
"epoch": 0.7853403141361257,
"grad_norm": 38.52054977416992,
"learning_rate": 4.1270611057225994e-05,
"loss": 0.4691,
"step": 300
},
{
"epoch": 0.8507853403141361,
"grad_norm": 36.57845687866211,
"learning_rate": 4.005819592628517e-05,
"loss": 0.3103,
"step": 325
},
{
"epoch": 0.9162303664921466,
"grad_norm": 21.468103408813477,
"learning_rate": 3.8845780795344326e-05,
"loss": 0.4161,
"step": 350
},
{
"epoch": 0.981675392670157,
"grad_norm": 1.5287563800811768,
"learning_rate": 3.763336566440349e-05,
"loss": 0.2762,
"step": 375
},
{
"epoch": 1.0,
"eval_accuracy": 0.912303664921466,
"eval_f1_macro": 0.8881661676824378,
"eval_f1_micro": 0.912303664921466,
"eval_f1_weighted": 0.9139427487256797,
"eval_loss": 0.24220755696296692,
"eval_precision_macro": 0.878941728706775,
"eval_precision_micro": 0.912303664921466,
"eval_precision_weighted": 0.9193888936099365,
"eval_recall_macro": 0.903916491565807,
"eval_recall_micro": 0.912303664921466,
"eval_recall_weighted": 0.912303664921466,
"eval_runtime": 18.3017,
"eval_samples_per_second": 166.979,
"eval_steps_per_second": 10.436,
"step": 382
},
{
"epoch": 1.0471204188481675,
"grad_norm": 82.7063980102539,
"learning_rate": 3.6420950533462664e-05,
"loss": 0.4792,
"step": 400
},
{
"epoch": 1.112565445026178,
"grad_norm": 1.9939672946929932,
"learning_rate": 3.520853540252182e-05,
"loss": 0.3641,
"step": 425
},
{
"epoch": 1.1780104712041886,
"grad_norm": 2.1117889881134033,
"learning_rate": 3.399612027158099e-05,
"loss": 0.3835,
"step": 450
},
{
"epoch": 1.243455497382199,
"grad_norm": 12.421843528747559,
"learning_rate": 3.278370514064016e-05,
"loss": 0.3868,
"step": 475
},
{
"epoch": 1.3089005235602094,
"grad_norm": 4.019728183746338,
"learning_rate": 3.157129000969932e-05,
"loss": 0.3204,
"step": 500
},
{
"epoch": 1.3743455497382198,
"grad_norm": 23.721004486083984,
"learning_rate": 3.0358874878758486e-05,
"loss": 0.2277,
"step": 525
},
{
"epoch": 1.4397905759162304,
"grad_norm": 37.32318878173828,
"learning_rate": 2.9146459747817655e-05,
"loss": 0.4918,
"step": 550
},
{
"epoch": 1.5052356020942408,
"grad_norm": 0.5763813257217407,
"learning_rate": 2.793404461687682e-05,
"loss": 0.2702,
"step": 575
},
{
"epoch": 1.5706806282722514,
"grad_norm": 25.39264488220215,
"learning_rate": 2.6721629485935983e-05,
"loss": 0.2803,
"step": 600
},
{
"epoch": 1.6361256544502618,
"grad_norm": 48.61098861694336,
"learning_rate": 2.5509214354995155e-05,
"loss": 0.3219,
"step": 625
},
{
"epoch": 1.7015706806282722,
"grad_norm": 19.207382202148438,
"learning_rate": 2.4296799224054317e-05,
"loss": 0.4093,
"step": 650
},
{
"epoch": 1.7670157068062826,
"grad_norm": 19.071319580078125,
"learning_rate": 2.3084384093113483e-05,
"loss": 0.3342,
"step": 675
},
{
"epoch": 1.8324607329842932,
"grad_norm": 24.700105667114258,
"learning_rate": 2.187196896217265e-05,
"loss": 0.3489,
"step": 700
},
{
"epoch": 1.8979057591623036,
"grad_norm": 25.030014038085938,
"learning_rate": 2.0659553831231815e-05,
"loss": 0.3604,
"step": 725
},
{
"epoch": 1.9633507853403143,
"grad_norm": 51.47434616088867,
"learning_rate": 1.944713870029098e-05,
"loss": 0.2948,
"step": 750
},
{
"epoch": 2.0,
"eval_accuracy": 0.9195026178010471,
"eval_f1_macro": 0.8825867245438248,
"eval_f1_micro": 0.9195026178010471,
"eval_f1_weighted": 0.9191139958840788,
"eval_loss": 0.22944672405719757,
"eval_precision_macro": 0.8833301888913111,
"eval_precision_micro": 0.9195026178010471,
"eval_precision_weighted": 0.9239632346357177,
"eval_recall_macro": 0.8933703672169516,
"eval_recall_micro": 0.9195026178010471,
"eval_recall_weighted": 0.9195026178010471,
"eval_runtime": 18.2799,
"eval_samples_per_second": 167.178,
"eval_steps_per_second": 10.449,
"step": 764
},
{
"epoch": 2.0287958115183247,
"grad_norm": 13.710552215576172,
"learning_rate": 1.8234723569350146e-05,
"loss": 0.2163,
"step": 775
},
{
"epoch": 2.094240837696335,
"grad_norm": 14.117116928100586,
"learning_rate": 1.702230843840931e-05,
"loss": 0.286,
"step": 800
},
{
"epoch": 2.1596858638743455,
"grad_norm": 3.677631378173828,
"learning_rate": 1.5809893307468477e-05,
"loss": 0.421,
"step": 825
},
{
"epoch": 2.225130890052356,
"grad_norm": 19.017574310302734,
"learning_rate": 1.4597478176527643e-05,
"loss": 0.3262,
"step": 850
},
{
"epoch": 2.2905759162303667,
"grad_norm": 4.1396331787109375,
"learning_rate": 1.338506304558681e-05,
"loss": 0.3032,
"step": 875
},
{
"epoch": 2.356020942408377,
"grad_norm": 14.411179542541504,
"learning_rate": 1.2172647914645975e-05,
"loss": 0.3508,
"step": 900
},
{
"epoch": 2.4214659685863875,
"grad_norm": 3.900756597518921,
"learning_rate": 1.096023278370514e-05,
"loss": 0.4431,
"step": 925
},
{
"epoch": 2.486910994764398,
"grad_norm": 3.200969696044922,
"learning_rate": 9.747817652764308e-06,
"loss": 0.2815,
"step": 950
},
{
"epoch": 2.5523560209424083,
"grad_norm": 17.05501365661621,
"learning_rate": 8.535402521823473e-06,
"loss": 0.3593,
"step": 975
},
{
"epoch": 2.6178010471204187,
"grad_norm": 15.708697319030762,
"learning_rate": 7.322987390882638e-06,
"loss": 0.2373,
"step": 1000
},
{
"epoch": 2.683246073298429,
"grad_norm": 30.304662704467773,
"learning_rate": 6.159068865179437e-06,
"loss": 0.3447,
"step": 1025
},
{
"epoch": 2.7486910994764395,
"grad_norm": 18.696701049804688,
"learning_rate": 4.946653734238604e-06,
"loss": 0.225,
"step": 1050
},
{
"epoch": 2.8141361256544504,
"grad_norm": 8.534065246582031,
"learning_rate": 3.734238603297769e-06,
"loss": 0.2218,
"step": 1075
},
{
"epoch": 2.8795811518324608,
"grad_norm": 2.1658194065093994,
"learning_rate": 2.521823472356935e-06,
"loss": 0.2223,
"step": 1100
},
{
"epoch": 2.945026178010471,
"grad_norm": 61.290462493896484,
"learning_rate": 1.309408341416101e-06,
"loss": 0.3373,
"step": 1125
},
{
"epoch": 3.0,
"eval_accuracy": 0.931282722513089,
"eval_f1_macro": 0.9028776359481251,
"eval_f1_micro": 0.931282722513089,
"eval_f1_weighted": 0.9312850320245419,
"eval_loss": 0.19515299797058105,
"eval_precision_macro": 0.8955979809260896,
"eval_precision_micro": 0.931282722513089,
"eval_precision_weighted": 0.9330235887739936,
"eval_recall_macro": 0.9159046117460236,
"eval_recall_micro": 0.931282722513089,
"eval_recall_weighted": 0.931282722513089,
"eval_runtime": 18.4024,
"eval_samples_per_second": 166.066,
"eval_steps_per_second": 10.379,
"step": 1146
}
],
"logging_steps": 25,
"max_steps": 1146,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.3077959759396864e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}