|
{ |
|
"best_metric": 0.18285594880580902, |
|
"best_model_checkpoint": "autotrain-6doma-5m8vf/checkpoint-1107", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1107, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06775067750677506, |
|
"grad_norm": 16.566085815429688, |
|
"learning_rate": 9.90990990990991e-06, |
|
"loss": 1.5667, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.13550135501355012, |
|
"grad_norm": 25.27834129333496, |
|
"learning_rate": 2.117117117117117e-05, |
|
"loss": 1.2958, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2032520325203252, |
|
"grad_norm": 14.882951736450195, |
|
"learning_rate": 3.2432432432432436e-05, |
|
"loss": 1.0021, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.27100271002710025, |
|
"grad_norm": 12.05569839477539, |
|
"learning_rate": 4.369369369369369e-05, |
|
"loss": 0.7635, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.33875338753387535, |
|
"grad_norm": 15.701233863830566, |
|
"learning_rate": 4.944779116465864e-05, |
|
"loss": 0.572, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"grad_norm": 45.54197692871094, |
|
"learning_rate": 4.8242971887550205e-05, |
|
"loss": 0.4778, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4742547425474255, |
|
"grad_norm": 20.10284996032715, |
|
"learning_rate": 4.698795180722892e-05, |
|
"loss": 0.4213, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.5420054200542005, |
|
"grad_norm": 52.795291900634766, |
|
"learning_rate": 4.573293172690764e-05, |
|
"loss": 0.4171, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6097560975609756, |
|
"grad_norm": 32.24135208129883, |
|
"learning_rate": 4.447791164658635e-05, |
|
"loss": 0.4031, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6775067750677507, |
|
"grad_norm": 48.17521286010742, |
|
"learning_rate": 4.3222891566265064e-05, |
|
"loss": 0.3206, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7452574525745257, |
|
"grad_norm": 27.5257511138916, |
|
"learning_rate": 4.196787148594378e-05, |
|
"loss": 0.3616, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"grad_norm": 15.912370681762695, |
|
"learning_rate": 4.071285140562249e-05, |
|
"loss": 0.508, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8807588075880759, |
|
"grad_norm": 12.763589859008789, |
|
"learning_rate": 3.9508032128514064e-05, |
|
"loss": 0.3685, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.948509485094851, |
|
"grad_norm": 20.36044692993164, |
|
"learning_rate": 3.8253012048192774e-05, |
|
"loss": 0.393, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9189005768578216, |
|
"eval_f1_macro": 0.8913236764060113, |
|
"eval_f1_micro": 0.9189005768578216, |
|
"eval_f1_weighted": 0.9195807716070247, |
|
"eval_loss": 0.2322985827922821, |
|
"eval_precision_macro": 0.8992556342366311, |
|
"eval_precision_micro": 0.9189005768578216, |
|
"eval_precision_weighted": 0.9235420876186199, |
|
"eval_recall_macro": 0.8887207219589304, |
|
"eval_recall_micro": 0.9189005768578216, |
|
"eval_recall_weighted": 0.9189005768578216, |
|
"eval_runtime": 19.8492, |
|
"eval_samples_per_second": 148.469, |
|
"eval_steps_per_second": 9.32, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.016260162601626, |
|
"grad_norm": 14.446638107299805, |
|
"learning_rate": 3.699799196787149e-05, |
|
"loss": 0.2934, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.084010840108401, |
|
"grad_norm": 29.589651107788086, |
|
"learning_rate": 3.57429718875502e-05, |
|
"loss": 0.3801, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.151761517615176, |
|
"grad_norm": 23.76276397705078, |
|
"learning_rate": 3.4487951807228916e-05, |
|
"loss": 0.2276, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 33.06072998046875, |
|
"learning_rate": 3.323293172690763e-05, |
|
"loss": 0.2623, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.2872628726287263, |
|
"grad_norm": 31.562694549560547, |
|
"learning_rate": 3.197791164658634e-05, |
|
"loss": 0.3324, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.3550135501355014, |
|
"grad_norm": 25.050046920776367, |
|
"learning_rate": 3.072289156626506e-05, |
|
"loss": 0.3613, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4227642276422765, |
|
"grad_norm": 5.65738582611084, |
|
"learning_rate": 2.9467871485943778e-05, |
|
"loss": 0.3689, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.4905149051490514, |
|
"grad_norm": 30.50360870361328, |
|
"learning_rate": 2.821285140562249e-05, |
|
"loss": 0.2128, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5582655826558267, |
|
"grad_norm": 31.306838989257812, |
|
"learning_rate": 2.6957831325301207e-05, |
|
"loss": 0.3329, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"grad_norm": 5.569540023803711, |
|
"learning_rate": 2.570281124497992e-05, |
|
"loss": 0.3934, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.6937669376693767, |
|
"grad_norm": 79.83793640136719, |
|
"learning_rate": 2.4447791164658633e-05, |
|
"loss": 0.3329, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.7615176151761518, |
|
"grad_norm": 11.711432456970215, |
|
"learning_rate": 2.319277108433735e-05, |
|
"loss": 0.3065, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.8292682926829267, |
|
"grad_norm": 27.71021842956543, |
|
"learning_rate": 2.1937751004016066e-05, |
|
"loss": 0.3361, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.897018970189702, |
|
"grad_norm": 24.346481323242188, |
|
"learning_rate": 2.068273092369478e-05, |
|
"loss": 0.3967, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.9647696476964769, |
|
"grad_norm": 7.5306549072265625, |
|
"learning_rate": 1.9427710843373495e-05, |
|
"loss": 0.3304, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9284017645062775, |
|
"eval_f1_macro": 0.903605865288441, |
|
"eval_f1_micro": 0.9284017645062775, |
|
"eval_f1_weighted": 0.9277100982185731, |
|
"eval_loss": 0.2046061009168625, |
|
"eval_precision_macro": 0.9095877174004062, |
|
"eval_precision_micro": 0.9284017645062775, |
|
"eval_precision_weighted": 0.9281331331487362, |
|
"eval_recall_macro": 0.8989112570392443, |
|
"eval_recall_micro": 0.9284017645062775, |
|
"eval_recall_weighted": 0.9284017645062775, |
|
"eval_runtime": 19.9432, |
|
"eval_samples_per_second": 147.769, |
|
"eval_steps_per_second": 9.276, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.032520325203252, |
|
"grad_norm": 28.1395206451416, |
|
"learning_rate": 1.822289156626506e-05, |
|
"loss": 0.332, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.100271002710027, |
|
"grad_norm": 7.682183265686035, |
|
"learning_rate": 1.6967871485943776e-05, |
|
"loss": 0.2995, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.168021680216802, |
|
"grad_norm": 23.640390396118164, |
|
"learning_rate": 1.5712851405622492e-05, |
|
"loss": 0.3089, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.2357723577235773, |
|
"grad_norm": 3.6244945526123047, |
|
"learning_rate": 1.4457831325301205e-05, |
|
"loss": 0.2557, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.303523035230352, |
|
"grad_norm": 109.68293762207031, |
|
"learning_rate": 1.3202811244979921e-05, |
|
"loss": 0.3734, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.3712737127371275, |
|
"grad_norm": 28.19609832763672, |
|
"learning_rate": 1.1947791164658636e-05, |
|
"loss": 0.3144, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 17.588850021362305, |
|
"learning_rate": 1.069277108433735e-05, |
|
"loss": 0.4061, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.5067750677506773, |
|
"grad_norm": 21.384654998779297, |
|
"learning_rate": 9.437751004016063e-06, |
|
"loss": 0.2626, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.5745257452574526, |
|
"grad_norm": 0.35269397497177124, |
|
"learning_rate": 8.18273092369478e-06, |
|
"loss": 0.2822, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.642276422764228, |
|
"grad_norm": 21.37306785583496, |
|
"learning_rate": 6.927710843373494e-06, |
|
"loss": 0.4436, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.710027100271003, |
|
"grad_norm": 17.97796630859375, |
|
"learning_rate": 5.672690763052209e-06, |
|
"loss": 0.2517, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 9.19117259979248, |
|
"learning_rate": 4.417670682730924e-06, |
|
"loss": 0.2395, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.845528455284553, |
|
"grad_norm": 64.22978210449219, |
|
"learning_rate": 3.1626506024096387e-06, |
|
"loss": 0.3387, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.913279132791328, |
|
"grad_norm": 1.4875394105911255, |
|
"learning_rate": 1.9076305220883537e-06, |
|
"loss": 0.2559, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.9810298102981028, |
|
"grad_norm": 0.5281310677528381, |
|
"learning_rate": 6.526104417670682e-07, |
|
"loss": 0.3605, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9365456396335257, |
|
"eval_f1_macro": 0.9148614413559308, |
|
"eval_f1_micro": 0.9365456396335257, |
|
"eval_f1_weighted": 0.9364564915178187, |
|
"eval_loss": 0.18285594880580902, |
|
"eval_precision_macro": 0.9159613318061458, |
|
"eval_precision_micro": 0.9365456396335257, |
|
"eval_precision_weighted": 0.9365041505044936, |
|
"eval_recall_macro": 0.9139276800740521, |
|
"eval_recall_micro": 0.9365456396335257, |
|
"eval_recall_weighted": 0.9365456396335257, |
|
"eval_runtime": 19.7723, |
|
"eval_samples_per_second": 149.047, |
|
"eval_steps_per_second": 9.357, |
|
"step": 1107 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 1107, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 2.225421168402862e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|