|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.999912914743534, |
|
"global_step": 31578, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9419279907084783e-05, |
|
"loss": 0.0381, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.883855981416957e-05, |
|
"loss": 0.0241, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.825783972125435e-05, |
|
"loss": 0.022, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.767711962833915e-05, |
|
"loss": 0.0207, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.709639953542393e-05, |
|
"loss": 0.0199, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9726582157682948, |
|
"eval_f1_HIRIQ": 0.7791055764551668, |
|
"eval_f1_HOLAM": 0.6713693875265927, |
|
"eval_f1_KUBUTZ": 0.7119160348309771, |
|
"eval_f1_PATACH": 0.851339663936741, |
|
"eval_f1_SHADDA": 0.8292565235143462, |
|
"eval_f1_SHVA": 0.8089031685917613, |
|
"eval_f1_TSERE": 0.7485289696344972, |
|
"eval_loss": 0.013756499625742435, |
|
"eval_macro_f1": 0.7714884749271546, |
|
"eval_micro_f1": 0.8119978846741942, |
|
"eval_runtime": 51.0176, |
|
"eval_samples_per_second": 750.232, |
|
"eval_steps_per_second": 7.507, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.6515679442508716e-05, |
|
"loss": 0.0189, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.59349593495935e-05, |
|
"loss": 0.0177, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.5354239256678285e-05, |
|
"loss": 0.0174, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.4773519163763066e-05, |
|
"loss": 0.0172, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.4192799070847854e-05, |
|
"loss": 0.0167, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.361207897793264e-05, |
|
"loss": 0.0166, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9761089815978242, |
|
"eval_f1_HIRIQ": 0.8106474124231328, |
|
"eval_f1_HOLAM": 0.7086283185840708, |
|
"eval_f1_KUBUTZ": 0.7472128945601075, |
|
"eval_f1_PATACH": 0.8701410253039187, |
|
"eval_f1_SHADDA": 0.8522099185169709, |
|
"eval_f1_SHVA": 0.8329891346774823, |
|
"eval_f1_TSERE": 0.7836281031964952, |
|
"eval_loss": 0.012357393279671669, |
|
"eval_macro_f1": 0.8007795438945969, |
|
"eval_micro_f1": 0.8363289062525998, |
|
"eval_runtime": 52.8127, |
|
"eval_samples_per_second": 724.731, |
|
"eval_steps_per_second": 7.252, |
|
"step": 5741 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.303135888501742e-05, |
|
"loss": 0.0157, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.245063879210221e-05, |
|
"loss": 0.0151, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.186991869918699e-05, |
|
"loss": 0.015, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 4.128919860627178e-05, |
|
"loss": 0.0148, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.070847851335656e-05, |
|
"loss": 0.0148, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.012775842044135e-05, |
|
"loss": 0.0147, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9780662605569959, |
|
"eval_f1_HIRIQ": 0.8294876571557304, |
|
"eval_f1_HOLAM": 0.7245056654076871, |
|
"eval_f1_KUBUTZ": 0.7711184824965379, |
|
"eval_f1_PATACH": 0.8810396563640023, |
|
"eval_f1_SHADDA": 0.8664105216491798, |
|
"eval_f1_SHVA": 0.8441380510188548, |
|
"eval_f1_TSERE": 0.7970168612191959, |
|
"eval_loss": 0.011536195874214172, |
|
"eval_macro_f1": 0.8162452707587411, |
|
"eval_micro_f1": 0.8498705967528521, |
|
"eval_runtime": 54.0777, |
|
"eval_samples_per_second": 707.778, |
|
"eval_steps_per_second": 7.082, |
|
"step": 8612 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 3.9547038327526136e-05, |
|
"loss": 0.0137, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 3.8966318234610924e-05, |
|
"loss": 0.0134, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 3.8385598141695705e-05, |
|
"loss": 0.0135, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 3.780487804878049e-05, |
|
"loss": 0.0135, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 3.7224157955865274e-05, |
|
"loss": 0.0132, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9792273312868799, |
|
"eval_f1_HIRIQ": 0.8373507327062703, |
|
"eval_f1_HOLAM": 0.7442917433178227, |
|
"eval_f1_KUBUTZ": 0.7827533265097235, |
|
"eval_f1_PATACH": 0.8867928661237257, |
|
"eval_f1_SHADDA": 0.8725204092940786, |
|
"eval_f1_SHVA": 0.8508812530764879, |
|
"eval_f1_TSERE": 0.801735428945645, |
|
"eval_loss": 0.010950990952551365, |
|
"eval_macro_f1": 0.8251893942819649, |
|
"eval_micro_f1": 0.8567964095958783, |
|
"eval_runtime": 52.4162, |
|
"eval_samples_per_second": 730.213, |
|
"eval_steps_per_second": 7.307, |
|
"step": 11483 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.664343786295006e-05, |
|
"loss": 0.0131, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 3.606271777003484e-05, |
|
"loss": 0.0122, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 3.548199767711963e-05, |
|
"loss": 0.0122, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 3.490127758420442e-05, |
|
"loss": 0.0123, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 3.43205574912892e-05, |
|
"loss": 0.0122, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 3.373983739837399e-05, |
|
"loss": 0.0122, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9799653269288884, |
|
"eval_f1_HIRIQ": 0.8439961236895427, |
|
"eval_f1_HOLAM": 0.7499999999999999, |
|
"eval_f1_KUBUTZ": 0.7890396727485228, |
|
"eval_f1_PATACH": 0.8902453557194496, |
|
"eval_f1_SHADDA": 0.8791103582869978, |
|
"eval_f1_SHVA": 0.8568982423275623, |
|
"eval_f1_TSERE": 0.8086938522045125, |
|
"eval_loss": 0.010681645944714546, |
|
"eval_macro_f1": 0.8311405149966554, |
|
"eval_micro_f1": 0.8620291101643662, |
|
"eval_runtime": 49.3354, |
|
"eval_samples_per_second": 775.812, |
|
"eval_steps_per_second": 7.763, |
|
"step": 14353 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 3.315911730545877e-05, |
|
"loss": 0.012, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 3.2578397212543556e-05, |
|
"loss": 0.011, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 3.199767711962834e-05, |
|
"loss": 0.0114, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 3.1416957026713125e-05, |
|
"loss": 0.0112, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 3.083623693379791e-05, |
|
"loss": 0.0112, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 3.0255516840882698e-05, |
|
"loss": 0.0112, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9805248675902215, |
|
"eval_f1_HIRIQ": 0.8487574675668987, |
|
"eval_f1_HOLAM": 0.7595126122274476, |
|
"eval_f1_KUBUTZ": 0.797991035951556, |
|
"eval_f1_PATACH": 0.8928041018387551, |
|
"eval_f1_SHADDA": 0.8846275811897255, |
|
"eval_f1_SHVA": 0.8607482938928253, |
|
"eval_f1_TSERE": 0.8122762990085161, |
|
"eval_loss": 0.010463288053870201, |
|
"eval_macro_f1": 0.8366739130965322, |
|
"eval_micro_f1": 0.8659091900525827, |
|
"eval_runtime": 49.2152, |
|
"eval_samples_per_second": 777.707, |
|
"eval_steps_per_second": 7.782, |
|
"step": 17224 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 2.9674796747967482e-05, |
|
"loss": 0.0107, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 2.9094076655052267e-05, |
|
"loss": 0.0105, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 2.851335656213705e-05, |
|
"loss": 0.0103, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 2.7932636469221835e-05, |
|
"loss": 0.0106, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 2.735191637630662e-05, |
|
"loss": 0.0105, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 2.6771196283391408e-05, |
|
"loss": 0.0104, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9811321234870294, |
|
"eval_f1_HIRIQ": 0.8550038223948401, |
|
"eval_f1_HOLAM": 0.763222830168437, |
|
"eval_f1_KUBUTZ": 0.8037383177570094, |
|
"eval_f1_PATACH": 0.8963010344751529, |
|
"eval_f1_SHADDA": 0.8854757929883139, |
|
"eval_f1_SHVA": 0.8649184871804224, |
|
"eval_f1_TSERE": 0.8171559407964675, |
|
"eval_loss": 0.010495145805180073, |
|
"eval_macro_f1": 0.8408308893943776, |
|
"eval_micro_f1": 0.8700826091477137, |
|
"eval_runtime": 52.1218, |
|
"eval_samples_per_second": 734.337, |
|
"eval_steps_per_second": 7.348, |
|
"step": 20095 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 2.6190476190476192e-05, |
|
"loss": 0.0098, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 2.5609756097560977e-05, |
|
"loss": 0.0097, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 2.502903600464576e-05, |
|
"loss": 0.0097, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 2.4448315911730546e-05, |
|
"loss": 0.0098, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 2.3867595818815333e-05, |
|
"loss": 0.0097, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.981536112480715, |
|
"eval_f1_HIRIQ": 0.8594682202461781, |
|
"eval_f1_HOLAM": 0.7635878626208735, |
|
"eval_f1_KUBUTZ": 0.8044461430664374, |
|
"eval_f1_PATACH": 0.8982397971622214, |
|
"eval_f1_SHADDA": 0.888189855746859, |
|
"eval_f1_SHVA": 0.8671149394978173, |
|
"eval_f1_TSERE": 0.8201351021404737, |
|
"eval_loss": 0.010434958152472973, |
|
"eval_macro_f1": 0.8430259886401229, |
|
"eval_micro_f1": 0.8725397588417411, |
|
"eval_runtime": 50.6576, |
|
"eval_samples_per_second": 755.562, |
|
"eval_steps_per_second": 7.561, |
|
"step": 22966 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 2.3286875725900118e-05, |
|
"loss": 0.0096, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 2.2706155632984902e-05, |
|
"loss": 0.009, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 2.2125435540069687e-05, |
|
"loss": 0.0091, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 2.1544715447154475e-05, |
|
"loss": 0.0091, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 2.096399535423926e-05, |
|
"loss": 0.0092, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 2.038327526132404e-05, |
|
"loss": 0.0091, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9816429946081784, |
|
"eval_f1_HIRIQ": 0.8617756604590732, |
|
"eval_f1_HOLAM": 0.7760372565622354, |
|
"eval_f1_KUBUTZ": 0.8087252634284247, |
|
"eval_f1_PATACH": 0.8981484778351434, |
|
"eval_f1_SHADDA": 0.8896060289509028, |
|
"eval_f1_SHVA": 0.8676933023775238, |
|
"eval_f1_TSERE": 0.8195844976309076, |
|
"eval_loss": 0.010549969039857388, |
|
"eval_macro_f1": 0.8459386410348871, |
|
"eval_micro_f1": 0.8734815959940515, |
|
"eval_runtime": 50.289, |
|
"eval_samples_per_second": 761.101, |
|
"eval_steps_per_second": 7.616, |
|
"step": 25836 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 1.9802555168408828e-05, |
|
"loss": 0.0088, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 1.9221835075493612e-05, |
|
"loss": 0.0085, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 1.8641114982578397e-05, |
|
"loss": 0.0086, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 1.806039488966318e-05, |
|
"loss": 0.0085, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 1.747967479674797e-05, |
|
"loss": 0.0086, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 1.6898954703832754e-05, |
|
"loss": 0.0086, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9820323509296518, |
|
"eval_f1_HIRIQ": 0.8644184686296934, |
|
"eval_f1_HOLAM": 0.7807301793043854, |
|
"eval_f1_KUBUTZ": 0.8107445035070613, |
|
"eval_f1_PATACH": 0.9007614706453949, |
|
"eval_f1_SHADDA": 0.8914874936954775, |
|
"eval_f1_SHVA": 0.8700223746362294, |
|
"eval_f1_TSERE": 0.8237858288440186, |
|
"eval_loss": 0.010514745488762856, |
|
"eval_macro_f1": 0.8488500456088944, |
|
"eval_micro_f1": 0.876108330623615, |
|
"eval_runtime": 51.4328, |
|
"eval_samples_per_second": 744.175, |
|
"eval_steps_per_second": 7.447, |
|
"step": 28707 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 1.6318234610917538e-05, |
|
"loss": 0.0083, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 1.5737514518002326e-05, |
|
"loss": 0.008, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"learning_rate": 1.5156794425087109e-05, |
|
"loss": 0.0082, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 1.4576074332171893e-05, |
|
"loss": 0.0081, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 1.3995354239256678e-05, |
|
"loss": 0.0081, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 1.3414634146341466e-05, |
|
"loss": 0.0081, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9822652012787683, |
|
"eval_f1_HIRIQ": 0.8650168041552093, |
|
"eval_f1_HOLAM": 0.7809975223526877, |
|
"eval_f1_KUBUTZ": 0.8156180840973759, |
|
"eval_f1_PATACH": 0.9017915187001648, |
|
"eval_f1_SHADDA": 0.893456266013353, |
|
"eval_f1_SHVA": 0.8708873379860418, |
|
"eval_f1_TSERE": 0.824460194494808, |
|
"eval_loss": 0.010741644538939, |
|
"eval_macro_f1": 0.85031824682852, |
|
"eval_micro_f1": 0.8772879261954959, |
|
"eval_runtime": 49.6631, |
|
"eval_samples_per_second": 770.693, |
|
"eval_steps_per_second": 7.712, |
|
"step": 31578 |
|
} |
|
], |
|
"max_steps": 43050, |
|
"num_train_epochs": 15, |
|
"total_flos": 1.540842233529006e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|