{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.999912914743534, "global_step": 31578, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "learning_rate": 4.9419279907084783e-05, "loss": 0.0381, "step": 500 }, { "epoch": 0.35, "learning_rate": 4.883855981416957e-05, "loss": 0.0241, "step": 1000 }, { "epoch": 0.52, "learning_rate": 4.825783972125435e-05, "loss": 0.022, "step": 1500 }, { "epoch": 0.7, "learning_rate": 4.767711962833915e-05, "loss": 0.0207, "step": 2000 }, { "epoch": 0.87, "learning_rate": 4.709639953542393e-05, "loss": 0.0199, "step": 2500 }, { "epoch": 1.0, "eval_accuracy": 0.9726582157682948, "eval_f1_HIRIQ": 0.7791055764551668, "eval_f1_HOLAM": 0.6713693875265927, "eval_f1_KUBUTZ": 0.7119160348309771, "eval_f1_PATACH": 0.851339663936741, "eval_f1_SHADDA": 0.8292565235143462, "eval_f1_SHVA": 0.8089031685917613, "eval_f1_TSERE": 0.7485289696344972, "eval_loss": 0.013756499625742435, "eval_macro_f1": 0.7714884749271546, "eval_micro_f1": 0.8119978846741942, "eval_runtime": 51.0176, "eval_samples_per_second": 750.232, "eval_steps_per_second": 7.507, "step": 2870 }, { "epoch": 1.05, "learning_rate": 4.6515679442508716e-05, "loss": 0.0189, "step": 3000 }, { "epoch": 1.22, "learning_rate": 4.59349593495935e-05, "loss": 0.0177, "step": 3500 }, { "epoch": 1.39, "learning_rate": 4.5354239256678285e-05, "loss": 0.0174, "step": 4000 }, { "epoch": 1.57, "learning_rate": 4.4773519163763066e-05, "loss": 0.0172, "step": 4500 }, { "epoch": 1.74, "learning_rate": 4.4192799070847854e-05, "loss": 0.0167, "step": 5000 }, { "epoch": 1.92, "learning_rate": 4.361207897793264e-05, "loss": 0.0166, "step": 5500 }, { "epoch": 2.0, "eval_accuracy": 0.9761089815978242, "eval_f1_HIRIQ": 0.8106474124231328, "eval_f1_HOLAM": 0.7086283185840708, "eval_f1_KUBUTZ": 0.7472128945601075, "eval_f1_PATACH": 0.8701410253039187, "eval_f1_SHADDA": 0.8522099185169709, "eval_f1_SHVA": 0.8329891346774823, "eval_f1_TSERE": 0.7836281031964952, "eval_loss": 0.012357393279671669, "eval_macro_f1": 0.8007795438945969, "eval_micro_f1": 0.8363289062525998, "eval_runtime": 52.8127, "eval_samples_per_second": 724.731, "eval_steps_per_second": 7.252, "step": 5741 }, { "epoch": 2.09, "learning_rate": 4.303135888501742e-05, "loss": 0.0157, "step": 6000 }, { "epoch": 2.26, "learning_rate": 4.245063879210221e-05, "loss": 0.0151, "step": 6500 }, { "epoch": 2.44, "learning_rate": 4.186991869918699e-05, "loss": 0.015, "step": 7000 }, { "epoch": 2.61, "learning_rate": 4.128919860627178e-05, "loss": 0.0148, "step": 7500 }, { "epoch": 2.79, "learning_rate": 4.070847851335656e-05, "loss": 0.0148, "step": 8000 }, { "epoch": 2.96, "learning_rate": 4.012775842044135e-05, "loss": 0.0147, "step": 8500 }, { "epoch": 3.0, "eval_accuracy": 0.9780662605569959, "eval_f1_HIRIQ": 0.8294876571557304, "eval_f1_HOLAM": 0.7245056654076871, "eval_f1_KUBUTZ": 0.7711184824965379, "eval_f1_PATACH": 0.8810396563640023, "eval_f1_SHADDA": 0.8664105216491798, "eval_f1_SHVA": 0.8441380510188548, "eval_f1_TSERE": 0.7970168612191959, "eval_loss": 0.011536195874214172, "eval_macro_f1": 0.8162452707587411, "eval_micro_f1": 0.8498705967528521, "eval_runtime": 54.0777, "eval_samples_per_second": 707.778, "eval_steps_per_second": 7.082, "step": 8612 }, { "epoch": 3.14, "learning_rate": 3.9547038327526136e-05, "loss": 0.0137, "step": 9000 }, { "epoch": 3.31, "learning_rate": 3.8966318234610924e-05, "loss": 0.0134, "step": 9500 }, { "epoch": 3.48, "learning_rate": 3.8385598141695705e-05, "loss": 0.0135, "step": 10000 }, { "epoch": 3.66, "learning_rate": 3.780487804878049e-05, "loss": 0.0135, "step": 10500 }, { "epoch": 3.83, "learning_rate": 3.7224157955865274e-05, "loss": 0.0132, "step": 11000 }, { "epoch": 4.0, "eval_accuracy": 0.9792273312868799, "eval_f1_HIRIQ": 0.8373507327062703, "eval_f1_HOLAM": 0.7442917433178227, "eval_f1_KUBUTZ": 0.7827533265097235, "eval_f1_PATACH": 0.8867928661237257, "eval_f1_SHADDA": 0.8725204092940786, "eval_f1_SHVA": 0.8508812530764879, "eval_f1_TSERE": 0.801735428945645, "eval_loss": 0.010950990952551365, "eval_macro_f1": 0.8251893942819649, "eval_micro_f1": 0.8567964095958783, "eval_runtime": 52.4162, "eval_samples_per_second": 730.213, "eval_steps_per_second": 7.307, "step": 11483 }, { "epoch": 4.01, "learning_rate": 3.664343786295006e-05, "loss": 0.0131, "step": 11500 }, { "epoch": 4.18, "learning_rate": 3.606271777003484e-05, "loss": 0.0122, "step": 12000 }, { "epoch": 4.35, "learning_rate": 3.548199767711963e-05, "loss": 0.0122, "step": 12500 }, { "epoch": 4.53, "learning_rate": 3.490127758420442e-05, "loss": 0.0123, "step": 13000 }, { "epoch": 4.7, "learning_rate": 3.43205574912892e-05, "loss": 0.0122, "step": 13500 }, { "epoch": 4.88, "learning_rate": 3.373983739837399e-05, "loss": 0.0122, "step": 14000 }, { "epoch": 5.0, "eval_accuracy": 0.9799653269288884, "eval_f1_HIRIQ": 0.8439961236895427, "eval_f1_HOLAM": 0.7499999999999999, "eval_f1_KUBUTZ": 0.7890396727485228, "eval_f1_PATACH": 0.8902453557194496, "eval_f1_SHADDA": 0.8791103582869978, "eval_f1_SHVA": 0.8568982423275623, "eval_f1_TSERE": 0.8086938522045125, "eval_loss": 0.010681645944714546, "eval_macro_f1": 0.8311405149966554, "eval_micro_f1": 0.8620291101643662, "eval_runtime": 49.3354, "eval_samples_per_second": 775.812, "eval_steps_per_second": 7.763, "step": 14353 }, { "epoch": 5.05, "learning_rate": 3.315911730545877e-05, "loss": 0.012, "step": 14500 }, { "epoch": 5.23, "learning_rate": 3.2578397212543556e-05, "loss": 0.011, "step": 15000 }, { "epoch": 5.4, "learning_rate": 3.199767711962834e-05, "loss": 0.0114, "step": 15500 }, { "epoch": 5.57, "learning_rate": 3.1416957026713125e-05, "loss": 0.0112, "step": 16000 }, { "epoch": 5.75, "learning_rate": 3.083623693379791e-05, "loss": 0.0112, "step": 16500 }, { "epoch": 5.92, "learning_rate": 3.0255516840882698e-05, "loss": 0.0112, "step": 17000 }, { "epoch": 6.0, "eval_accuracy": 0.9805248675902215, "eval_f1_HIRIQ": 0.8487574675668987, "eval_f1_HOLAM": 0.7595126122274476, "eval_f1_KUBUTZ": 0.797991035951556, "eval_f1_PATACH": 0.8928041018387551, "eval_f1_SHADDA": 0.8846275811897255, "eval_f1_SHVA": 0.8607482938928253, "eval_f1_TSERE": 0.8122762990085161, "eval_loss": 0.010463288053870201, "eval_macro_f1": 0.8366739130965322, "eval_micro_f1": 0.8659091900525827, "eval_runtime": 49.2152, "eval_samples_per_second": 777.707, "eval_steps_per_second": 7.782, "step": 17224 }, { "epoch": 6.1, "learning_rate": 2.9674796747967482e-05, "loss": 0.0107, "step": 17500 }, { "epoch": 6.27, "learning_rate": 2.9094076655052267e-05, "loss": 0.0105, "step": 18000 }, { "epoch": 6.44, "learning_rate": 2.851335656213705e-05, "loss": 0.0103, "step": 18500 }, { "epoch": 6.62, "learning_rate": 2.7932636469221835e-05, "loss": 0.0106, "step": 19000 }, { "epoch": 6.79, "learning_rate": 2.735191637630662e-05, "loss": 0.0105, "step": 19500 }, { "epoch": 6.97, "learning_rate": 2.6771196283391408e-05, "loss": 0.0104, "step": 20000 }, { "epoch": 7.0, "eval_accuracy": 0.9811321234870294, "eval_f1_HIRIQ": 0.8550038223948401, "eval_f1_HOLAM": 0.763222830168437, "eval_f1_KUBUTZ": 0.8037383177570094, "eval_f1_PATACH": 0.8963010344751529, "eval_f1_SHADDA": 0.8854757929883139, "eval_f1_SHVA": 0.8649184871804224, "eval_f1_TSERE": 0.8171559407964675, "eval_loss": 0.010495145805180073, "eval_macro_f1": 0.8408308893943776, "eval_micro_f1": 0.8700826091477137, "eval_runtime": 52.1218, "eval_samples_per_second": 734.337, "eval_steps_per_second": 7.348, "step": 20095 }, { "epoch": 7.14, "learning_rate": 2.6190476190476192e-05, "loss": 0.0098, "step": 20500 }, { "epoch": 7.32, "learning_rate": 2.5609756097560977e-05, "loss": 0.0097, "step": 21000 }, { "epoch": 7.49, "learning_rate": 2.502903600464576e-05, "loss": 0.0097, "step": 21500 }, { "epoch": 7.66, "learning_rate": 2.4448315911730546e-05, "loss": 0.0098, "step": 22000 }, { "epoch": 7.84, "learning_rate": 2.3867595818815333e-05, "loss": 0.0097, "step": 22500 }, { "epoch": 8.0, "eval_accuracy": 0.981536112480715, "eval_f1_HIRIQ": 0.8594682202461781, "eval_f1_HOLAM": 0.7635878626208735, "eval_f1_KUBUTZ": 0.8044461430664374, "eval_f1_PATACH": 0.8982397971622214, "eval_f1_SHADDA": 0.888189855746859, "eval_f1_SHVA": 0.8671149394978173, "eval_f1_TSERE": 0.8201351021404737, "eval_loss": 0.010434958152472973, "eval_macro_f1": 0.8430259886401229, "eval_micro_f1": 0.8725397588417411, "eval_runtime": 50.6576, "eval_samples_per_second": 755.562, "eval_steps_per_second": 7.561, "step": 22966 }, { "epoch": 8.01, "learning_rate": 2.3286875725900118e-05, "loss": 0.0096, "step": 23000 }, { "epoch": 8.19, "learning_rate": 2.2706155632984902e-05, "loss": 0.009, "step": 23500 }, { "epoch": 8.36, "learning_rate": 2.2125435540069687e-05, "loss": 0.0091, "step": 24000 }, { "epoch": 8.53, "learning_rate": 2.1544715447154475e-05, "loss": 0.0091, "step": 24500 }, { "epoch": 8.71, "learning_rate": 2.096399535423926e-05, "loss": 0.0092, "step": 25000 }, { "epoch": 8.88, "learning_rate": 2.038327526132404e-05, "loss": 0.0091, "step": 25500 }, { "epoch": 9.0, "eval_accuracy": 0.9816429946081784, "eval_f1_HIRIQ": 0.8617756604590732, "eval_f1_HOLAM": 0.7760372565622354, "eval_f1_KUBUTZ": 0.8087252634284247, "eval_f1_PATACH": 0.8981484778351434, "eval_f1_SHADDA": 0.8896060289509028, "eval_f1_SHVA": 0.8676933023775238, "eval_f1_TSERE": 0.8195844976309076, "eval_loss": 0.010549969039857388, "eval_macro_f1": 0.8459386410348871, "eval_micro_f1": 0.8734815959940515, "eval_runtime": 50.289, "eval_samples_per_second": 761.101, "eval_steps_per_second": 7.616, "step": 25836 }, { "epoch": 9.06, "learning_rate": 1.9802555168408828e-05, "loss": 0.0088, "step": 26000 }, { "epoch": 9.23, "learning_rate": 1.9221835075493612e-05, "loss": 0.0085, "step": 26500 }, { "epoch": 9.41, "learning_rate": 1.8641114982578397e-05, "loss": 0.0086, "step": 27000 }, { "epoch": 9.58, "learning_rate": 1.806039488966318e-05, "loss": 0.0085, "step": 27500 }, { "epoch": 9.75, "learning_rate": 1.747967479674797e-05, "loss": 0.0086, "step": 28000 }, { "epoch": 9.93, "learning_rate": 1.6898954703832754e-05, "loss": 0.0086, "step": 28500 }, { "epoch": 10.0, "eval_accuracy": 0.9820323509296518, "eval_f1_HIRIQ": 0.8644184686296934, "eval_f1_HOLAM": 0.7807301793043854, "eval_f1_KUBUTZ": 0.8107445035070613, "eval_f1_PATACH": 0.9007614706453949, "eval_f1_SHADDA": 0.8914874936954775, "eval_f1_SHVA": 0.8700223746362294, "eval_f1_TSERE": 0.8237858288440186, "eval_loss": 0.010514745488762856, "eval_macro_f1": 0.8488500456088944, "eval_micro_f1": 0.876108330623615, "eval_runtime": 51.4328, "eval_samples_per_second": 744.175, "eval_steps_per_second": 7.447, "step": 28707 }, { "epoch": 10.1, "learning_rate": 1.6318234610917538e-05, "loss": 0.0083, "step": 29000 }, { "epoch": 10.28, "learning_rate": 1.5737514518002326e-05, "loss": 0.008, "step": 29500 }, { "epoch": 10.45, "learning_rate": 1.5156794425087109e-05, "loss": 0.0082, "step": 30000 }, { "epoch": 10.62, "learning_rate": 1.4576074332171893e-05, "loss": 0.0081, "step": 30500 }, { "epoch": 10.8, "learning_rate": 1.3995354239256678e-05, "loss": 0.0081, "step": 31000 }, { "epoch": 10.97, "learning_rate": 1.3414634146341466e-05, "loss": 0.0081, "step": 31500 }, { "epoch": 11.0, "eval_accuracy": 0.9822652012787683, "eval_f1_HIRIQ": 0.8650168041552093, "eval_f1_HOLAM": 0.7809975223526877, "eval_f1_KUBUTZ": 0.8156180840973759, "eval_f1_PATACH": 0.9017915187001648, "eval_f1_SHADDA": 0.893456266013353, "eval_f1_SHVA": 0.8708873379860418, "eval_f1_TSERE": 0.824460194494808, "eval_loss": 0.010741644538939, "eval_macro_f1": 0.85031824682852, "eval_micro_f1": 0.8772879261954959, "eval_runtime": 49.6631, "eval_samples_per_second": 770.693, "eval_steps_per_second": 7.712, "step": 31578 } ], "max_steps": 43050, "num_train_epochs": 15, "total_flos": 1.540842233529006e+17, "trial_name": null, "trial_params": null }