{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.453662604578592, "global_step": 11400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "eval_exact_match": 14.738231412230533, "eval_f1": 19.992446610541194, "step": 100 }, { "epoch": 0.04, "learning_rate": 0.0005, "loss": 3.6292, "step": 200 }, { "epoch": 0.04, "eval_exact_match": 21.381434227892655, "eval_f1": 31.43956554541397, "step": 200 }, { "epoch": 0.05, "eval_exact_match": 24.175098988121427, "eval_f1": 34.101042049094296, "step": 300 }, { "epoch": 0.07, "learning_rate": 0.0005, "loss": 2.7193, "step": 400 }, { "epoch": 0.07, "eval_exact_match": 24.950592885375492, "eval_f1": 38.217244544211226, "step": 400 }, { "epoch": 0.11, "learning_rate": 0.0005, "loss": 2.4559, "step": 600 }, { "epoch": 0.11, "eval_exact_match": 28.40909090909091, "eval_f1": 44.64064089265683, "step": 600 }, { "epoch": 0.14, "learning_rate": 0.0005, "loss": 2.2801, "step": 800 }, { "epoch": 0.14, "eval_exact_match": 26.630434782608695, "eval_f1": 45.504003307885625, "step": 800 }, { "epoch": 0.18, "learning_rate": 0.0005, "loss": 2.1285, "step": 1000 }, { "epoch": 0.18, "eval_exact_match": 30.8300395256917, "eval_f1": 50.359736600917515, "step": 1000 }, { "epoch": 0.21, "learning_rate": 0.0005, "loss": 2.026, "step": 1200 }, { "epoch": 0.21, "eval_exact_match": 31.175889328063242, "eval_f1": 51.623685774579855, "step": 1200 }, { "epoch": 0.25, "learning_rate": 0.0005, "loss": 1.9281, "step": 1400 }, { "epoch": 0.25, "eval_exact_match": 35.869565217391305, "eval_f1": 56.21242269358832, "step": 1400 }, { "epoch": 0.28, "learning_rate": 0.0005, "loss": 1.8836, "step": 1600 }, { "epoch": 0.28, "eval_exact_match": 35.0296442687747, "eval_f1": 57.21764925319378, "step": 1600 }, { "epoch": 0.32, "learning_rate": 0.0005, "loss": 1.8377, "step": 1800 }, { "epoch": 0.32, "eval_exact_match": 36.61067193675889, "eval_f1": 58.4380904170952, "step": 1800 }, { "epoch": 0.35, "learning_rate": 0.0005, "loss": 1.7926, "step": 2000 }, { "epoch": 0.35, "eval_exact_match": 36.85770750988142, "eval_f1": 59.677808943848156, "step": 2000 }, { "epoch": 0.39, "learning_rate": 0.0005, "loss": 1.7356, "step": 2200 }, { "epoch": 0.39, "eval_exact_match": 35.62252964426877, "eval_f1": 59.02520911162799, "step": 2200 }, { "epoch": 0.42, "learning_rate": 0.0005, "loss": 1.7001, "step": 2400 }, { "epoch": 0.42, "eval_exact_match": 36.31422924901186, "eval_f1": 60.89754377178488, "step": 2400 }, { "epoch": 0.46, "learning_rate": 0.0005, "loss": 1.675, "step": 2600 }, { "epoch": 0.46, "eval_exact_match": 36.26482213438735, "eval_f1": 59.783069358423525, "step": 2600 }, { "epoch": 0.49, "learning_rate": 0.0005, "loss": 1.6594, "step": 2800 }, { "epoch": 0.49, "eval_exact_match": 36.56126482213439, "eval_f1": 61.449547007432926, "step": 2800 }, { "epoch": 0.53, "learning_rate": 0.0005, "loss": 1.6435, "step": 3000 }, { "epoch": 0.53, "eval_exact_match": 39.92094861660079, "eval_f1": 63.03510761568375, "step": 3000 }, { "epoch": 0.56, "learning_rate": 0.0005, "loss": 1.6116, "step": 3200 }, { "epoch": 0.56, "eval_exact_match": 37.30237154150198, "eval_f1": 62.953985227470696, "step": 3200 }, { "epoch": 0.6, "learning_rate": 0.0005, "loss": 1.5701, "step": 3400 }, { "epoch": 0.6, "eval_exact_match": 36.6600790513834, "eval_f1": 61.820603199741704, "step": 3400 }, { "epoch": 0.63, "learning_rate": 0.0005, "loss": 1.5627, "step": 3600 }, { "epoch": 0.63, "eval_exact_match": 38.04347826086956, "eval_f1": 63.56696764038496, "step": 3600 }, { "epoch": 0.67, "learning_rate": 0.0005, "loss": 1.5525, "step": 3800 }, { "epoch": 0.67, "eval_exact_match": 39.03162055335969, "eval_f1": 64.72028168259759, "step": 3800 }, { "epoch": 0.83, "learning_rate": 0.0005, "loss": 1.4518, "step": 3850 }, { "epoch": 0.84, "learning_rate": 0.0005, "loss": 1.4245, "step": 3900 }, { "epoch": 0.85, "learning_rate": 0.0005, "loss": 1.4164, "step": 3950 }, { "epoch": 0.86, "learning_rate": 0.0005, "loss": 1.4627, "step": 4000 }, { "epoch": 0.86, "eval_exact_match": 42.5, "eval_f1": 67.3159921257088, "step": 4000 }, { "epoch": 0.87, "learning_rate": 0.0005, "loss": 1.4369, "step": 4050 }, { "epoch": 0.88, "learning_rate": 0.0005, "loss": 1.4205, "step": 4100 }, { "epoch": 0.89, "learning_rate": 0.0005, "loss": 1.4189, "step": 4150 }, { "epoch": 0.9, "learning_rate": 0.0005, "loss": 1.4127, "step": 4200 }, { "epoch": 0.9, "eval_exact_match": 44.95, "eval_f1": 68.47203537029696, "step": 4200 }, { "epoch": 0.91, "learning_rate": 0.0005, "loss": 1.4329, "step": 4250 }, { "epoch": 0.93, "learning_rate": 0.0005, "loss": 1.4233, "step": 4300 }, { "epoch": 0.94, "learning_rate": 0.0005, "loss": 1.4426, "step": 4350 }, { "epoch": 0.95, "learning_rate": 0.0005, "loss": 1.3969, "step": 4400 }, { "epoch": 0.95, "eval_exact_match": 46.9, "eval_f1": 69.7906665651929, "step": 4400 }, { "epoch": 0.96, "learning_rate": 0.0005, "loss": 1.4589, "step": 4450 }, { "epoch": 0.97, "learning_rate": 0.0005, "loss": 1.4199, "step": 4500 }, { "epoch": 0.98, "learning_rate": 0.0005, "loss": 1.3795, "step": 4550 }, { "epoch": 0.99, "learning_rate": 0.0005, "loss": 1.3794, "step": 4600 }, { "epoch": 0.99, "eval_exact_match": 50.95, "eval_f1": 71.26934663680022, "step": 4600 }, { "epoch": 1.0, "learning_rate": 0.0005, "loss": 1.3577, "step": 4650 }, { "epoch": 1.01, "learning_rate": 0.0005, "loss": 1.3633, "step": 4700 }, { "epoch": 1.02, "learning_rate": 0.0005, "loss": 1.3672, "step": 4750 }, { "epoch": 1.03, "learning_rate": 0.0005, "loss": 1.3568, "step": 4800 }, { "epoch": 1.03, "eval_exact_match": 49.5, "eval_f1": 71.44716194869687, "step": 4800 }, { "epoch": 1.04, "learning_rate": 0.0005, "loss": 1.334, "step": 4850 }, { "epoch": 1.05, "learning_rate": 0.0005, "loss": 1.3173, "step": 4900 }, { "epoch": 1.07, "learning_rate": 0.0005, "loss": 1.3332, "step": 4950 }, { "epoch": 1.08, "learning_rate": 0.0005, "loss": 1.3858, "step": 5000 }, { "epoch": 1.08, "eval_exact_match": 50.6, "eval_f1": 71.4031953884389, "step": 5000 }, { "epoch": 1.09, "learning_rate": 0.0005, "loss": 1.3724, "step": 5050 }, { "epoch": 1.1, "learning_rate": 0.0005, "loss": 1.3356, "step": 5100 }, { "epoch": 1.11, "learning_rate": 0.0005, "loss": 1.4173, "step": 5150 }, { "epoch": 1.12, "learning_rate": 0.0005, "loss": 1.3499, "step": 5200 }, { "epoch": 1.12, "eval_exact_match": 50.45, "eval_f1": 70.96365965036652, "step": 5200 }, { "epoch": 1.13, "learning_rate": 0.0005, "loss": 1.3853, "step": 5250 }, { "epoch": 1.14, "learning_rate": 0.0005, "loss": 1.337, "step": 5300 }, { "epoch": 1.15, "learning_rate": 0.0005, "loss": 1.3366, "step": 5350 }, { "epoch": 1.16, "learning_rate": 0.0005, "loss": 1.342, "step": 5400 }, { "epoch": 1.16, "eval_exact_match": 51.25, "eval_f1": 72.29202655492661, "step": 5400 }, { "epoch": 1.17, "learning_rate": 0.0005, "loss": 1.3289, "step": 5450 }, { "epoch": 1.18, "learning_rate": 0.0005, "loss": 1.3894, "step": 5500 }, { "epoch": 1.19, "learning_rate": 0.0005, "loss": 1.3393, "step": 5550 }, { "epoch": 1.21, "learning_rate": 0.0005, "loss": 1.2983, "step": 5600 }, { "epoch": 1.21, "eval_exact_match": 52.15, "eval_f1": 72.57033769389315, "step": 5600 }, { "epoch": 1.22, "learning_rate": 0.0005, "loss": 1.3181, "step": 5650 }, { "epoch": 1.23, "learning_rate": 0.0005, "loss": 1.3672, "step": 5700 }, { "epoch": 1.24, "learning_rate": 0.0005, "loss": 1.3064, "step": 5750 }, { "epoch": 1.25, "learning_rate": 0.0005, "loss": 1.3316, "step": 5800 }, { "epoch": 1.25, "eval_exact_match": 52.6, "eval_f1": 72.91182293082619, "step": 5800 }, { "epoch": 1.26, "learning_rate": 0.0005, "loss": 1.3099, "step": 5850 }, { "epoch": 1.27, "learning_rate": 0.0005, "loss": 1.293, "step": 5900 }, { "epoch": 1.28, "learning_rate": 0.0005, "loss": 1.3386, "step": 5950 }, { "epoch": 1.29, "learning_rate": 0.0005, "loss": 1.3475, "step": 6000 }, { "epoch": 1.29, "eval_exact_match": 50.7, "eval_f1": 72.13671184582012, "step": 6000 }, { "epoch": 1.3, "learning_rate": 0.0005, "loss": 1.2548, "step": 6050 }, { "epoch": 1.31, "learning_rate": 0.0005, "loss": 1.3243, "step": 6100 }, { "epoch": 1.32, "learning_rate": 0.0005, "loss": 1.322, "step": 6150 }, { "epoch": 1.33, "learning_rate": 0.0005, "loss": 1.3049, "step": 6200 }, { "epoch": 1.33, "eval_exact_match": 51.25, "eval_f1": 72.62424474108035, "step": 6200 }, { "epoch": 1.35, "learning_rate": 0.0005, "loss": 1.274, "step": 6250 }, { "epoch": 1.36, "learning_rate": 0.0005, "loss": 1.3015, "step": 6300 }, { "epoch": 1.37, "learning_rate": 0.0005, "loss": 1.303, "step": 6350 }, { "epoch": 1.38, "learning_rate": 0.0005, "loss": 1.2727, "step": 6400 }, { "epoch": 1.38, "eval_exact_match": 52.35, "eval_f1": 73.1319111267331, "step": 6400 }, { "epoch": 1.39, "learning_rate": 0.0005, "loss": 1.2866, "step": 6450 }, { "epoch": 1.4, "learning_rate": 0.0005, "loss": 1.3302, "step": 6500 }, { "epoch": 1.41, "learning_rate": 0.0005, "loss": 1.2281, "step": 6550 }, { "epoch": 1.42, "learning_rate": 0.0005, "loss": 1.3037, "step": 6600 }, { "epoch": 1.42, "eval_exact_match": 53.2, "eval_f1": 73.01971722088678, "step": 6600 }, { "epoch": 1.43, "learning_rate": 0.0005, "loss": 1.2822, "step": 6650 }, { "epoch": 1.44, "learning_rate": 0.0005, "loss": 1.2871, "step": 6700 }, { "epoch": 1.45, "learning_rate": 0.0005, "loss": 1.3176, "step": 6750 }, { "epoch": 1.46, "learning_rate": 0.0005, "loss": 1.2828, "step": 6800 }, { "epoch": 1.46, "eval_exact_match": 52.85, "eval_f1": 73.01360270382577, "step": 6800 }, { "epoch": 1.47, "learning_rate": 0.0005, "loss": 1.3448, "step": 6850 }, { "epoch": 1.49, "learning_rate": 0.0005, "loss": 1.3102, "step": 6900 }, { "epoch": 1.5, "learning_rate": 0.0005, "loss": 1.3273, "step": 6950 }, { "epoch": 1.51, "learning_rate": 0.0005, "loss": 1.3016, "step": 7000 }, { "epoch": 1.51, "eval_exact_match": 54.0, "eval_f1": 73.98223052649728, "step": 7000 }, { "epoch": 1.52, "learning_rate": 0.0005, "loss": 1.2767, "step": 7050 }, { "epoch": 1.53, "learning_rate": 0.0005, "loss": 1.262, "step": 7100 }, { "epoch": 1.54, "learning_rate": 0.0005, "loss": 1.2846, "step": 7150 }, { "epoch": 1.55, "learning_rate": 0.0005, "loss": 1.2642, "step": 7200 }, { "epoch": 1.55, "eval_exact_match": 52.45, "eval_f1": 73.66687625952305, "step": 7200 }, { "epoch": 1.56, "learning_rate": 0.0005, "loss": 1.2875, "step": 7250 }, { "epoch": 1.57, "learning_rate": 0.0005, "loss": 1.247, "step": 7300 }, { "epoch": 1.58, "learning_rate": 0.0005, "loss": 1.2501, "step": 7350 }, { "epoch": 1.59, "learning_rate": 0.0005, "loss": 1.293, "step": 7400 }, { "epoch": 1.59, "eval_exact_match": 52.3, "eval_f1": 73.46398897242841, "step": 7400 }, { "epoch": 1.6, "learning_rate": 0.0005, "loss": 1.2373, "step": 7450 }, { "epoch": 1.61, "learning_rate": 0.0005, "loss": 1.2477, "step": 7500 }, { "epoch": 1.62, "learning_rate": 0.0005, "loss": 1.2647, "step": 7550 }, { "epoch": 1.64, "learning_rate": 0.0005, "loss": 1.2947, "step": 7600 }, { "epoch": 1.64, "eval_exact_match": 53.05, "eval_f1": 73.40471749318357, "step": 7600 }, { "epoch": 1.65, "learning_rate": 0.0005, "loss": 1.2237, "step": 7650 }, { "epoch": 1.66, "learning_rate": 0.0005, "loss": 1.2996, "step": 7700 }, { "epoch": 1.67, "learning_rate": 0.0005, "loss": 1.2833, "step": 7750 }, { "epoch": 1.68, "learning_rate": 0.0005, "loss": 1.2663, "step": 7800 }, { "epoch": 1.68, "eval_exact_match": 53.95, "eval_f1": 74.59089034170034, "step": 7800 }, { "epoch": 1.69, "learning_rate": 0.0005, "loss": 1.2519, "step": 7850 }, { "epoch": 1.7, "learning_rate": 0.0005, "loss": 1.2365, "step": 7900 }, { "epoch": 1.71, "learning_rate": 0.0005, "loss": 1.2152, "step": 7950 }, { "epoch": 1.72, "learning_rate": 0.0005, "loss": 1.2309, "step": 8000 }, { "epoch": 1.72, "eval_exact_match": 53.35, "eval_f1": 74.02883287579421, "step": 8000 }, { "epoch": 1.73, "learning_rate": 0.0005, "loss": 1.2532, "step": 8050 }, { "epoch": 1.74, "learning_rate": 0.0005, "loss": 1.2607, "step": 8100 }, { "epoch": 1.75, "learning_rate": 0.0005, "loss": 1.1885, "step": 8150 }, { "epoch": 1.76, "learning_rate": 0.0005, "loss": 1.2169, "step": 8200 }, { "epoch": 1.76, "eval_exact_match": 54.35, "eval_f1": 74.273655582778, "step": 8200 }, { "epoch": 1.78, "learning_rate": 0.0005, "loss": 1.2543, "step": 8250 }, { "epoch": 1.79, "learning_rate": 0.0005, "loss": 1.2352, "step": 8300 }, { "epoch": 1.8, "learning_rate": 0.0005, "loss": 1.2142, "step": 8350 }, { "epoch": 1.81, "learning_rate": 0.0005, "loss": 1.223, "step": 8400 }, { "epoch": 1.81, "eval_exact_match": 52.7, "eval_f1": 73.99393883664169, "step": 8400 }, { "epoch": 1.82, "learning_rate": 0.0005, "loss": 1.2145, "step": 8450 }, { "epoch": 1.83, "learning_rate": 0.0005, "loss": 1.1973, "step": 8500 }, { "epoch": 1.84, "learning_rate": 0.0005, "loss": 1.2277, "step": 8550 }, { "epoch": 1.85, "learning_rate": 0.0005, "loss": 1.2552, "step": 8600 }, { "epoch": 1.85, "eval_exact_match": 53.8, "eval_f1": 74.61861738151018, "step": 8600 }, { "epoch": 1.86, "learning_rate": 0.0005, "loss": 1.228, "step": 8650 }, { "epoch": 1.87, "learning_rate": 0.0005, "loss": 1.2117, "step": 8700 }, { "epoch": 1.88, "learning_rate": 0.0005, "loss": 1.2429, "step": 8750 }, { "epoch": 1.89, "learning_rate": 0.0005, "loss": 1.2279, "step": 8800 }, { "epoch": 1.89, "eval_exact_match": 54.4, "eval_f1": 74.93345914261613, "step": 8800 }, { "epoch": 1.9, "learning_rate": 0.0005, "loss": 1.2134, "step": 8850 }, { "epoch": 1.92, "learning_rate": 0.0005, "loss": 1.245, "step": 8900 }, { "epoch": 1.93, "learning_rate": 0.0005, "loss": 1.2179, "step": 8950 }, { "epoch": 1.94, "learning_rate": 0.0005, "loss": 1.2006, "step": 9000 }, { "epoch": 1.94, "eval_exact_match": 55.4, "eval_f1": 75.75427604493635, "step": 9000 }, { "epoch": 1.95, "learning_rate": 0.0005, "loss": 1.2037, "step": 9050 }, { "epoch": 1.96, "learning_rate": 0.0005, "loss": 1.1985, "step": 9100 }, { "epoch": 1.97, "learning_rate": 0.0005, "loss": 1.2012, "step": 9150 }, { "epoch": 1.98, "learning_rate": 0.0005, "loss": 1.2194, "step": 9200 }, { "epoch": 1.98, "eval_exact_match": 54.0, "eval_f1": 74.89545612203896, "step": 9200 }, { "epoch": 1.99, "learning_rate": 0.0005, "loss": 1.2173, "step": 9250 }, { "epoch": 2.0, "learning_rate": 0.0005, "loss": 1.2616, "step": 9300 }, { "epoch": 2.01, "learning_rate": 0.0005, "loss": 1.1785, "step": 9350 }, { "epoch": 2.02, "learning_rate": 0.0005, "loss": 1.1664, "step": 9400 }, { "epoch": 2.02, "eval_exact_match": 55.2, "eval_f1": 75.50544659346157, "step": 9400 }, { "epoch": 2.03, "learning_rate": 0.0005, "loss": 1.1271, "step": 9450 }, { "epoch": 2.04, "learning_rate": 0.0005, "loss": 1.1806, "step": 9500 }, { "epoch": 2.06, "learning_rate": 0.0005, "loss": 1.1865, "step": 9550 }, { "epoch": 2.07, "learning_rate": 0.0005, "loss": 1.198, "step": 9600 }, { "epoch": 2.07, "eval_exact_match": 54.65, "eval_f1": 75.02151439633785, "step": 9600 }, { "epoch": 2.08, "learning_rate": 0.0005, "loss": 1.1726, "step": 9650 }, { "epoch": 2.09, "learning_rate": 0.0005, "loss": 1.1799, "step": 9700 }, { "epoch": 2.1, "learning_rate": 0.0005, "loss": 1.1107, "step": 9750 }, { "epoch": 2.11, "learning_rate": 0.0005, "loss": 1.1585, "step": 9800 }, { "epoch": 2.11, "eval_exact_match": 53.9, "eval_f1": 74.15646190187827, "step": 9800 }, { "epoch": 2.12, "learning_rate": 0.0005, "loss": 1.1436, "step": 9850 }, { "epoch": 2.13, "learning_rate": 0.0005, "loss": 1.1806, "step": 9900 }, { "epoch": 2.14, "learning_rate": 0.0005, "loss": 1.1444, "step": 9950 }, { "epoch": 2.15, "learning_rate": 0.0005, "loss": 1.1738, "step": 10000 }, { "epoch": 2.15, "eval_exact_match": 54.85, "eval_f1": 75.11255802008874, "step": 10000 }, { "epoch": 2.16, "learning_rate": 0.0005, "loss": 1.1095, "step": 10050 }, { "epoch": 2.17, "learning_rate": 0.0005, "loss": 1.1511, "step": 10100 }, { "epoch": 2.18, "learning_rate": 0.0005, "loss": 1.1297, "step": 10150 }, { "epoch": 2.2, "learning_rate": 0.0005, "loss": 1.1505, "step": 10200 }, { "epoch": 2.2, "eval_exact_match": 54.85, "eval_f1": 75.52882358671673, "step": 10200 }, { "epoch": 2.2, "learning_rate": 5e-05, "loss": 1.1081, "step": 10210 }, { "epoch": 2.2, "learning_rate": 5e-05, "loss": 1.1572, "step": 10220 }, { "epoch": 2.2, "learning_rate": 5e-05, "loss": 1.0839, "step": 10230 }, { "epoch": 2.2, "learning_rate": 5e-05, "loss": 1.0971, "step": 10240 }, { "epoch": 2.21, "learning_rate": 5e-05, "loss": 1.0737, "step": 10250 }, { "epoch": 2.21, "learning_rate": 5e-05, "loss": 1.1049, "step": 10260 }, { "epoch": 2.21, "learning_rate": 5e-05, "loss": 1.202, "step": 10270 }, { "epoch": 2.21, "learning_rate": 5e-05, "loss": 1.1848, "step": 10280 }, { "epoch": 2.21, "learning_rate": 5e-05, "loss": 1.1095, "step": 10290 }, { "epoch": 2.22, "learning_rate": 5e-05, "loss": 1.2268, "step": 10300 }, { "epoch": 2.22, "learning_rate": 5e-05, "loss": 1.1422, "step": 10310 }, { "epoch": 2.22, "learning_rate": 5e-05, "loss": 1.1222, "step": 10320 }, { "epoch": 2.22, "learning_rate": 5e-05, "loss": 1.1251, "step": 10330 }, { "epoch": 2.23, "learning_rate": 5e-05, "loss": 1.0694, "step": 10340 }, { "epoch": 2.23, "learning_rate": 5e-05, "loss": 1.1571, "step": 10350 }, { "epoch": 2.23, "learning_rate": 5e-05, "loss": 1.1546, "step": 10360 }, { "epoch": 2.23, "learning_rate": 5e-05, "loss": 1.2171, "step": 10370 }, { "epoch": 2.23, "learning_rate": 5e-05, "loss": 1.1283, "step": 10380 }, { "epoch": 2.24, "learning_rate": 5e-05, "loss": 1.1215, "step": 10390 }, { "epoch": 2.24, "learning_rate": 5e-05, "loss": 1.127, "step": 10400 }, { "epoch": 2.24, "eval_exact_match": 55.35, "eval_f1": 75.70498337751204, "step": 10400 }, { "epoch": 2.24, "learning_rate": 5e-05, "loss": 1.0492, "step": 10410 }, { "epoch": 2.24, "learning_rate": 5e-05, "loss": 1.1658, "step": 10420 }, { "epoch": 2.24, "learning_rate": 5e-05, "loss": 1.1677, "step": 10430 }, { "epoch": 2.25, "learning_rate": 5e-05, "loss": 1.1758, "step": 10440 }, { "epoch": 2.25, "learning_rate": 5e-05, "loss": 1.1968, "step": 10450 }, { "epoch": 2.25, "learning_rate": 5e-05, "loss": 1.1315, "step": 10460 }, { "epoch": 2.25, "learning_rate": 5e-05, "loss": 1.1138, "step": 10470 }, { "epoch": 2.26, "learning_rate": 5e-05, "loss": 1.1375, "step": 10480 }, { "epoch": 2.26, "learning_rate": 5e-05, "loss": 1.1892, "step": 10490 }, { "epoch": 2.26, "learning_rate": 5e-05, "loss": 1.131, "step": 10500 }, { "epoch": 2.26, "learning_rate": 5e-05, "loss": 1.1195, "step": 10510 }, { "epoch": 2.26, "learning_rate": 5e-05, "loss": 1.1044, "step": 10520 }, { "epoch": 2.27, "learning_rate": 5e-05, "loss": 1.1371, "step": 10530 }, { "epoch": 2.27, "learning_rate": 5e-05, "loss": 1.1382, "step": 10540 }, { "epoch": 2.27, "learning_rate": 5e-05, "loss": 1.1273, "step": 10550 }, { "epoch": 2.27, "learning_rate": 5e-05, "loss": 1.1566, "step": 10560 }, { "epoch": 2.28, "learning_rate": 5e-05, "loss": 1.209, "step": 10570 }, { "epoch": 2.28, "learning_rate": 5e-05, "loss": 1.1336, "step": 10580 }, { "epoch": 2.28, "learning_rate": 5e-05, "loss": 1.0522, "step": 10590 }, { "epoch": 2.28, "learning_rate": 5e-05, "loss": 1.2019, "step": 10600 }, { "epoch": 2.28, "eval_exact_match": 55.55, "eval_f1": 76.02151178378499, "step": 10600 }, { "epoch": 2.28, "learning_rate": 5e-05, "loss": 1.1227, "step": 10610 }, { "epoch": 2.29, "learning_rate": 5e-05, "loss": 1.2202, "step": 10620 }, { "epoch": 2.29, "learning_rate": 5e-05, "loss": 1.0762, "step": 10630 }, { "epoch": 2.29, "learning_rate": 5e-05, "loss": 1.2074, "step": 10640 }, { "epoch": 2.29, "learning_rate": 5e-05, "loss": 1.1283, "step": 10650 }, { "epoch": 2.29, "learning_rate": 5e-05, "loss": 1.1631, "step": 10660 }, { "epoch": 2.3, "learning_rate": 5e-05, "loss": 1.0954, "step": 10670 }, { "epoch": 2.3, "learning_rate": 5e-05, "loss": 1.2253, "step": 10680 }, { "epoch": 2.3, "learning_rate": 5e-05, "loss": 1.1141, "step": 10690 }, { "epoch": 2.3, "learning_rate": 5e-05, "loss": 1.2454, "step": 10700 }, { "epoch": 2.31, "learning_rate": 5e-05, "loss": 1.0997, "step": 10710 }, { "epoch": 2.31, "learning_rate": 5e-05, "loss": 1.1408, "step": 10720 }, { "epoch": 2.31, "learning_rate": 5e-05, "loss": 1.098, "step": 10730 }, { "epoch": 2.31, "learning_rate": 5e-05, "loss": 1.2331, "step": 10740 }, { "epoch": 2.31, "learning_rate": 5e-05, "loss": 1.1006, "step": 10750 }, { "epoch": 2.32, "learning_rate": 5e-05, "loss": 1.1057, "step": 10760 }, { "epoch": 2.32, "learning_rate": 5e-05, "loss": 1.2075, "step": 10770 }, { "epoch": 2.32, "learning_rate": 5e-05, "loss": 1.0302, "step": 10780 }, { "epoch": 2.32, "learning_rate": 5e-05, "loss": 1.1484, "step": 10790 }, { "epoch": 2.32, "learning_rate": 5e-05, "loss": 1.1272, "step": 10800 }, { "epoch": 2.32, "eval_exact_match": 55.6, "eval_f1": 75.90896740958824, "step": 10800 }, { "epoch": 2.33, "learning_rate": 5e-05, "loss": 1.1306, "step": 10810 }, { "epoch": 2.33, "learning_rate": 5e-05, "loss": 1.1356, "step": 10820 }, { "epoch": 2.33, "learning_rate": 5e-05, "loss": 1.1776, "step": 10830 }, { "epoch": 2.33, "learning_rate": 5e-05, "loss": 1.1216, "step": 10840 }, { "epoch": 2.34, "learning_rate": 5e-05, "loss": 1.1211, "step": 10850 }, { "epoch": 2.34, "learning_rate": 5e-05, "loss": 1.2693, "step": 10860 }, { "epoch": 2.34, "learning_rate": 5e-05, "loss": 1.1473, "step": 10870 }, { "epoch": 2.34, "learning_rate": 5e-05, "loss": 1.0641, "step": 10880 }, { "epoch": 2.34, "learning_rate": 5e-05, "loss": 1.2424, "step": 10890 }, { "epoch": 2.35, "learning_rate": 5e-05, "loss": 1.1371, "step": 10900 }, { "epoch": 2.35, "learning_rate": 5e-05, "loss": 1.1217, "step": 10910 }, { "epoch": 2.35, "learning_rate": 5e-05, "loss": 1.2007, "step": 10920 }, { "epoch": 2.35, "learning_rate": 5e-05, "loss": 1.1501, "step": 10930 }, { "epoch": 2.35, "learning_rate": 5e-05, "loss": 1.1135, "step": 10940 }, { "epoch": 2.36, "learning_rate": 5e-05, "loss": 1.137, "step": 10950 }, { "epoch": 2.36, "learning_rate": 5e-05, "loss": 1.1422, "step": 10960 }, { "epoch": 2.36, "learning_rate": 5e-05, "loss": 1.1248, "step": 10970 }, { "epoch": 2.36, "learning_rate": 5e-05, "loss": 1.1899, "step": 10980 }, { "epoch": 2.37, "learning_rate": 5e-05, "loss": 1.102, "step": 10990 }, { "epoch": 2.37, "learning_rate": 5e-05, "loss": 1.1877, "step": 11000 }, { "epoch": 2.37, "eval_exact_match": 55.95, "eval_f1": 76.45373153349801, "step": 11000 }, { "epoch": 2.37, "learning_rate": 5e-05, "loss": 1.1474, "step": 11010 }, { "epoch": 2.37, "learning_rate": 5e-05, "loss": 1.1536, "step": 11020 }, { "epoch": 2.37, "learning_rate": 5e-05, "loss": 1.2218, "step": 11030 }, { "epoch": 2.38, "learning_rate": 5e-05, "loss": 1.0967, "step": 11040 }, { "epoch": 2.38, "learning_rate": 5e-05, "loss": 1.1305, "step": 11050 }, { "epoch": 2.38, "learning_rate": 5e-05, "loss": 1.087, "step": 11060 }, { "epoch": 2.38, "learning_rate": 5e-05, "loss": 1.0908, "step": 11070 }, { "epoch": 2.38, "learning_rate": 5e-05, "loss": 1.0134, "step": 11080 }, { "epoch": 2.39, "learning_rate": 5e-05, "loss": 1.0997, "step": 11090 }, { "epoch": 2.39, "learning_rate": 5e-05, "loss": 1.066, "step": 11100 }, { "epoch": 2.39, "learning_rate": 5e-05, "loss": 1.2601, "step": 11110 }, { "epoch": 2.39, "learning_rate": 5e-05, "loss": 1.1191, "step": 11120 }, { "epoch": 2.4, "learning_rate": 5e-05, "loss": 1.1025, "step": 11130 }, { "epoch": 2.4, "learning_rate": 5e-05, "loss": 1.112, "step": 11140 }, { "epoch": 2.4, "learning_rate": 5e-05, "loss": 1.0794, "step": 11150 }, { "epoch": 2.4, "learning_rate": 5e-05, "loss": 1.112, "step": 11160 }, { "epoch": 2.4, "learning_rate": 5e-05, "loss": 1.1411, "step": 11170 }, { "epoch": 2.41, "learning_rate": 5e-05, "loss": 1.1118, "step": 11180 }, { "epoch": 2.41, "learning_rate": 5e-05, "loss": 1.1464, "step": 11190 }, { "epoch": 2.41, "learning_rate": 5e-05, "loss": 1.1496, "step": 11200 }, { "epoch": 2.41, "eval_exact_match": 56.15, "eval_f1": 76.4356306603872, "step": 11200 }, { "epoch": 2.41, "learning_rate": 5e-05, "loss": 1.1588, "step": 11210 }, { "epoch": 2.41, "learning_rate": 5e-05, "loss": 1.1369, "step": 11220 }, { "epoch": 2.42, "learning_rate": 5e-05, "loss": 1.1026, "step": 11230 }, { "epoch": 2.42, "learning_rate": 5e-05, "loss": 1.1764, "step": 11240 }, { "epoch": 2.42, "learning_rate": 5e-05, "loss": 1.1449, "step": 11250 }, { "epoch": 2.42, "learning_rate": 5e-05, "loss": 1.1712, "step": 11260 }, { "epoch": 2.43, "learning_rate": 5e-05, "loss": 1.0465, "step": 11270 }, { "epoch": 2.43, "learning_rate": 5e-05, "loss": 1.1362, "step": 11280 }, { "epoch": 2.43, "learning_rate": 5e-05, "loss": 1.1022, "step": 11290 }, { "epoch": 2.43, "learning_rate": 5e-05, "loss": 1.1401, "step": 11300 }, { "epoch": 2.43, "learning_rate": 5e-05, "loss": 1.0913, "step": 11310 }, { "epoch": 2.44, "learning_rate": 5e-05, "loss": 1.1922, "step": 11320 }, { "epoch": 2.44, "learning_rate": 5e-05, "loss": 1.101, "step": 11330 }, { "epoch": 2.44, "learning_rate": 5e-05, "loss": 1.0261, "step": 11340 }, { "epoch": 2.44, "learning_rate": 5e-05, "loss": 1.1109, "step": 11350 }, { "epoch": 2.45, "learning_rate": 5e-05, "loss": 1.0958, "step": 11360 }, { "epoch": 2.45, "learning_rate": 5e-05, "loss": 1.027, "step": 11370 }, { "epoch": 2.45, "learning_rate": 5e-05, "loss": 1.117, "step": 11380 }, { "epoch": 2.45, "learning_rate": 5e-05, "loss": 1.1295, "step": 11390 }, { "epoch": 2.45, "learning_rate": 5e-05, "loss": 1.1344, "step": 11400 }, { "epoch": 2.45, "eval_exact_match": 56.45, "eval_f1": 76.81870981288014, "step": 11400 } ], "max_steps": 92920, "num_train_epochs": 20, "total_flos": 3.858021871727411e+17, "trial_name": null, "trial_params": null }