| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.4109589041095891, | |
| "eval_steps": 5, | |
| "global_step": 721, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0019569471624266144, | |
| "grad_norm": 3.932948112487793, | |
| "learning_rate": 7.8125e-08, | |
| "loss": 0.107, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.003913894324853229, | |
| "grad_norm": 4.482716083526611, | |
| "learning_rate": 1.5625e-07, | |
| "loss": 0.1529, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.005870841487279843, | |
| "grad_norm": 4.672689437866211, | |
| "learning_rate": 2.3437500000000003e-07, | |
| "loss": 0.1874, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.007827788649706457, | |
| "grad_norm": 4.226949214935303, | |
| "learning_rate": 3.125e-07, | |
| "loss": 0.1682, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.009784735812133072, | |
| "grad_norm": 4.327479362487793, | |
| "learning_rate": 3.90625e-07, | |
| "loss": 0.1438, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.009784735812133072, | |
| "eval_loss": 0.1470455378293991, | |
| "eval_runtime": 107.3614, | |
| "eval_samples_per_second": 28.427, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8861388036460539, | |
| "eval_sts-test_pearson_dot": 0.8769528313548112, | |
| "eval_sts-test_pearson_euclidean": 0.9079831987750276, | |
| "eval_sts-test_pearson_manhattan": 0.9086786527495163, | |
| "eval_sts-test_pearson_max": 0.9086786527495163, | |
| "eval_sts-test_spearman_cosine": 0.9077902566323186, | |
| "eval_sts-test_spearman_dot": 0.8794770733264693, | |
| "eval_sts-test_spearman_euclidean": 0.903967335376697, | |
| "eval_sts-test_spearman_manhattan": 0.9043498244078092, | |
| "eval_sts-test_spearman_max": 0.9077902566323186, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.011741682974559686, | |
| "grad_norm": 5.27250337600708, | |
| "learning_rate": 4.6875000000000006e-07, | |
| "loss": 0.2961, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0136986301369863, | |
| "grad_norm": 5.903276443481445, | |
| "learning_rate": 5.468750000000001e-07, | |
| "loss": 0.3019, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.015655577299412915, | |
| "grad_norm": 4.000335693359375, | |
| "learning_rate": 6.25e-07, | |
| "loss": 0.1184, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01761252446183953, | |
| "grad_norm": 5.876769065856934, | |
| "learning_rate": 7.03125e-07, | |
| "loss": 0.3176, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.019569471624266144, | |
| "grad_norm": 4.8437933921813965, | |
| "learning_rate": 7.8125e-07, | |
| "loss": 0.2234, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.019569471624266144, | |
| "eval_loss": 0.1467687040567398, | |
| "eval_runtime": 107.2549, | |
| "eval_samples_per_second": 28.456, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8861409457129842, | |
| "eval_sts-test_pearson_dot": 0.876972814890145, | |
| "eval_sts-test_pearson_euclidean": 0.9080268416052204, | |
| "eval_sts-test_pearson_manhattan": 0.9087444298597203, | |
| "eval_sts-test_pearson_max": 0.9087444298597203, | |
| "eval_sts-test_spearman_cosine": 0.9078342918735278, | |
| "eval_sts-test_spearman_dot": 0.8794190309404447, | |
| "eval_sts-test_spearman_euclidean": 0.9039501508923226, | |
| "eval_sts-test_spearman_manhattan": 0.9044244247605487, | |
| "eval_sts-test_spearman_max": 0.9078342918735278, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.021526418786692758, | |
| "grad_norm": 4.726498603820801, | |
| "learning_rate": 8.59375e-07, | |
| "loss": 0.1881, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.023483365949119372, | |
| "grad_norm": 4.818070411682129, | |
| "learning_rate": 9.375000000000001e-07, | |
| "loss": 0.1593, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.025440313111545987, | |
| "grad_norm": 4.98201322555542, | |
| "learning_rate": 1.0156250000000001e-06, | |
| "loss": 0.1833, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0273972602739726, | |
| "grad_norm": 4.269514560699463, | |
| "learning_rate": 1.0937500000000001e-06, | |
| "loss": 0.1352, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.029354207436399216, | |
| "grad_norm": 6.1525492668151855, | |
| "learning_rate": 1.1718750000000001e-06, | |
| "loss": 0.3143, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.029354207436399216, | |
| "eval_loss": 0.1462097316980362, | |
| "eval_runtime": 107.0721, | |
| "eval_samples_per_second": 28.504, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8860829119688085, | |
| "eval_sts-test_pearson_dot": 0.8768990080043222, | |
| "eval_sts-test_pearson_euclidean": 0.9080646402781543, | |
| "eval_sts-test_pearson_manhattan": 0.9088063929836994, | |
| "eval_sts-test_pearson_max": 0.9088063929836994, | |
| "eval_sts-test_spearman_cosine": 0.907713597721555, | |
| "eval_sts-test_spearman_dot": 0.8795110842851269, | |
| "eval_sts-test_spearman_euclidean": 0.9040110126078148, | |
| "eval_sts-test_spearman_manhattan": 0.9045081991218733, | |
| "eval_sts-test_spearman_max": 0.907713597721555, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03131115459882583, | |
| "grad_norm": 4.751354694366455, | |
| "learning_rate": 1.25e-06, | |
| "loss": 0.1583, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.033268101761252444, | |
| "grad_norm": 5.435980319976807, | |
| "learning_rate": 1.328125e-06, | |
| "loss": 0.2015, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.03522504892367906, | |
| "grad_norm": 4.1765851974487305, | |
| "learning_rate": 1.40625e-06, | |
| "loss": 0.1476, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.03718199608610567, | |
| "grad_norm": 4.689794540405273, | |
| "learning_rate": 1.484375e-06, | |
| "loss": 0.1676, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.03913894324853229, | |
| "grad_norm": 4.203744888305664, | |
| "learning_rate": 1.5625e-06, | |
| "loss": 0.1525, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03913894324853229, | |
| "eval_loss": 0.14544810354709625, | |
| "eval_runtime": 107.1845, | |
| "eval_samples_per_second": 28.474, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8861436293943533, | |
| "eval_sts-test_pearson_dot": 0.8769239163708102, | |
| "eval_sts-test_pearson_euclidean": 0.9082269545633608, | |
| "eval_sts-test_pearson_manhattan": 0.9089828403051001, | |
| "eval_sts-test_pearson_max": 0.9089828403051001, | |
| "eval_sts-test_spearman_cosine": 0.907929343552723, | |
| "eval_sts-test_spearman_dot": 0.8796122221358714, | |
| "eval_sts-test_spearman_euclidean": 0.9043074002120102, | |
| "eval_sts-test_spearman_manhattan": 0.9047217521412333, | |
| "eval_sts-test_spearman_max": 0.907929343552723, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0410958904109589, | |
| "grad_norm": 5.152130603790283, | |
| "learning_rate": 1.640625e-06, | |
| "loss": 0.1717, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.043052837573385516, | |
| "grad_norm": 5.343059062957764, | |
| "learning_rate": 1.71875e-06, | |
| "loss": 0.198, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.04500978473581213, | |
| "grad_norm": 5.224748134613037, | |
| "learning_rate": 1.796875e-06, | |
| "loss": 0.3062, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.046966731898238745, | |
| "grad_norm": 4.6179423332214355, | |
| "learning_rate": 1.8750000000000003e-06, | |
| "loss": 0.1241, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.04892367906066536, | |
| "grad_norm": 4.200148105621338, | |
| "learning_rate": 1.953125e-06, | |
| "loss": 0.1087, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.04892367906066536, | |
| "eval_loss": 0.14457188546657562, | |
| "eval_runtime": 107.3809, | |
| "eval_samples_per_second": 28.422, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8862905994058754, | |
| "eval_sts-test_pearson_dot": 0.877015249192232, | |
| "eval_sts-test_pearson_euclidean": 0.9085054742522269, | |
| "eval_sts-test_pearson_manhattan": 0.9092575877809899, | |
| "eval_sts-test_pearson_max": 0.9092575877809899, | |
| "eval_sts-test_spearman_cosine": 0.9082294902628751, | |
| "eval_sts-test_spearman_dot": 0.8798810429630494, | |
| "eval_sts-test_spearman_euclidean": 0.9047149499495015, | |
| "eval_sts-test_spearman_manhattan": 0.9051023616193669, | |
| "eval_sts-test_spearman_max": 0.9082294902628751, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.050880626223091974, | |
| "grad_norm": 4.890737533569336, | |
| "learning_rate": 2.0312500000000002e-06, | |
| "loss": 0.1767, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.05283757338551859, | |
| "grad_norm": 4.683767795562744, | |
| "learning_rate": 2.109375e-06, | |
| "loss": 0.1951, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.0547945205479452, | |
| "grad_norm": 4.656280040740967, | |
| "learning_rate": 2.1875000000000002e-06, | |
| "loss": 0.1621, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.05675146771037182, | |
| "grad_norm": 4.446409702301025, | |
| "learning_rate": 2.265625e-06, | |
| "loss": 0.221, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.05870841487279843, | |
| "grad_norm": 5.765133857727051, | |
| "learning_rate": 2.3437500000000002e-06, | |
| "loss": 0.2241, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05870841487279843, | |
| "eval_loss": 0.14350731670856476, | |
| "eval_runtime": 107.3747, | |
| "eval_samples_per_second": 28.424, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8863784941826807, | |
| "eval_sts-test_pearson_dot": 0.8768948467465629, | |
| "eval_sts-test_pearson_euclidean": 0.9088066170487232, | |
| "eval_sts-test_pearson_manhattan": 0.9095658568102677, | |
| "eval_sts-test_pearson_max": 0.9095658568102677, | |
| "eval_sts-test_spearman_cosine": 0.9082580415676429, | |
| "eval_sts-test_spearman_dot": 0.8801849487791585, | |
| "eval_sts-test_spearman_euclidean": 0.9051721735871375, | |
| "eval_sts-test_spearman_manhattan": 0.9054862826908437, | |
| "eval_sts-test_spearman_max": 0.9082580415676429, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.060665362035225046, | |
| "grad_norm": 5.359245777130127, | |
| "learning_rate": 2.421875e-06, | |
| "loss": 0.2093, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.06262230919765166, | |
| "grad_norm": 4.439486503601074, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.1615, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.06457925636007827, | |
| "grad_norm": 3.689824342727661, | |
| "learning_rate": 2.5781250000000004e-06, | |
| "loss": 0.1615, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.06653620352250489, | |
| "grad_norm": 4.842885494232178, | |
| "learning_rate": 2.65625e-06, | |
| "loss": 0.1772, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0684931506849315, | |
| "grad_norm": 5.209301948547363, | |
| "learning_rate": 2.7343750000000004e-06, | |
| "loss": 0.2324, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0684931506849315, | |
| "eval_loss": 0.14226235449314117, | |
| "eval_runtime": 107.3108, | |
| "eval_samples_per_second": 28.441, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8863574366132135, | |
| "eval_sts-test_pearson_dot": 0.8765683077424664, | |
| "eval_sts-test_pearson_euclidean": 0.9091012263251723, | |
| "eval_sts-test_pearson_manhattan": 0.9098631032540263, | |
| "eval_sts-test_pearson_max": 0.9098631032540263, | |
| "eval_sts-test_spearman_cosine": 0.9083728733043733, | |
| "eval_sts-test_spearman_dot": 0.8800282746130272, | |
| "eval_sts-test_spearman_euclidean": 0.9052579170039636, | |
| "eval_sts-test_spearman_manhattan": 0.9059997586640487, | |
| "eval_sts-test_spearman_max": 0.9083728733043733, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07045009784735812, | |
| "grad_norm": 4.740983009338379, | |
| "learning_rate": 2.8125e-06, | |
| "loss": 0.2611, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.07240704500978473, | |
| "grad_norm": 5.090059757232666, | |
| "learning_rate": 2.8906250000000004e-06, | |
| "loss": 0.214, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.07436399217221135, | |
| "grad_norm": 5.123153209686279, | |
| "learning_rate": 2.96875e-06, | |
| "loss": 0.1985, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.07632093933463796, | |
| "grad_norm": 5.401946067810059, | |
| "learning_rate": 3.0468750000000004e-06, | |
| "loss": 0.1855, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.07827788649706457, | |
| "grad_norm": 4.838700294494629, | |
| "learning_rate": 3.125e-06, | |
| "loss": 0.1234, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07827788649706457, | |
| "eval_loss": 0.14100149273872375, | |
| "eval_runtime": 107.3059, | |
| "eval_samples_per_second": 28.442, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8864265749012155, | |
| "eval_sts-test_pearson_dot": 0.8764612424174422, | |
| "eval_sts-test_pearson_euclidean": 0.9094092487009695, | |
| "eval_sts-test_pearson_manhattan": 0.9101707626021143, | |
| "eval_sts-test_pearson_max": 0.9101707626021143, | |
| "eval_sts-test_spearman_cosine": 0.908505695048183, | |
| "eval_sts-test_spearman_dot": 0.8802103674956289, | |
| "eval_sts-test_spearman_euclidean": 0.9054564783507572, | |
| "eval_sts-test_spearman_manhattan": 0.9063046490079084, | |
| "eval_sts-test_spearman_max": 0.908505695048183, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08023483365949119, | |
| "grad_norm": 3.8856801986694336, | |
| "learning_rate": 3.2031250000000004e-06, | |
| "loss": 0.1492, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.0821917808219178, | |
| "grad_norm": 5.678151607513428, | |
| "learning_rate": 3.28125e-06, | |
| "loss": 0.2022, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.08414872798434442, | |
| "grad_norm": 5.104148864746094, | |
| "learning_rate": 3.3593750000000003e-06, | |
| "loss": 0.2146, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.08610567514677103, | |
| "grad_norm": 4.76043701171875, | |
| "learning_rate": 3.4375e-06, | |
| "loss": 0.1688, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.08806262230919765, | |
| "grad_norm": 5.128803730010986, | |
| "learning_rate": 3.5156250000000003e-06, | |
| "loss": 0.175, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.08806262230919765, | |
| "eval_loss": 0.13962982594966888, | |
| "eval_runtime": 107.4144, | |
| "eval_samples_per_second": 28.413, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.886410908658177, | |
| "eval_sts-test_pearson_dot": 0.8762836795862763, | |
| "eval_sts-test_pearson_euclidean": 0.9096890242379734, | |
| "eval_sts-test_pearson_manhattan": 0.9104590803642174, | |
| "eval_sts-test_pearson_max": 0.9104590803642174, | |
| "eval_sts-test_spearman_cosine": 0.9086694846648755, | |
| "eval_sts-test_spearman_dot": 0.8801346931126159, | |
| "eval_sts-test_spearman_euclidean": 0.9057376952773407, | |
| "eval_sts-test_spearman_manhattan": 0.9064708999439774, | |
| "eval_sts-test_spearman_max": 0.9086694846648755, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.09001956947162426, | |
| "grad_norm": 4.968522548675537, | |
| "learning_rate": 3.59375e-06, | |
| "loss": 0.2123, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.09197651663405088, | |
| "grad_norm": 4.343472957611084, | |
| "learning_rate": 3.6718750000000003e-06, | |
| "loss": 0.1118, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.09393346379647749, | |
| "grad_norm": 6.252938270568848, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.3009, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0958904109589041, | |
| "grad_norm": 3.411029815673828, | |
| "learning_rate": 3.828125000000001e-06, | |
| "loss": 0.1071, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.09784735812133072, | |
| "grad_norm": 5.379226207733154, | |
| "learning_rate": 3.90625e-06, | |
| "loss": 0.2608, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09784735812133072, | |
| "eval_loss": 0.13823722302913666, | |
| "eval_runtime": 107.3656, | |
| "eval_samples_per_second": 28.426, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8863074884351817, | |
| "eval_sts-test_pearson_dot": 0.8763122134205692, | |
| "eval_sts-test_pearson_euclidean": 0.9097700018848961, | |
| "eval_sts-test_pearson_manhattan": 0.9105724410858811, | |
| "eval_sts-test_pearson_max": 0.9105724410858811, | |
| "eval_sts-test_spearman_cosine": 0.9085105281844131, | |
| "eval_sts-test_spearman_dot": 0.8801239975611433, | |
| "eval_sts-test_spearman_euclidean": 0.9059798443527296, | |
| "eval_sts-test_spearman_manhattan": 0.9065691737139927, | |
| "eval_sts-test_spearman_max": 0.9085105281844131, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09980430528375733, | |
| "grad_norm": 4.599095821380615, | |
| "learning_rate": 3.984375e-06, | |
| "loss": 0.1368, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.10176125244618395, | |
| "grad_norm": 5.634761333465576, | |
| "learning_rate": 4.0625000000000005e-06, | |
| "loss": 0.2307, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.10371819960861056, | |
| "grad_norm": 4.678525924682617, | |
| "learning_rate": 4.140625000000001e-06, | |
| "loss": 0.1366, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.10567514677103718, | |
| "grad_norm": 4.931070327758789, | |
| "learning_rate": 4.21875e-06, | |
| "loss": 0.1857, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.10763209393346379, | |
| "grad_norm": 4.903087139129639, | |
| "learning_rate": 4.296875e-06, | |
| "loss": 0.2155, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.10763209393346379, | |
| "eval_loss": 0.1367325782775879, | |
| "eval_runtime": 107.3012, | |
| "eval_samples_per_second": 28.443, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.88603017002284, | |
| "eval_sts-test_pearson_dot": 0.8761626193697236, | |
| "eval_sts-test_pearson_euclidean": 0.9096799681812165, | |
| "eval_sts-test_pearson_manhattan": 0.9104977957475867, | |
| "eval_sts-test_pearson_max": 0.9104977957475867, | |
| "eval_sts-test_spearman_cosine": 0.9084685067499666, | |
| "eval_sts-test_spearman_dot": 0.8802836700617878, | |
| "eval_sts-test_spearman_euclidean": 0.9058409364373706, | |
| "eval_sts-test_spearman_manhattan": 0.9064240006220393, | |
| "eval_sts-test_spearman_max": 0.9084685067499666, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.1095890410958904, | |
| "grad_norm": 5.408311367034912, | |
| "learning_rate": 4.3750000000000005e-06, | |
| "loss": 0.2022, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.11154598825831702, | |
| "grad_norm": 4.5926713943481445, | |
| "learning_rate": 4.453125000000001e-06, | |
| "loss": 0.2076, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.11350293542074363, | |
| "grad_norm": 6.475535869598389, | |
| "learning_rate": 4.53125e-06, | |
| "loss": 0.4133, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.11545988258317025, | |
| "grad_norm": 4.997581481933594, | |
| "learning_rate": 4.609375e-06, | |
| "loss": 0.1823, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.11741682974559686, | |
| "grad_norm": 3.899284601211548, | |
| "learning_rate": 4.6875000000000004e-06, | |
| "loss": 0.1136, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11741682974559686, | |
| "eval_loss": 0.13528631627559662, | |
| "eval_runtime": 107.3435, | |
| "eval_samples_per_second": 28.432, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8860224650016398, | |
| "eval_sts-test_pearson_dot": 0.8762739756970772, | |
| "eval_sts-test_pearson_euclidean": 0.9099016820022997, | |
| "eval_sts-test_pearson_manhattan": 0.9107281338135995, | |
| "eval_sts-test_pearson_max": 0.9107281338135995, | |
| "eval_sts-test_spearman_cosine": 0.9087510214631306, | |
| "eval_sts-test_spearman_dot": 0.8808623486228402, | |
| "eval_sts-test_spearman_euclidean": 0.9060555634870038, | |
| "eval_sts-test_spearman_manhattan": 0.9067256241238172, | |
| "eval_sts-test_spearman_max": 0.9087510214631306, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11937377690802348, | |
| "grad_norm": 4.476404190063477, | |
| "learning_rate": 4.765625000000001e-06, | |
| "loss": 0.1687, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.12133072407045009, | |
| "grad_norm": 4.893277168273926, | |
| "learning_rate": 4.84375e-06, | |
| "loss": 0.1591, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1232876712328767, | |
| "grad_norm": 4.510354042053223, | |
| "learning_rate": 4.921875e-06, | |
| "loss": 0.1653, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.12524461839530332, | |
| "grad_norm": 4.400285243988037, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1799, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.12720156555772993, | |
| "grad_norm": 4.631839752197266, | |
| "learning_rate": 5.078125000000001e-06, | |
| "loss": 0.1578, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.12720156555772993, | |
| "eval_loss": 0.1336735188961029, | |
| "eval_runtime": 107.4984, | |
| "eval_samples_per_second": 28.391, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.886014179849858, | |
| "eval_sts-test_pearson_dot": 0.8762492282837839, | |
| "eval_sts-test_pearson_euclidean": 0.9101155794045166, | |
| "eval_sts-test_pearson_manhattan": 0.9109538919103571, | |
| "eval_sts-test_pearson_max": 0.9109538919103571, | |
| "eval_sts-test_spearman_cosine": 0.9089514176116413, | |
| "eval_sts-test_spearman_dot": 0.8810853441583534, | |
| "eval_sts-test_spearman_euclidean": 0.9061670836303911, | |
| "eval_sts-test_spearman_manhattan": 0.9072153371772234, | |
| "eval_sts-test_spearman_max": 0.9089514176116413, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.12915851272015655, | |
| "grad_norm": 4.043459415435791, | |
| "learning_rate": 5.156250000000001e-06, | |
| "loss": 0.1844, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.13111545988258316, | |
| "grad_norm": 4.447835922241211, | |
| "learning_rate": 5.234375e-06, | |
| "loss": 0.1489, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.13307240704500978, | |
| "grad_norm": 5.372109889984131, | |
| "learning_rate": 5.3125e-06, | |
| "loss": 0.1845, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1350293542074364, | |
| "grad_norm": 3.5112483501434326, | |
| "learning_rate": 5.390625000000001e-06, | |
| "loss": 0.1364, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.136986301369863, | |
| "grad_norm": 4.305239200592041, | |
| "learning_rate": 5.468750000000001e-06, | |
| "loss": 0.1584, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.136986301369863, | |
| "eval_loss": 0.1320798397064209, | |
| "eval_runtime": 107.505, | |
| "eval_samples_per_second": 28.389, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.88578311613969, | |
| "eval_sts-test_pearson_dot": 0.875928774505713, | |
| "eval_sts-test_pearson_euclidean": 0.91024619729973, | |
| "eval_sts-test_pearson_manhattan": 0.9110959495329505, | |
| "eval_sts-test_pearson_max": 0.9110959495329505, | |
| "eval_sts-test_spearman_cosine": 0.9086066538938818, | |
| "eval_sts-test_spearman_dot": 0.8801235500485294, | |
| "eval_sts-test_spearman_euclidean": 0.9060052183179386, | |
| "eval_sts-test_spearman_manhattan": 0.907439182986703, | |
| "eval_sts-test_spearman_max": 0.9086066538938818, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.13894324853228962, | |
| "grad_norm": 5.093306064605713, | |
| "learning_rate": 5.546875e-06, | |
| "loss": 0.2279, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.14090019569471623, | |
| "grad_norm": 4.953585147857666, | |
| "learning_rate": 5.625e-06, | |
| "loss": 0.2028, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.14285714285714285, | |
| "grad_norm": 4.1561102867126465, | |
| "learning_rate": 5.7031250000000006e-06, | |
| "loss": 0.2291, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.14481409001956946, | |
| "grad_norm": 5.00941801071167, | |
| "learning_rate": 5.781250000000001e-06, | |
| "loss": 0.2419, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.14677103718199608, | |
| "grad_norm": 3.6476099491119385, | |
| "learning_rate": 5.859375e-06, | |
| "loss": 0.1329, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.14677103718199608, | |
| "eval_loss": 0.13061992824077606, | |
| "eval_runtime": 107.3395, | |
| "eval_samples_per_second": 28.433, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8854112983780439, | |
| "eval_sts-test_pearson_dot": 0.8752625071185561, | |
| "eval_sts-test_pearson_euclidean": 0.9103378320010516, | |
| "eval_sts-test_pearson_manhattan": 0.9112261622276095, | |
| "eval_sts-test_pearson_max": 0.9112261622276095, | |
| "eval_sts-test_spearman_cosine": 0.9082604133844965, | |
| "eval_sts-test_spearman_dot": 0.8794192099454903, | |
| "eval_sts-test_spearman_euclidean": 0.9060063370994732, | |
| "eval_sts-test_spearman_manhattan": 0.90766132824825, | |
| "eval_sts-test_spearman_max": 0.9082604133844965, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.1487279843444227, | |
| "grad_norm": 4.10636568069458, | |
| "learning_rate": 5.9375e-06, | |
| "loss": 0.204, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.1506849315068493, | |
| "grad_norm": 4.767779350280762, | |
| "learning_rate": 6.0156250000000005e-06, | |
| "loss": 0.2239, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.15264187866927592, | |
| "grad_norm": 5.366302490234375, | |
| "learning_rate": 6.093750000000001e-06, | |
| "loss": 0.2181, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.15459882583170254, | |
| "grad_norm": 4.087960720062256, | |
| "learning_rate": 6.171875e-06, | |
| "loss": 0.1285, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.15655577299412915, | |
| "grad_norm": 3.7557668685913086, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.1067, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.15655577299412915, | |
| "eval_loss": 0.12924787402153015, | |
| "eval_runtime": 107.2528, | |
| "eval_samples_per_second": 28.456, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8850894038300653, | |
| "eval_sts-test_pearson_dot": 0.874941916465686, | |
| "eval_sts-test_pearson_euclidean": 0.9101863990952803, | |
| "eval_sts-test_pearson_manhattan": 0.9110826056950171, | |
| "eval_sts-test_pearson_max": 0.9110826056950171, | |
| "eval_sts-test_spearman_cosine": 0.9078700928826409, | |
| "eval_sts-test_spearman_dot": 0.8792947566875607, | |
| "eval_sts-test_spearman_euclidean": 0.9059290069197888, | |
| "eval_sts-test_spearman_manhattan": 0.9075206750336968, | |
| "eval_sts-test_spearman_max": 0.9078700928826409, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.15851272015655576, | |
| "grad_norm": 3.5708839893341064, | |
| "learning_rate": 6.3281250000000005e-06, | |
| "loss": 0.1189, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.16046966731898238, | |
| "grad_norm": 4.602839469909668, | |
| "learning_rate": 6.406250000000001e-06, | |
| "loss": 0.236, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.162426614481409, | |
| "grad_norm": 4.304513931274414, | |
| "learning_rate": 6.484375000000001e-06, | |
| "loss": 0.1584, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.1643835616438356, | |
| "grad_norm": 4.165163516998291, | |
| "learning_rate": 6.5625e-06, | |
| "loss": 0.1925, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.16634050880626222, | |
| "grad_norm": 3.9157192707061768, | |
| "learning_rate": 6.6406250000000005e-06, | |
| "loss": 0.129, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.16634050880626222, | |
| "eval_loss": 0.1278335303068161, | |
| "eval_runtime": 107.1978, | |
| "eval_samples_per_second": 28.471, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8845993101894516, | |
| "eval_sts-test_pearson_dot": 0.8740701762146532, | |
| "eval_sts-test_pearson_euclidean": 0.9100055922999684, | |
| "eval_sts-test_pearson_manhattan": 0.9108899080028133, | |
| "eval_sts-test_pearson_max": 0.9108899080028133, | |
| "eval_sts-test_spearman_cosine": 0.9078923342595523, | |
| "eval_sts-test_spearman_dot": 0.8788126513485913, | |
| "eval_sts-test_spearman_euclidean": 0.9057257466905491, | |
| "eval_sts-test_spearman_manhattan": 0.9070083178420268, | |
| "eval_sts-test_spearman_max": 0.9078923342595523, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.16829745596868884, | |
| "grad_norm": 4.233823776245117, | |
| "learning_rate": 6.718750000000001e-06, | |
| "loss": 0.1376, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.17025440313111545, | |
| "grad_norm": 4.670790195465088, | |
| "learning_rate": 6.796875000000001e-06, | |
| "loss": 0.1691, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.17221135029354206, | |
| "grad_norm": 3.742030382156372, | |
| "learning_rate": 6.875e-06, | |
| "loss": 0.1045, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.17416829745596868, | |
| "grad_norm": 4.242702960968018, | |
| "learning_rate": 6.9531250000000004e-06, | |
| "loss": 0.165, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.1761252446183953, | |
| "grad_norm": 5.499476909637451, | |
| "learning_rate": 7.031250000000001e-06, | |
| "loss": 0.2926, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1761252446183953, | |
| "eval_loss": 0.12669824063777924, | |
| "eval_runtime": 107.2778, | |
| "eval_samples_per_second": 28.45, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8844194771150324, | |
| "eval_sts-test_pearson_dot": 0.873458365713796, | |
| "eval_sts-test_pearson_euclidean": 0.9099396625521212, | |
| "eval_sts-test_pearson_manhattan": 0.910745898918033, | |
| "eval_sts-test_pearson_max": 0.910745898918033, | |
| "eval_sts-test_spearman_cosine": 0.907622707909669, | |
| "eval_sts-test_spearman_dot": 0.8783740442356941, | |
| "eval_sts-test_spearman_euclidean": 0.9058808545625318, | |
| "eval_sts-test_spearman_manhattan": 0.906889458491771, | |
| "eval_sts-test_spearman_max": 0.907622707909669, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1780821917808219, | |
| "grad_norm": 2.992021083831787, | |
| "learning_rate": 7.109375000000001e-06, | |
| "loss": 0.1048, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.18003913894324852, | |
| "grad_norm": 4.298286437988281, | |
| "learning_rate": 7.1875e-06, | |
| "loss": 0.1596, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.18199608610567514, | |
| "grad_norm": 5.210509300231934, | |
| "learning_rate": 7.265625e-06, | |
| "loss": 0.2474, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.18395303326810175, | |
| "grad_norm": 4.527407169342041, | |
| "learning_rate": 7.343750000000001e-06, | |
| "loss": 0.1652, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.18590998043052837, | |
| "grad_norm": 5.302050590515137, | |
| "learning_rate": 7.421875000000001e-06, | |
| "loss": 0.2483, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.18590998043052837, | |
| "eval_loss": 0.1252526491880417, | |
| "eval_runtime": 107.5519, | |
| "eval_samples_per_second": 28.377, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.884272350180128, | |
| "eval_sts-test_pearson_dot": 0.8727334938335432, | |
| "eval_sts-test_pearson_euclidean": 0.9099441972021025, | |
| "eval_sts-test_pearson_manhattan": 0.9106991509833859, | |
| "eval_sts-test_pearson_max": 0.9106991509833859, | |
| "eval_sts-test_spearman_cosine": 0.9075948278738224, | |
| "eval_sts-test_spearman_dot": 0.87780624023116, | |
| "eval_sts-test_spearman_euclidean": 0.9060086194138042, | |
| "eval_sts-test_spearman_manhattan": 0.9069788267607697, | |
| "eval_sts-test_spearman_max": 0.9075948278738224, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.18786692759295498, | |
| "grad_norm": 3.690441608428955, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.1623, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.1898238747553816, | |
| "grad_norm": 4.585984706878662, | |
| "learning_rate": 7.578125e-06, | |
| "loss": 0.1955, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.1917808219178082, | |
| "grad_norm": 4.493942737579346, | |
| "learning_rate": 7.656250000000001e-06, | |
| "loss": 0.2023, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.19373776908023482, | |
| "grad_norm": 4.569936275482178, | |
| "learning_rate": 7.734375e-06, | |
| "loss": 0.1886, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.19569471624266144, | |
| "grad_norm": 3.7703664302825928, | |
| "learning_rate": 7.8125e-06, | |
| "loss": 0.1284, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.19569471624266144, | |
| "eval_loss": 0.12290485948324203, | |
| "eval_runtime": 107.6958, | |
| "eval_samples_per_second": 28.339, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8836376979322419, | |
| "eval_sts-test_pearson_dot": 0.8710695777275684, | |
| "eval_sts-test_pearson_euclidean": 0.9098265834859519, | |
| "eval_sts-test_pearson_manhattan": 0.9106248996071287, | |
| "eval_sts-test_pearson_max": 0.9106248996071287, | |
| "eval_sts-test_spearman_cosine": 0.9078868298544011, | |
| "eval_sts-test_spearman_dot": 0.8773200625274038, | |
| "eval_sts-test_spearman_euclidean": 0.9063156130669492, | |
| "eval_sts-test_spearman_manhattan": 0.9071474495136926, | |
| "eval_sts-test_spearman_max": 0.9078868298544011, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.19765166340508805, | |
| "grad_norm": 4.356619358062744, | |
| "learning_rate": 7.890625e-06, | |
| "loss": 0.2005, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.19960861056751467, | |
| "grad_norm": 4.293449878692627, | |
| "learning_rate": 7.96875e-06, | |
| "loss": 0.2301, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.20156555772994128, | |
| "grad_norm": 4.654509544372559, | |
| "learning_rate": 8.046875e-06, | |
| "loss": 0.2249, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.2035225048923679, | |
| "grad_norm": 4.510340213775635, | |
| "learning_rate": 8.125000000000001e-06, | |
| "loss": 0.214, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.2054794520547945, | |
| "grad_norm": 3.880908489227295, | |
| "learning_rate": 8.203125000000001e-06, | |
| "loss": 0.1429, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.2054794520547945, | |
| "eval_loss": 0.12076468020677567, | |
| "eval_runtime": 107.7074, | |
| "eval_samples_per_second": 28.336, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8828542959864998, | |
| "eval_sts-test_pearson_dot": 0.8689355363147886, | |
| "eval_sts-test_pearson_euclidean": 0.9096459762354197, | |
| "eval_sts-test_pearson_manhattan": 0.9104979967855148, | |
| "eval_sts-test_pearson_max": 0.9104979967855148, | |
| "eval_sts-test_spearman_cosine": 0.9076751563880199, | |
| "eval_sts-test_spearman_dot": 0.8750991469270715, | |
| "eval_sts-test_spearman_euclidean": 0.906379383614432, | |
| "eval_sts-test_spearman_manhattan": 0.9071111562407043, | |
| "eval_sts-test_spearman_max": 0.9076751563880199, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.20743639921722112, | |
| "grad_norm": 3.8524463176727295, | |
| "learning_rate": 8.281250000000001e-06, | |
| "loss": 0.17, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.20939334637964774, | |
| "grad_norm": 4.660905838012695, | |
| "learning_rate": 8.359375e-06, | |
| "loss": 0.1955, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.21135029354207435, | |
| "grad_norm": 4.391407012939453, | |
| "learning_rate": 8.4375e-06, | |
| "loss": 0.1964, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.21330724070450097, | |
| "grad_norm": 3.908740758895874, | |
| "learning_rate": 8.515625e-06, | |
| "loss": 0.1246, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.21526418786692758, | |
| "grad_norm": 3.295600414276123, | |
| "learning_rate": 8.59375e-06, | |
| "loss": 0.1295, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.21526418786692758, | |
| "eval_loss": 0.11901199817657471, | |
| "eval_runtime": 107.5373, | |
| "eval_samples_per_second": 28.381, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8820675142963768, | |
| "eval_sts-test_pearson_dot": 0.8664913359514981, | |
| "eval_sts-test_pearson_euclidean": 0.9093761405951237, | |
| "eval_sts-test_pearson_manhattan": 0.910248319457324, | |
| "eval_sts-test_pearson_max": 0.910248319457324, | |
| "eval_sts-test_spearman_cosine": 0.9071699146469111, | |
| "eval_sts-test_spearman_dot": 0.8726812810253556, | |
| "eval_sts-test_spearman_euclidean": 0.9064896954737618, | |
| "eval_sts-test_spearman_manhattan": 0.9068174537121922, | |
| "eval_sts-test_spearman_max": 0.9071699146469111, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2172211350293542, | |
| "grad_norm": 5.0308518409729, | |
| "learning_rate": 8.671875e-06, | |
| "loss": 0.2203, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.2191780821917808, | |
| "grad_norm": 4.501624584197998, | |
| "learning_rate": 8.750000000000001e-06, | |
| "loss": 0.2195, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.22113502935420742, | |
| "grad_norm": 4.200097560882568, | |
| "learning_rate": 8.828125000000001e-06, | |
| "loss": 0.1823, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.22309197651663404, | |
| "grad_norm": 3.6750545501708984, | |
| "learning_rate": 8.906250000000001e-06, | |
| "loss": 0.174, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.22504892367906065, | |
| "grad_norm": 4.105295181274414, | |
| "learning_rate": 8.984375000000002e-06, | |
| "loss": 0.207, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.22504892367906065, | |
| "eval_loss": 0.11745984107255936, | |
| "eval_runtime": 107.5979, | |
| "eval_samples_per_second": 28.365, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.882042560326929, | |
| "eval_sts-test_pearson_dot": 0.8653067979173212, | |
| "eval_sts-test_pearson_euclidean": 0.9095832495385563, | |
| "eval_sts-test_pearson_manhattan": 0.9103602950988618, | |
| "eval_sts-test_pearson_max": 0.9103602950988618, | |
| "eval_sts-test_spearman_cosine": 0.9068824772949942, | |
| "eval_sts-test_spearman_dot": 0.8714208617482668, | |
| "eval_sts-test_spearman_euclidean": 0.906395180809703, | |
| "eval_sts-test_spearman_manhattan": 0.9068741088091138, | |
| "eval_sts-test_spearman_max": 0.9068824772949942, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.22700587084148727, | |
| "grad_norm": 4.654273509979248, | |
| "learning_rate": 9.0625e-06, | |
| "loss": 0.2156, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.22896281800391388, | |
| "grad_norm": 4.661588191986084, | |
| "learning_rate": 9.140625e-06, | |
| "loss": 0.2202, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.2309197651663405, | |
| "grad_norm": 5.366416931152344, | |
| "learning_rate": 9.21875e-06, | |
| "loss": 0.2718, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2328767123287671, | |
| "grad_norm": 3.672802448272705, | |
| "learning_rate": 9.296875e-06, | |
| "loss": 0.1387, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.23483365949119372, | |
| "grad_norm": 3.7878501415252686, | |
| "learning_rate": 9.375000000000001e-06, | |
| "loss": 0.1506, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23483365949119372, | |
| "eval_loss": 0.11679373681545258, | |
| "eval_runtime": 107.6687, | |
| "eval_samples_per_second": 28.346, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.882107468031623, | |
| "eval_sts-test_pearson_dot": 0.8647556765462645, | |
| "eval_sts-test_pearson_euclidean": 0.9099443435071429, | |
| "eval_sts-test_pearson_manhattan": 0.9105934104125866, | |
| "eval_sts-test_pearson_max": 0.9105934104125866, | |
| "eval_sts-test_spearman_cosine": 0.9068624287298908, | |
| "eval_sts-test_spearman_dot": 0.8710628964083971, | |
| "eval_sts-test_spearman_euclidean": 0.906624531024334, | |
| "eval_sts-test_spearman_manhattan": 0.9069254385059298, | |
| "eval_sts-test_spearman_max": 0.9069254385059298, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23679060665362034, | |
| "grad_norm": 3.4761197566986084, | |
| "learning_rate": 9.453125000000001e-06, | |
| "loss": 0.1185, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.23874755381604695, | |
| "grad_norm": 3.9917871952056885, | |
| "learning_rate": 9.531250000000001e-06, | |
| "loss": 0.1681, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.24070450097847357, | |
| "grad_norm": 4.491674423217773, | |
| "learning_rate": 9.609375000000001e-06, | |
| "loss": 0.2321, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.24266144814090018, | |
| "grad_norm": 3.903496503829956, | |
| "learning_rate": 9.6875e-06, | |
| "loss": 0.1457, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.2446183953033268, | |
| "grad_norm": 5.046339988708496, | |
| "learning_rate": 9.765625e-06, | |
| "loss": 0.2027, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.2446183953033268, | |
| "eval_loss": 0.11647585779428482, | |
| "eval_runtime": 107.5396, | |
| "eval_samples_per_second": 28.38, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8824938293263067, | |
| "eval_sts-test_pearson_dot": 0.8653100788410637, | |
| "eval_sts-test_pearson_euclidean": 0.9104636052712812, | |
| "eval_sts-test_pearson_manhattan": 0.9109341151161342, | |
| "eval_sts-test_pearson_max": 0.9109341151161342, | |
| "eval_sts-test_spearman_cosine": 0.9070702535877924, | |
| "eval_sts-test_spearman_dot": 0.8716920543922986, | |
| "eval_sts-test_spearman_euclidean": 0.9070027239343528, | |
| "eval_sts-test_spearman_manhattan": 0.9073061822378479, | |
| "eval_sts-test_spearman_max": 0.9073061822378479, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.2465753424657534, | |
| "grad_norm": 4.304446697235107, | |
| "learning_rate": 9.84375e-06, | |
| "loss": 0.1821, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.24853228962818003, | |
| "grad_norm": 3.208357810974121, | |
| "learning_rate": 9.921875e-06, | |
| "loss": 0.1258, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.25048923679060664, | |
| "grad_norm": 4.275379657745361, | |
| "learning_rate": 1e-05, | |
| "loss": 0.184, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.25244618395303325, | |
| "grad_norm": 4.408608436584473, | |
| "learning_rate": 1.0078125000000001e-05, | |
| "loss": 0.2015, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.25440313111545987, | |
| "grad_norm": 3.565253973007202, | |
| "learning_rate": 1.0156250000000001e-05, | |
| "loss": 0.1323, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.25440313111545987, | |
| "eval_loss": 0.1154385656118393, | |
| "eval_runtime": 107.5442, | |
| "eval_samples_per_second": 28.379, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8820850631122565, | |
| "eval_sts-test_pearson_dot": 0.8648589750662984, | |
| "eval_sts-test_pearson_euclidean": 0.9105884442785888, | |
| "eval_sts-test_pearson_manhattan": 0.9109040210291837, | |
| "eval_sts-test_pearson_max": 0.9109040210291837, | |
| "eval_sts-test_spearman_cosine": 0.9074317095260507, | |
| "eval_sts-test_spearman_dot": 0.8710452196601474, | |
| "eval_sts-test_spearman_euclidean": 0.9070635408985837, | |
| "eval_sts-test_spearman_manhattan": 0.9074422260724778, | |
| "eval_sts-test_spearman_max": 0.9074422260724778, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2563600782778865, | |
| "grad_norm": 4.261953353881836, | |
| "learning_rate": 1.0234375000000001e-05, | |
| "loss": 0.1939, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.2583170254403131, | |
| "grad_norm": 3.806480646133423, | |
| "learning_rate": 1.0312500000000002e-05, | |
| "loss": 0.1428, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.2602739726027397, | |
| "grad_norm": 2.824733257293701, | |
| "learning_rate": 1.0390625e-05, | |
| "loss": 0.1063, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.2622309197651663, | |
| "grad_norm": 4.076455116271973, | |
| "learning_rate": 1.046875e-05, | |
| "loss": 0.1602, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.26418786692759294, | |
| "grad_norm": 3.7571659088134766, | |
| "learning_rate": 1.0546875e-05, | |
| "loss": 0.1814, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.26418786692759294, | |
| "eval_loss": 0.11387230455875397, | |
| "eval_runtime": 107.5968, | |
| "eval_samples_per_second": 28.365, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8812889193869892, | |
| "eval_sts-test_pearson_dot": 0.8634898982579755, | |
| "eval_sts-test_pearson_euclidean": 0.9104977472627025, | |
| "eval_sts-test_pearson_manhattan": 0.9107178140804983, | |
| "eval_sts-test_pearson_max": 0.9107178140804983, | |
| "eval_sts-test_spearman_cosine": 0.9066986391131981, | |
| "eval_sts-test_spearman_dot": 0.870129116588204, | |
| "eval_sts-test_spearman_euclidean": 0.9070359293703052, | |
| "eval_sts-test_spearman_manhattan": 0.9073414909830857, | |
| "eval_sts-test_spearman_max": 0.9073414909830857, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.26614481409001955, | |
| "grad_norm": 3.864948034286499, | |
| "learning_rate": 1.0625e-05, | |
| "loss": 0.1518, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.26810176125244617, | |
| "grad_norm": 3.5900001525878906, | |
| "learning_rate": 1.0703125000000001e-05, | |
| "loss": 0.1379, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.2700587084148728, | |
| "grad_norm": 4.291954517364502, | |
| "learning_rate": 1.0781250000000001e-05, | |
| "loss": 0.1708, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.2720156555772994, | |
| "grad_norm": 3.8340342044830322, | |
| "learning_rate": 1.0859375000000001e-05, | |
| "loss": 0.2046, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.273972602739726, | |
| "grad_norm": 3.749396562576294, | |
| "learning_rate": 1.0937500000000002e-05, | |
| "loss": 0.1259, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.273972602739726, | |
| "eval_loss": 0.1124362125992775, | |
| "eval_runtime": 107.5142, | |
| "eval_samples_per_second": 28.387, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8805714116282963, | |
| "eval_sts-test_pearson_dot": 0.8618911680351633, | |
| "eval_sts-test_pearson_euclidean": 0.9102979980912764, | |
| "eval_sts-test_pearson_manhattan": 0.9105232760600299, | |
| "eval_sts-test_pearson_max": 0.9105232760600299, | |
| "eval_sts-test_spearman_cosine": 0.9063180743863257, | |
| "eval_sts-test_spearman_dot": 0.8687826406354595, | |
| "eval_sts-test_spearman_euclidean": 0.9070556199253175, | |
| "eval_sts-test_spearman_manhattan": 0.9073570196707885, | |
| "eval_sts-test_spearman_max": 0.9073570196707885, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2759295499021526, | |
| "grad_norm": 2.8815276622772217, | |
| "learning_rate": 1.1015625e-05, | |
| "loss": 0.1181, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.27788649706457924, | |
| "grad_norm": 3.766554355621338, | |
| "learning_rate": 1.109375e-05, | |
| "loss": 0.2144, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.27984344422700586, | |
| "grad_norm": 4.289268493652344, | |
| "learning_rate": 1.1171875e-05, | |
| "loss": 0.1822, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.28180039138943247, | |
| "grad_norm": 3.9036617279052734, | |
| "learning_rate": 1.125e-05, | |
| "loss": 0.1667, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.2837573385518591, | |
| "grad_norm": 3.321366786956787, | |
| "learning_rate": 1.1328125000000001e-05, | |
| "loss": 0.0779, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.2837573385518591, | |
| "eval_loss": 0.1118142157793045, | |
| "eval_runtime": 107.3173, | |
| "eval_samples_per_second": 28.439, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8796044904115364, | |
| "eval_sts-test_pearson_dot": 0.8607678603166254, | |
| "eval_sts-test_pearson_euclidean": 0.9097479995877322, | |
| "eval_sts-test_pearson_manhattan": 0.9098650580518599, | |
| "eval_sts-test_pearson_max": 0.9098650580518599, | |
| "eval_sts-test_spearman_cosine": 0.9059690592987342, | |
| "eval_sts-test_spearman_dot": 0.8685229490656053, | |
| "eval_sts-test_spearman_euclidean": 0.90680836920613, | |
| "eval_sts-test_spearman_manhattan": 0.9069437865231001, | |
| "eval_sts-test_spearman_max": 0.9069437865231001, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 3.460301160812378, | |
| "learning_rate": 1.1406250000000001e-05, | |
| "loss": 0.147, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.2876712328767123, | |
| "grad_norm": 3.8999266624450684, | |
| "learning_rate": 1.1484375000000001e-05, | |
| "loss": 0.1913, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.2896281800391389, | |
| "grad_norm": 3.539788007736206, | |
| "learning_rate": 1.1562500000000002e-05, | |
| "loss": 0.1357, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.29158512720156554, | |
| "grad_norm": 3.499439001083374, | |
| "learning_rate": 1.1640625000000002e-05, | |
| "loss": 0.1128, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.29354207436399216, | |
| "grad_norm": 3.2960240840911865, | |
| "learning_rate": 1.171875e-05, | |
| "loss": 0.0996, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.29354207436399216, | |
| "eval_loss": 0.11132737249135971, | |
| "eval_runtime": 107.5867, | |
| "eval_samples_per_second": 28.368, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8787852416493207, | |
| "eval_sts-test_pearson_dot": 0.8593025559452621, | |
| "eval_sts-test_pearson_euclidean": 0.9091617970047303, | |
| "eval_sts-test_pearson_manhattan": 0.9091664157178929, | |
| "eval_sts-test_pearson_max": 0.9091664157178929, | |
| "eval_sts-test_spearman_cosine": 0.9054375485671886, | |
| "eval_sts-test_spearman_dot": 0.867029912731804, | |
| "eval_sts-test_spearman_euclidean": 0.9062253050214613, | |
| "eval_sts-test_spearman_manhattan": 0.9062610165280517, | |
| "eval_sts-test_spearman_max": 0.9062610165280517, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.29549902152641877, | |
| "grad_norm": 4.271719932556152, | |
| "learning_rate": 1.1796875e-05, | |
| "loss": 0.1956, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.2974559686888454, | |
| "grad_norm": 3.168663501739502, | |
| "learning_rate": 1.1875e-05, | |
| "loss": 0.0942, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.299412915851272, | |
| "grad_norm": 3.816993236541748, | |
| "learning_rate": 1.1953125000000001e-05, | |
| "loss": 0.1406, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.3013698630136986, | |
| "grad_norm": 5.383023738861084, | |
| "learning_rate": 1.2031250000000001e-05, | |
| "loss": 0.2868, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.30332681017612523, | |
| "grad_norm": 3.123462677001953, | |
| "learning_rate": 1.2109375000000001e-05, | |
| "loss": 0.1102, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.30332681017612523, | |
| "eval_loss": 0.11142811924219131, | |
| "eval_runtime": 107.3019, | |
| "eval_samples_per_second": 28.443, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8780761726881443, | |
| "eval_sts-test_pearson_dot": 0.8581767032057357, | |
| "eval_sts-test_pearson_euclidean": 0.9081534036571242, | |
| "eval_sts-test_pearson_manhattan": 0.9081724370385316, | |
| "eval_sts-test_pearson_max": 0.9081724370385316, | |
| "eval_sts-test_spearman_cosine": 0.9048428490545583, | |
| "eval_sts-test_spearman_dot": 0.8670075818523697, | |
| "eval_sts-test_spearman_euclidean": 0.9052714766361651, | |
| "eval_sts-test_spearman_manhattan": 0.9054467225757737, | |
| "eval_sts-test_spearman_max": 0.9054467225757737, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.30528375733855184, | |
| "grad_norm": 4.1034979820251465, | |
| "learning_rate": 1.2187500000000001e-05, | |
| "loss": 0.1659, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.30724070450097846, | |
| "grad_norm": 3.60249400138855, | |
| "learning_rate": 1.2265625000000002e-05, | |
| "loss": 0.1645, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.30919765166340507, | |
| "grad_norm": 3.771853446960449, | |
| "learning_rate": 1.234375e-05, | |
| "loss": 0.151, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3111545988258317, | |
| "grad_norm": 4.291686058044434, | |
| "learning_rate": 1.2421875e-05, | |
| "loss": 0.158, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.3131115459882583, | |
| "grad_norm": 5.1689453125, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.2323, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3131115459882583, | |
| "eval_loss": 0.11126424372196198, | |
| "eval_runtime": 107.301, | |
| "eval_samples_per_second": 28.443, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8777597983330929, | |
| "eval_sts-test_pearson_dot": 0.8577739588604719, | |
| "eval_sts-test_pearson_euclidean": 0.9075483317216817, | |
| "eval_sts-test_pearson_manhattan": 0.9075908461381532, | |
| "eval_sts-test_pearson_max": 0.9075908461381532, | |
| "eval_sts-test_spearman_cosine": 0.9047649818597372, | |
| "eval_sts-test_spearman_dot": 0.867389712873391, | |
| "eval_sts-test_spearman_euclidean": 0.9048189966322366, | |
| "eval_sts-test_spearman_manhattan": 0.9049692713679889, | |
| "eval_sts-test_spearman_max": 0.9049692713679889, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3150684931506849, | |
| "grad_norm": 3.304703712463379, | |
| "learning_rate": 1.2578125e-05, | |
| "loss": 0.1157, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.31702544031311153, | |
| "grad_norm": 4.064731121063232, | |
| "learning_rate": 1.2656250000000001e-05, | |
| "loss": 0.1507, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.31898238747553814, | |
| "grad_norm": 4.615545749664307, | |
| "learning_rate": 1.2734375000000001e-05, | |
| "loss": 0.1879, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.32093933463796476, | |
| "grad_norm": 3.767533540725708, | |
| "learning_rate": 1.2812500000000001e-05, | |
| "loss": 0.143, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.32289628180039137, | |
| "grad_norm": 4.727967262268066, | |
| "learning_rate": 1.2890625000000002e-05, | |
| "loss": 0.2227, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.32289628180039137, | |
| "eval_loss": 0.11155427247285843, | |
| "eval_runtime": 107.2898, | |
| "eval_samples_per_second": 28.446, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8775899700998113, | |
| "eval_sts-test_pearson_dot": 0.8571711542435376, | |
| "eval_sts-test_pearson_euclidean": 0.907399950708088, | |
| "eval_sts-test_pearson_manhattan": 0.9073879045697356, | |
| "eval_sts-test_pearson_max": 0.907399950708088, | |
| "eval_sts-test_spearman_cosine": 0.9049959431197784, | |
| "eval_sts-test_spearman_dot": 0.8667648957618442, | |
| "eval_sts-test_spearman_euclidean": 0.9048916279294749, | |
| "eval_sts-test_spearman_manhattan": 0.9050786882020909, | |
| "eval_sts-test_spearman_max": 0.9050786882020909, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.324853228962818, | |
| "grad_norm": 4.0150017738342285, | |
| "learning_rate": 1.2968750000000002e-05, | |
| "loss": 0.1624, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.3268101761252446, | |
| "grad_norm": 3.021153450012207, | |
| "learning_rate": 1.3046875e-05, | |
| "loss": 0.1345, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.3287671232876712, | |
| "grad_norm": 3.869710922241211, | |
| "learning_rate": 1.3125e-05, | |
| "loss": 0.1765, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.33072407045009783, | |
| "grad_norm": 3.538076162338257, | |
| "learning_rate": 1.3203125e-05, | |
| "loss": 0.1368, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.33268101761252444, | |
| "grad_norm": 3.378551483154297, | |
| "learning_rate": 1.3281250000000001e-05, | |
| "loss": 0.0962, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.33268101761252444, | |
| "eval_loss": 0.11131894588470459, | |
| "eval_runtime": 107.3532, | |
| "eval_samples_per_second": 28.43, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8782576778514848, | |
| "eval_sts-test_pearson_dot": 0.8576530243239538, | |
| "eval_sts-test_pearson_euclidean": 0.9077401564122008, | |
| "eval_sts-test_pearson_manhattan": 0.907609849534313, | |
| "eval_sts-test_pearson_max": 0.9077401564122008, | |
| "eval_sts-test_spearman_cosine": 0.9055560946586144, | |
| "eval_sts-test_spearman_dot": 0.8666707838591381, | |
| "eval_sts-test_spearman_euclidean": 0.9054064016892602, | |
| "eval_sts-test_spearman_manhattan": 0.9054834186101147, | |
| "eval_sts-test_spearman_max": 0.9055560946586144, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.33463796477495106, | |
| "grad_norm": 4.588249683380127, | |
| "learning_rate": 1.3359375000000001e-05, | |
| "loss": 0.1783, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.33659491193737767, | |
| "grad_norm": 4.370199680328369, | |
| "learning_rate": 1.3437500000000001e-05, | |
| "loss": 0.2019, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3385518590998043, | |
| "grad_norm": 4.000157356262207, | |
| "learning_rate": 1.3515625000000002e-05, | |
| "loss": 0.1761, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.3405088062622309, | |
| "grad_norm": 4.3335862159729, | |
| "learning_rate": 1.3593750000000002e-05, | |
| "loss": 0.1855, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3424657534246575, | |
| "grad_norm": 4.247244358062744, | |
| "learning_rate": 1.3671875e-05, | |
| "loss": 0.1922, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3424657534246575, | |
| "eval_loss": 0.1105586364865303, | |
| "eval_runtime": 107.3507, | |
| "eval_samples_per_second": 28.43, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8775475016000731, | |
| "eval_sts-test_pearson_dot": 0.8543732981082479, | |
| "eval_sts-test_pearson_euclidean": 0.9076643456809551, | |
| "eval_sts-test_pearson_manhattan": 0.9075054089199206, | |
| "eval_sts-test_pearson_max": 0.9076643456809551, | |
| "eval_sts-test_spearman_cosine": 0.905357578063082, | |
| "eval_sts-test_spearman_dot": 0.8628476388472094, | |
| "eval_sts-test_spearman_euclidean": 0.9054710672619708, | |
| "eval_sts-test_spearman_manhattan": 0.9055309444497123, | |
| "eval_sts-test_spearman_max": 0.9055309444497123, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.34442270058708413, | |
| "grad_norm": 3.881108522415161, | |
| "learning_rate": 1.375e-05, | |
| "loss": 0.1538, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.34637964774951074, | |
| "grad_norm": 3.4271416664123535, | |
| "learning_rate": 1.3828125e-05, | |
| "loss": 0.1049, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.34833659491193736, | |
| "grad_norm": 3.7847940921783447, | |
| "learning_rate": 1.3906250000000001e-05, | |
| "loss": 0.1619, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.350293542074364, | |
| "grad_norm": 2.3725311756134033, | |
| "learning_rate": 1.3984375000000001e-05, | |
| "loss": 0.0731, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.3522504892367906, | |
| "grad_norm": 3.6820032596588135, | |
| "learning_rate": 1.4062500000000001e-05, | |
| "loss": 0.1205, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3522504892367906, | |
| "eval_loss": 0.10974939167499542, | |
| "eval_runtime": 107.353, | |
| "eval_samples_per_second": 28.43, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8782123578217031, | |
| "eval_sts-test_pearson_dot": 0.852106566478191, | |
| "eval_sts-test_pearson_euclidean": 0.9088860377565003, | |
| "eval_sts-test_pearson_manhattan": 0.9087269620613702, | |
| "eval_sts-test_pearson_max": 0.9088860377565003, | |
| "eval_sts-test_spearman_cosine": 0.9058966517578029, | |
| "eval_sts-test_spearman_dot": 0.8595467858069799, | |
| "eval_sts-test_spearman_euclidean": 0.9064047128283795, | |
| "eval_sts-test_spearman_manhattan": 0.9067846510375924, | |
| "eval_sts-test_spearman_max": 0.9067846510375924, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3542074363992172, | |
| "grad_norm": 3.7714688777923584, | |
| "learning_rate": 1.4140625000000002e-05, | |
| "loss": 0.169, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.3561643835616438, | |
| "grad_norm": 3.7113559246063232, | |
| "learning_rate": 1.4218750000000002e-05, | |
| "loss": 0.1688, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.35812133072407043, | |
| "grad_norm": 3.1639597415924072, | |
| "learning_rate": 1.4296875000000002e-05, | |
| "loss": 0.1274, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.36007827788649704, | |
| "grad_norm": 4.144288539886475, | |
| "learning_rate": 1.4375e-05, | |
| "loss": 0.1477, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.36203522504892366, | |
| "grad_norm": 3.4342098236083984, | |
| "learning_rate": 1.4453125e-05, | |
| "loss": 0.1418, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.36203522504892366, | |
| "eval_loss": 0.10942607372999191, | |
| "eval_runtime": 107.2679, | |
| "eval_samples_per_second": 28.452, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8778855142398189, | |
| "eval_sts-test_pearson_dot": 0.8501658695420333, | |
| "eval_sts-test_pearson_euclidean": 0.9088432870055996, | |
| "eval_sts-test_pearson_manhattan": 0.9086435133118579, | |
| "eval_sts-test_pearson_max": 0.9088432870055996, | |
| "eval_sts-test_spearman_cosine": 0.9055185931015683, | |
| "eval_sts-test_spearman_dot": 0.8575025481866207, | |
| "eval_sts-test_spearman_euclidean": 0.9063994321795352, | |
| "eval_sts-test_spearman_manhattan": 0.9064969899293684, | |
| "eval_sts-test_spearman_max": 0.9064969899293684, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.3639921722113503, | |
| "grad_norm": 4.744626045227051, | |
| "learning_rate": 1.453125e-05, | |
| "loss": 0.2477, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.3659491193737769, | |
| "grad_norm": 4.062248229980469, | |
| "learning_rate": 1.4609375000000001e-05, | |
| "loss": 0.1713, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.3679060665362035, | |
| "grad_norm": 3.989694833755493, | |
| "learning_rate": 1.4687500000000001e-05, | |
| "loss": 0.1703, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.3698630136986301, | |
| "grad_norm": 3.3543660640716553, | |
| "learning_rate": 1.4765625000000001e-05, | |
| "loss": 0.1176, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.37181996086105673, | |
| "grad_norm": 4.307045936584473, | |
| "learning_rate": 1.4843750000000002e-05, | |
| "loss": 0.1811, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.37181996086105673, | |
| "eval_loss": 0.10837770998477936, | |
| "eval_runtime": 107.3429, | |
| "eval_samples_per_second": 28.432, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8774103555789884, | |
| "eval_sts-test_pearson_dot": 0.84942827650618, | |
| "eval_sts-test_pearson_euclidean": 0.9086430009253119, | |
| "eval_sts-test_pearson_manhattan": 0.9084642534632353, | |
| "eval_sts-test_pearson_max": 0.9086430009253119, | |
| "eval_sts-test_spearman_cosine": 0.9048482639571866, | |
| "eval_sts-test_spearman_dot": 0.8562155914115267, | |
| "eval_sts-test_spearman_euclidean": 0.9060070531196555, | |
| "eval_sts-test_spearman_manhattan": 0.9061608184537963, | |
| "eval_sts-test_spearman_max": 0.9061608184537963, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.37377690802348335, | |
| "grad_norm": 4.140930652618408, | |
| "learning_rate": 1.4921875000000002e-05, | |
| "loss": 0.162, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.37573385518590996, | |
| "grad_norm": 2.7555642127990723, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.1141, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.3776908023483366, | |
| "grad_norm": 4.070343017578125, | |
| "learning_rate": 1.5078125e-05, | |
| "loss": 0.154, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.3796477495107632, | |
| "grad_norm": 4.453440189361572, | |
| "learning_rate": 1.515625e-05, | |
| "loss": 0.2461, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.3816046966731898, | |
| "grad_norm": 3.7656772136688232, | |
| "learning_rate": 1.5234375000000001e-05, | |
| "loss": 0.1573, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.3816046966731898, | |
| "eval_loss": 0.10762027651071548, | |
| "eval_runtime": 107.299, | |
| "eval_samples_per_second": 28.444, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8779461080888007, | |
| "eval_sts-test_pearson_dot": 0.8521074278329072, | |
| "eval_sts-test_pearson_euclidean": 0.9087045359990432, | |
| "eval_sts-test_pearson_manhattan": 0.9086340705654771, | |
| "eval_sts-test_pearson_max": 0.9087045359990432, | |
| "eval_sts-test_spearman_cosine": 0.9045706718827756, | |
| "eval_sts-test_spearman_dot": 0.8584340456924826, | |
| "eval_sts-test_spearman_euclidean": 0.9055143864829975, | |
| "eval_sts-test_spearman_manhattan": 0.9058283613329196, | |
| "eval_sts-test_spearman_max": 0.9058283613329196, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.3835616438356164, | |
| "grad_norm": 3.063400983810425, | |
| "learning_rate": 1.5312500000000003e-05, | |
| "loss": 0.1197, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.38551859099804303, | |
| "grad_norm": 3.893153429031372, | |
| "learning_rate": 1.5390625e-05, | |
| "loss": 0.1395, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.38747553816046965, | |
| "grad_norm": 2.95540714263916, | |
| "learning_rate": 1.546875e-05, | |
| "loss": 0.0847, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.38943248532289626, | |
| "grad_norm": 3.4665300846099854, | |
| "learning_rate": 1.5546875e-05, | |
| "loss": 0.1848, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.3913894324853229, | |
| "grad_norm": 3.6926543712615967, | |
| "learning_rate": 1.5625e-05, | |
| "loss": 0.1377, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3913894324853229, | |
| "eval_loss": 0.10723523795604706, | |
| "eval_runtime": 107.245, | |
| "eval_samples_per_second": 28.458, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.877994665901344, | |
| "eval_sts-test_pearson_dot": 0.854134605280733, | |
| "eval_sts-test_pearson_euclidean": 0.9085191117850383, | |
| "eval_sts-test_pearson_manhattan": 0.9086424100414001, | |
| "eval_sts-test_pearson_max": 0.9086424100414001, | |
| "eval_sts-test_spearman_cosine": 0.904685279863199, | |
| "eval_sts-test_spearman_dot": 0.8598855528557127, | |
| "eval_sts-test_spearman_euclidean": 0.9052407772708506, | |
| "eval_sts-test_spearman_manhattan": 0.9058868959828196, | |
| "eval_sts-test_spearman_max": 0.9058868959828196, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3933463796477495, | |
| "grad_norm": 3.303112268447876, | |
| "learning_rate": 1.5703125e-05, | |
| "loss": 0.1109, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.3953033268101761, | |
| "grad_norm": 3.4490058422088623, | |
| "learning_rate": 1.578125e-05, | |
| "loss": 0.1051, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.3972602739726027, | |
| "grad_norm": 2.6598286628723145, | |
| "learning_rate": 1.5859375e-05, | |
| "loss": 0.0975, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.39921722113502933, | |
| "grad_norm": 3.373512029647827, | |
| "learning_rate": 1.59375e-05, | |
| "loss": 0.127, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.40117416829745595, | |
| "grad_norm": 3.1471354961395264, | |
| "learning_rate": 1.6015625e-05, | |
| "loss": 0.1297, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.40117416829745595, | |
| "eval_loss": 0.10685314983129501, | |
| "eval_runtime": 107.3321, | |
| "eval_samples_per_second": 28.435, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8785914848590666, | |
| "eval_sts-test_pearson_dot": 0.8570818659891223, | |
| "eval_sts-test_pearson_euclidean": 0.9086611488562145, | |
| "eval_sts-test_pearson_manhattan": 0.9087606701935215, | |
| "eval_sts-test_pearson_max": 0.9087606701935215, | |
| "eval_sts-test_spearman_cosine": 0.9048987433800361, | |
| "eval_sts-test_spearman_dot": 0.8616398023022556, | |
| "eval_sts-test_spearman_euclidean": 0.9052247563192726, | |
| "eval_sts-test_spearman_manhattan": 0.9056138237858093, | |
| "eval_sts-test_spearman_max": 0.9056138237858093, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.40313111545988256, | |
| "grad_norm": 2.6924684047698975, | |
| "learning_rate": 1.609375e-05, | |
| "loss": 0.0783, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4050880626223092, | |
| "grad_norm": 2.1100542545318604, | |
| "learning_rate": 1.6171875000000002e-05, | |
| "loss": 0.053, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.4070450097847358, | |
| "grad_norm": 3.7984156608581543, | |
| "learning_rate": 1.6250000000000002e-05, | |
| "loss": 0.1916, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4090019569471624, | |
| "grad_norm": 4.329834461212158, | |
| "learning_rate": 1.6328125000000002e-05, | |
| "loss": 0.178, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.410958904109589, | |
| "grad_norm": 4.427723407745361, | |
| "learning_rate": 1.6406250000000002e-05, | |
| "loss": 0.2343, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.410958904109589, | |
| "eval_loss": 0.10670512914657593, | |
| "eval_runtime": 107.2313, | |
| "eval_samples_per_second": 28.462, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8788965355860006, | |
| "eval_sts-test_pearson_dot": 0.8580075676260999, | |
| "eval_sts-test_pearson_euclidean": 0.908776492246521, | |
| "eval_sts-test_pearson_manhattan": 0.9089340980301853, | |
| "eval_sts-test_pearson_max": 0.9089340980301853, | |
| "eval_sts-test_spearman_cosine": 0.90530862018312, | |
| "eval_sts-test_spearman_dot": 0.8630207814775328, | |
| "eval_sts-test_spearman_euclidean": 0.905449362900196, | |
| "eval_sts-test_spearman_manhattan": 0.9056519071092534, | |
| "eval_sts-test_spearman_max": 0.9056519071092534, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.41291585127201563, | |
| "grad_norm": 3.890899419784546, | |
| "learning_rate": 1.6484375000000003e-05, | |
| "loss": 0.1816, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.41487279843444225, | |
| "grad_norm": 4.071934700012207, | |
| "learning_rate": 1.6562500000000003e-05, | |
| "loss": 0.2522, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.41682974559686886, | |
| "grad_norm": 3.8046796321868896, | |
| "learning_rate": 1.6640625000000003e-05, | |
| "loss": 0.1787, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.4187866927592955, | |
| "grad_norm": 3.357276201248169, | |
| "learning_rate": 1.671875e-05, | |
| "loss": 0.1913, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.4207436399217221, | |
| "grad_norm": 3.8679873943328857, | |
| "learning_rate": 1.6796875e-05, | |
| "loss": 0.175, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.4207436399217221, | |
| "eval_loss": 0.10552908480167389, | |
| "eval_runtime": 107.6412, | |
| "eval_samples_per_second": 28.353, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8791676817178924, | |
| "eval_sts-test_pearson_dot": 0.8573342496118925, | |
| "eval_sts-test_pearson_euclidean": 0.909475190469058, | |
| "eval_sts-test_pearson_manhattan": 0.9097533727394405, | |
| "eval_sts-test_pearson_max": 0.9097533727394405, | |
| "eval_sts-test_spearman_cosine": 0.9056468502167161, | |
| "eval_sts-test_spearman_dot": 0.8624976392318674, | |
| "eval_sts-test_spearman_euclidean": 0.9066117769148375, | |
| "eval_sts-test_spearman_manhattan": 0.9069566301351195, | |
| "eval_sts-test_spearman_max": 0.9069566301351195, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.4227005870841487, | |
| "grad_norm": 3.436488389968872, | |
| "learning_rate": 1.6875e-05, | |
| "loss": 0.1533, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.4246575342465753, | |
| "grad_norm": 3.891040563583374, | |
| "learning_rate": 1.6953125e-05, | |
| "loss": 0.1819, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.42661448140900193, | |
| "grad_norm": 4.554884910583496, | |
| "learning_rate": 1.703125e-05, | |
| "loss": 0.2541, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.42857142857142855, | |
| "grad_norm": 3.4431850910186768, | |
| "learning_rate": 1.7109375e-05, | |
| "loss": 0.1103, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.43052837573385516, | |
| "grad_norm": 3.5396361351013184, | |
| "learning_rate": 1.71875e-05, | |
| "loss": 0.1693, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.43052837573385516, | |
| "eval_loss": 0.10396925359964371, | |
| "eval_runtime": 107.33, | |
| "eval_samples_per_second": 28.436, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8794186180626897, | |
| "eval_sts-test_pearson_dot": 0.8555325369075935, | |
| "eval_sts-test_pearson_euclidean": 0.9099071011406157, | |
| "eval_sts-test_pearson_manhattan": 0.9104095617945829, | |
| "eval_sts-test_pearson_max": 0.9104095617945829, | |
| "eval_sts-test_spearman_cosine": 0.9061536582519738, | |
| "eval_sts-test_spearman_dot": 0.8609769018672648, | |
| "eval_sts-test_spearman_euclidean": 0.9068523149448162, | |
| "eval_sts-test_spearman_manhattan": 0.9075606826613808, | |
| "eval_sts-test_spearman_max": 0.9075606826613808, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4324853228962818, | |
| "grad_norm": 3.4416589736938477, | |
| "learning_rate": 1.7265625e-05, | |
| "loss": 0.1233, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.4344422700587084, | |
| "grad_norm": 2.9554316997528076, | |
| "learning_rate": 1.734375e-05, | |
| "loss": 0.0922, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.436399217221135, | |
| "grad_norm": 3.1570141315460205, | |
| "learning_rate": 1.7421875e-05, | |
| "loss": 0.1243, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.4383561643835616, | |
| "grad_norm": 3.8479344844818115, | |
| "learning_rate": 1.7500000000000002e-05, | |
| "loss": 0.1613, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.44031311154598823, | |
| "grad_norm": 3.004990339279175, | |
| "learning_rate": 1.7578125000000002e-05, | |
| "loss": 0.1188, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.44031311154598823, | |
| "eval_loss": 0.1029738187789917, | |
| "eval_runtime": 107.2661, | |
| "eval_samples_per_second": 28.453, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8796765477862789, | |
| "eval_sts-test_pearson_dot": 0.8576485629522204, | |
| "eval_sts-test_pearson_euclidean": 0.9098263075403831, | |
| "eval_sts-test_pearson_manhattan": 0.9104321398639006, | |
| "eval_sts-test_pearson_max": 0.9104321398639006, | |
| "eval_sts-test_spearman_cosine": 0.9064603386462892, | |
| "eval_sts-test_spearman_dot": 0.8635142088856343, | |
| "eval_sts-test_spearman_euclidean": 0.9066103896257344, | |
| "eval_sts-test_spearman_manhattan": 0.9076328216947436, | |
| "eval_sts-test_spearman_max": 0.9076328216947436, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.44227005870841485, | |
| "grad_norm": 3.595667839050293, | |
| "learning_rate": 1.7656250000000002e-05, | |
| "loss": 0.196, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.44422700587084146, | |
| "grad_norm": 3.9599428176879883, | |
| "learning_rate": 1.7734375000000002e-05, | |
| "loss": 0.2254, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.4461839530332681, | |
| "grad_norm": 3.2490875720977783, | |
| "learning_rate": 1.7812500000000003e-05, | |
| "loss": 0.1162, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.4481409001956947, | |
| "grad_norm": 4.811342239379883, | |
| "learning_rate": 1.7890625000000003e-05, | |
| "loss": 0.2579, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.4500978473581213, | |
| "grad_norm": 2.993255138397217, | |
| "learning_rate": 1.7968750000000003e-05, | |
| "loss": 0.1203, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4500978473581213, | |
| "eval_loss": 0.102933868765831, | |
| "eval_runtime": 107.2515, | |
| "eval_samples_per_second": 28.456, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8799758353085696, | |
| "eval_sts-test_pearson_dot": 0.8592997081846103, | |
| "eval_sts-test_pearson_euclidean": 0.9101945793558552, | |
| "eval_sts-test_pearson_manhattan": 0.9106837055219174, | |
| "eval_sts-test_pearson_max": 0.9106837055219174, | |
| "eval_sts-test_spearman_cosine": 0.9071432428951217, | |
| "eval_sts-test_spearman_dot": 0.865314059867535, | |
| "eval_sts-test_spearman_euclidean": 0.9072587906520344, | |
| "eval_sts-test_spearman_manhattan": 0.9077949555147645, | |
| "eval_sts-test_spearman_max": 0.9077949555147645, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4520547945205479, | |
| "grad_norm": 3.654191017150879, | |
| "learning_rate": 1.8046875e-05, | |
| "loss": 0.1654, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.45401174168297453, | |
| "grad_norm": 3.429565668106079, | |
| "learning_rate": 1.8125e-05, | |
| "loss": 0.1808, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.45596868884540115, | |
| "grad_norm": 3.5679566860198975, | |
| "learning_rate": 1.8203125e-05, | |
| "loss": 0.1397, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.45792563600782776, | |
| "grad_norm": 3.9862124919891357, | |
| "learning_rate": 1.828125e-05, | |
| "loss": 0.2177, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.4598825831702544, | |
| "grad_norm": 3.536984443664551, | |
| "learning_rate": 1.8359375e-05, | |
| "loss": 0.162, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.4598825831702544, | |
| "eval_loss": 0.10404225438833237, | |
| "eval_runtime": 107.254, | |
| "eval_samples_per_second": 28.456, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8802088610554777, | |
| "eval_sts-test_pearson_dot": 0.8618209119350905, | |
| "eval_sts-test_pearson_euclidean": 0.9103461475031536, | |
| "eval_sts-test_pearson_manhattan": 0.9106782364335553, | |
| "eval_sts-test_pearson_max": 0.9106782364335553, | |
| "eval_sts-test_spearman_cosine": 0.9077748174471387, | |
| "eval_sts-test_spearman_dot": 0.8686349167216066, | |
| "eval_sts-test_spearman_euclidean": 0.907571109705285, | |
| "eval_sts-test_spearman_manhattan": 0.9080472631264893, | |
| "eval_sts-test_spearman_max": 0.9080472631264893, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.461839530332681, | |
| "grad_norm": 3.2987570762634277, | |
| "learning_rate": 1.84375e-05, | |
| "loss": 0.177, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.4637964774951076, | |
| "grad_norm": 1.792919397354126, | |
| "learning_rate": 1.8515625e-05, | |
| "loss": 0.0556, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.4657534246575342, | |
| "grad_norm": 3.8270483016967773, | |
| "learning_rate": 1.859375e-05, | |
| "loss": 0.2285, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.46771037181996084, | |
| "grad_norm": 3.2458577156066895, | |
| "learning_rate": 1.8671875e-05, | |
| "loss": 0.1657, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.46966731898238745, | |
| "grad_norm": 4.352839469909668, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.2555, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.46966731898238745, | |
| "eval_loss": 0.10528620332479477, | |
| "eval_runtime": 107.3201, | |
| "eval_samples_per_second": 28.438, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8794205585889476, | |
| "eval_sts-test_pearson_dot": 0.8616236846471828, | |
| "eval_sts-test_pearson_euclidean": 0.9100171674371834, | |
| "eval_sts-test_pearson_manhattan": 0.9102120642982687, | |
| "eval_sts-test_pearson_max": 0.9102120642982687, | |
| "eval_sts-test_spearman_cosine": 0.9076779309662261, | |
| "eval_sts-test_spearman_dot": 0.8702396969551023, | |
| "eval_sts-test_spearman_euclidean": 0.9078436896384199, | |
| "eval_sts-test_spearman_manhattan": 0.9080407741935878, | |
| "eval_sts-test_spearman_max": 0.9080407741935878, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.47162426614481406, | |
| "grad_norm": 3.644327163696289, | |
| "learning_rate": 1.8828125000000002e-05, | |
| "loss": 0.1606, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.4735812133072407, | |
| "grad_norm": 3.0316474437713623, | |
| "learning_rate": 1.8906250000000002e-05, | |
| "loss": 0.1257, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.4755381604696673, | |
| "grad_norm": 3.8527326583862305, | |
| "learning_rate": 1.8984375000000002e-05, | |
| "loss": 0.1898, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.4774951076320939, | |
| "grad_norm": 3.91603422164917, | |
| "learning_rate": 1.9062500000000003e-05, | |
| "loss": 0.1621, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.4794520547945205, | |
| "grad_norm": 3.6845171451568604, | |
| "learning_rate": 1.9140625000000003e-05, | |
| "loss": 0.1606, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.4794520547945205, | |
| "eval_loss": 0.10541080683469772, | |
| "eval_runtime": 107.3443, | |
| "eval_samples_per_second": 28.432, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8782579310286232, | |
| "eval_sts-test_pearson_dot": 0.8596847230641689, | |
| "eval_sts-test_pearson_euclidean": 0.909741577402618, | |
| "eval_sts-test_pearson_manhattan": 0.9098438643121189, | |
| "eval_sts-test_pearson_max": 0.9098438643121189, | |
| "eval_sts-test_spearman_cosine": 0.9078928712746891, | |
| "eval_sts-test_spearman_dot": 0.8682800392187727, | |
| "eval_sts-test_spearman_euclidean": 0.9083291960732551, | |
| "eval_sts-test_spearman_manhattan": 0.908423397478484, | |
| "eval_sts-test_spearman_max": 0.908423397478484, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.48140900195694714, | |
| "grad_norm": 3.31758451461792, | |
| "learning_rate": 1.9218750000000003e-05, | |
| "loss": 0.0983, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.48336594911937375, | |
| "grad_norm": 3.8613622188568115, | |
| "learning_rate": 1.9296875000000003e-05, | |
| "loss": 0.2028, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.48532289628180036, | |
| "grad_norm": 2.792924165725708, | |
| "learning_rate": 1.9375e-05, | |
| "loss": 0.0997, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.487279843444227, | |
| "grad_norm": 3.4162261486053467, | |
| "learning_rate": 1.9453125e-05, | |
| "loss": 0.1582, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.4892367906066536, | |
| "grad_norm": 4.499621391296387, | |
| "learning_rate": 1.953125e-05, | |
| "loss": 0.2394, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4892367906066536, | |
| "eval_loss": 0.10517927259206772, | |
| "eval_runtime": 107.2761, | |
| "eval_samples_per_second": 28.45, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8775291091187776, | |
| "eval_sts-test_pearson_dot": 0.8591957018286404, | |
| "eval_sts-test_pearson_euclidean": 0.9092406666480166, | |
| "eval_sts-test_pearson_manhattan": 0.909395200356788, | |
| "eval_sts-test_pearson_max": 0.909395200356788, | |
| "eval_sts-test_spearman_cosine": 0.9073655224104529, | |
| "eval_sts-test_spearman_dot": 0.866218124850164, | |
| "eval_sts-test_spearman_euclidean": 0.9077081380676655, | |
| "eval_sts-test_spearman_manhattan": 0.907968321901395, | |
| "eval_sts-test_spearman_max": 0.907968321901395, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4911937377690802, | |
| "grad_norm": 4.491675853729248, | |
| "learning_rate": 1.9609375e-05, | |
| "loss": 0.2186, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.4931506849315068, | |
| "grad_norm": 2.9051578044891357, | |
| "learning_rate": 1.96875e-05, | |
| "loss": 0.0993, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.49510763209393344, | |
| "grad_norm": 3.53365421295166, | |
| "learning_rate": 1.9765625e-05, | |
| "loss": 0.1805, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.49706457925636005, | |
| "grad_norm": 3.2181098461151123, | |
| "learning_rate": 1.984375e-05, | |
| "loss": 0.1178, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.49902152641878667, | |
| "grad_norm": 4.045453071594238, | |
| "learning_rate": 1.9921875e-05, | |
| "loss": 0.2198, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.49902152641878667, | |
| "eval_loss": 0.10428859293460846, | |
| "eval_runtime": 107.2698, | |
| "eval_samples_per_second": 28.452, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8777129617944619, | |
| "eval_sts-test_pearson_dot": 0.8588391652180615, | |
| "eval_sts-test_pearson_euclidean": 0.9093230964292308, | |
| "eval_sts-test_pearson_manhattan": 0.9095932968076137, | |
| "eval_sts-test_pearson_max": 0.9095932968076137, | |
| "eval_sts-test_spearman_cosine": 0.9069800350448274, | |
| "eval_sts-test_spearman_dot": 0.8639776976998651, | |
| "eval_sts-test_spearman_euclidean": 0.9072912800678044, | |
| "eval_sts-test_spearman_manhattan": 0.9080281095866138, | |
| "eval_sts-test_spearman_max": 0.9080281095866138, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.5009784735812133, | |
| "grad_norm": 2.8251521587371826, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1064, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.50293542074364, | |
| "grad_norm": 3.3597464561462402, | |
| "learning_rate": 1.999924308128909e-05, | |
| "loss": 0.1436, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.5048923679060665, | |
| "grad_norm": 2.580488920211792, | |
| "learning_rate": 1.9996972439741537e-05, | |
| "loss": 0.0859, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5068493150684932, | |
| "grad_norm": 3.937856674194336, | |
| "learning_rate": 1.9993188419095562e-05, | |
| "loss": 0.2157, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.5088062622309197, | |
| "grad_norm": 3.344531774520874, | |
| "learning_rate": 1.9987891592190367e-05, | |
| "loss": 0.1455, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5088062622309197, | |
| "eval_loss": 0.10292962938547134, | |
| "eval_runtime": 107.2285, | |
| "eval_samples_per_second": 28.463, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8767515459977318, | |
| "eval_sts-test_pearson_dot": 0.8564862360521637, | |
| "eval_sts-test_pearson_euclidean": 0.9083760527634203, | |
| "eval_sts-test_pearson_manhattan": 0.9086626400377007, | |
| "eval_sts-test_pearson_max": 0.9086626400377007, | |
| "eval_sts-test_spearman_cosine": 0.9057508521481897, | |
| "eval_sts-test_spearman_dot": 0.8601081456298736, | |
| "eval_sts-test_spearman_euclidean": 0.9063700753520626, | |
| "eval_sts-test_spearman_manhattan": 0.9068438122051519, | |
| "eval_sts-test_spearman_max": 0.9068438122051519, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5107632093933464, | |
| "grad_norm": 3.7637484073638916, | |
| "learning_rate": 1.9981082760879432e-05, | |
| "loss": 0.1974, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.512720156555773, | |
| "grad_norm": 3.182102918624878, | |
| "learning_rate": 1.997276295590912e-05, | |
| "loss": 0.1667, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5146771037181996, | |
| "grad_norm": 3.7908170223236084, | |
| "learning_rate": 1.9962933436762644e-05, | |
| "loss": 0.1512, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.5166340508806262, | |
| "grad_norm": 3.4492650032043457, | |
| "learning_rate": 1.9951595691469397e-05, | |
| "loss": 0.1684, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5185909980430529, | |
| "grad_norm": 3.816772222518921, | |
| "learning_rate": 1.9938751436379684e-05, | |
| "loss": 0.2132, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5185909980430529, | |
| "eval_loss": 0.10117975622415543, | |
| "eval_runtime": 107.3212, | |
| "eval_samples_per_second": 28.438, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8770393502752714, | |
| "eval_sts-test_pearson_dot": 0.8567524208989885, | |
| "eval_sts-test_pearson_euclidean": 0.9080912956763092, | |
| "eval_sts-test_pearson_manhattan": 0.908247948105785, | |
| "eval_sts-test_pearson_max": 0.908247948105785, | |
| "eval_sts-test_spearman_cosine": 0.9053279079767796, | |
| "eval_sts-test_spearman_dot": 0.8598375795035011, | |
| "eval_sts-test_spearman_euclidean": 0.9057662913333698, | |
| "eval_sts-test_spearman_manhattan": 0.9061448870047409, | |
| "eval_sts-test_spearman_max": 0.9061448870047409, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5205479452054794, | |
| "grad_norm": 3.5570499897003174, | |
| "learning_rate": 1.992440261590491e-05, | |
| "loss": 0.1645, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5225048923679061, | |
| "grad_norm": 4.160579681396484, | |
| "learning_rate": 1.9908551402223218e-05, | |
| "loss": 0.203, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.5244618395303327, | |
| "grad_norm": 3.5718774795532227, | |
| "learning_rate": 1.9891200194950644e-05, | |
| "loss": 0.1539, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5264187866927593, | |
| "grad_norm": 3.604438066482544, | |
| "learning_rate": 1.9872351620777883e-05, | |
| "loss": 0.1445, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.5283757338551859, | |
| "grad_norm": 3.4854915142059326, | |
| "learning_rate": 1.9852008533072627e-05, | |
| "loss": 0.1377, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5283757338551859, | |
| "eval_loss": 0.09936786442995071, | |
| "eval_runtime": 107.3119, | |
| "eval_samples_per_second": 28.44, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8772155384897071, | |
| "eval_sts-test_pearson_dot": 0.8577040756637748, | |
| "eval_sts-test_pearson_euclidean": 0.9081962404777727, | |
| "eval_sts-test_pearson_manhattan": 0.9082660411148933, | |
| "eval_sts-test_pearson_max": 0.9082660411148933, | |
| "eval_sts-test_spearman_cosine": 0.9056296657323417, | |
| "eval_sts-test_spearman_dot": 0.8627456954737598, | |
| "eval_sts-test_spearman_euclidean": 0.9061553587999066, | |
| "eval_sts-test_spearman_manhattan": 0.9063870360801298, | |
| "eval_sts-test_spearman_max": 0.9063870360801298, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5303326810176126, | |
| "grad_norm": 3.662992238998413, | |
| "learning_rate": 1.9830174011447617e-05, | |
| "loss": 0.1719, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.5322896281800391, | |
| "grad_norm": 3.5594613552093506, | |
| "learning_rate": 1.980685136129445e-05, | |
| "loss": 0.1896, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5342465753424658, | |
| "grad_norm": 3.257335662841797, | |
| "learning_rate": 1.978204411328318e-05, | |
| "loss": 0.1452, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.5362035225048923, | |
| "grad_norm": 3.292863368988037, | |
| "learning_rate": 1.9755756022827847e-05, | |
| "loss": 0.1275, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.538160469667319, | |
| "grad_norm": 4.065443515777588, | |
| "learning_rate": 1.972799106951796e-05, | |
| "loss": 0.1883, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.538160469667319, | |
| "eval_loss": 0.09800439327955246, | |
| "eval_runtime": 107.2596, | |
| "eval_samples_per_second": 28.454, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8773598628753827, | |
| "eval_sts-test_pearson_dot": 0.8578251655808844, | |
| "eval_sts-test_pearson_euclidean": 0.9082603623937704, | |
| "eval_sts-test_pearson_manhattan": 0.9081101963076783, | |
| "eval_sts-test_pearson_max": 0.9082603623937704, | |
| "eval_sts-test_spearman_cosine": 0.9056689328319392, | |
| "eval_sts-test_spearman_dot": 0.8647132741833555, | |
| "eval_sts-test_spearman_euclidean": 0.9063065285608867, | |
| "eval_sts-test_spearman_manhattan": 0.9067770433231558, | |
| "eval_sts-test_spearman_max": 0.9067770433231558, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5401174168297456, | |
| "grad_norm": 3.7186553478240967, | |
| "learning_rate": 1.9698753456516047e-05, | |
| "loss": 0.1462, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5420743639921722, | |
| "grad_norm": 3.5399951934814453, | |
| "learning_rate": 1.9668047609921382e-05, | |
| "loss": 0.1595, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.5440313111545988, | |
| "grad_norm": 3.6143035888671875, | |
| "learning_rate": 1.963587817809993e-05, | |
| "loss": 0.1693, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5459882583170255, | |
| "grad_norm": 4.133859634399414, | |
| "learning_rate": 1.9602250030980657e-05, | |
| "loss": 0.1929, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.547945205479452, | |
| "grad_norm": 3.6929726600646973, | |
| "learning_rate": 1.9567168259318324e-05, | |
| "loss": 0.154, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.547945205479452, | |
| "eval_loss": 0.0969705730676651, | |
| "eval_runtime": 107.333, | |
| "eval_samples_per_second": 28.435, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8772326487842304, | |
| "eval_sts-test_pearson_dot": 0.8584362564160372, | |
| "eval_sts-test_pearson_euclidean": 0.9077579223693962, | |
| "eval_sts-test_pearson_manhattan": 0.9072827835669532, | |
| "eval_sts-test_pearson_max": 0.9077579223693962, | |
| "eval_sts-test_spearman_cosine": 0.9052923754752349, | |
| "eval_sts-test_spearman_dot": 0.866326959917868, | |
| "eval_sts-test_spearman_euclidean": 0.9057464665245734, | |
| "eval_sts-test_spearman_manhattan": 0.9059635996448444, | |
| "eval_sts-test_spearman_max": 0.9059635996448444, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5499021526418787, | |
| "grad_norm": 3.515667200088501, | |
| "learning_rate": 1.953063817392281e-05, | |
| "loss": 0.1468, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.5518590998043053, | |
| "grad_norm": 2.3627371788024902, | |
| "learning_rate": 1.949266530485513e-05, | |
| "loss": 0.0898, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.5538160469667319, | |
| "grad_norm": 3.26710844039917, | |
| "learning_rate": 1.945325540059032e-05, | |
| "loss": 0.1425, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.5557729941291585, | |
| "grad_norm": 3.6672258377075195, | |
| "learning_rate": 1.941241442714716e-05, | |
| "loss": 0.1362, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.5577299412915852, | |
| "grad_norm": 3.306119203567505, | |
| "learning_rate": 1.9370148567185043e-05, | |
| "loss": 0.1025, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5577299412915852, | |
| "eval_loss": 0.09782103449106216, | |
| "eval_runtime": 107.2147, | |
| "eval_samples_per_second": 28.466, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8780373092719852, | |
| "eval_sts-test_pearson_dot": 0.861128415219923, | |
| "eval_sts-test_pearson_euclidean": 0.9076094585437832, | |
| "eval_sts-test_pearson_manhattan": 0.9068707688162918, | |
| "eval_sts-test_pearson_max": 0.9076094585437832, | |
| "eval_sts-test_spearman_cosine": 0.9052606468309083, | |
| "eval_sts-test_spearman_dot": 0.868469739815811, | |
| "eval_sts-test_spearman_euclidean": 0.9051151604801249, | |
| "eval_sts-test_spearman_manhattan": 0.9048908224067698, | |
| "eval_sts-test_spearman_max": 0.9052606468309083, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5596868884540117, | |
| "grad_norm": 3.2606685161590576, | |
| "learning_rate": 1.9326464219068023e-05, | |
| "loss": 0.1578, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.5616438356164384, | |
| "grad_norm": 3.5152740478515625, | |
| "learning_rate": 1.9281367995896187e-05, | |
| "loss": 0.1235, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.5636007827788649, | |
| "grad_norm": 2.8671882152557373, | |
| "learning_rate": 1.9234866724504554e-05, | |
| "loss": 0.1109, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.5655577299412916, | |
| "grad_norm": 2.315185785293579, | |
| "learning_rate": 1.9186967444429613e-05, | |
| "loss": 0.0746, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.5675146771037182, | |
| "grad_norm": 3.4961392879486084, | |
| "learning_rate": 1.913767740684362e-05, | |
| "loss": 0.1471, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5675146771037182, | |
| "eval_loss": 0.09924904257059097, | |
| "eval_runtime": 107.3422, | |
| "eval_samples_per_second": 28.432, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8783681424807572, | |
| "eval_sts-test_pearson_dot": 0.861120631953773, | |
| "eval_sts-test_pearson_euclidean": 0.9077238606316402, | |
| "eval_sts-test_pearson_manhattan": 0.9069786963498391, | |
| "eval_sts-test_pearson_max": 0.9077238606316402, | |
| "eval_sts-test_spearman_cosine": 0.9052591700392825, | |
| "eval_sts-test_spearman_dot": 0.8684268233561366, | |
| "eval_sts-test_spearman_euclidean": 0.9046835793152661, | |
| "eval_sts-test_spearman_manhattan": 0.9045985071673613, | |
| "eval_sts-test_spearman_max": 0.9052591700392825, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5694716242661448, | |
| "grad_norm": 4.221432209014893, | |
| "learning_rate": 1.9087004073456926e-05, | |
| "loss": 0.2631, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 3.4570438861846924, | |
| "learning_rate": 1.9034955115388364e-05, | |
| "loss": 0.11, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.5733855185909981, | |
| "grad_norm": 3.6059136390686035, | |
| "learning_rate": 1.898153841200398e-05, | |
| "loss": 0.1834, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.5753424657534246, | |
| "grad_norm": 3.3278088569641113, | |
| "learning_rate": 1.892676204972423e-05, | |
| "loss": 0.1277, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.5772994129158513, | |
| "grad_norm": 4.314577579498291, | |
| "learning_rate": 1.8870634320799822e-05, | |
| "loss": 0.2104, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5772994129158513, | |
| "eval_loss": 0.09903673827648163, | |
| "eval_runtime": 107.6434, | |
| "eval_samples_per_second": 28.353, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8781909348259072, | |
| "eval_sts-test_pearson_dot": 0.8596231931866185, | |
| "eval_sts-test_pearson_euclidean": 0.9076411156234586, | |
| "eval_sts-test_pearson_manhattan": 0.9069147632233857, | |
| "eval_sts-test_pearson_max": 0.9076411156234586, | |
| "eval_sts-test_spearman_cosine": 0.9042011607174669, | |
| "eval_sts-test_spearman_dot": 0.8660264551976247, | |
| "eval_sts-test_spearman_euclidean": 0.9044265280698341, | |
| "eval_sts-test_spearman_manhattan": 0.9041656729671835, | |
| "eval_sts-test_spearman_max": 0.9044265280698341, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5792563600782779, | |
| "grad_norm": 3.195991039276123, | |
| "learning_rate": 1.8813163722056397e-05, | |
| "loss": 0.1294, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.5812133072407045, | |
| "grad_norm": 3.6352145671844482, | |
| "learning_rate": 1.875435895360826e-05, | |
| "loss": 0.1672, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.5831702544031311, | |
| "grad_norm": 3.7248518466949463, | |
| "learning_rate": 1.8694228917541313e-05, | |
| "loss": 0.2171, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.5851272015655578, | |
| "grad_norm": 3.459801435470581, | |
| "learning_rate": 1.8632782716565438e-05, | |
| "loss": 0.1451, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.5870841487279843, | |
| "grad_norm": 2.6911542415618896, | |
| "learning_rate": 1.857002965263648e-05, | |
| "loss": 0.0871, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5870841487279843, | |
| "eval_loss": 0.09800251573324203, | |
| "eval_runtime": 107.2338, | |
| "eval_samples_per_second": 28.461, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8772925626670083, | |
| "eval_sts-test_pearson_dot": 0.8566016359384749, | |
| "eval_sts-test_pearson_euclidean": 0.9070931796775764, | |
| "eval_sts-test_pearson_manhattan": 0.9064105714529896, | |
| "eval_sts-test_pearson_max": 0.9070931796775764, | |
| "eval_sts-test_spearman_cosine": 0.9032592361677008, | |
| "eval_sts-test_spearman_dot": 0.8623085204012272, | |
| "eval_sts-test_spearman_euclidean": 0.9038942565668446, | |
| "eval_sts-test_spearman_manhattan": 0.9033954590073763, | |
| "eval_sts-test_spearman_max": 0.9038942565668446, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.589041095890411, | |
| "grad_norm": 2.913508653640747, | |
| "learning_rate": 1.850597922554809e-05, | |
| "loss": 0.0897, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.5909980430528375, | |
| "grad_norm": 3.2928783893585205, | |
| "learning_rate": 1.844064113149361e-05, | |
| "loss": 0.1296, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.5929549902152642, | |
| "grad_norm": 3.2551913261413574, | |
| "learning_rate": 1.8374025261598224e-05, | |
| "loss": 0.1206, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.5949119373776908, | |
| "grad_norm": 3.246716022491455, | |
| "learning_rate": 1.8306141700421606e-05, | |
| "loss": 0.1665, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.5968688845401174, | |
| "grad_norm": 3.980085611343384, | |
| "learning_rate": 1.8237000724431283e-05, | |
| "loss": 0.1511, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.5968688845401174, | |
| "eval_loss": 0.09785618633031845, | |
| "eval_runtime": 107.3879, | |
| "eval_samples_per_second": 28.42, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8767817266460346, | |
| "eval_sts-test_pearson_dot": 0.8544828510438696, | |
| "eval_sts-test_pearson_euclidean": 0.9070553577944469, | |
| "eval_sts-test_pearson_manhattan": 0.9065146784679962, | |
| "eval_sts-test_pearson_max": 0.9070553577944469, | |
| "eval_sts-test_spearman_cosine": 0.9032290290662617, | |
| "eval_sts-test_spearman_dot": 0.8599922398628699, | |
| "eval_sts-test_spearman_euclidean": 0.9039456310149221, | |
| "eval_sts-test_spearman_manhattan": 0.9035283702537087, | |
| "eval_sts-test_spearman_max": 0.9039456310149221, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.598825831702544, | |
| "grad_norm": 3.576425790786743, | |
| "learning_rate": 1.8166612800446927e-05, | |
| "loss": 0.1566, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6007827788649707, | |
| "grad_norm": 3.3370437622070312, | |
| "learning_rate": 1.809498858405589e-05, | |
| "loss": 0.1339, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.6027397260273972, | |
| "grad_norm": 3.3882863521575928, | |
| "learning_rate": 1.802213891800007e-05, | |
| "loss": 0.1474, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6046966731898239, | |
| "grad_norm": 2.9576971530914307, | |
| "learning_rate": 1.7948074830534535e-05, | |
| "loss": 0.1022, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.6066536203522505, | |
| "grad_norm": 3.737396001815796, | |
| "learning_rate": 1.7872807533758007e-05, | |
| "loss": 0.1263, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6066536203522505, | |
| "eval_loss": 0.09827280789613724, | |
| "eval_runtime": 107.5911, | |
| "eval_samples_per_second": 28.367, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8774447336518928, | |
| "eval_sts-test_pearson_dot": 0.8526883126161577, | |
| "eval_sts-test_pearson_euclidean": 0.9083025051320742, | |
| "eval_sts-test_pearson_manhattan": 0.9079128512948802, | |
| "eval_sts-test_pearson_max": 0.9083025051320742, | |
| "eval_sts-test_spearman_cosine": 0.9043404713941784, | |
| "eval_sts-test_spearman_dot": 0.8595169367156317, | |
| "eval_sts-test_spearman_euclidean": 0.9055969973115261, | |
| "eval_sts-test_spearman_manhattan": 0.9051065234866762, | |
| "eval_sts-test_spearman_max": 0.9055969973115261, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6086105675146771, | |
| "grad_norm": 3.6634974479675293, | |
| "learning_rate": 1.7796348421915536e-05, | |
| "loss": 0.1713, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.6105675146771037, | |
| "grad_norm": 4.3175225257873535, | |
| "learning_rate": 1.7718709069673595e-05, | |
| "loss": 0.1628, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6125244618395304, | |
| "grad_norm": 3.73574161529541, | |
| "learning_rate": 1.763990123036787e-05, | |
| "loss": 0.1585, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.6144814090019569, | |
| "grad_norm": 3.8439183235168457, | |
| "learning_rate": 1.7559936834223982e-05, | |
| "loss": 0.1419, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6164383561643836, | |
| "grad_norm": 2.908531904220581, | |
| "learning_rate": 1.747882798655147e-05, | |
| "loss": 0.1136, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6164383561643836, | |
| "eval_loss": 0.09831386059522629, | |
| "eval_runtime": 107.369, | |
| "eval_samples_per_second": 28.425, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8766201899554078, | |
| "eval_sts-test_pearson_dot": 0.8489007626542151, | |
| "eval_sts-test_pearson_euclidean": 0.907814904603313, | |
| "eval_sts-test_pearson_manhattan": 0.9075136258935672, | |
| "eval_sts-test_pearson_max": 0.907814904603313, | |
| "eval_sts-test_spearman_cosine": 0.9040450683177336, | |
| "eval_sts-test_spearman_dot": 0.856758468963466, | |
| "eval_sts-test_spearman_euclidean": 0.9053801326988233, | |
| "eval_sts-test_spearman_manhattan": 0.9047017483273913, | |
| "eval_sts-test_spearman_max": 0.9053801326988233, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6183953033268101, | |
| "grad_norm": 4.284037113189697, | |
| "learning_rate": 1.739658696591121e-05, | |
| "loss": 0.255, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6203522504892368, | |
| "grad_norm": 3.051182270050049, | |
| "learning_rate": 1.7313226222256675e-05, | |
| "loss": 0.1262, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.6223091976516634, | |
| "grad_norm": 3.270893096923828, | |
| "learning_rate": 1.7228758375049186e-05, | |
| "loss": 0.1393, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.62426614481409, | |
| "grad_norm": 3.4940428733825684, | |
| "learning_rate": 1.714319621134755e-05, | |
| "loss": 0.1134, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.6262230919765166, | |
| "grad_norm": 3.899348258972168, | |
| "learning_rate": 1.705655268387229e-05, | |
| "loss": 0.1441, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6262230919765166, | |
| "eval_loss": 0.09844871610403061, | |
| "eval_runtime": 107.3824, | |
| "eval_samples_per_second": 28.422, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8758902555910153, | |
| "eval_sts-test_pearson_dot": 0.8487463478874753, | |
| "eval_sts-test_pearson_euclidean": 0.9067076042499689, | |
| "eval_sts-test_pearson_manhattan": 0.9065947885559749, | |
| "eval_sts-test_pearson_max": 0.9067076042499689, | |
| "eval_sts-test_spearman_cosine": 0.9032847891379552, | |
| "eval_sts-test_spearman_dot": 0.8557776108162892, | |
| "eval_sts-test_spearman_euclidean": 0.9042920057780914, | |
| "eval_sts-test_spearman_manhattan": 0.9038587688165614, | |
| "eval_sts-test_spearman_max": 0.9042920057780914, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6281800391389433, | |
| "grad_norm": 4.422016143798828, | |
| "learning_rate": 1.696884090904484e-05, | |
| "loss": 0.1744, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.6301369863013698, | |
| "grad_norm": 3.950225353240967, | |
| "learning_rate": 1.6880074165001906e-05, | |
| "loss": 0.2124, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.6320939334637965, | |
| "grad_norm": 3.2186155319213867, | |
| "learning_rate": 1.6790265889585377e-05, | |
| "loss": 0.1267, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.6340508806262231, | |
| "grad_norm": 3.156022548675537, | |
| "learning_rate": 1.669942967830807e-05, | |
| "loss": 0.1435, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.6360078277886497, | |
| "grad_norm": 3.511422634124756, | |
| "learning_rate": 1.6607579282295572e-05, | |
| "loss": 0.1705, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.6360078277886497, | |
| "eval_loss": 0.09894353151321411, | |
| "eval_runtime": 107.3913, | |
| "eval_samples_per_second": 28.419, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8748236722620941, | |
| "eval_sts-test_pearson_dot": 0.8489660029264176, | |
| "eval_sts-test_pearson_euclidean": 0.9056717167596496, | |
| "eval_sts-test_pearson_manhattan": 0.9057306950198961, | |
| "eval_sts-test_pearson_max": 0.9057306950198961, | |
| "eval_sts-test_spearman_cosine": 0.9023375391880836, | |
| "eval_sts-test_spearman_dot": 0.8556132394331987, | |
| "eval_sts-test_spearman_euclidean": 0.9032693499527753, | |
| "eval_sts-test_spearman_manhattan": 0.9032065639330431, | |
| "eval_sts-test_spearman_max": 0.9032693499527753, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.6379647749510763, | |
| "grad_norm": 3.2097976207733154, | |
| "learning_rate": 1.651472860620455e-05, | |
| "loss": 0.1441, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.639921722113503, | |
| "grad_norm": 3.0201833248138428, | |
| "learning_rate": 1.6420891706117818e-05, | |
| "loss": 0.118, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.6418786692759295, | |
| "grad_norm": 3.370908737182617, | |
| "learning_rate": 1.6326082787416465e-05, | |
| "loss": 0.1956, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.6438356164383562, | |
| "grad_norm": 2.768566131591797, | |
| "learning_rate": 1.6230316202629393e-05, | |
| "loss": 0.0803, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.6457925636007827, | |
| "grad_norm": 3.2455928325653076, | |
| "learning_rate": 1.613360644926059e-05, | |
| "loss": 0.1651, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6457925636007827, | |
| "eval_loss": 0.09914453327655792, | |
| "eval_runtime": 107.2995, | |
| "eval_samples_per_second": 28.444, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8743455111936658, | |
| "eval_sts-test_pearson_dot": 0.8479229638933452, | |
| "eval_sts-test_pearson_euclidean": 0.9055198964101038, | |
| "eval_sts-test_pearson_manhattan": 0.9055992524553022, | |
| "eval_sts-test_pearson_max": 0.9055992524553022, | |
| "eval_sts-test_spearman_cosine": 0.9022275405875834, | |
| "eval_sts-test_spearman_dot": 0.8543106197166178, | |
| "eval_sts-test_spearman_euclidean": 0.9029672341871219, | |
| "eval_sts-test_spearman_manhattan": 0.9028108285285591, | |
| "eval_sts-test_spearman_max": 0.9029672341871219, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6477495107632094, | |
| "grad_norm": 3.465236186981201, | |
| "learning_rate": 1.603596816759442e-05, | |
| "loss": 0.1498, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.649706457925636, | |
| "grad_norm": 3.303255558013916, | |
| "learning_rate": 1.5937416138479344e-05, | |
| "loss": 0.1171, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.6516634050880626, | |
| "grad_norm": 3.893554449081421, | |
| "learning_rate": 1.5837965281090334e-05, | |
| "loss": 0.1976, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.6536203522504892, | |
| "grad_norm": 2.688338041305542, | |
| "learning_rate": 1.5737630650670336e-05, | |
| "loss": 0.0926, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.6555772994129159, | |
| "grad_norm": 3.4313673973083496, | |
| "learning_rate": 1.5636427436251182e-05, | |
| "loss": 0.1496, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.6555772994129159, | |
| "eval_loss": 0.09911184757947922, | |
| "eval_runtime": 107.4795, | |
| "eval_samples_per_second": 28.396, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8746652537201496, | |
| "eval_sts-test_pearson_dot": 0.8492828619583224, | |
| "eval_sts-test_pearson_euclidean": 0.906047319803132, | |
| "eval_sts-test_pearson_manhattan": 0.9060247174283395, | |
| "eval_sts-test_pearson_max": 0.906047319803132, | |
| "eval_sts-test_spearman_cosine": 0.9026310179602884, | |
| "eval_sts-test_spearman_dot": 0.856069836553175, | |
| "eval_sts-test_spearman_euclidean": 0.9034376594468683, | |
| "eval_sts-test_spearman_manhattan": 0.9036356837785253, | |
| "eval_sts-test_spearman_max": 0.9036356837785253, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.6575342465753424, | |
| "grad_norm": 3.2240829467773438, | |
| "learning_rate": 1.5534370958354184e-05, | |
| "loss": 0.1131, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.6594911937377691, | |
| "grad_norm": 3.2019200325012207, | |
| "learning_rate": 1.5431476666670885e-05, | |
| "loss": 0.1352, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.6614481409001957, | |
| "grad_norm": 3.5696215629577637, | |
| "learning_rate": 1.5327760137724213e-05, | |
| "loss": 0.1608, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.6634050880626223, | |
| "grad_norm": 3.2444350719451904, | |
| "learning_rate": 1.5223237072510433e-05, | |
| "loss": 0.1239, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.6653620352250489, | |
| "grad_norm": 3.1613712310791016, | |
| "learning_rate": 1.5117923294122312e-05, | |
| "loss": 0.1227, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6653620352250489, | |
| "eval_loss": 0.09929565340280533, | |
| "eval_runtime": 107.2604, | |
| "eval_samples_per_second": 28.454, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.874603261003018, | |
| "eval_sts-test_pearson_dot": 0.8513553588561518, | |
| "eval_sts-test_pearson_euclidean": 0.9056124511024704, | |
| "eval_sts-test_pearson_manhattan": 0.9053134930024975, | |
| "eval_sts-test_pearson_max": 0.9056124511024704, | |
| "eval_sts-test_spearman_cosine": 0.9019408192558488, | |
| "eval_sts-test_spearman_dot": 0.8587178581922269, | |
| "eval_sts-test_spearman_euclidean": 0.9026150865112329, | |
| "eval_sts-test_spearman_manhattan": 0.9023037967369943, | |
| "eval_sts-test_spearman_max": 0.9026150865112329, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6673189823874756, | |
| "grad_norm": 3.3650641441345215, | |
| "learning_rate": 1.5011834745353725e-05, | |
| "loss": 0.1452, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.6692759295499021, | |
| "grad_norm": 3.7061643600463867, | |
| "learning_rate": 1.4904987486286184e-05, | |
| "loss": 0.1992, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.6712328767123288, | |
| "grad_norm": 3.262500286102295, | |
| "learning_rate": 1.4797397691857614e-05, | |
| "loss": 0.1349, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.6731898238747553, | |
| "grad_norm": 3.4780774116516113, | |
| "learning_rate": 1.468908164941371e-05, | |
| "loss": 0.1702, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.675146771037182, | |
| "grad_norm": 2.908043146133423, | |
| "learning_rate": 1.4580055756242315e-05, | |
| "loss": 0.1033, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.675146771037182, | |
| "eval_loss": 0.09903653711080551, | |
| "eval_runtime": 107.5298, | |
| "eval_samples_per_second": 28.383, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8748590023241125, | |
| "eval_sts-test_pearson_dot": 0.8538817887560792, | |
| "eval_sts-test_pearson_euclidean": 0.905694726781384, | |
| "eval_sts-test_pearson_manhattan": 0.9051916896005284, | |
| "eval_sts-test_pearson_max": 0.905694726781384, | |
| "eval_sts-test_spearman_cosine": 0.9022368936012142, | |
| "eval_sts-test_spearman_dot": 0.86127843586652, | |
| "eval_sts-test_spearman_euclidean": 0.9024703161806319, | |
| "eval_sts-test_spearman_manhattan": 0.9023726241770144, | |
| "eval_sts-test_spearman_max": 0.9024703161806319, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.6771037181996086, | |
| "grad_norm": 3.7118523120880127, | |
| "learning_rate": 1.4470336517091139e-05, | |
| "loss": 0.1788, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.6790606653620352, | |
| "grad_norm": 3.106895923614502, | |
| "learning_rate": 1.435994054166919e-05, | |
| "loss": 0.1084, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.6810176125244618, | |
| "grad_norm": 3.782027244567871, | |
| "learning_rate": 1.4248884542132348e-05, | |
| "loss": 0.1325, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.6829745596868885, | |
| "grad_norm": 3.8729352951049805, | |
| "learning_rate": 1.4137185330553416e-05, | |
| "loss": 0.1537, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.684931506849315, | |
| "grad_norm": 3.7617311477661133, | |
| "learning_rate": 1.4024859816377046e-05, | |
| "loss": 0.2099, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.684931506849315, | |
| "eval_loss": 0.09886621683835983, | |
| "eval_runtime": 107.402, | |
| "eval_samples_per_second": 28.417, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8751440351237663, | |
| "eval_sts-test_pearson_dot": 0.8544171062513735, | |
| "eval_sts-test_pearson_euclidean": 0.906084032839116, | |
| "eval_sts-test_pearson_manhattan": 0.9052674845128671, | |
| "eval_sts-test_pearson_max": 0.906084032839116, | |
| "eval_sts-test_spearman_cosine": 0.9021938876390173, | |
| "eval_sts-test_spearman_dot": 0.861093657908235, | |
| "eval_sts-test_spearman_euclidean": 0.902945753581654, | |
| "eval_sts-test_spearman_manhattan": 0.9021002232489249, | |
| "eval_sts-test_spearman_max": 0.902945753581654, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6868884540117417, | |
| "grad_norm": 3.419968366622925, | |
| "learning_rate": 1.3911925003859907e-05, | |
| "loss": 0.1603, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.6888454011741683, | |
| "grad_norm": 3.050192356109619, | |
| "learning_rate": 1.3798397989496549e-05, | |
| "loss": 0.0982, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.6908023483365949, | |
| "grad_norm": 3.8518471717834473, | |
| "learning_rate": 1.3684295959431241e-05, | |
| "loss": 0.1537, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.6927592954990215, | |
| "grad_norm": 3.516019582748413, | |
| "learning_rate": 1.3569636186856286e-05, | |
| "loss": 0.1758, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.6947162426614482, | |
| "grad_norm": 3.678056240081787, | |
| "learning_rate": 1.3454436029397135e-05, | |
| "loss": 0.1521, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.6947162426614482, | |
| "eval_loss": 0.09901077300310135, | |
| "eval_runtime": 107.3981, | |
| "eval_samples_per_second": 28.418, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8757904015245911, | |
| "eval_sts-test_pearson_dot": 0.8540145186864471, | |
| "eval_sts-test_pearson_euclidean": 0.9064035478905541, | |
| "eval_sts-test_pearson_manhattan": 0.9052325995566524, | |
| "eval_sts-test_pearson_max": 0.9064035478905541, | |
| "eval_sts-test_spearman_cosine": 0.9019130734737861, | |
| "eval_sts-test_spearman_dot": 0.859719212417121, | |
| "eval_sts-test_spearman_euclidean": 0.9030878388365718, | |
| "eval_sts-test_spearman_manhattan": 0.9015811981193075, | |
| "eval_sts-test_spearman_max": 0.9030878388365718, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.6966731898238747, | |
| "grad_norm": 2.466977119445801, | |
| "learning_rate": 1.3338712926484722e-05, | |
| "loss": 0.089, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.6986301369863014, | |
| "grad_norm": 3.8731167316436768, | |
| "learning_rate": 1.322248439671543e-05, | |
| "loss": 0.1509, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.700587084148728, | |
| "grad_norm": 4.406742572784424, | |
| "learning_rate": 1.3105768035199033e-05, | |
| "loss": 0.1943, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.7025440313111546, | |
| "grad_norm": 3.6811671257019043, | |
| "learning_rate": 1.2988581510895118e-05, | |
| "loss": 0.1582, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.7045009784735812, | |
| "grad_norm": 3.6019861698150635, | |
| "learning_rate": 1.2870942563938265e-05, | |
| "loss": 0.1527, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7045009784735812, | |
| "eval_loss": 0.09933393448591232, | |
| "eval_runtime": 107.4727, | |
| "eval_samples_per_second": 28.398, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8759901613929976, | |
| "eval_sts-test_pearson_dot": 0.8539317550786957, | |
| "eval_sts-test_pearson_euclidean": 0.9058198972317745, | |
| "eval_sts-test_pearson_manhattan": 0.9045671303429671, | |
| "eval_sts-test_pearson_max": 0.9058198972317745, | |
| "eval_sts-test_spearman_cosine": 0.9007536577936579, | |
| "eval_sts-test_spearman_dot": 0.8583788673871872, | |
| "eval_sts-test_spearman_euclidean": 0.9019342408204244, | |
| "eval_sts-test_spearman_manhattan": 0.9006401238435077, | |
| "eval_sts-test_spearman_max": 0.9019342408204244, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7064579256360078, | |
| "grad_norm": 2.7014875411987305, | |
| "learning_rate": 1.2752869002952492e-05, | |
| "loss": 0.0754, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.7084148727984344, | |
| "grad_norm": 3.4292407035827637, | |
| "learning_rate": 1.2634378702355317e-05, | |
| "loss": 0.122, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.7103718199608611, | |
| "grad_norm": 3.9553112983703613, | |
| "learning_rate": 1.2515489599651846e-05, | |
| "loss": 0.1727, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.7123287671232876, | |
| "grad_norm": 2.3133935928344727, | |
| "learning_rate": 1.2396219692719364e-05, | |
| "loss": 0.074, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 4.136401176452637, | |
| "learning_rate": 1.2276587037082707e-05, | |
| "loss": 0.1822, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "eval_loss": 0.09977750480175018, | |
| "eval_runtime": 107.5146, | |
| "eval_samples_per_second": 28.387, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8763860642424627, | |
| "eval_sts-test_pearson_dot": 0.8548955898076025, | |
| "eval_sts-test_pearson_euclidean": 0.9053617182443816, | |
| "eval_sts-test_pearson_manhattan": 0.9041710686717819, | |
| "eval_sts-test_pearson_max": 0.9053617182443816, | |
| "eval_sts-test_spearman_cosine": 0.9004566884230645, | |
| "eval_sts-test_spearman_dot": 0.8588638368068857, | |
| "eval_sts-test_spearman_euclidean": 0.90101585543415, | |
| "eval_sts-test_spearman_manhattan": 0.9002594696141124, | |
| "eval_sts-test_spearman_max": 0.90101585543415, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.7162426614481409, | |
| "grad_norm": 3.4787533283233643, | |
| "learning_rate": 1.215660974318097e-05, | |
| "loss": 0.1344, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.7181996086105675, | |
| "grad_norm": 3.3478775024414062, | |
| "learning_rate": 1.2036305973625881e-05, | |
| "loss": 0.1819, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.7201565557729941, | |
| "grad_norm": 3.435234546661377, | |
| "learning_rate": 1.191569394045228e-05, | |
| "loss": 0.1811, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.7221135029354208, | |
| "grad_norm": 3.827272653579712, | |
| "learning_rate": 1.1794791902361095e-05, | |
| "loss": 0.1564, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.7240704500978473, | |
| "grad_norm": 4.088834762573242, | |
| "learning_rate": 1.1673618161955288e-05, | |
| "loss": 0.1522, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7240704500978473, | |
| "eval_loss": 0.09972013533115387, | |
| "eval_runtime": 107.5001, | |
| "eval_samples_per_second": 28.391, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8760486874181325, | |
| "eval_sts-test_pearson_dot": 0.8547648526750222, | |
| "eval_sts-test_pearson_euclidean": 0.9048215352945358, | |
| "eval_sts-test_pearson_manhattan": 0.9037328672460638, | |
| "eval_sts-test_pearson_max": 0.9048215352945358, | |
| "eval_sts-test_spearman_cosine": 0.8997958912973589, | |
| "eval_sts-test_spearman_dot": 0.859370958100973, | |
| "eval_sts-test_spearman_euclidean": 0.9005396125104228, | |
| "eval_sts-test_spearman_manhattan": 0.8996819545858564, | |
| "eval_sts-test_spearman_max": 0.9005396125104228, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.726027397260274, | |
| "grad_norm": 3.117750406265259, | |
| "learning_rate": 1.1552191062969147e-05, | |
| "loss": 0.1379, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.7279843444227005, | |
| "grad_norm": 2.870415449142456, | |
| "learning_rate": 1.1430528987491303e-05, | |
| "loss": 0.082, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.7299412915851272, | |
| "grad_norm": 3.0934267044067383, | |
| "learning_rate": 1.1308650353182036e-05, | |
| "loss": 0.1288, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.7318982387475538, | |
| "grad_norm": 4.175031661987305, | |
| "learning_rate": 1.1186573610485099e-05, | |
| "loss": 0.1809, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.7338551859099804, | |
| "grad_norm": 4.580765724182129, | |
| "learning_rate": 1.1064317239834628e-05, | |
| "loss": 0.2418, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.7338551859099804, | |
| "eval_loss": 0.0990942195057869, | |
| "eval_runtime": 107.4134, | |
| "eval_samples_per_second": 28.414, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8759927956331243, | |
| "eval_sts-test_pearson_dot": 0.8545842177056733, | |
| "eval_sts-test_pearson_euclidean": 0.9050231511967292, | |
| "eval_sts-test_pearson_manhattan": 0.9040000709018708, | |
| "eval_sts-test_pearson_max": 0.9050231511967292, | |
| "eval_sts-test_spearman_cosine": 0.9005535301527153, | |
| "eval_sts-test_spearman_dot": 0.8591386543030902, | |
| "eval_sts-test_spearman_euclidean": 0.9009244733583888, | |
| "eval_sts-test_spearman_manhattan": 0.9002575453098726, | |
| "eval_sts-test_spearman_max": 0.9009244733583888, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.735812133072407, | |
| "grad_norm": 2.8520517349243164, | |
| "learning_rate": 1.094189974885752e-05, | |
| "loss": 0.0789, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.7377690802348337, | |
| "grad_norm": 3.5163254737854004, | |
| "learning_rate": 1.081933966957167e-05, | |
| "loss": 0.132, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.7397260273972602, | |
| "grad_norm": 3.160409688949585, | |
| "learning_rate": 1.0696655555580527e-05, | |
| "loss": 0.1425, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.7416829745596869, | |
| "grad_norm": 3.013707160949707, | |
| "learning_rate": 1.0573865979264362e-05, | |
| "loss": 0.1514, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.7436399217221135, | |
| "grad_norm": 2.8930296897888184, | |
| "learning_rate": 1.0450989528968747e-05, | |
| "loss": 0.0997, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7436399217221135, | |
| "eval_loss": 0.09842444956302643, | |
| "eval_runtime": 107.304, | |
| "eval_samples_per_second": 28.443, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8764482210558782, | |
| "eval_sts-test_pearson_dot": 0.8556595611954538, | |
| "eval_sts-test_pearson_euclidean": 0.9055523774056253, | |
| "eval_sts-test_pearson_manhattan": 0.9046224074077234, | |
| "eval_sts-test_pearson_max": 0.9055523774056253, | |
| "eval_sts-test_spearman_cosine": 0.9017774324005088, | |
| "eval_sts-test_spearman_dot": 0.860274840931642, | |
| "eval_sts-test_spearman_euclidean": 0.9016713719110112, | |
| "eval_sts-test_spearman_manhattan": 0.9011951289872838, | |
| "eval_sts-test_spearman_max": 0.9017774324005088, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7455968688845401, | |
| "grad_norm": 4.028687477111816, | |
| "learning_rate": 1.0328044806190547e-05, | |
| "loss": 0.2002, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.7475538160469667, | |
| "grad_norm": 3.8006536960601807, | |
| "learning_rate": 1.0205050422761989e-05, | |
| "loss": 0.1943, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.7495107632093934, | |
| "grad_norm": 2.689953088760376, | |
| "learning_rate": 1.0082024998033092e-05, | |
| "loss": 0.1198, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.7514677103718199, | |
| "grad_norm": 3.1326684951782227, | |
| "learning_rate": 9.95898715605304e-06, | |
| "loss": 0.1171, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.7534246575342466, | |
| "grad_norm": 2.8200089931488037, | |
| "learning_rate": 9.835955522750789e-06, | |
| "loss": 0.0872, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.7534246575342466, | |
| "eval_loss": 0.09781364351511002, | |
| "eval_runtime": 107.4026, | |
| "eval_samples_per_second": 28.416, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8764091455338202, | |
| "eval_sts-test_pearson_dot": 0.8558042892931453, | |
| "eval_sts-test_pearson_euclidean": 0.9058212793054227, | |
| "eval_sts-test_pearson_manhattan": 0.9049414370234095, | |
| "eval_sts-test_pearson_max": 0.9058212793054227, | |
| "eval_sts-test_spearman_cosine": 0.902642877044557, | |
| "eval_sts-test_spearman_dot": 0.861618590204356, | |
| "eval_sts-test_spearman_euclidean": 0.9022327317339048, | |
| "eval_sts-test_spearman_manhattan": 0.9017312490987527, | |
| "eval_sts-test_spearman_max": 0.902642877044557, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.7553816046966731, | |
| "grad_norm": 3.0478882789611816, | |
| "learning_rate": 9.712948723115384e-06, | |
| "loss": 0.0937, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.7573385518590998, | |
| "grad_norm": 3.0198819637298584, | |
| "learning_rate": 9.589985378376474e-06, | |
| "loss": 0.0933, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.7592954990215264, | |
| "grad_norm": 3.319575786590576, | |
| "learning_rate": 9.46708410318533e-06, | |
| "loss": 0.1109, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.761252446183953, | |
| "grad_norm": 3.1134960651397705, | |
| "learning_rate": 9.344263502796918e-06, | |
| "loss": 0.0999, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.7632093933463796, | |
| "grad_norm": 3.596510887145996, | |
| "learning_rate": 9.221542170253334e-06, | |
| "loss": 0.1625, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7632093933463796, | |
| "eval_loss": 0.09730728715658188, | |
| "eval_runtime": 107.3398, | |
| "eval_samples_per_second": 28.433, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8764763861944456, | |
| "eval_sts-test_pearson_dot": 0.8561556513692123, | |
| "eval_sts-test_pearson_euclidean": 0.9062056976600675, | |
| "eval_sts-test_pearson_manhattan": 0.9053202975786792, | |
| "eval_sts-test_pearson_max": 0.9062056976600675, | |
| "eval_sts-test_spearman_cosine": 0.9034457146739187, | |
| "eval_sts-test_spearman_dot": 0.8626341305791112, | |
| "eval_sts-test_spearman_euclidean": 0.9030662687285811, | |
| "eval_sts-test_spearman_manhattan": 0.9025959777226189, | |
| "eval_sts-test_spearman_max": 0.9034457146739187, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7651663405088063, | |
| "grad_norm": 2.9087250232696533, | |
| "learning_rate": 9.098938683569155e-06, | |
| "loss": 0.1357, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.7671232876712328, | |
| "grad_norm": 3.40970516204834, | |
| "learning_rate": 8.97647160291899e-06, | |
| "loss": 0.1202, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.7690802348336595, | |
| "grad_norm": 2.962822437286377, | |
| "learning_rate": 8.854159467827808e-06, | |
| "loss": 0.116, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.7710371819960861, | |
| "grad_norm": 3.2842860221862793, | |
| "learning_rate": 8.732020794364327e-06, | |
| "loss": 0.1256, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.7729941291585127, | |
| "grad_norm": 3.8854291439056396, | |
| "learning_rate": 8.610074072338006e-06, | |
| "loss": 0.2402, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.7729941291585127, | |
| "eval_loss": 0.09690071642398834, | |
| "eval_runtime": 107.3243, | |
| "eval_samples_per_second": 28.437, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8764916753522476, | |
| "eval_sts-test_pearson_dot": 0.8559063099369891, | |
| "eval_sts-test_pearson_euclidean": 0.906553160229111, | |
| "eval_sts-test_pearson_manhattan": 0.9056446394888255, | |
| "eval_sts-test_pearson_max": 0.906553160229111, | |
| "eval_sts-test_spearman_cosine": 0.9036960084788808, | |
| "eval_sts-test_spearman_dot": 0.8628471465833341, | |
| "eval_sts-test_spearman_euclidean": 0.9032961559583487, | |
| "eval_sts-test_spearman_manhattan": 0.9026765747443849, | |
| "eval_sts-test_spearman_max": 0.9036960084788808, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.7749510763209393, | |
| "grad_norm": 4.216285705566406, | |
| "learning_rate": 8.488337762499971e-06, | |
| "loss": 0.2413, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.776908023483366, | |
| "grad_norm": 3.007631540298462, | |
| "learning_rate": 8.366830293748364e-06, | |
| "loss": 0.1144, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.7788649706457925, | |
| "grad_norm": 3.3882946968078613, | |
| "learning_rate": 8.245570060338511e-06, | |
| "loss": 0.1198, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.7808219178082192, | |
| "grad_norm": 3.6439969539642334, | |
| "learning_rate": 8.124575419098321e-06, | |
| "loss": 0.1361, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.7827788649706457, | |
| "grad_norm": 3.3682761192321777, | |
| "learning_rate": 8.003864686649369e-06, | |
| "loss": 0.1496, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7827788649706457, | |
| "eval_loss": 0.09637484699487686, | |
| "eval_runtime": 107.2784, | |
| "eval_samples_per_second": 28.449, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8766720014290486, | |
| "eval_sts-test_pearson_dot": 0.8562700614904787, | |
| "eval_sts-test_pearson_euclidean": 0.9067249528004879, | |
| "eval_sts-test_pearson_manhattan": 0.905876462094101, | |
| "eval_sts-test_pearson_max": 0.9067249528004879, | |
| "eval_sts-test_spearman_cosine": 0.9038214015132995, | |
| "eval_sts-test_spearman_dot": 0.8632292328530939, | |
| "eval_sts-test_spearman_euclidean": 0.9036244959631774, | |
| "eval_sts-test_spearman_manhattan": 0.9027354226531145, | |
| "eval_sts-test_spearman_max": 0.9038214015132995, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7847358121330724, | |
| "grad_norm": 3.5519731044769287, | |
| "learning_rate": 7.883456136634053e-06, | |
| "loss": 0.1606, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.786692759295499, | |
| "grad_norm": 4.0106329917907715, | |
| "learning_rate": 7.763367996949262e-06, | |
| "loss": 0.1739, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.7886497064579256, | |
| "grad_norm": 3.347114086151123, | |
| "learning_rate": 7.64361844698699e-06, | |
| "loss": 0.1121, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.7906066536203522, | |
| "grad_norm": 3.429165840148926, | |
| "learning_rate": 7.524225614882216e-06, | |
| "loss": 0.1176, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.7925636007827789, | |
| "grad_norm": 3.169438362121582, | |
| "learning_rate": 7.4052075747686625e-06, | |
| "loss": 0.1024, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.7925636007827789, | |
| "eval_loss": 0.09552557021379471, | |
| "eval_runtime": 107.3797, | |
| "eval_samples_per_second": 28.423, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8765829897566032, | |
| "eval_sts-test_pearson_dot": 0.856005218083783, | |
| "eval_sts-test_pearson_euclidean": 0.9066659750196452, | |
| "eval_sts-test_pearson_manhattan": 0.9058351541345429, | |
| "eval_sts-test_pearson_max": 0.9066659750196452, | |
| "eval_sts-test_spearman_cosine": 0.9033677132253135, | |
| "eval_sts-test_spearman_dot": 0.8628391808588065, | |
| "eval_sts-test_spearman_euclidean": 0.9033363425910781, | |
| "eval_sts-test_spearman_manhattan": 0.9027725662000693, | |
| "eval_sts-test_spearman_max": 0.9033677132253135, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.7945205479452054, | |
| "grad_norm": 3.571244239807129, | |
| "learning_rate": 7.286582344042625e-06, | |
| "loss": 0.1256, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.7964774951076321, | |
| "grad_norm": 3.144022226333618, | |
| "learning_rate": 7.168367880635454e-06, | |
| "loss": 0.1424, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.7984344422700587, | |
| "grad_norm": 3.899695634841919, | |
| "learning_rate": 7.050582080294996e-06, | |
| "loss": 0.181, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.8003913894324853, | |
| "grad_norm": 2.7152762413024902, | |
| "learning_rate": 6.933242773876481e-06, | |
| "loss": 0.0829, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.8023483365949119, | |
| "grad_norm": 4.248819351196289, | |
| "learning_rate": 6.816367724643225e-06, | |
| "loss": 0.2329, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8023483365949119, | |
| "eval_loss": 0.09485543519258499, | |
| "eval_runtime": 107.35, | |
| "eval_samples_per_second": 28.43, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.876662263871305, | |
| "eval_sts-test_pearson_dot": 0.8565606973853341, | |
| "eval_sts-test_pearson_euclidean": 0.906503720630583, | |
| "eval_sts-test_pearson_manhattan": 0.9056869432887309, | |
| "eval_sts-test_pearson_max": 0.906503720630583, | |
| "eval_sts-test_spearman_cosine": 0.9031667800616662, | |
| "eval_sts-test_spearman_dot": 0.8635160884386128, | |
| "eval_sts-test_spearman_euclidean": 0.9027914064811151, | |
| "eval_sts-test_spearman_manhattan": 0.9026432798059095, | |
| "eval_sts-test_spearman_max": 0.9031667800616662, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8043052837573386, | |
| "grad_norm": 2.724637508392334, | |
| "learning_rate": 6.699974625577545e-06, | |
| "loss": 0.075, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.8062622309197651, | |
| "grad_norm": 2.825380325317383, | |
| "learning_rate": 6.584081096702343e-06, | |
| "loss": 0.1157, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.8082191780821918, | |
| "grad_norm": 3.5397439002990723, | |
| "learning_rate": 6.4687046824137115e-06, | |
| "loss": 0.1383, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.8101761252446184, | |
| "grad_norm": 2.7954063415527344, | |
| "learning_rate": 6.353862848825011e-06, | |
| "loss": 0.1042, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.812133072407045, | |
| "grad_norm": 3.3894357681274414, | |
| "learning_rate": 6.2395729811227635e-06, | |
| "loss": 0.1352, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.812133072407045, | |
| "eval_loss": 0.09433061629533768, | |
| "eval_runtime": 107.4406, | |
| "eval_samples_per_second": 28.406, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8762374184553781, | |
| "eval_sts-test_pearson_dot": 0.8565984619458994, | |
| "eval_sts-test_pearson_euclidean": 0.9059975243362595, | |
| "eval_sts-test_pearson_manhattan": 0.9052369107641345, | |
| "eval_sts-test_pearson_max": 0.9059975243362595, | |
| "eval_sts-test_spearman_cosine": 0.9025091804482506, | |
| "eval_sts-test_spearman_dot": 0.8638915067704253, | |
| "eval_sts-test_spearman_euclidean": 0.9023325717980691, | |
| "eval_sts-test_spearman_manhattan": 0.9021176314896061, | |
| "eval_sts-test_spearman_max": 0.9025091804482506, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.8140900195694716, | |
| "grad_norm": 2.5251049995422363, | |
| "learning_rate": 6.125852380934841e-06, | |
| "loss": 0.0778, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.8160469667318982, | |
| "grad_norm": 2.7683308124542236, | |
| "learning_rate": 6.012718263711261e-06, | |
| "loss": 0.1006, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.8180039138943248, | |
| "grad_norm": 4.330955982208252, | |
| "learning_rate": 5.900187756118055e-06, | |
| "loss": 0.2188, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.8199608610567515, | |
| "grad_norm": 4.001932621002197, | |
| "learning_rate": 5.788277893444574e-06, | |
| "loss": 0.1338, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.821917808219178, | |
| "grad_norm": 3.1434426307678223, | |
| "learning_rate": 5.677005617024618e-06, | |
| "loss": 0.1314, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.821917808219178, | |
| "eval_loss": 0.09401828795671463, | |
| "eval_runtime": 107.3518, | |
| "eval_samples_per_second": 28.43, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.875859101651175, | |
| "eval_sts-test_pearson_dot": 0.85667243561015, | |
| "eval_sts-test_pearson_euclidean": 0.9054683468020812, | |
| "eval_sts-test_pearson_manhattan": 0.9047678088467972, | |
| "eval_sts-test_pearson_max": 0.9054683468020812, | |
| "eval_sts-test_spearman_cosine": 0.9022520195275646, | |
| "eval_sts-test_spearman_dot": 0.8640136777140237, | |
| "eval_sts-test_spearman_euclidean": 0.9018411134454687, | |
| "eval_sts-test_spearman_manhattan": 0.9017817285216024, | |
| "eval_sts-test_spearman_max": 0.9022520195275646, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8238747553816047, | |
| "grad_norm": 3.4323763847351074, | |
| "learning_rate": 5.566387771671788e-06, | |
| "loss": 0.1298, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.8258317025440313, | |
| "grad_norm": 3.4278817176818848, | |
| "learning_rate": 5.4564411031294695e-06, | |
| "loss": 0.1573, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.8277886497064579, | |
| "grad_norm": 3.182588577270508, | |
| "learning_rate": 5.34718225553579e-06, | |
| "loss": 0.1283, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.8297455968688845, | |
| "grad_norm": 3.8493266105651855, | |
| "learning_rate": 5.238627768903952e-06, | |
| "loss": 0.1998, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.8317025440313112, | |
| "grad_norm": 2.763507127761841, | |
| "learning_rate": 5.130794076618395e-06, | |
| "loss": 0.0747, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.8317025440313112, | |
| "eval_loss": 0.09391138702630997, | |
| "eval_runtime": 107.4165, | |
| "eval_samples_per_second": 28.413, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8759008831174315, | |
| "eval_sts-test_pearson_dot": 0.8568367129286429, | |
| "eval_sts-test_pearson_euclidean": 0.9053146972146215, | |
| "eval_sts-test_pearson_manhattan": 0.9046055003987086, | |
| "eval_sts-test_pearson_max": 0.9053146972146215, | |
| "eval_sts-test_spearman_cosine": 0.9019464579147841, | |
| "eval_sts-test_spearman_dot": 0.8642364494932302, | |
| "eval_sts-test_spearman_euclidean": 0.9015885820774372, | |
| "eval_sts-test_spearman_manhattan": 0.9014194670606389, | |
| "eval_sts-test_spearman_max": 0.9019464579147841, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.8336594911937377, | |
| "grad_norm": 3.613516092300415, | |
| "learning_rate": 5.02369750294697e-06, | |
| "loss": 0.142, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.8356164383561644, | |
| "grad_norm": 3.1161856651306152, | |
| "learning_rate": 4.917354260569775e-06, | |
| "loss": 0.131, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.837573385518591, | |
| "grad_norm": 2.7324466705322266, | |
| "learning_rate": 4.811780448124812e-06, | |
| "loss": 0.1037, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.8395303326810176, | |
| "grad_norm": 3.0681588649749756, | |
| "learning_rate": 4.706992047770877e-06, | |
| "loss": 0.1145, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.8414872798434442, | |
| "grad_norm": 3.202045202255249, | |
| "learning_rate": 4.6030049227681484e-06, | |
| "loss": 0.1371, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8414872798434442, | |
| "eval_loss": 0.09390870481729507, | |
| "eval_runtime": 107.3691, | |
| "eval_samples_per_second": 28.425, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8761008129035103, | |
| "eval_sts-test_pearson_dot": 0.8571168858418021, | |
| "eval_sts-test_pearson_euclidean": 0.9053023363687696, | |
| "eval_sts-test_pearson_manhattan": 0.9045322392753761, | |
| "eval_sts-test_pearson_max": 0.9053023363687696, | |
| "eval_sts-test_spearman_cosine": 0.9020062008487415, | |
| "eval_sts-test_spearman_dot": 0.8644168418278989, | |
| "eval_sts-test_spearman_euclidean": 0.9017023845351555, | |
| "eval_sts-test_spearman_manhattan": 0.9012797088713137, | |
| "eval_sts-test_spearman_max": 0.9020062008487415, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8434442270058709, | |
| "grad_norm": 3.2915422916412354, | |
| "learning_rate": 4.4998348150767525e-06, | |
| "loss": 0.1457, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.8454011741682974, | |
| "grad_norm": 2.914283514022827, | |
| "learning_rate": 4.397497342973677e-06, | |
| "loss": 0.123, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.8473581213307241, | |
| "grad_norm": 2.803455114364624, | |
| "learning_rate": 4.296007998688405e-06, | |
| "loss": 0.1312, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.8493150684931506, | |
| "grad_norm": 3.1922266483306885, | |
| "learning_rate": 4.195382146057672e-06, | |
| "loss": 0.1301, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.8512720156555773, | |
| "grad_norm": 3.4597818851470947, | |
| "learning_rate": 4.095635018199612e-06, | |
| "loss": 0.1324, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.8512720156555773, | |
| "eval_loss": 0.09401452541351318, | |
| "eval_runtime": 107.4416, | |
| "eval_samples_per_second": 28.406, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8763389911185249, | |
| "eval_sts-test_pearson_dot": 0.8577101143502727, | |
| "eval_sts-test_pearson_euclidean": 0.9054263092988353, | |
| "eval_sts-test_pearson_manhattan": 0.904625946001658, | |
| "eval_sts-test_pearson_max": 0.9054263092988353, | |
| "eval_sts-test_spearman_cosine": 0.902128819304954, | |
| "eval_sts-test_spearman_dot": 0.8650113175842223, | |
| "eval_sts-test_spearman_euclidean": 0.9018843879152342, | |
| "eval_sts-test_spearman_manhattan": 0.9012396564923683, | |
| "eval_sts-test_spearman_max": 0.902128819304954, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.8532289628180039, | |
| "grad_norm": 4.107654571533203, | |
| "learning_rate": 3.996781715207706e-06, | |
| "loss": 0.1826, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.8551859099804305, | |
| "grad_norm": 3.786817789077759, | |
| "learning_rate": 3.898837201864893e-06, | |
| "loss": 0.2145, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 3.1700594425201416, | |
| "learning_rate": 3.8018163053781243e-06, | |
| "loss": 0.132, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.8590998043052838, | |
| "grad_norm": 3.451833963394165, | |
| "learning_rate": 3.7057337131337822e-06, | |
| "loss": 0.1263, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.8610567514677103, | |
| "grad_norm": 3.5724422931671143, | |
| "learning_rate": 3.610603970474239e-06, | |
| "loss": 0.1461, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8610567514677103, | |
| "eval_loss": 0.09419582784175873, | |
| "eval_runtime": 107.4099, | |
| "eval_samples_per_second": 28.415, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8765822162435379, | |
| "eval_sts-test_pearson_dot": 0.858518150878541, | |
| "eval_sts-test_pearson_euclidean": 0.9054994480778518, | |
| "eval_sts-test_pearson_manhattan": 0.9046991633393977, | |
| "eval_sts-test_pearson_max": 0.9054994480778518, | |
| "eval_sts-test_spearman_cosine": 0.9021761661395061, | |
| "eval_sts-test_spearman_dot": 0.8659527051188517, | |
| "eval_sts-test_spearman_euclidean": 0.9019459656509089, | |
| "eval_sts-test_spearman_manhattan": 0.9013235203562157, | |
| "eval_sts-test_spearman_max": 0.9021761661395061, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.863013698630137, | |
| "grad_norm": 3.693023204803467, | |
| "learning_rate": 3.5164414784959368e-06, | |
| "loss": 0.1349, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.8649706457925636, | |
| "grad_norm": 2.6913726329803467, | |
| "learning_rate": 3.423260491869276e-06, | |
| "loss": 0.0824, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.8669275929549902, | |
| "grad_norm": 3.2380688190460205, | |
| "learning_rate": 3.3310751166807186e-06, | |
| "loss": 0.1639, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.8688845401174168, | |
| "grad_norm": 3.5086116790771484, | |
| "learning_rate": 3.2398993082973294e-06, | |
| "loss": 0.1403, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.8708414872798435, | |
| "grad_norm": 3.3980696201324463, | |
| "learning_rate": 3.1497468692541812e-06, | |
| "loss": 0.1766, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.8708414872798435, | |
| "eval_loss": 0.09430497139692307, | |
| "eval_runtime": 107.5105, | |
| "eval_samples_per_second": 28.388, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8769023883634329, | |
| "eval_sts-test_pearson_dot": 0.8592785166761266, | |
| "eval_sts-test_pearson_euclidean": 0.9057520441250233, | |
| "eval_sts-test_pearson_manhattan": 0.9049586591092414, | |
| "eval_sts-test_pearson_max": 0.9057520441250233, | |
| "eval_sts-test_spearman_cosine": 0.9023938810261752, | |
| "eval_sts-test_spearman_dot": 0.8662211679359387, | |
| "eval_sts-test_spearman_euclidean": 0.9022854934710853, | |
| "eval_sts-test_spearman_manhattan": 0.9016112262157012, | |
| "eval_sts-test_spearman_max": 0.9023938810261752, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.87279843444227, | |
| "grad_norm": 3.6057686805725098, | |
| "learning_rate": 3.0606314471648667e-06, | |
| "loss": 0.1402, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.8747553816046967, | |
| "grad_norm": 3.3049042224884033, | |
| "learning_rate": 2.972566532655462e-06, | |
| "loss": 0.1203, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.8767123287671232, | |
| "grad_norm": 3.3699026107788086, | |
| "learning_rate": 2.8855654573222824e-06, | |
| "loss": 0.1398, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.8786692759295499, | |
| "grad_norm": 4.141995906829834, | |
| "learning_rate": 2.79964139171369e-06, | |
| "loss": 0.2226, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.8806262230919765, | |
| "grad_norm": 3.005411386489868, | |
| "learning_rate": 2.7148073433362732e-06, | |
| "loss": 0.0943, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8806262230919765, | |
| "eval_loss": 0.09433107823133469, | |
| "eval_runtime": 107.5997, | |
| "eval_samples_per_second": 28.364, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8770850204583311, | |
| "eval_sts-test_pearson_dot": 0.859396639463007, | |
| "eval_sts-test_pearson_euclidean": 0.9058757986724881, | |
| "eval_sts-test_pearson_manhattan": 0.9050657797722572, | |
| "eval_sts-test_pearson_max": 0.9058757986724881, | |
| "eval_sts-test_spearman_cosine": 0.9023927622446406, | |
| "eval_sts-test_spearman_dot": 0.8658192568573825, | |
| "eval_sts-test_spearman_euclidean": 0.9024802062093993, | |
| "eval_sts-test_spearman_manhattan": 0.9016976856527094, | |
| "eval_sts-test_spearman_max": 0.9024802062093993, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8825831702544031, | |
| "grad_norm": 3.1855523586273193, | |
| "learning_rate": 2.6310761546857433e-06, | |
| "loss": 0.1101, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.8845401174168297, | |
| "grad_norm": 3.945880651473999, | |
| "learning_rate": 2.5484605013027753e-06, | |
| "loss": 0.1536, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.8864970645792564, | |
| "grad_norm": 3.068859100341797, | |
| "learning_rate": 2.4669728898541456e-06, | |
| "loss": 0.1159, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.8884540117416829, | |
| "grad_norm": 2.9130120277404785, | |
| "learning_rate": 2.3866256562394084e-06, | |
| "loss": 0.1373, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.8904109589041096, | |
| "grad_norm": 3.2793118953704834, | |
| "learning_rate": 2.3074309637234673e-06, | |
| "loss": 0.1412, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.8904109589041096, | |
| "eval_loss": 0.0943305641412735, | |
| "eval_runtime": 107.5498, | |
| "eval_samples_per_second": 28.378, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8773045053021702, | |
| "eval_sts-test_pearson_dot": 0.8595737170903098, | |
| "eval_sts-test_pearson_euclidean": 0.9060339759187682, | |
| "eval_sts-test_pearson_manhattan": 0.9052200019449477, | |
| "eval_sts-test_pearson_max": 0.9060339759187682, | |
| "eval_sts-test_spearman_cosine": 0.902556909871424, | |
| "eval_sts-test_spearman_dot": 0.8656299142704356, | |
| "eval_sts-test_spearman_euclidean": 0.9027095564240303, | |
| "eval_sts-test_spearman_manhattan": 0.9019418485348608, | |
| "eval_sts-test_spearman_max": 0.9027095564240303, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.8923679060665362, | |
| "grad_norm": 2.4695982933044434, | |
| "learning_rate": 2.229400801095235e-06, | |
| "loss": 0.0626, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.8943248532289628, | |
| "grad_norm": 3.314206838607788, | |
| "learning_rate": 2.15254698085274e-06, | |
| "loss": 0.1447, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.8962818003913894, | |
| "grad_norm": 2.9935507774353027, | |
| "learning_rate": 2.07688113741488e-06, | |
| "loss": 0.1296, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.898238747553816, | |
| "grad_norm": 3.59999942779541, | |
| "learning_rate": 2.0024147253601957e-06, | |
| "loss": 0.1407, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.9001956947162426, | |
| "grad_norm": 3.781248092651367, | |
| "learning_rate": 1.92915901769281e-06, | |
| "loss": 0.1966, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9001956947162426, | |
| "eval_loss": 0.0943402424454689, | |
| "eval_runtime": 107.5535, | |
| "eval_samples_per_second": 28.377, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8773605385802614, | |
| "eval_sts-test_pearson_dot": 0.8594795998035439, | |
| "eval_sts-test_pearson_euclidean": 0.906101891634431, | |
| "eval_sts-test_pearson_manhattan": 0.9052843548013115, | |
| "eval_sts-test_pearson_max": 0.906101891634431, | |
| "eval_sts-test_spearman_cosine": 0.9026339267922787, | |
| "eval_sts-test_spearman_dot": 0.8655708873566604, | |
| "eval_sts-test_spearman_euclidean": 0.9027678225663618, | |
| "eval_sts-test_spearman_manhattan": 0.9020037842806264, | |
| "eval_sts-test_spearman_max": 0.9027678225663618, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9021526418786693, | |
| "grad_norm": 3.1703717708587646, | |
| "learning_rate": 1.8571251041358895e-06, | |
| "loss": 0.1019, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.9041095890410958, | |
| "grad_norm": 3.5208213329315186, | |
| "learning_rate": 1.786323889452828e-06, | |
| "loss": 0.1332, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.9060665362035225, | |
| "grad_norm": 3.7119576930999756, | |
| "learning_rate": 1.7167660917964557e-06, | |
| "loss": 0.1417, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.9080234833659491, | |
| "grad_norm": 3.853672981262207, | |
| "learning_rate": 1.6484622410864815e-06, | |
| "loss": 0.1782, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.9099804305283757, | |
| "grad_norm": 3.186000347137451, | |
| "learning_rate": 1.5814226774154351e-06, | |
| "loss": 0.1068, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.9099804305283757, | |
| "eval_loss": 0.09426674991846085, | |
| "eval_runtime": 107.6056, | |
| "eval_samples_per_second": 28.363, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8773874434779254, | |
| "eval_sts-test_pearson_dot": 0.8592176171000234, | |
| "eval_sts-test_pearson_euclidean": 0.9061215425720242, | |
| "eval_sts-test_pearson_manhattan": 0.9052975958981246, | |
| "eval_sts-test_pearson_max": 0.9061215425720242, | |
| "eval_sts-test_spearman_cosine": 0.9025369508088437, | |
| "eval_sts-test_spearman_dot": 0.8649846905836943, | |
| "eval_sts-test_spearman_euclidean": 0.9027074083634833, | |
| "eval_sts-test_spearman_manhattan": 0.9020343941434181, | |
| "eval_sts-test_spearman_max": 0.9027074083634833, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.9119373776908023, | |
| "grad_norm": 3.7655446529388428, | |
| "learning_rate": 1.515657549483328e-06, | |
| "loss": 0.1292, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.913894324853229, | |
| "grad_norm": 4.406928539276123, | |
| "learning_rate": 1.4511768130613434e-06, | |
| "loss": 0.1896, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.9158512720156555, | |
| "grad_norm": 3.7682666778564453, | |
| "learning_rate": 1.3879902294846559e-06, | |
| "loss": 0.1597, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.9178082191780822, | |
| "grad_norm": 3.596527338027954, | |
| "learning_rate": 1.3261073641747358e-06, | |
| "loss": 0.1588, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.9197651663405088, | |
| "grad_norm": 3.1445114612579346, | |
| "learning_rate": 1.2655375851913209e-06, | |
| "loss": 0.13, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9197651663405088, | |
| "eval_loss": 0.09422677010297775, | |
| "eval_runtime": 107.5943, | |
| "eval_samples_per_second": 28.366, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8774123494705477, | |
| "eval_sts-test_pearson_dot": 0.8591397738989044, | |
| "eval_sts-test_pearson_euclidean": 0.9061478507699354, | |
| "eval_sts-test_pearson_manhattan": 0.905330010345305, | |
| "eval_sts-test_pearson_max": 0.9061478507699354, | |
| "eval_sts-test_spearman_cosine": 0.9025525689990692, | |
| "eval_sts-test_spearman_dot": 0.8648119954659849, | |
| "eval_sts-test_spearman_euclidean": 0.9027778468489135, | |
| "eval_sts-test_spearman_manhattan": 0.9020008754486362, | |
| "eval_sts-test_spearman_max": 0.9027778468489135, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9217221135029354, | |
| "grad_norm": 3.6816020011901855, | |
| "learning_rate": 1.2062900618142136e-06, | |
| "loss": 0.1593, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.923679060665362, | |
| "grad_norm": 3.369281530380249, | |
| "learning_rate": 1.1483737631552161e-06, | |
| "loss": 0.1137, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.9256360078277887, | |
| "grad_norm": 3.330564260482788, | |
| "learning_rate": 1.0917974568003531e-06, | |
| "loss": 0.1295, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.9275929549902152, | |
| "grad_norm": 3.9396634101867676, | |
| "learning_rate": 1.0365697074826043e-06, | |
| "loss": 0.1367, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.9295499021526419, | |
| "grad_norm": 3.385772228240967, | |
| "learning_rate": 9.82698875785325e-07, | |
| "loss": 0.107, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.9295499021526419, | |
| "eval_loss": 0.09417176246643066, | |
| "eval_runtime": 107.4989, | |
| "eval_samples_per_second": 28.391, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8774075619796129, | |
| "eval_sts-test_pearson_dot": 0.8590208674593851, | |
| "eval_sts-test_pearson_euclidean": 0.9061610232160783, | |
| "eval_sts-test_pearson_manhattan": 0.9053388130087265, | |
| "eval_sts-test_pearson_max": 0.9061610232160783, | |
| "eval_sts-test_spearman_cosine": 0.9025310883936013, | |
| "eval_sts-test_spearman_dot": 0.8647318907080943, | |
| "eval_sts-test_spearman_euclidean": 0.9027404347943903, | |
| "eval_sts-test_spearman_manhattan": 0.9020158671212021, | |
| "eval_sts-test_spearman_max": 0.9027404347943903, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.9315068493150684, | |
| "grad_norm": 2.921172618865967, | |
| "learning_rate": 9.301931168766165e-07, | |
| "loss": 0.1442, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.9334637964774951, | |
| "grad_norm": 4.05568790435791, | |
| "learning_rate": 8.790603792747499e-07, | |
| "loss": 0.1841, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.9354207436399217, | |
| "grad_norm": 1.999931812286377, | |
| "learning_rate": 8.293084036448895e-07, | |
| "loss": 0.0436, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.9373776908023483, | |
| "grad_norm": 2.164313793182373, | |
| "learning_rate": 7.809447216272892e-07, | |
| "loss": 0.0908, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.9393346379647749, | |
| "grad_norm": 3.318042755126953, | |
| "learning_rate": 7.33976654697115e-07, | |
| "loss": 0.1233, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9393346379647749, | |
| "eval_loss": 0.09411043673753738, | |
| "eval_runtime": 107.5241, | |
| "eval_samples_per_second": 28.384, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8774878626138263, | |
| "eval_sts-test_pearson_dot": 0.8591304690756056, | |
| "eval_sts-test_pearson_euclidean": 0.9062269621782642, | |
| "eval_sts-test_pearson_manhattan": 0.9054142728411033, | |
| "eval_sts-test_pearson_max": 0.9062269621782642, | |
| "eval_sts-test_spearman_cosine": 0.9025523452427622, | |
| "eval_sts-test_spearman_dot": 0.8649809762289989, | |
| "eval_sts-test_spearman_euclidean": 0.9027271436697569, | |
| "eval_sts-test_spearman_manhattan": 0.9021333391823545, | |
| "eval_sts-test_spearman_max": 0.9027271436697569, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9412915851272016, | |
| "grad_norm": 3.7038323879241943, | |
| "learning_rate": 6.884113130561043e-07, | |
| "loss": 0.1676, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.9432485322896281, | |
| "grad_norm": 3.2897257804870605, | |
| "learning_rate": 6.442555945561923e-07, | |
| "loss": 0.1449, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.9452054794520548, | |
| "grad_norm": 3.172556161880493, | |
| "learning_rate": 6.015161836552764e-07, | |
| "loss": 0.1234, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.9471624266144814, | |
| "grad_norm": 2.7821803092956543, | |
| "learning_rate": 5.601995504053193e-07, | |
| "loss": 0.076, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.949119373776908, | |
| "grad_norm": 3.796891927719116, | |
| "learning_rate": 5.203119494728826e-07, | |
| "loss": 0.1369, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.949119373776908, | |
| "eval_loss": 0.09407136589288712, | |
| "eval_runtime": 107.5103, | |
| "eval_samples_per_second": 28.388, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8775371989941387, | |
| "eval_sts-test_pearson_dot": 0.8591526007541124, | |
| "eval_sts-test_pearson_euclidean": 0.9062640446951766, | |
| "eval_sts-test_pearson_manhattan": 0.905456867436393, | |
| "eval_sts-test_pearson_max": 0.9062640446951766, | |
| "eval_sts-test_spearman_cosine": 0.902595753966312, | |
| "eval_sts-test_spearman_dot": 0.8649717127178908, | |
| "eval_sts-test_spearman_euclidean": 0.9027548894518198, | |
| "eval_sts-test_spearman_manhattan": 0.9021964384609165, | |
| "eval_sts-test_spearman_max": 0.9027548894518198, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.9510763209393346, | |
| "grad_norm": 3.227461576461792, | |
| "learning_rate": 4.818594191922577e-07, | |
| "loss": 0.144, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.9530332681017613, | |
| "grad_norm": 2.9072837829589844, | |
| "learning_rate": 4.448477806513729e-07, | |
| "loss": 0.0874, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.9549902152641878, | |
| "grad_norm": 4.316675186157227, | |
| "learning_rate": 4.0928263681057845e-07, | |
| "loss": 0.195, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.9569471624266145, | |
| "grad_norm": 3.2540643215179443, | |
| "learning_rate": 3.7516937165444025e-07, | |
| "loss": 0.1585, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.958904109589041, | |
| "grad_norm": 3.0417003631591797, | |
| "learning_rate": 3.4251314937669313e-07, | |
| "loss": 0.1152, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.958904109589041, | |
| "eval_loss": 0.09405405074357986, | |
| "eval_runtime": 107.5662, | |
| "eval_samples_per_second": 28.373, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8775483547748348, | |
| "eval_sts-test_pearson_dot": 0.8591693308199357, | |
| "eval_sts-test_pearson_euclidean": 0.9062789492300676, | |
| "eval_sts-test_pearson_manhattan": 0.9054771724847893, | |
| "eval_sts-test_pearson_max": 0.9062789492300676, | |
| "eval_sts-test_spearman_cosine": 0.9026681615072432, | |
| "eval_sts-test_spearman_dot": 0.8650296656013927, | |
| "eval_sts-test_spearman_euclidean": 0.9028729432793703, | |
| "eval_sts-test_spearman_manhattan": 0.9022379228802262, | |
| "eval_sts-test_spearman_max": 0.9028729432793703, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9608610567514677, | |
| "grad_norm": 2.9117610454559326, | |
| "learning_rate": 3.1131891359847397e-07, | |
| "loss": 0.0862, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.9628180039138943, | |
| "grad_norm": 4.31304407119751, | |
| "learning_rate": 2.8159138661992824e-07, | |
| "loss": 0.2244, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.9647749510763209, | |
| "grad_norm": 3.088024854660034, | |
| "learning_rate": 2.5333506870533484e-07, | |
| "loss": 0.0987, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.9667318982387475, | |
| "grad_norm": 3.0071840286254883, | |
| "learning_rate": 2.2655423740183925e-07, | |
| "loss": 0.105, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.9686888454011742, | |
| "grad_norm": 3.72670316696167, | |
| "learning_rate": 2.0125294689190555e-07, | |
| "loss": 0.1777, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.9686888454011742, | |
| "eval_loss": 0.09404183179140091, | |
| "eval_runtime": 107.5451, | |
| "eval_samples_per_second": 28.379, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8775779600592797, | |
| "eval_sts-test_pearson_dot": 0.8592464798748846, | |
| "eval_sts-test_pearson_euclidean": 0.9062982054856592, | |
| "eval_sts-test_pearson_manhattan": 0.905493676973923, | |
| "eval_sts-test_pearson_max": 0.9062982054856592, | |
| "eval_sts-test_spearman_cosine": 0.9026481576934011, | |
| "eval_sts-test_spearman_dot": 0.8650861416932687, | |
| "eval_sts-test_spearman_euclidean": 0.9028581753631112, | |
| "eval_sts-test_spearman_manhattan": 0.9022116538897895, | |
| "eval_sts-test_spearman_max": 0.9028581753631112, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.9706457925636007, | |
| "grad_norm": 3.807492256164551, | |
| "learning_rate": 1.7743502737957107e-07, | |
| "loss": 0.1838, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.9726027397260274, | |
| "grad_norm": 3.7314093112945557, | |
| "learning_rate": 1.5510408451062552e-07, | |
| "loss": 0.1813, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.974559686888454, | |
| "grad_norm": 3.0683677196502686, | |
| "learning_rate": 1.3426349882676326e-07, | |
| "loss": 0.0994, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.9765166340508806, | |
| "grad_norm": 3.496849298477173, | |
| "learning_rate": 1.1491642525383595e-07, | |
| "loss": 0.1576, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.9784735812133072, | |
| "grad_norm": 3.3572986125946045, | |
| "learning_rate": 9.706579262424243e-08, | |
| "loss": 0.1298, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9784735812133072, | |
| "eval_loss": 0.09404946863651276, | |
| "eval_runtime": 107.5442, | |
| "eval_samples_per_second": 28.379, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8775676935231487, | |
| "eval_sts-test_pearson_dot": 0.8592566533355803, | |
| "eval_sts-test_pearson_euclidean": 0.9062872900289646, | |
| "eval_sts-test_pearson_manhattan": 0.9054889721563202, | |
| "eval_sts-test_pearson_max": 0.9062872900289646, | |
| "eval_sts-test_spearman_cosine": 0.9026514245354827, | |
| "eval_sts-test_spearman_dot": 0.8649815132441356, | |
| "eval_sts-test_spearman_euclidean": 0.9028325328903337, | |
| "eval_sts-test_spearman_manhattan": 0.9022372963625668, | |
| "eval_sts-test_spearman_max": 0.9028325328903337, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9804305283757339, | |
| "grad_norm": 3.594297170639038, | |
| "learning_rate": 8.071430323354778e-08, | |
| "loss": 0.1884, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.9823874755381604, | |
| "grad_norm": 2.890631914138794, | |
| "learning_rate": 6.586443243140839e-08, | |
| "loss": 0.1032, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.9843444227005871, | |
| "grad_norm": 2.5980706214904785, | |
| "learning_rate": 5.251842824683717e-08, | |
| "loss": 0.1164, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.9863013698630136, | |
| "grad_norm": 3.6865243911743164, | |
| "learning_rate": 4.067831104789033e-08, | |
| "loss": 0.1466, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.9882583170254403, | |
| "grad_norm": 4.4932637214660645, | |
| "learning_rate": 3.034587323581639e-08, | |
| "loss": 0.2192, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.9882583170254403, | |
| "eval_loss": 0.09403973817825317, | |
| "eval_runtime": 107.6061, | |
| "eval_samples_per_second": 28.363, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8775751164978143, | |
| "eval_sts-test_pearson_dot": 0.859260290269255, | |
| "eval_sts-test_pearson_euclidean": 0.906298388123929, | |
| "eval_sts-test_pearson_manhattan": 0.9055001359483166, | |
| "eval_sts-test_pearson_max": 0.906298388123929, | |
| "eval_sts-test_spearman_cosine": 0.9026775592721354, | |
| "eval_sts-test_spearman_dot": 0.8650394661276374, | |
| "eval_sts-test_spearman_euclidean": 0.9028324881390722, | |
| "eval_sts-test_spearman_manhattan": 0.9022903713585769, | |
| "eval_sts-test_spearman_max": 0.9028324881390722, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.9902152641878669, | |
| "grad_norm": 2.892927408218384, | |
| "learning_rate": 2.1522678973718848e-08, | |
| "loss": 0.1302, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.9921722113502935, | |
| "grad_norm": 3.516493320465088, | |
| "learning_rate": 1.421006394976221e-08, | |
| "loss": 0.1371, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.9941291585127201, | |
| "grad_norm": 2.9676320552825928, | |
| "learning_rate": 8.40913517497377e-09, | |
| "loss": 0.1543, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.9960861056751468, | |
| "grad_norm": Infinity, | |
| "learning_rate": 8.40913517497377e-09, | |
| "loss": 0.1084, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.9980430528375733, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.120770815659869e-09, | |
| "loss": 0.0, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9980430528375733, | |
| "eval_loss": 0.09404201060533524, | |
| "eval_runtime": 107.6151, | |
| "eval_samples_per_second": 28.36, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8775732893584032, | |
| "eval_sts-test_pearson_dot": 0.8592601719833955, | |
| "eval_sts-test_pearson_euclidean": 0.9062985160748658, | |
| "eval_sts-test_pearson_manhattan": 0.9054996976916896, | |
| "eval_sts-test_pearson_max": 0.9062985160748658, | |
| "eval_sts-test_spearman_cosine": 0.9026670874769698, | |
| "eval_sts-test_spearman_dot": 0.8650449257815273, | |
| "eval_sts-test_spearman_euclidean": 0.9028166014412784, | |
| "eval_sts-test_spearman_manhattan": 0.9022697410270756, | |
| "eval_sts-test_spearman_max": 0.9028166014412784, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.345620060465569e-09, | |
| "loss": 0.0, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.0019569471624266, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.410302209660437e-11, | |
| "loss": 0.0, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.0039138943248533, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.999966358932628e-05, | |
| "loss": 0.0, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.00587084148728, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9997897495179932e-05, | |
| "loss": 0.0, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.0078277886497065, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.999461788189681e-05, | |
| "loss": 0.0, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.0078277886497065, | |
| "eval_loss": 0.09377636015415192, | |
| "eval_runtime": 107.6229, | |
| "eval_samples_per_second": 28.358, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8778303207829065, | |
| "eval_sts-test_pearson_dot": 0.8593106541530915, | |
| "eval_sts-test_pearson_euclidean": 0.9066522007596656, | |
| "eval_sts-test_pearson_manhattan": 0.9058577790072662, | |
| "eval_sts-test_pearson_max": 0.9066522007596656, | |
| "eval_sts-test_spearman_cosine": 0.9030437588441012, | |
| "eval_sts-test_spearman_dot": 0.865145302860828, | |
| "eval_sts-test_spearman_euclidean": 0.9032262992393169, | |
| "eval_sts-test_spearman_manhattan": 0.9024990017391836, | |
| "eval_sts-test_spearman_max": 0.9032262992393169, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.009784735812133, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9989825245957038e-05, | |
| "loss": 0.0, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.0117416829745598, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9983520312887785e-05, | |
| "loss": 0.0, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.0136986301369864, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.997570403715341e-05, | |
| "loss": 0.0, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.015655577299413, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9966377602010984e-05, | |
| "loss": 0.0, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.0176125244618395, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9955542419331162e-05, | |
| "loss": 0.0, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.0176125244618395, | |
| "eval_loss": 0.09346973896026611, | |
| "eval_runtime": 107.7416, | |
| "eval_samples_per_second": 28.327, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.878189992162205, | |
| "eval_sts-test_pearson_dot": 0.859272710846145, | |
| "eval_sts-test_pearson_euclidean": 0.9071873551110472, | |
| "eval_sts-test_pearson_manhattan": 0.9063997831012797, | |
| "eval_sts-test_pearson_max": 0.9071873551110472, | |
| "eval_sts-test_spearman_cosine": 0.9037371348880996, | |
| "eval_sts-test_spearman_dot": 0.8649724734893345, | |
| "eval_sts-test_spearman_euclidean": 0.9038065440945177, | |
| "eval_sts-test_spearman_manhattan": 0.9030390152103936, | |
| "eval_sts-test_spearman_max": 0.9038065440945177, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.0195694716242663, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9943200129384444e-05, | |
| "loss": 0.0, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.0215264187866928, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.992935260059287e-05, | |
| "loss": 0.0, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.0234833659491194, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.991400192924717e-05, | |
| "loss": 0.0, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.025440313111546, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.989715043918941e-05, | |
| "loss": 0.0, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.0273972602739727, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9878800681461222e-05, | |
| "loss": 0.0, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.0273972602739727, | |
| "eval_loss": 0.09335841238498688, | |
| "eval_runtime": 107.4591, | |
| "eval_samples_per_second": 28.402, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8783446381509361, | |
| "eval_sts-test_pearson_dot": 0.8591028399574137, | |
| "eval_sts-test_pearson_euclidean": 0.9074724220606889, | |
| "eval_sts-test_pearson_manhattan": 0.9066928402352433, | |
| "eval_sts-test_pearson_max": 0.9074724220606889, | |
| "eval_sts-test_spearman_cosine": 0.9039463022838429, | |
| "eval_sts-test_spearman_dot": 0.8650437622487309, | |
| "eval_sts-test_spearman_euclidean": 0.904165270205831, | |
| "eval_sts-test_spearman_manhattan": 0.9033685187480186, | |
| "eval_sts-test_spearman_max": 0.904165270205831, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.0293542074363993, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9858955433917602e-05, | |
| "loss": 0.0, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.0313111545988258, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9837617700806385e-05, | |
| "loss": 0.0, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.0332681017612524, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9814790712313456e-05, | |
| "loss": 0.0, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.0352250489236792, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.979047792407376e-05, | |
| "loss": 0.0, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.0371819960861057, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9764683016648156e-05, | |
| "loss": 0.0, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.0371819960861057, | |
| "eval_loss": 0.09333890676498413, | |
| "eval_runtime": 107.5188, | |
| "eval_samples_per_second": 28.386, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8784118572563219, | |
| "eval_sts-test_pearson_dot": 0.8589820845942575, | |
| "eval_sts-test_pearson_euclidean": 0.9076113916771168, | |
| "eval_sts-test_pearson_manhattan": 0.906847708354887, | |
| "eval_sts-test_pearson_max": 0.9076113916771168, | |
| "eval_sts-test_spearman_cosine": 0.9041257100907611, | |
| "eval_sts-test_spearman_dot": 0.8648521373474531, | |
| "eval_sts-test_spearman_euclidean": 0.9043787337226681, | |
| "eval_sts-test_spearman_manhattan": 0.9034415528066093, | |
| "eval_sts-test_spearman_max": 0.9043787337226681, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.0391389432485323, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.973740989496627e-05, | |
| "loss": 0.0, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.0410958904109588, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9708662687735316e-05, | |
| "loss": 0.0, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.0430528375733856, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9678445746815103e-05, | |
| "loss": 0.0, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.0450097847358122, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9646763646559234e-05, | |
| "loss": 0.0, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.0469667318982387, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.961362118312259e-05, | |
| "loss": 0.0, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.0469667318982387, | |
| "eval_loss": 0.09334058314561844, | |
| "eval_runtime": 107.4766, | |
| "eval_samples_per_second": 28.397, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8784564641027457, | |
| "eval_sts-test_pearson_dot": 0.8589118480822063, | |
| "eval_sts-test_pearson_euclidean": 0.9077060194361904, | |
| "eval_sts-test_pearson_manhattan": 0.9069470084717334, | |
| "eval_sts-test_pearson_max": 0.9077060194361904, | |
| "eval_sts-test_spearman_cosine": 0.9041882276029249, | |
| "eval_sts-test_spearman_dot": 0.8647786557762484, | |
| "eval_sts-test_spearman_euclidean": 0.9044847942121657, | |
| "eval_sts-test_spearman_manhattan": 0.9035779099000689, | |
| "eval_sts-test_spearman_max": 0.9044847942121657, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.0489236790606653, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.957902337373532e-05, | |
| "loss": 0.0, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.050880626223092, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9542975455943284e-05, | |
| "loss": 0.0, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.0528375733855186, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9505482886815167e-05, | |
| "loss": 0.0, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.0547945205479452, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.946655134211639e-05, | |
| "loss": 0.0, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.0567514677103718, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.942618671544988e-05, | |
| "loss": 0.0, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.0567514677103718, | |
| "eval_loss": 0.09333459287881851, | |
| "eval_runtime": 107.4198, | |
| "eval_samples_per_second": 28.412, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8784862573563277, | |
| "eval_sts-test_pearson_dot": 0.8588656254832566, | |
| "eval_sts-test_pearson_euclidean": 0.9077645108700259, | |
| "eval_sts-test_pearson_manhattan": 0.9070039494171251, | |
| "eval_sts-test_pearson_max": 0.9077645108700259, | |
| "eval_sts-test_spearman_cosine": 0.9042475230242684, | |
| "eval_sts-test_spearman_dot": 0.8648472147086999, | |
| "eval_sts-test_spearman_euclidean": 0.9046143043626323, | |
| "eval_sts-test_spearman_manhattan": 0.9038324550748632, | |
| "eval_sts-test_spearman_max": 0.9046143043626323, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.0587084148727985, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.938439511736388e-05, | |
| "loss": 0.0, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.060665362035225, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.934118287442689e-05, | |
| "loss": 0.0, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.0626223091976517, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9296556528269954e-05, | |
| "loss": 0.0, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.0645792563600782, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.925052283459636e-05, | |
| "loss": 0.0, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.066536203522505, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9203088762158917e-05, | |
| "loss": 0.0, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.066536203522505, | |
| "eval_loss": 0.09334129840135574, | |
| "eval_runtime": 107.3473, | |
| "eval_samples_per_second": 28.431, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8785021239198606, | |
| "eval_sts-test_pearson_dot": 0.858837811340809, | |
| "eval_sts-test_pearson_euclidean": 0.9077852597668379, | |
| "eval_sts-test_pearson_manhattan": 0.9070244989378426, | |
| "eval_sts-test_pearson_max": 0.9077852597668379, | |
| "eval_sts-test_spearman_cosine": 0.9042259976675391, | |
| "eval_sts-test_spearman_dot": 0.8647955717530543, | |
| "eval_sts-test_spearman_euclidean": 0.904697944470173, | |
| "eval_sts-test_spearman_manhattan": 0.9037859137630162, | |
| "eval_sts-test_spearman_max": 0.904697944470173, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.0684931506849316, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.915426149170502e-05, | |
| "loss": 0.0, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.0704500978473581, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.910404841488959e-05, | |
| "loss": 0.0, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.0724070450097847, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9052457133156107e-05, | |
| "loss": 0.0, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.0743639921722115, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.899949545658585e-05, | |
| "loss": 0.0, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.076320939334638, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8945171402715628e-05, | |
| "loss": 0.0, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.076320939334638, | |
| "eval_loss": 0.09333806484937668, | |
| "eval_runtime": 107.4025, | |
| "eval_samples_per_second": 28.416, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8784964957818725, | |
| "eval_sts-test_pearson_dot": 0.8588087778719404, | |
| "eval_sts-test_pearson_euclidean": 0.907796603890586, | |
| "eval_sts-test_pearson_manhattan": 0.9070375894901823, | |
| "eval_sts-test_pearson_max": 0.907796603890586, | |
| "eval_sts-test_spearman_cosine": 0.9041786060817257, | |
| "eval_sts-test_spearman_dot": 0.8647905148605171, | |
| "eval_sts-test_spearman_euclidean": 0.9046940958616932, | |
| "eval_sts-test_spearman_manhattan": 0.9038211330057312, | |
| "eval_sts-test_spearman_max": 0.9046940958616932, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.0782778864970646, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8889493195324e-05, | |
| "loss": 0.0, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.0802348336594911, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8832469263186352e-05, | |
| "loss": 0.0, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.082191780821918, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8774108238798932e-05, | |
| "loss": 0.0, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.0841487279843445, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8714418957072008e-05, | |
| "loss": 0.0, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.086105675146771, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8653410453992415e-05, | |
| "loss": 0.0, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.086105675146771, | |
| "eval_loss": 0.09333912283182144, | |
| "eval_runtime": 107.2368, | |
| "eval_samples_per_second": 28.46, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8784962428161289, | |
| "eval_sts-test_pearson_dot": 0.8587781277415185, | |
| "eval_sts-test_pearson_euclidean": 0.9078067676515431, | |
| "eval_sts-test_pearson_manhattan": 0.9070510578145549, | |
| "eval_sts-test_pearson_max": 0.9078067676515431, | |
| "eval_sts-test_spearman_cosine": 0.9042436744157888, | |
| "eval_sts-test_spearman_dot": 0.8647337702610728, | |
| "eval_sts-test_spearman_euclidean": 0.9047590299419722, | |
| "eval_sts-test_spearman_manhattan": 0.9037811253780472, | |
| "eval_sts-test_spearman_max": 0.9047590299419722, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.0880626223091976, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8591091965255654e-05, | |
| "loss": 0.0, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.0900195694716244, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.852747292486776e-05, | |
| "loss": 0.0, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.091976516634051, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8462562963717134e-05, | |
| "loss": 0.0, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.0939334637964775, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.839637190811661e-05, | |
| "loss": 0.0, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.095890410958904, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8328909778315876e-05, | |
| "loss": 0.0, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.095890410958904, | |
| "eval_loss": 0.0933486595749855, | |
| "eval_runtime": 107.2061, | |
| "eval_samples_per_second": 28.469, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8785057707401351, | |
| "eval_sts-test_pearson_dot": 0.8587863599514347, | |
| "eval_sts-test_pearson_euclidean": 0.9078183009121663, | |
| "eval_sts-test_pearson_manhattan": 0.9070598319920766, | |
| "eval_sts-test_pearson_max": 0.9078183009121663, | |
| "eval_sts-test_spearman_cosine": 0.9041971331039418, | |
| "eval_sts-test_spearman_dot": 0.8647677812197302, | |
| "eval_sts-test_spearman_euclidean": 0.9047771542028356, | |
| "eval_sts-test_spearman_manhattan": 0.9038047092928005, | |
| "eval_sts-test_spearman_max": 0.9047771542028356, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.0978473581213308, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8260186786984603e-05, | |
| "loss": 0.0, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.0998043052837574, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8190213337666384e-05, | |
| "loss": 0.0, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.101761252446184, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8119000023203838e-05, | |
| "loss": 0.0, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.1037181996086105, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8046557624134994e-05, | |
| "loss": 0.0, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.1056751467710373, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.797289710706133e-05, | |
| "loss": 0.0, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.1056751467710373, | |
| "eval_loss": 0.09335649758577347, | |
| "eval_runtime": 107.3103, | |
| "eval_samples_per_second": 28.441, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8784986781606445, | |
| "eval_sts-test_pearson_dot": 0.8587650367829168, | |
| "eval_sts-test_pearson_euclidean": 0.9078184764952983, | |
| "eval_sts-test_pearson_manhattan": 0.9070615638026146, | |
| "eval_sts-test_pearson_max": 0.9078184764952983, | |
| "eval_sts-test_spearman_cosine": 0.9042330236155777, | |
| "eval_sts-test_spearman_dot": 0.8647168542842668, | |
| "eval_sts-test_spearman_euclidean": 0.9047300311245905, | |
| "eval_sts-test_spearman_manhattan": 0.9038148678291363, | |
| "eval_sts-test_spearman_max": 0.9047300311245905, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.1076320939334638, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7898029622987555e-05, | |
| "loss": 0.0, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.1095890410958904, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7821966505633587e-05, | |
| "loss": 0.0, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.111545988258317, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.774471926971877e-05, | |
| "loss": 0.0, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.1135029354207437, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7666299609218748e-05, | |
| "loss": 0.0, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.1154598825831703, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.758671939559519e-05, | |
| "loss": 0.0, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.1154598825831703, | |
| "eval_loss": 0.09335120022296906, | |
| "eval_runtime": 107.405, | |
| "eval_samples_per_second": 28.416, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8785052890221882, | |
| "eval_sts-test_pearson_dot": 0.8587803203866682, | |
| "eval_sts-test_pearson_euclidean": 0.9078233268224812, | |
| "eval_sts-test_pearson_manhattan": 0.9070679561119817, | |
| "eval_sts-test_pearson_max": 0.9078233268224812, | |
| "eval_sts-test_spearman_cosine": 0.9042315020726903, | |
| "eval_sts-test_spearman_dot": 0.864802866208661, | |
| "eval_sts-test_spearman_euclidean": 0.9047536150393436, | |
| "eval_sts-test_spearman_manhattan": 0.9037862717731073, | |
| "eval_sts-test_spearman_max": 0.9047536150393436, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.1174168297455969, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.750599067599863e-05, | |
| "loss": 0.0, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.1193737769080234, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.742412567144476e-05, | |
| "loss": 0.0, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.1213307240704502, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.734113677496431e-05, | |
| "loss": 0.0, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.1232876712328768, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7257036549726984e-05, | |
| "loss": 0.0, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.1252446183953033, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7171837727139613e-05, | |
| "loss": 0.0, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.1252446183953033, | |
| "eval_loss": 0.09334684163331985, | |
| "eval_runtime": 107.3077, | |
| "eval_samples_per_second": 28.442, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.878500870133716, | |
| "eval_sts-test_pearson_dot": 0.8587683133660149, | |
| "eval_sts-test_pearson_euclidean": 0.9078186307509195, | |
| "eval_sts-test_pearson_manhattan": 0.9070596363407782, | |
| "eval_sts-test_pearson_max": 0.9078186307509195, | |
| "eval_sts-test_spearman_cosine": 0.9042276087129493, | |
| "eval_sts-test_spearman_dot": 0.8647620978095335, | |
| "eval_sts-test_spearman_euclidean": 0.9047204543546528, | |
| "eval_sts-test_spearman_manhattan": 0.9037996076490019, | |
| "eval_sts-test_spearman_max": 0.9047204543546528, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.1272015655577299, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.708555320491878e-05, | |
| "loss": 0.0, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.1291585127201567, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6998196045138354e-05, | |
| "loss": 0.0, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.1311154598825832, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6909779472252084e-05, | |
| "loss": 0.0, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.1330724070450098, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.682031687109165e-05, | |
| "loss": 0.0, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.1350293542074363, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.67298217848404e-05, | |
| "loss": 0.0, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.1350293542074363, | |
| "eval_loss": 0.09334247559309006, | |
| "eval_runtime": 107.379, | |
| "eval_samples_per_second": 28.423, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8785055163668648, | |
| "eval_sts-test_pearson_dot": 0.8587721647379715, | |
| "eval_sts-test_pearson_euclidean": 0.907824824275582, | |
| "eval_sts-test_pearson_manhattan": 0.9070711811795367, | |
| "eval_sts-test_pearson_max": 0.907824824275582, | |
| "eval_sts-test_spearman_cosine": 0.9042655577826093, | |
| "eval_sts-test_spearman_dot": 0.8648062225532656, | |
| "eval_sts-test_spearman_euclidean": 0.9047673089253296, | |
| "eval_sts-test_spearman_manhattan": 0.9038550097106045, | |
| "eval_sts-test_spearman_max": 0.9047673089253296, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.1369863013698631, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.663830791298314e-05, | |
| "loss": 0.0, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.1389432485322897, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6545789109232247e-05, | |
| "loss": 0.0, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.1409001956947162, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6452279379430463e-05, | |
| "loss": 0.0, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6357792879430615e-05, | |
| "loss": 0.0, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.1448140900195696, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6262343912952656e-05, | |
| "loss": 0.0, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.1448140900195696, | |
| "eval_loss": 0.09335017949342728, | |
| "eval_runtime": 107.4769, | |
| "eval_samples_per_second": 28.397, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8784989682942236, | |
| "eval_sts-test_pearson_dot": 0.8587666674437093, | |
| "eval_sts-test_pearson_euclidean": 0.9078185933512712, | |
| "eval_sts-test_pearson_manhattan": 0.9070612186504049, | |
| "eval_sts-test_pearson_max": 0.9078185933512712, | |
| "eval_sts-test_spearman_cosine": 0.9041901519071646, | |
| "eval_sts-test_spearman_dot": 0.8647350232963916, | |
| "eval_sts-test_spearman_euclidean": 0.904776617187699, | |
| "eval_sts-test_spearman_manhattan": 0.9038217147721292, | |
| "eval_sts-test_spearman_max": 0.904776617187699, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.1467710371819961, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6165946929418322e-05, | |
| "loss": 0.0, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.1487279843444227, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.606861652176371e-05, | |
| "loss": 0.0, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.1506849315068493, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.597036742423016e-05, | |
| "loss": 0.0, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.152641878669276, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5871214510133727e-05, | |
| "loss": 0.0, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.1545988258317026, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.57711727896136e-05, | |
| "loss": 0.0, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.1545988258317026, | |
| "eval_loss": 0.09335111826658249, | |
| "eval_runtime": 107.7179, | |
| "eval_samples_per_second": 28.333, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8784866577367805, | |
| "eval_sts-test_pearson_dot": 0.8587507291698867, | |
| "eval_sts-test_pearson_euclidean": 0.9078058047445436, | |
| "eval_sts-test_pearson_manhattan": 0.9070496706784236, | |
| "eval_sts-test_pearson_max": 0.9078058047445436, | |
| "eval_sts-test_spearman_cosine": 0.9041966408400663, | |
| "eval_sts-test_spearman_dot": 0.8647644696263873, | |
| "eval_sts-test_spearman_euclidean": 0.9047290018455785, | |
| "eval_sts-test_spearman_manhattan": 0.9038172396459901, | |
| "eval_sts-test_spearman_max": 0.9047290018455785, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.1565557729941291, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.567025740735979e-05, | |
| "loss": 0.0, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.1585127201565557, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.556848364032052e-05, | |
| "loss": 0.0, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.1604696673189825, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5465866895389493e-05, | |
| "loss": 0.0, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.162426614481409, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5362422707073577e-05, | |
| "loss": 0.0, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.1643835616438356, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5258166735141095e-05, | |
| "loss": 0.0, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.1643835616438356, | |
| "eval_loss": 0.09335541725158691, | |
| "eval_runtime": 107.3213, | |
| "eval_samples_per_second": 28.438, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8784989040319251, | |
| "eval_sts-test_pearson_dot": 0.858769906655404, | |
| "eval_sts-test_pearson_euclidean": 0.9078191195624842, | |
| "eval_sts-test_pearson_manhattan": 0.9070606696278369, | |
| "eval_sts-test_pearson_max": 0.9078191195624842, | |
| "eval_sts-test_spearman_cosine": 0.9041829917053421, | |
| "eval_sts-test_spearman_dot": 0.8647956165043157, | |
| "eval_sts-test_spearman_euclidean": 0.9047499006846482, | |
| "eval_sts-test_spearman_manhattan": 0.9037852424940954, | |
| "eval_sts-test_spearman_max": 0.9047499006846482, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.1663405088062622, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5153114762251221e-05, | |
| "loss": 0.0, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.168297455968689, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5047282691564749e-05, | |
| "loss": 0.0, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.1702544031311155, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4940686544336573e-05, | |
| "loss": 0.0, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.172211350293542, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4833342457490358e-05, | |
| "loss": 0.0, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.1741682974559686, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4725266681175685e-05, | |
| "loss": 0.0, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1741682974559686, | |
| "eval_loss": 0.09334863722324371, | |
| "eval_runtime": 107.3237, | |
| "eval_samples_per_second": 28.437, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8785061318088844, | |
| "eval_sts-test_pearson_dot": 0.8587713502140157, | |
| "eval_sts-test_pearson_euclidean": 0.907819300380693, | |
| "eval_sts-test_pearson_manhattan": 0.9070639883382074, | |
| "eval_sts-test_pearson_max": 0.907819300380693, | |
| "eval_sts-test_spearman_cosine": 0.9042770588567869, | |
| "eval_sts-test_spearman_dot": 0.8647896198352892, | |
| "eval_sts-test_spearman_euclidean": 0.9047371913264132, | |
| "eval_sts-test_spearman_manhattan": 0.903829680496657, | |
| "eval_sts-test_spearman_max": 0.9047371913264132, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1761252446183954, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4616475576308005e-05, | |
| "loss": 0.0, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.178082191780822, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4506985612091885e-05, | |
| "loss": 0.0, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.1800391389432485, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.439681336352785e-05, | |
| "loss": 0.0, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.181996086105675, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4285975508903163e-05, | |
| "loss": 0.0, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.1839530332681019, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4174488827267032e-05, | |
| "loss": 0.0, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.1839530332681019, | |
| "eval_loss": 0.09334637224674225, | |
| "eval_runtime": 107.3977, | |
| "eval_samples_per_second": 28.418, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8785067013529959, | |
| "eval_sts-test_pearson_dot": 0.8587667557271453, | |
| "eval_sts-test_pearson_euclidean": 0.9078207514195331, | |
| "eval_sts-test_pearson_manhattan": 0.907064973531389, | |
| "eval_sts-test_pearson_max": 0.9078207514195331, | |
| "eval_sts-test_spearman_cosine": 0.9042189717195008, | |
| "eval_sts-test_spearman_dot": 0.8647535950698692, | |
| "eval_sts-test_spearman_euclidean": 0.9047399659046194, | |
| "eval_sts-test_spearman_manhattan": 0.9038043512827094, | |
| "eval_sts-test_spearman_max": 0.9047399659046194, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.1859099804305284, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4062370195890534e-05, | |
| "loss": 0.0, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.187866927592955, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3949636587711643e-05, | |
| "loss": 0.0, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.1898238747553815, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.383630506876585e-05, | |
| "loss": 0.0, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.1917808219178083, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3722392795602595e-05, | |
| "loss": 0.0, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.1937377690802349, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.360791701268806e-05, | |
| "loss": 0.0, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.1937377690802349, | |
| "eval_loss": 0.093353271484375, | |
| "eval_runtime": 107.3333, | |
| "eval_samples_per_second": 28.435, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.878490183485023, | |
| "eval_sts-test_pearson_dot": 0.8587609085059802, | |
| "eval_sts-test_pearson_euclidean": 0.9078096976829267, | |
| "eval_sts-test_pearson_manhattan": 0.907053408419778, | |
| "eval_sts-test_pearson_max": 0.9078096976829267, | |
| "eval_sts-test_spearman_cosine": 0.9042677058431561, | |
| "eval_sts-test_spearman_dot": 0.8647269680693414, | |
| "eval_sts-test_spearman_euclidean": 0.9047321791851373, | |
| "eval_sts-test_spearman_manhattan": 0.9038006369280138, | |
| "eval_sts-test_spearman_max": 0.9047321791851373, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.1956947162426614, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.349289504979467e-05, | |
| "loss": 0.0, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.197651663405088, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3377344319377588e-05, | |
| "loss": 0.0, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.1996086105675148, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3261282313938793e-05, | |
| "loss": 0.0, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.2015655577299413, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3144726603379003e-05, | |
| "loss": 0.0, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.203522504892368, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3027694832337858e-05, | |
| "loss": 0.0, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.203522504892368, | |
| "eval_loss": 0.09335704892873764, | |
| "eval_runtime": 107.4116, | |
| "eval_samples_per_second": 28.414, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.878500401803376, | |
| "eval_sts-test_pearson_dot": 0.8587685092468204, | |
| "eval_sts-test_pearson_euclidean": 0.9078178231946414, | |
| "eval_sts-test_pearson_manhattan": 0.9070609295572959, | |
| "eval_sts-test_pearson_max": 0.9078178231946414, | |
| "eval_sts-test_spearman_cosine": 0.9042342766508965, | |
| "eval_sts-test_spearman_dot": 0.8647266548105116, | |
| "eval_sts-test_spearman_euclidean": 0.9047684724581257, | |
| "eval_sts-test_spearman_manhattan": 0.9038073943684841, | |
| "eval_sts-test_spearman_max": 0.9047684724581257, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.2054794520547945, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2910204717522805e-05, | |
| "loss": 0.0, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.2074363992172212, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2792274045027092e-05, | |
| "loss": 0.0, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.2093933463796478, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2673920667637241e-05, | |
| "loss": 0.0, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.2113502935420744, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2555162502130436e-05, | |
| "loss": 0.0, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.213307240704501, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.243601752656217e-05, | |
| "loss": 0.0, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.213307240704501, | |
| "eval_loss": 0.09336086362600327, | |
| "eval_runtime": 107.5193, | |
| "eval_samples_per_second": 28.386, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8784938485004923, | |
| "eval_sts-test_pearson_dot": 0.8587639304102439, | |
| "eval_sts-test_pearson_euclidean": 0.9078127749947285, | |
| "eval_sts-test_pearson_manhattan": 0.9070537756129862, | |
| "eval_sts-test_pearson_max": 0.9078127749947285, | |
| "eval_sts-test_spearman_cosine": 0.9042542357134773, | |
| "eval_sts-test_spearman_dot": 0.8647234327196913, | |
| "eval_sts-test_spearman_euclidean": 0.9047827033592482, | |
| "eval_sts-test_spearman_manhattan": 0.9038551439643886, | |
| "eval_sts-test_spearman_max": 0.9047827033592482, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.2152641878669277, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.23165037775447e-05, | |
| "loss": 0.0, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.2172211350293543, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2196639347516613e-05, | |
| "loss": 0.0, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.2191780821917808, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2076442382003872e-05, | |
| "loss": 0.0, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.2211350293542074, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.195593107687291e-05, | |
| "loss": 0.0, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.2230919765166341, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1835123675576088e-05, | |
| "loss": 0.0, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.2230919765166341, | |
| "eval_loss": 0.09335645288228989, | |
| "eval_runtime": 107.4993, | |
| "eval_samples_per_second": 28.391, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8784903528227259, | |
| "eval_sts-test_pearson_dot": 0.8587580776062302, | |
| "eval_sts-test_pearson_euclidean": 0.9078094778464928, | |
| "eval_sts-test_pearson_manhattan": 0.9070520930017617, | |
| "eval_sts-test_pearson_max": 0.9078094778464928, | |
| "eval_sts-test_spearman_cosine": 0.9042383937669446, | |
| "eval_sts-test_spearman_dot": 0.864728310607183, | |
| "eval_sts-test_spearman_euclidean": 0.9047238106992572, | |
| "eval_sts-test_spearman_manhattan": 0.9038257423856545, | |
| "eval_sts-test_spearman_max": 0.9047238106992572, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.2250489236790607, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1714038466389892e-05, | |
| "loss": 0.0, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.2270058708414873, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1592693779646405e-05, | |
| "loss": 0.0, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.2289628180039138, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1471107984958405e-05, | |
| "loss": 0.0, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.2309197651663406, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1349299488438488e-05, | |
| "loss": 0.0, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.2328767123287672, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1227286729912685e-05, | |
| "loss": 0.0, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.2328767123287672, | |
| "eval_loss": 0.09335894882678986, | |
| "eval_runtime": 107.3317, | |
| "eval_samples_per_second": 28.435, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8784908843908168, | |
| "eval_sts-test_pearson_dot": 0.8587574658056576, | |
| "eval_sts-test_pearson_euclidean": 0.907809130040599, | |
| "eval_sts-test_pearson_manhattan": 0.9070528501257922, | |
| "eval_sts-test_pearson_max": 0.907809130040599, | |
| "eval_sts-test_spearman_cosine": 0.9042321733416113, | |
| "eval_sts-test_spearman_dot": 0.8647282211046603, | |
| "eval_sts-test_spearman_euclidean": 0.9047467680963509, | |
| "eval_sts-test_spearman_manhattan": 0.9038159866106711, | |
| "eval_sts-test_spearman_max": 0.9047467680963509, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.2348336594911937, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.110508818012898e-05, | |
| "loss": 0.0, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.2367906066536203, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0982722337961116e-05, | |
| "loss": 0.0, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.238747553816047, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0860207727608212e-05, | |
| "loss": 0.0, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.2407045009784736, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0737562895790447e-05, | |
| "loss": 0.0, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.2426614481409002, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0614806408941419e-05, | |
| "loss": 0.0, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.2426614481409002, | |
| "eval_loss": 0.09335716813802719, | |
| "eval_runtime": 107.3162, | |
| "eval_samples_per_second": 28.439, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8784912377743683, | |
| "eval_sts-test_pearson_dot": 0.858758170898035, | |
| "eval_sts-test_pearson_euclidean": 0.9078096910450107, | |
| "eval_sts-test_pearson_manhattan": 0.9070530313953442, | |
| "eval_sts-test_pearson_max": 0.9078096910450107, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.2446183953033267, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0491956850397496e-05, | |
| "loss": 0.0, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.2465753424657535, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0369032817584565e-05, | |
| "loss": 0.0, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.24853228962818, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0246052919202713e-05, | |
| "loss": 0.0, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.2504892367906066, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0123035772409182e-05, | |
| "loss": 0.0, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.2524461839530332, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.2524461839530332, | |
| "eval_loss": 0.09335613995790482, | |
| "eval_runtime": 107.2102, | |
| "eval_samples_per_second": 28.467, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8784927350695535, | |
| "eval_sts-test_pearson_dot": 0.8587596772706618, | |
| "eval_sts-test_pearson_euclidean": 0.9078113282999147, | |
| "eval_sts-test_pearson_manhattan": 0.907054058345235, | |
| "eval_sts-test_pearson_max": 0.9078113282999147, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.25440313111546, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.876964227590821e-06, | |
| "loss": 0.0, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.2563600782778865, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.75394708079729e-06, | |
| "loss": 0.0, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.258317025440313, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.630967182415446e-06, | |
| "loss": 0.0, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.2602739726027397, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.508043149602506e-06, | |
| "loss": 0.0, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.2622309197651664, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.385193591058586e-06, | |
| "loss": 0.0, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.2622309197651664, | |
| "eval_loss": 0.09335590153932571, | |
| "eval_runtime": 107.3543, | |
| "eval_samples_per_second": 28.429, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8784930614535755, | |
| "eval_sts-test_pearson_dot": 0.8587600228422643, | |
| "eval_sts-test_pearson_euclidean": 0.9078118367014806, | |
| "eval_sts-test_pearson_manhattan": 0.9070545805039607, | |
| "eval_sts-test_pearson_max": 0.9078118367014806, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.264187866927593, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.262437104209548e-06, | |
| "loss": 0.0, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.2661448140900196, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.139792272391791e-06, | |
| "loss": 0.0, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.2681017612524461, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.017277662038881e-06, | |
| "loss": 0.0, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.270058708414873, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.894911819871023e-06, | |
| "loss": 0.0, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.2720156555772995, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.772713270087325e-06, | |
| "loss": 0.0, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.2720156555772995, | |
| "eval_loss": 0.09335590898990631, | |
| "eval_runtime": 107.482, | |
| "eval_samples_per_second": 28.395, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8784930614535755, | |
| "eval_sts-test_pearson_dot": 0.8587600228422643, | |
| "eval_sts-test_pearson_euclidean": 0.9078118367014806, | |
| "eval_sts-test_pearson_manhattan": 0.9070545805039607, | |
| "eval_sts-test_pearson_max": 0.9078118367014806, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.273972602739726, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.650700511561515e-06, | |
| "loss": 0.0, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.2759295499021526, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.528892015041598e-06, | |
| "loss": 0.0, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.2778864970645794, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.407306220353597e-06, | |
| "loss": 0.0, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.279843444227006, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.285961533610111e-06, | |
| "loss": 0.0, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.2818003913894325, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.164876324423908e-06, | |
| "loss": 0.0, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.2818003913894325, | |
| "eval_loss": 0.09335590898990631, | |
| "eval_runtime": 107.4342, | |
| "eval_samples_per_second": 28.408, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8784930614535755, | |
| "eval_sts-test_pearson_dot": 0.8587600228422643, | |
| "eval_sts-test_pearson_euclidean": 0.9078118367014806, | |
| "eval_sts-test_pearson_manhattan": 0.9070545805039607, | |
| "eval_sts-test_pearson_max": 0.9078118367014806, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.283757338551859, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.044068923127091e-06, | |
| "loss": 0.0, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.2857142857142858, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.923557617996125e-06, | |
| "loss": 0.0, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.2876712328767124, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.803360652483392e-06, | |
| "loss": 0.0, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.289628180039139, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.68349622245531e-06, | |
| "loss": 0.0, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.2915851272015655, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.563982473437834e-06, | |
| "loss": 0.0, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.2915851272015655, | |
| "eval_loss": 0.09335587918758392, | |
| "eval_runtime": 107.2891, | |
| "eval_samples_per_second": 28.446, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8784930614535755, | |
| "eval_sts-test_pearson_dot": 0.8587600228422643, | |
| "eval_sts-test_pearson_euclidean": 0.9078118367014806, | |
| "eval_sts-test_pearson_manhattan": 0.9070545805039607, | |
| "eval_sts-test_pearson_max": 0.9078118367014806, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.2935420743639923, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.444837497869574e-06, | |
| "loss": 0.0, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.2954990215264188, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.326079332362755e-06, | |
| "loss": 0.0, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.2974559686888454, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.207725954972908e-06, | |
| "loss": 0.0, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.299412915851272, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.089795282477199e-06, | |
| "loss": 0.0, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.3013698630136987, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.972305167662144e-06, | |
| "loss": 0.0, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.3013698630136987, | |
| "eval_loss": 0.09335587918758392, | |
| "eval_runtime": 107.3407, | |
| "eval_samples_per_second": 28.433, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8784930614535755, | |
| "eval_sts-test_pearson_dot": 0.8587600228422643, | |
| "eval_sts-test_pearson_euclidean": 0.9078118367014806, | |
| "eval_sts-test_pearson_manhattan": 0.9070545805039607, | |
| "eval_sts-test_pearson_max": 0.9078118367014806, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.3033268101761253, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.855273396620999e-06, | |
| "loss": 0.0, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.3052837573385518, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.738717686061211e-06, | |
| "loss": 0.0, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.3072407045009784, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.6226556806224204e-06, | |
| "loss": 0.0, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.3091976516634052, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.507104950205332e-06, | |
| "loss": 0.0, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.3111545988258317, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.392082987311944e-06, | |
| "loss": 0.0, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.3111545988258317, | |
| "eval_loss": 0.09335587918758392, | |
| "eval_runtime": 107.3241, | |
| "eval_samples_per_second": 28.437, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8784930614535755, | |
| "eval_sts-test_pearson_dot": 0.8587600228422643, | |
| "eval_sts-test_pearson_euclidean": 0.9078118367014806, | |
| "eval_sts-test_pearson_manhattan": 0.9070545805039607, | |
| "eval_sts-test_pearson_max": 0.9078118367014806, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.3131115459882583, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.277607204397403e-06, | |
| "loss": 0.0, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.3150684931506849, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.163694931234153e-06, | |
| "loss": 0.0, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.3170254403131116, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.050363412288353e-06, | |
| "loss": 0.0, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.3189823874755382, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.937629804109468e-06, | |
| "loss": 0.0, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.3209393346379648, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.825511172732976e-06, | |
| "loss": 0.0, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.3209393346379648, | |
| "eval_loss": 0.09335587918758392, | |
| "eval_runtime": 107.2666, | |
| "eval_samples_per_second": 28.452, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8784930614535755, | |
| "eval_sts-test_pearson_dot": 0.8587600228422643, | |
| "eval_sts-test_pearson_euclidean": 0.9078118367014806, | |
| "eval_sts-test_pearson_manhattan": 0.9070545805039607, | |
| "eval_sts-test_pearson_max": 0.9078118367014806, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.3228962818003913, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.714024491096841e-06, | |
| "loss": 0.0, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.324853228962818, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.603186636472156e-06, | |
| "loss": 0.0, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.3268101761252447, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.4930143879081146e-06, | |
| "loss": 0.0, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.3287671232876712, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.383524423691999e-06, | |
| "loss": 0.0, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.3307240704500978, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.274733318824313e-06, | |
| "loss": 0.0, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.3307240704500978, | |
| "eval_loss": 0.09335587918758392, | |
| "eval_runtime": 107.2724, | |
| "eval_samples_per_second": 28.451, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8784930614535755, | |
| "eval_sts-test_pearson_dot": 0.8587600228422643, | |
| "eval_sts-test_pearson_euclidean": 0.9078118367014806, | |
| "eval_sts-test_pearson_manhattan": 0.9070545805039607, | |
| "eval_sts-test_pearson_max": 0.9078118367014806, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.3326810176125246, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.166657542509643e-06, | |
| "loss": 0.0, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.3346379647749511, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.059313455663425e-06, | |
| "loss": 0.0, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.3365949119373777, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.952717308435254e-06, | |
| "loss": 0.0, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.3385518590998042, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.846885237748786e-06, | |
| "loss": 0.0, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.340508806262231, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.7418332648589046e-06, | |
| "loss": 0.0, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.340508806262231, | |
| "eval_loss": 0.09335587918758392, | |
| "eval_runtime": 107.4095, | |
| "eval_samples_per_second": 28.415, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8784930614535755, | |
| "eval_sts-test_pearson_dot": 0.8587600228422643, | |
| "eval_sts-test_pearson_euclidean": 0.9078118367014806, | |
| "eval_sts-test_pearson_manhattan": 0.9070545805039607, | |
| "eval_sts-test_pearson_max": 0.9078118367014806, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.3424657534246576, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.637577292926432e-06, | |
| "loss": 0.0, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.3444227005870841, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.534133104610507e-06, | |
| "loss": 0.0, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.3463796477495107, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.43151635967948e-06, | |
| "loss": 0.0, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.3483365949119375, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.329742592640212e-06, | |
| "loss": 0.0, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.350293542074364, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.228827210386404e-06, | |
| "loss": 0.0, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.350293542074364, | |
| "eval_loss": 0.09335587918758392, | |
| "eval_runtime": 107.3373, | |
| "eval_samples_per_second": 28.434, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8784930614535755, | |
| "eval_sts-test_pearson_dot": 0.8587600228422643, | |
| "eval_sts-test_pearson_euclidean": 0.9078118367014806, | |
| "eval_sts-test_pearson_manhattan": 0.9070545805039607, | |
| "eval_sts-test_pearson_max": 0.9078118367014806, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.3522504892367906, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.128785489866266e-06, | |
| "loss": 0.0, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.3542074363992171, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.029632575769844e-06, | |
| "loss": 0.0, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.356164383561644, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.931383478236297e-06, | |
| "loss": 0.0, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.3581213307240705, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.834053070581682e-06, | |
| "loss": 0.0, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.360078277886497, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.73765608704735e-06, | |
| "loss": 0.0, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.360078277886497, | |
| "eval_loss": 0.09335587918758392, | |
| "eval_runtime": 107.6379, | |
| "eval_samples_per_second": 28.354, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8784930614535755, | |
| "eval_sts-test_pearson_dot": 0.8587600228422643, | |
| "eval_sts-test_pearson_euclidean": 0.9078118367014806, | |
| "eval_sts-test_pearson_manhattan": 0.9070545805039607, | |
| "eval_sts-test_pearson_max": 0.9078118367014806, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.3620352250489236, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.642207120569383e-06, | |
| "loss": 0.0, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.3639921722113504, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.5477206205695392e-06, | |
| "loss": 0.0, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.365949119373777, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.4542108907677517e-06, | |
| "loss": 0.0, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.3679060665362035, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.3616920870168633e-06, | |
| "loss": 0.0, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.36986301369863, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.270178215159607e-06, | |
| "loss": 0.0, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.36986301369863, | |
| "eval_loss": 0.09335587918758392, | |
| "eval_runtime": 107.3353, | |
| "eval_samples_per_second": 28.434, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8784930614535755, | |
| "eval_sts-test_pearson_dot": 0.8587600228422643, | |
| "eval_sts-test_pearson_euclidean": 0.9078118367014806, | |
| "eval_sts-test_pearson_manhattan": 0.9070545805039607, | |
| "eval_sts-test_pearson_max": 0.9078118367014806, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.3718199608610568, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.179683128908352e-06, | |
| "loss": 0.0, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.3737769080234834, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.090220527747916e-06, | |
| "loss": 0.0, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.37573385518591, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.0018039548616497e-06, | |
| "loss": 0.0, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.3776908023483365, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.9144467950812237e-06, | |
| "loss": 0.0, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.3796477495107633, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.8281622728603862e-06, | |
| "loss": 0.0, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.3796477495107633, | |
| "eval_loss": 0.09335587918758392, | |
| "eval_runtime": 107.41, | |
| "eval_samples_per_second": 28.414, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8784930614535755, | |
| "eval_sts-test_pearson_dot": 0.8587600228422643, | |
| "eval_sts-test_pearson_euclidean": 0.9078118367014806, | |
| "eval_sts-test_pearson_manhattan": 0.9070545805039607, | |
| "eval_sts-test_pearson_max": 0.9078118367014806, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.3816046966731899, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.7429634502730186e-06, | |
| "loss": 0.0, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.3835616438356164, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.6588632250356893e-06, | |
| "loss": 0.0, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.385518590998043, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.5758743285552435e-06, | |
| "loss": 0.0, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.3874755381604698, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.4940093240013774e-06, | |
| "loss": 0.0, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.3894324853228963, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.413280604404814e-06, | |
| "loss": 0.0, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.3894324853228963, | |
| "eval_loss": 0.09335587918758392, | |
| "eval_runtime": 107.3333, | |
| "eval_samples_per_second": 28.435, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8784930614535755, | |
| "eval_sts-test_pearson_dot": 0.8587600228422643, | |
| "eval_sts-test_pearson_euclidean": 0.9078118367014806, | |
| "eval_sts-test_pearson_manhattan": 0.9070545805039607, | |
| "eval_sts-test_pearson_max": 0.9078118367014806, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.3913894324853229, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.3337003907812593e-06, | |
| "loss": 0.0, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.3933463796477494, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.255280730281233e-06, | |
| "loss": 0.0, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.3953033268101762, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.178033494366416e-06, | |
| "loss": 0.0, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.3972602739726028, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.1019703770124454e-06, | |
| "loss": 0.0, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.3992172211350293, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.027102892938674e-06, | |
| "loss": 0.0, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.3992172211350293, | |
| "eval_loss": 0.09335587918758392, | |
| "eval_runtime": 107.3332, | |
| "eval_samples_per_second": 28.435, | |
| "eval_steps_per_second": 0.224, | |
| "eval_sts-test_pearson_cosine": 0.8784930614535755, | |
| "eval_sts-test_pearson_dot": 0.8587600228422643, | |
| "eval_sts-test_pearson_euclidean": 0.9078118367014806, | |
| "eval_sts-test_pearson_manhattan": 0.9070545805039607, | |
| "eval_sts-test_pearson_max": 0.9078118367014806, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.401174168297456, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9534423758650014e-06, | |
| "loss": 0.0, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.4031311154598827, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8809999767961663e-06, | |
| "loss": 0.0, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.4050880626223092, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8097866623336214e-06, | |
| "loss": 0.0, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.4070450097847358, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.739813213015401e-06, | |
| "loss": 0.0, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.4090019569471623, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6710902216841296e-06, | |
| "loss": 0.0, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.4090019569471623, | |
| "eval_loss": 0.09335587918758392, | |
| "eval_runtime": 107.4354, | |
| "eval_samples_per_second": 28.408, | |
| "eval_steps_per_second": 0.223, | |
| "eval_sts-test_pearson_cosine": 0.8784930614535755, | |
| "eval_sts-test_pearson_dot": 0.8587600228422643, | |
| "eval_sts-test_pearson_euclidean": 0.9078118367014806, | |
| "eval_sts-test_pearson_manhattan": 0.9070545805039607, | |
| "eval_sts-test_pearson_max": 0.9078118367014806, | |
| "eval_sts-test_spearman_cosine": 0.9042182109480572, | |
| "eval_sts-test_spearman_dot": 0.8647310851853892, | |
| "eval_sts-test_spearman_euclidean": 0.904759790713416, | |
| "eval_sts-test_spearman_manhattan": 0.9038005474254912, | |
| "eval_sts-test_spearman_max": 0.904759790713416, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.4109589041095891, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6036280918833903e-06, | |
| "loss": 0.0, | |
| "step": 721 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1022, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 103, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 320, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |