{ "best_metric": 0.49757373332977295, "best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/google_t5/t5_small_twitter/checkpoint-200", "epoch": 3.0, "eval_steps": 50, "global_step": 816, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 0.9986526370048523, "learning_rate": 0.0004938725490196079, "loss": 0.6031, "step": 10 }, { "epoch": 0.07, "grad_norm": 0.9707651734352112, "learning_rate": 0.0004877450980392157, "loss": 0.5921, "step": 20 }, { "epoch": 0.11, "grad_norm": 1.3216536045074463, "learning_rate": 0.00048161764705882356, "loss": 0.5945, "step": 30 }, { "epoch": 0.15, "grad_norm": 0.8769760131835938, "learning_rate": 0.00047549019607843134, "loss": 0.5244, "step": 40 }, { "epoch": 0.18, "grad_norm": 1.3491202592849731, "learning_rate": 0.0004693627450980392, "loss": 0.4779, "step": 50 }, { "epoch": 0.18, "eval_accuracy": 0.7564338235294118, "eval_f1_macro": 0.7177028105371891, "eval_f1_micro": 0.7564338235294118, "eval_loss": 0.5150883793830872, "eval_runtime": 0.9275, "eval_samples_per_second": 1173.066, "eval_steps_per_second": 36.658, "step": 50 }, { "epoch": 0.22, "grad_norm": 2.29738450050354, "learning_rate": 0.0004632352941176471, "loss": 0.4934, "step": 60 }, { "epoch": 0.26, "grad_norm": 3.7542002201080322, "learning_rate": 0.0004571078431372549, "loss": 0.5007, "step": 70 }, { "epoch": 0.29, "grad_norm": 1.893402099609375, "learning_rate": 0.0004509803921568628, "loss": 0.4738, "step": 80 }, { "epoch": 0.33, "grad_norm": 2.0344367027282715, "learning_rate": 0.00044485294117647056, "loss": 0.4594, "step": 90 }, { "epoch": 0.37, "grad_norm": 2.5279059410095215, "learning_rate": 0.00043872549019607844, "loss": 0.5123, "step": 100 }, { "epoch": 0.37, "eval_accuracy": 0.7527573529411765, "eval_f1_macro": 0.6986562303405088, "eval_f1_micro": 0.7527573529411765, "eval_loss": 0.5059758424758911, "eval_runtime": 0.8718, "eval_samples_per_second": 1248.006, "eval_steps_per_second": 39.0, "step": 100 }, { "epoch": 0.4, "grad_norm": 1.4078208208084106, "learning_rate": 0.0004325980392156863, "loss": 0.516, "step": 110 }, { "epoch": 0.44, "grad_norm": 2.224971055984497, "learning_rate": 0.0004264705882352941, "loss": 0.5001, "step": 120 }, { "epoch": 0.48, "grad_norm": 1.39370858669281, "learning_rate": 0.000420343137254902, "loss": 0.4835, "step": 130 }, { "epoch": 0.51, "grad_norm": 0.9512380361557007, "learning_rate": 0.0004142156862745098, "loss": 0.5218, "step": 140 }, { "epoch": 0.55, "grad_norm": 1.359387755393982, "learning_rate": 0.00040808823529411766, "loss": 0.4617, "step": 150 }, { "epoch": 0.55, "eval_accuracy": 0.7270220588235294, "eval_f1_macro": 0.614936097953347, "eval_f1_micro": 0.7270220588235294, "eval_loss": 0.5286622047424316, "eval_runtime": 0.9283, "eval_samples_per_second": 1172.017, "eval_steps_per_second": 36.626, "step": 150 }, { "epoch": 0.59, "grad_norm": 1.1093153953552246, "learning_rate": 0.0004019607843137255, "loss": 0.4961, "step": 160 }, { "epoch": 0.62, "grad_norm": 2.4549243450164795, "learning_rate": 0.0003958333333333333, "loss": 0.5351, "step": 170 }, { "epoch": 0.66, "grad_norm": 1.2358920574188232, "learning_rate": 0.0003897058823529412, "loss": 0.5129, "step": 180 }, { "epoch": 0.7, "grad_norm": 1.4391891956329346, "learning_rate": 0.00038357843137254904, "loss": 0.4945, "step": 190 }, { "epoch": 0.74, "grad_norm": 1.3604400157928467, "learning_rate": 0.0003774509803921569, "loss": 0.4942, "step": 200 }, { "epoch": 0.74, "eval_accuracy": 0.7444852941176471, "eval_f1_macro": 0.6956024668695554, "eval_f1_micro": 0.7444852941176471, "eval_loss": 0.49757373332977295, "eval_runtime": 0.9302, "eval_samples_per_second": 1169.596, "eval_steps_per_second": 36.55, "step": 200 }, { "epoch": 0.77, "grad_norm": 1.0586875677108765, "learning_rate": 0.0003713235294117647, "loss": 0.5041, "step": 210 }, { "epoch": 0.81, "grad_norm": 5.99719762802124, "learning_rate": 0.00036519607843137254, "loss": 0.5088, "step": 220 }, { "epoch": 0.85, "grad_norm": 1.8691691160202026, "learning_rate": 0.0003590686274509804, "loss": 0.4577, "step": 230 }, { "epoch": 0.88, "grad_norm": 0.8101117610931396, "learning_rate": 0.00035294117647058826, "loss": 0.4562, "step": 240 }, { "epoch": 0.92, "grad_norm": 1.3150193691253662, "learning_rate": 0.0003468137254901961, "loss": 0.4783, "step": 250 }, { "epoch": 0.92, "eval_accuracy": 0.7573529411764706, "eval_f1_macro": 0.7124355755253073, "eval_f1_micro": 0.7573529411764706, "eval_loss": 0.49782079458236694, "eval_runtime": 0.8783, "eval_samples_per_second": 1238.797, "eval_steps_per_second": 38.712, "step": 250 }, { "epoch": 0.96, "grad_norm": 1.0855648517608643, "learning_rate": 0.0003406862745098039, "loss": 0.4206, "step": 260 }, { "epoch": 0.99, "grad_norm": 1.4224942922592163, "learning_rate": 0.00033455882352941176, "loss": 0.532, "step": 270 }, { "epoch": 1.03, "grad_norm": 2.6312692165374756, "learning_rate": 0.0003284313725490196, "loss": 0.465, "step": 280 }, { "epoch": 1.07, "grad_norm": 1.7025282382965088, "learning_rate": 0.0003223039215686275, "loss": 0.4146, "step": 290 }, { "epoch": 1.1, "grad_norm": 1.1280652284622192, "learning_rate": 0.0003161764705882353, "loss": 0.4369, "step": 300 }, { "epoch": 1.1, "eval_accuracy": 0.7601102941176471, "eval_f1_macro": 0.7124360123342397, "eval_f1_micro": 0.7601102941176471, "eval_loss": 0.5052167773246765, "eval_runtime": 0.8783, "eval_samples_per_second": 1238.751, "eval_steps_per_second": 38.711, "step": 300 }, { "epoch": 1.14, "grad_norm": 1.285010814666748, "learning_rate": 0.00031004901960784314, "loss": 0.448, "step": 310 }, { "epoch": 1.18, "grad_norm": 0.8714994788169861, "learning_rate": 0.00030392156862745097, "loss": 0.4204, "step": 320 }, { "epoch": 1.21, "grad_norm": 1.7784018516540527, "learning_rate": 0.0002977941176470588, "loss": 0.4939, "step": 330 }, { "epoch": 1.25, "grad_norm": 1.4992843866348267, "learning_rate": 0.0002916666666666667, "loss": 0.4632, "step": 340 }, { "epoch": 1.29, "grad_norm": 1.7776957750320435, "learning_rate": 0.0002855392156862745, "loss": 0.439, "step": 350 }, { "epoch": 1.29, "eval_accuracy": 0.7564338235294118, "eval_f1_macro": 0.7223909884910498, "eval_f1_micro": 0.7564338235294118, "eval_loss": 0.5092098116874695, "eval_runtime": 0.9311, "eval_samples_per_second": 1168.532, "eval_steps_per_second": 36.517, "step": 350 }, { "epoch": 1.32, "grad_norm": 1.2894947528839111, "learning_rate": 0.00027941176470588236, "loss": 0.4339, "step": 360 }, { "epoch": 1.36, "grad_norm": 1.262589693069458, "learning_rate": 0.0002732843137254902, "loss": 0.3804, "step": 370 }, { "epoch": 1.4, "grad_norm": 2.4415295124053955, "learning_rate": 0.000267156862745098, "loss": 0.5278, "step": 380 }, { "epoch": 1.43, "grad_norm": 0.8975765109062195, "learning_rate": 0.0002610294117647059, "loss": 0.4198, "step": 390 }, { "epoch": 1.47, "grad_norm": 1.8955622911453247, "learning_rate": 0.00025490196078431374, "loss": 0.4417, "step": 400 }, { "epoch": 1.47, "eval_accuracy": 0.7545955882352942, "eval_f1_macro": 0.6808490301741896, "eval_f1_micro": 0.7545955882352942, "eval_loss": 0.5228371620178223, "eval_runtime": 0.8846, "eval_samples_per_second": 1229.999, "eval_steps_per_second": 38.437, "step": 400 }, { "epoch": 1.51, "grad_norm": 1.5438319444656372, "learning_rate": 0.00024877450980392157, "loss": 0.4897, "step": 410 }, { "epoch": 1.54, "grad_norm": 2.117488384246826, "learning_rate": 0.0002426470588235294, "loss": 0.4292, "step": 420 }, { "epoch": 1.58, "grad_norm": 1.4254924058914185, "learning_rate": 0.00023651960784313726, "loss": 0.4201, "step": 430 }, { "epoch": 1.62, "grad_norm": 1.8626124858856201, "learning_rate": 0.0002303921568627451, "loss": 0.4013, "step": 440 }, { "epoch": 1.65, "grad_norm": 1.8252973556518555, "learning_rate": 0.00022426470588235296, "loss": 0.47, "step": 450 }, { "epoch": 1.65, "eval_accuracy": 0.7693014705882353, "eval_f1_macro": 0.7234537896394413, "eval_f1_micro": 0.7693014705882353, "eval_loss": 0.5087092518806458, "eval_runtime": 0.9392, "eval_samples_per_second": 1158.396, "eval_steps_per_second": 36.2, "step": 450 }, { "epoch": 1.69, "grad_norm": 1.173530101776123, "learning_rate": 0.0002181372549019608, "loss": 0.4267, "step": 460 }, { "epoch": 1.73, "grad_norm": 1.2974258661270142, "learning_rate": 0.00021200980392156862, "loss": 0.429, "step": 470 }, { "epoch": 1.76, "grad_norm": 1.577986717224121, "learning_rate": 0.00020588235294117645, "loss": 0.3808, "step": 480 }, { "epoch": 1.8, "grad_norm": 1.0223047733306885, "learning_rate": 0.00019975490196078434, "loss": 0.4225, "step": 490 }, { "epoch": 1.84, "grad_norm": 1.9677762985229492, "learning_rate": 0.00019362745098039217, "loss": 0.4415, "step": 500 }, { "epoch": 1.84, "eval_accuracy": 0.7647058823529411, "eval_f1_macro": 0.7262032085561497, "eval_f1_micro": 0.7647058823529411, "eval_loss": 0.510553240776062, "eval_runtime": 0.9392, "eval_samples_per_second": 1158.407, "eval_steps_per_second": 36.2, "step": 500 }, { "epoch": 1.88, "grad_norm": 1.1366698741912842, "learning_rate": 0.0001875, "loss": 0.4622, "step": 510 }, { "epoch": 1.91, "grad_norm": 1.5312350988388062, "learning_rate": 0.00018137254901960784, "loss": 0.4507, "step": 520 }, { "epoch": 1.95, "grad_norm": 1.2124278545379639, "learning_rate": 0.00017524509803921567, "loss": 0.4495, "step": 530 }, { "epoch": 1.99, "grad_norm": 1.3845272064208984, "learning_rate": 0.00016911764705882356, "loss": 0.4161, "step": 540 }, { "epoch": 2.02, "grad_norm": 1.2308210134506226, "learning_rate": 0.0001629901960784314, "loss": 0.4297, "step": 550 }, { "epoch": 2.02, "eval_accuracy": 0.7628676470588235, "eval_f1_macro": 0.7291254940711462, "eval_f1_micro": 0.7628676470588235, "eval_loss": 0.5022936463356018, "eval_runtime": 0.9417, "eval_samples_per_second": 1155.3, "eval_steps_per_second": 36.103, "step": 550 }, { "epoch": 2.06, "grad_norm": 1.803513765335083, "learning_rate": 0.00015686274509803922, "loss": 0.4231, "step": 560 }, { "epoch": 2.1, "grad_norm": 1.3074411153793335, "learning_rate": 0.00015073529411764705, "loss": 0.3723, "step": 570 }, { "epoch": 2.13, "grad_norm": 1.8602393865585327, "learning_rate": 0.0001446078431372549, "loss": 0.4003, "step": 580 }, { "epoch": 2.17, "grad_norm": 1.486925721168518, "learning_rate": 0.00013848039215686274, "loss": 0.3796, "step": 590 }, { "epoch": 2.21, "grad_norm": 1.5388500690460205, "learning_rate": 0.0001323529411764706, "loss": 0.4366, "step": 600 }, { "epoch": 2.21, "eval_accuracy": 0.7555147058823529, "eval_f1_macro": 0.7127019171206659, "eval_f1_micro": 0.7555147058823529, "eval_loss": 0.5225424766540527, "eval_runtime": 0.9394, "eval_samples_per_second": 1158.157, "eval_steps_per_second": 36.192, "step": 600 }, { "epoch": 2.24, "grad_norm": 1.38102388381958, "learning_rate": 0.00012622549019607844, "loss": 0.4028, "step": 610 }, { "epoch": 2.28, "grad_norm": 1.0879701375961304, "learning_rate": 0.00012009803921568628, "loss": 0.3619, "step": 620 }, { "epoch": 2.32, "grad_norm": 2.56364107131958, "learning_rate": 0.00011397058823529411, "loss": 0.3883, "step": 630 }, { "epoch": 2.35, "grad_norm": 1.285326600074768, "learning_rate": 0.00010784313725490197, "loss": 0.4058, "step": 640 }, { "epoch": 2.39, "grad_norm": 1.1956150531768799, "learning_rate": 0.0001017156862745098, "loss": 0.3623, "step": 650 }, { "epoch": 2.39, "eval_accuracy": 0.7582720588235294, "eval_f1_macro": 0.7157047718685389, "eval_f1_micro": 0.7582720588235294, "eval_loss": 0.522619366645813, "eval_runtime": 0.9391, "eval_samples_per_second": 1158.576, "eval_steps_per_second": 36.206, "step": 650 }, { "epoch": 2.43, "grad_norm": 1.5235141515731812, "learning_rate": 9.558823529411764e-05, "loss": 0.4147, "step": 660 }, { "epoch": 2.46, "grad_norm": 1.760062575340271, "learning_rate": 8.94607843137255e-05, "loss": 0.4255, "step": 670 }, { "epoch": 2.5, "grad_norm": 1.636400580406189, "learning_rate": 8.333333333333333e-05, "loss": 0.3687, "step": 680 }, { "epoch": 2.54, "grad_norm": 1.4576902389526367, "learning_rate": 7.720588235294118e-05, "loss": 0.3764, "step": 690 }, { "epoch": 2.57, "grad_norm": 1.5375081300735474, "learning_rate": 7.107843137254902e-05, "loss": 0.3337, "step": 700 }, { "epoch": 2.57, "eval_accuracy": 0.7573529411764706, "eval_f1_macro": 0.7143845832587554, "eval_f1_micro": 0.7573529411764706, "eval_loss": 0.5312902927398682, "eval_runtime": 0.9369, "eval_samples_per_second": 1161.294, "eval_steps_per_second": 36.29, "step": 700 }, { "epoch": 2.61, "grad_norm": 1.7449744939804077, "learning_rate": 6.495098039215687e-05, "loss": 0.3967, "step": 710 }, { "epoch": 2.65, "grad_norm": 1.1091108322143555, "learning_rate": 5.882352941176471e-05, "loss": 0.3403, "step": 720 }, { "epoch": 2.68, "grad_norm": 1.650840163230896, "learning_rate": 5.2696078431372553e-05, "loss": 0.3709, "step": 730 }, { "epoch": 2.72, "grad_norm": 1.3335641622543335, "learning_rate": 4.656862745098039e-05, "loss": 0.3852, "step": 740 }, { "epoch": 2.76, "grad_norm": 1.7249501943588257, "learning_rate": 4.044117647058824e-05, "loss": 0.4158, "step": 750 }, { "epoch": 2.76, "eval_accuracy": 0.7601102941176471, "eval_f1_macro": 0.7210790741531978, "eval_f1_micro": 0.7601102941176471, "eval_loss": 0.5384809374809265, "eval_runtime": 0.9393, "eval_samples_per_second": 1158.291, "eval_steps_per_second": 36.197, "step": 750 }, { "epoch": 2.79, "grad_norm": 1.5633273124694824, "learning_rate": 3.4313725490196084e-05, "loss": 0.3583, "step": 760 }, { "epoch": 2.83, "grad_norm": 1.3377939462661743, "learning_rate": 2.8186274509803924e-05, "loss": 0.3841, "step": 770 }, { "epoch": 2.87, "grad_norm": 1.5654672384262085, "learning_rate": 2.2058823529411766e-05, "loss": 0.3559, "step": 780 }, { "epoch": 2.9, "grad_norm": 1.3034334182739258, "learning_rate": 1.593137254901961e-05, "loss": 0.3877, "step": 790 }, { "epoch": 2.94, "grad_norm": 1.5207332372665405, "learning_rate": 9.803921568627451e-06, "loss": 0.4003, "step": 800 }, { "epoch": 2.94, "eval_accuracy": 0.7545955882352942, "eval_f1_macro": 0.7058253459511188, "eval_f1_micro": 0.7545955882352942, "eval_loss": 0.5349971055984497, "eval_runtime": 0.9377, "eval_samples_per_second": 1160.255, "eval_steps_per_second": 36.258, "step": 800 }, { "epoch": 2.98, "grad_norm": 2.046905279159546, "learning_rate": 3.6764705882352942e-06, "loss": 0.3939, "step": 810 }, { "epoch": 3.0, "step": 816, "total_flos": 888799123472384.0, "train_loss": 0.44335933526357013, "train_runtime": 92.5, "train_samples_per_second": 282.162, "train_steps_per_second": 8.822 } ], "logging_steps": 10, "max_steps": 816, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "total_flos": 888799123472384.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }