{ "best_metric": 0.49125435948371887, "best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/google_t5/t5_base_twitter/checkpoint-250", "epoch": 3.0, "eval_steps": 50, "global_step": 816, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 1.5012608766555786, "learning_rate": 0.0004938725490196079, "loss": 0.6085, "step": 10 }, { "epoch": 0.07, "grad_norm": 1.0885683298110962, "learning_rate": 0.0004877450980392157, "loss": 0.5402, "step": 20 }, { "epoch": 0.11, "grad_norm": 3.7890007495880127, "learning_rate": 0.00048161764705882356, "loss": 0.5591, "step": 30 }, { "epoch": 0.15, "grad_norm": 1.2080425024032593, "learning_rate": 0.00047549019607843134, "loss": 0.5687, "step": 40 }, { "epoch": 0.18, "grad_norm": 0.9302119612693787, "learning_rate": 0.0004693627450980392, "loss": 0.4808, "step": 50 }, { "epoch": 0.18, "eval_accuracy": 0.7444852941176471, "eval_f1_macro": 0.6739663773514942, "eval_f1_micro": 0.7444852941176471, "eval_loss": 0.5169704556465149, "eval_runtime": 2.6255, "eval_samples_per_second": 414.395, "eval_steps_per_second": 12.95, "step": 50 }, { "epoch": 0.22, "grad_norm": 2.9698123931884766, "learning_rate": 0.0004632352941176471, "loss": 0.4972, "step": 60 }, { "epoch": 0.26, "grad_norm": 1.0578800439834595, "learning_rate": 0.0004571078431372549, "loss": 0.4967, "step": 70 }, { "epoch": 0.29, "grad_norm": 2.512136697769165, "learning_rate": 0.0004509803921568628, "loss": 0.4987, "step": 80 }, { "epoch": 0.33, "grad_norm": 1.629062533378601, "learning_rate": 0.00044485294117647056, "loss": 0.4347, "step": 90 }, { "epoch": 0.37, "grad_norm": 1.581597089767456, "learning_rate": 0.00043872549019607844, "loss": 0.5169, "step": 100 }, { "epoch": 0.37, "eval_accuracy": 0.7555147058823529, "eval_f1_macro": 0.7269323671497585, "eval_f1_micro": 0.7555147058823529, "eval_loss": 0.5100224018096924, "eval_runtime": 2.6367, "eval_samples_per_second": 412.631, "eval_steps_per_second": 12.895, "step": 100 }, { "epoch": 0.4, "grad_norm": 1.149016261100769, "learning_rate": 0.0004325980392156863, "loss": 0.4898, "step": 110 }, { "epoch": 0.44, "grad_norm": 1.1472299098968506, "learning_rate": 0.0004264705882352941, "loss": 0.497, "step": 120 }, { "epoch": 0.48, "grad_norm": 1.7374740839004517, "learning_rate": 0.000420343137254902, "loss": 0.4811, "step": 130 }, { "epoch": 0.51, "grad_norm": 1.1296411752700806, "learning_rate": 0.0004142156862745098, "loss": 0.5066, "step": 140 }, { "epoch": 0.55, "grad_norm": 0.9138337969779968, "learning_rate": 0.00040808823529411766, "loss": 0.4548, "step": 150 }, { "epoch": 0.55, "eval_accuracy": 0.7647058823529411, "eval_f1_macro": 0.7017083911650301, "eval_f1_micro": 0.7647058823529411, "eval_loss": 0.49222531914711, "eval_runtime": 2.6971, "eval_samples_per_second": 403.396, "eval_steps_per_second": 12.606, "step": 150 }, { "epoch": 0.59, "grad_norm": 1.1084121465682983, "learning_rate": 0.0004019607843137255, "loss": 0.4683, "step": 160 }, { "epoch": 0.62, "grad_norm": 2.0122885704040527, "learning_rate": 0.0003958333333333333, "loss": 0.5217, "step": 170 }, { "epoch": 0.66, "grad_norm": 2.2070980072021484, "learning_rate": 0.0003897058823529412, "loss": 0.524, "step": 180 }, { "epoch": 0.7, "grad_norm": 1.27259361743927, "learning_rate": 0.00038357843137254904, "loss": 0.4955, "step": 190 }, { "epoch": 0.74, "grad_norm": 0.8282164931297302, "learning_rate": 0.0003774509803921569, "loss": 0.498, "step": 200 }, { "epoch": 0.74, "eval_accuracy": 0.7518382352941176, "eval_f1_macro": 0.6776331672629108, "eval_f1_micro": 0.7518382352941176, "eval_loss": 0.5056816935539246, "eval_runtime": 2.6992, "eval_samples_per_second": 403.083, "eval_steps_per_second": 12.596, "step": 200 }, { "epoch": 0.77, "grad_norm": 1.8028568029403687, "learning_rate": 0.0003713235294117647, "loss": 0.4926, "step": 210 }, { "epoch": 0.81, "grad_norm": 2.6962406635284424, "learning_rate": 0.00036519607843137254, "loss": 0.5134, "step": 220 }, { "epoch": 0.85, "grad_norm": 0.7592838406562805, "learning_rate": 0.0003590686274509804, "loss": 0.469, "step": 230 }, { "epoch": 0.88, "grad_norm": 0.637353777885437, "learning_rate": 0.00035294117647058826, "loss": 0.4381, "step": 240 }, { "epoch": 0.92, "grad_norm": 0.9144633412361145, "learning_rate": 0.0003468137254901961, "loss": 0.4844, "step": 250 }, { "epoch": 0.92, "eval_accuracy": 0.765625, "eval_f1_macro": 0.7266124240384777, "eval_f1_micro": 0.765625, "eval_loss": 0.49125435948371887, "eval_runtime": 2.6614, "eval_samples_per_second": 408.809, "eval_steps_per_second": 12.775, "step": 250 }, { "epoch": 0.96, "grad_norm": 0.6250616312026978, "learning_rate": 0.0003406862745098039, "loss": 0.424, "step": 260 }, { "epoch": 0.99, "grad_norm": 0.6987557411193848, "learning_rate": 0.00033455882352941176, "loss": 0.5254, "step": 270 }, { "epoch": 1.03, "grad_norm": 1.9285285472869873, "learning_rate": 0.0003284313725490196, "loss": 0.4381, "step": 280 }, { "epoch": 1.07, "grad_norm": 1.1627569198608398, "learning_rate": 0.0003223039215686275, "loss": 0.3653, "step": 290 }, { "epoch": 1.1, "grad_norm": 0.7716933488845825, "learning_rate": 0.0003161764705882353, "loss": 0.3949, "step": 300 }, { "epoch": 1.1, "eval_accuracy": 0.7481617647058824, "eval_f1_macro": 0.6885478829212845, "eval_f1_micro": 0.7481617647058824, "eval_loss": 0.5400605201721191, "eval_runtime": 2.7023, "eval_samples_per_second": 402.615, "eval_steps_per_second": 12.582, "step": 300 }, { "epoch": 1.14, "grad_norm": 1.0258417129516602, "learning_rate": 0.00031004901960784314, "loss": 0.3679, "step": 310 }, { "epoch": 1.18, "grad_norm": 1.483994722366333, "learning_rate": 0.00030392156862745097, "loss": 0.3685, "step": 320 }, { "epoch": 1.21, "grad_norm": 0.6842905282974243, "learning_rate": 0.0002977941176470588, "loss": 0.4533, "step": 330 }, { "epoch": 1.25, "grad_norm": 1.1023057699203491, "learning_rate": 0.0002916666666666667, "loss": 0.3752, "step": 340 }, { "epoch": 1.29, "grad_norm": 1.9032913446426392, "learning_rate": 0.0002855392156862745, "loss": 0.4028, "step": 350 }, { "epoch": 1.29, "eval_accuracy": 0.7481617647058824, "eval_f1_macro": 0.7209065444359561, "eval_f1_micro": 0.7481617647058824, "eval_loss": 0.5463477373123169, "eval_runtime": 2.705, "eval_samples_per_second": 402.218, "eval_steps_per_second": 12.569, "step": 350 }, { "epoch": 1.32, "grad_norm": 1.553853988647461, "learning_rate": 0.00027941176470588236, "loss": 0.4068, "step": 360 }, { "epoch": 1.36, "grad_norm": 1.6380698680877686, "learning_rate": 0.0002732843137254902, "loss": 0.3824, "step": 370 }, { "epoch": 1.4, "grad_norm": 0.9034827947616577, "learning_rate": 0.000267156862745098, "loss": 0.4318, "step": 380 }, { "epoch": 1.43, "grad_norm": 1.1275397539138794, "learning_rate": 0.0002610294117647059, "loss": 0.3843, "step": 390 }, { "epoch": 1.47, "grad_norm": 1.784464716911316, "learning_rate": 0.00025490196078431374, "loss": 0.3778, "step": 400 }, { "epoch": 1.47, "eval_accuracy": 0.7555147058823529, "eval_f1_macro": 0.7087419287313013, "eval_f1_micro": 0.7555147058823529, "eval_loss": 0.5437767505645752, "eval_runtime": 2.7056, "eval_samples_per_second": 402.135, "eval_steps_per_second": 12.567, "step": 400 }, { "epoch": 1.51, "grad_norm": 1.8430761098861694, "learning_rate": 0.00024877450980392157, "loss": 0.4424, "step": 410 }, { "epoch": 1.54, "grad_norm": 2.5702669620513916, "learning_rate": 0.0002426470588235294, "loss": 0.3864, "step": 420 }, { "epoch": 1.58, "grad_norm": 1.207284927368164, "learning_rate": 0.00023651960784313726, "loss": 0.4018, "step": 430 }, { "epoch": 1.62, "grad_norm": 2.1779820919036865, "learning_rate": 0.0002303921568627451, "loss": 0.3279, "step": 440 }, { "epoch": 1.65, "grad_norm": 1.1213287115097046, "learning_rate": 0.00022426470588235296, "loss": 0.4383, "step": 450 }, { "epoch": 1.65, "eval_accuracy": 0.7380514705882353, "eval_f1_macro": 0.7095168638163571, "eval_f1_micro": 0.7380514705882353, "eval_loss": 0.5411675572395325, "eval_runtime": 2.7065, "eval_samples_per_second": 402.001, "eval_steps_per_second": 12.563, "step": 450 }, { "epoch": 1.69, "grad_norm": 1.428109049797058, "learning_rate": 0.0002181372549019608, "loss": 0.3877, "step": 460 }, { "epoch": 1.73, "grad_norm": 1.0778878927230835, "learning_rate": 0.00021200980392156862, "loss": 0.4038, "step": 470 }, { "epoch": 1.76, "grad_norm": 0.8310728073120117, "learning_rate": 0.00020588235294117645, "loss": 0.3548, "step": 480 }, { "epoch": 1.8, "grad_norm": 1.4109036922454834, "learning_rate": 0.00019975490196078434, "loss": 0.3681, "step": 490 }, { "epoch": 1.84, "grad_norm": 1.2154396772384644, "learning_rate": 0.00019362745098039217, "loss": 0.3984, "step": 500 }, { "epoch": 1.84, "eval_accuracy": 0.7555147058823529, "eval_f1_macro": 0.7239316239316239, "eval_f1_micro": 0.7555147058823529, "eval_loss": 0.529325008392334, "eval_runtime": 2.7052, "eval_samples_per_second": 402.188, "eval_steps_per_second": 12.568, "step": 500 }, { "epoch": 1.88, "grad_norm": 0.6760998964309692, "learning_rate": 0.0001875, "loss": 0.4037, "step": 510 }, { "epoch": 1.91, "grad_norm": 1.0472333431243896, "learning_rate": 0.00018137254901960784, "loss": 0.4015, "step": 520 }, { "epoch": 1.95, "grad_norm": 1.1614878177642822, "learning_rate": 0.00017524509803921567, "loss": 0.4191, "step": 530 }, { "epoch": 1.99, "grad_norm": 1.12571382522583, "learning_rate": 0.00016911764705882356, "loss": 0.3741, "step": 540 }, { "epoch": 2.02, "grad_norm": 0.934890866279602, "learning_rate": 0.0001629901960784314, "loss": 0.3122, "step": 550 }, { "epoch": 2.02, "eval_accuracy": 0.7564338235294118, "eval_f1_macro": 0.7211537996566648, "eval_f1_micro": 0.7564338235294118, "eval_loss": 0.5271954536437988, "eval_runtime": 2.6516, "eval_samples_per_second": 410.319, "eval_steps_per_second": 12.822, "step": 550 }, { "epoch": 2.06, "grad_norm": 1.299796462059021, "learning_rate": 0.00015686274509803922, "loss": 0.3406, "step": 560 }, { "epoch": 2.1, "grad_norm": 0.8316746950149536, "learning_rate": 0.00015073529411764705, "loss": 0.2448, "step": 570 }, { "epoch": 2.13, "grad_norm": 1.9803074598312378, "learning_rate": 0.0001446078431372549, "loss": 0.2767, "step": 580 }, { "epoch": 2.17, "grad_norm": 1.0188449621200562, "learning_rate": 0.00013848039215686274, "loss": 0.2699, "step": 590 }, { "epoch": 2.21, "grad_norm": 0.744954526424408, "learning_rate": 0.0001323529411764706, "loss": 0.2764, "step": 600 }, { "epoch": 2.21, "eval_accuracy": 0.7463235294117647, "eval_f1_macro": 0.704812834224599, "eval_f1_micro": 0.7463235294117647, "eval_loss": 0.5961496233940125, "eval_runtime": 2.6549, "eval_samples_per_second": 409.802, "eval_steps_per_second": 12.806, "step": 600 }, { "epoch": 2.24, "grad_norm": 1.4756693840026855, "learning_rate": 0.00012622549019607844, "loss": 0.2649, "step": 610 }, { "epoch": 2.28, "grad_norm": 1.1534539461135864, "learning_rate": 0.00012009803921568628, "loss": 0.2314, "step": 620 }, { "epoch": 2.32, "grad_norm": 1.0833938121795654, "learning_rate": 0.00011397058823529411, "loss": 0.2612, "step": 630 }, { "epoch": 2.35, "grad_norm": 0.9921144843101501, "learning_rate": 0.00010784313725490197, "loss": 0.2903, "step": 640 }, { "epoch": 2.39, "grad_norm": 1.7594339847564697, "learning_rate": 0.0001017156862745098, "loss": 0.236, "step": 650 }, { "epoch": 2.39, "eval_accuracy": 0.7454044117647058, "eval_f1_macro": 0.6995610690257088, "eval_f1_micro": 0.7454044117647058, "eval_loss": 0.6630216240882874, "eval_runtime": 2.7035, "eval_samples_per_second": 402.447, "eval_steps_per_second": 12.576, "step": 650 }, { "epoch": 2.43, "grad_norm": 1.5883187055587769, "learning_rate": 9.558823529411764e-05, "loss": 0.3054, "step": 660 }, { "epoch": 2.46, "grad_norm": 1.9597811698913574, "learning_rate": 8.94607843137255e-05, "loss": 0.2728, "step": 670 }, { "epoch": 2.5, "grad_norm": 0.9544721245765686, "learning_rate": 8.333333333333333e-05, "loss": 0.269, "step": 680 }, { "epoch": 2.54, "grad_norm": 2.1644415855407715, "learning_rate": 7.720588235294118e-05, "loss": 0.2378, "step": 690 }, { "epoch": 2.57, "grad_norm": 1.3767701387405396, "learning_rate": 7.107843137254902e-05, "loss": 0.1996, "step": 700 }, { "epoch": 2.57, "eval_accuracy": 0.7481617647058824, "eval_f1_macro": 0.6967447916666667, "eval_f1_micro": 0.7481617647058824, "eval_loss": 0.7069829702377319, "eval_runtime": 2.6525, "eval_samples_per_second": 410.183, "eval_steps_per_second": 12.818, "step": 700 }, { "epoch": 2.61, "grad_norm": 2.532257556915283, "learning_rate": 6.495098039215687e-05, "loss": 0.2777, "step": 710 }, { "epoch": 2.65, "grad_norm": 1.5882879495620728, "learning_rate": 5.882352941176471e-05, "loss": 0.2157, "step": 720 }, { "epoch": 2.68, "grad_norm": 1.5534424781799316, "learning_rate": 5.2696078431372553e-05, "loss": 0.223, "step": 730 }, { "epoch": 2.72, "grad_norm": 1.6244162321090698, "learning_rate": 4.656862745098039e-05, "loss": 0.2647, "step": 740 }, { "epoch": 2.76, "grad_norm": 1.8495365381240845, "learning_rate": 4.044117647058824e-05, "loss": 0.2245, "step": 750 }, { "epoch": 2.76, "eval_accuracy": 0.7454044117647058, "eval_f1_macro": 0.7015650761743022, "eval_f1_micro": 0.7454044117647058, "eval_loss": 0.6734161972999573, "eval_runtime": 2.6532, "eval_samples_per_second": 410.075, "eval_steps_per_second": 12.815, "step": 750 }, { "epoch": 2.79, "grad_norm": 1.7572370767593384, "learning_rate": 3.4313725490196084e-05, "loss": 0.2363, "step": 760 }, { "epoch": 2.83, "grad_norm": 3.104624032974243, "learning_rate": 2.8186274509803924e-05, "loss": 0.2159, "step": 770 }, { "epoch": 2.87, "grad_norm": 2.204385757446289, "learning_rate": 2.2058823529411766e-05, "loss": 0.2228, "step": 780 }, { "epoch": 2.9, "grad_norm": 0.7112906575202942, "learning_rate": 1.593137254901961e-05, "loss": 0.3132, "step": 790 }, { "epoch": 2.94, "grad_norm": 1.3646917343139648, "learning_rate": 9.803921568627451e-06, "loss": 0.2903, "step": 800 }, { "epoch": 2.94, "eval_accuracy": 0.7454044117647058, "eval_f1_macro": 0.6953530052170152, "eval_f1_micro": 0.7454044117647058, "eval_loss": 0.6759808659553528, "eval_runtime": 2.704, "eval_samples_per_second": 402.361, "eval_steps_per_second": 12.574, "step": 800 }, { "epoch": 2.98, "grad_norm": 1.2018649578094482, "learning_rate": 3.6764705882352942e-06, "loss": 0.2706, "step": 810 }, { "epoch": 3.0, "step": 816, "total_flos": 3987151653961728.0, "train_loss": 0.3840603100902894, "train_runtime": 309.7161, "train_samples_per_second": 84.271, "train_steps_per_second": 2.635 } ], "logging_steps": 10, "max_steps": 816, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "total_flos": 3987151653961728.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }