SLM_vs_LLM_experiments
/
max_seq_length_128_experiments
/LoRA
/Qwen
/Qwen1.5_7B_LoRA_MAdAiLab
/twitter_disaster
/trainer_state.json
{ | |
"best_metric": 0.4901912808418274, | |
"best_model_checkpoint": "../experiments_checkpoints/LoRA/Qwen/Qwen1.5_7B_LoRA_MAdAiLab/twitter_disaster/checkpoint-250", | |
"epoch": 3.0, | |
"eval_steps": 50, | |
"global_step": 816, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.04, | |
"grad_norm": 24.977563858032227, | |
"learning_rate": 4.9387254901960786e-05, | |
"loss": 1.7697, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.07, | |
"grad_norm": 142.99278259277344, | |
"learning_rate": 4.877450980392157e-05, | |
"loss": 1.666, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.11, | |
"grad_norm": 87.81151580810547, | |
"learning_rate": 4.816176470588236e-05, | |
"loss": 1.0546, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.15, | |
"grad_norm": 50.04526901245117, | |
"learning_rate": 4.7549019607843135e-05, | |
"loss": 0.813, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.18, | |
"grad_norm": 22.704946517944336, | |
"learning_rate": 4.6936274509803925e-05, | |
"loss": 0.8422, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.18, | |
"eval_accuracy": 0.7178308823529411, | |
"eval_f1_macro": 0.6371659017461913, | |
"eval_f1_micro": 0.7178308823529411, | |
"eval_loss": 0.6453067660331726, | |
"eval_runtime": 13.5072, | |
"eval_samples_per_second": 80.55, | |
"eval_steps_per_second": 2.517, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.22, | |
"grad_norm": 20.923370361328125, | |
"learning_rate": 4.632352941176471e-05, | |
"loss": 0.5966, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.26, | |
"grad_norm": 66.72351837158203, | |
"learning_rate": 4.571078431372549e-05, | |
"loss": 0.7947, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.29, | |
"grad_norm": 157.05340576171875, | |
"learning_rate": 4.5098039215686275e-05, | |
"loss": 0.8614, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.33, | |
"grad_norm": 68.06228637695312, | |
"learning_rate": 4.448529411764706e-05, | |
"loss": 0.6034, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.37, | |
"grad_norm": 15.797639846801758, | |
"learning_rate": 4.387254901960784e-05, | |
"loss": 0.6082, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.37, | |
"eval_accuracy": 0.7472426470588235, | |
"eval_f1_macro": 0.7123361820896584, | |
"eval_f1_micro": 0.7472426470588235, | |
"eval_loss": 0.5489143133163452, | |
"eval_runtime": 13.5318, | |
"eval_samples_per_second": 80.403, | |
"eval_steps_per_second": 2.513, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.4, | |
"grad_norm": 45.799476623535156, | |
"learning_rate": 4.325980392156863e-05, | |
"loss": 0.5511, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.44, | |
"grad_norm": 49.33269500732422, | |
"learning_rate": 4.2647058823529415e-05, | |
"loss": 0.4801, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.48, | |
"grad_norm": 36.33636474609375, | |
"learning_rate": 4.20343137254902e-05, | |
"loss": 0.4603, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.51, | |
"grad_norm": 82.08908081054688, | |
"learning_rate": 4.142156862745099e-05, | |
"loss": 0.5596, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.55, | |
"grad_norm": 13.81618595123291, | |
"learning_rate": 4.0808823529411765e-05, | |
"loss": 0.4305, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.55, | |
"eval_accuracy": 0.7251838235294118, | |
"eval_f1_macro": 0.5776786815440837, | |
"eval_f1_micro": 0.7251838235294118, | |
"eval_loss": 0.5571720004081726, | |
"eval_runtime": 13.5624, | |
"eval_samples_per_second": 80.222, | |
"eval_steps_per_second": 2.507, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.59, | |
"grad_norm": 57.01852035522461, | |
"learning_rate": 4.0196078431372555e-05, | |
"loss": 0.5056, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.62, | |
"grad_norm": 53.905147552490234, | |
"learning_rate": 3.958333333333333e-05, | |
"loss": 0.567, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.66, | |
"grad_norm": 47.106292724609375, | |
"learning_rate": 3.897058823529412e-05, | |
"loss": 0.5069, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.7, | |
"grad_norm": 8.995195388793945, | |
"learning_rate": 3.8357843137254904e-05, | |
"loss": 0.5449, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.74, | |
"grad_norm": 79.07156372070312, | |
"learning_rate": 3.774509803921569e-05, | |
"loss": 0.5021, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.74, | |
"eval_accuracy": 0.7720588235294118, | |
"eval_f1_macro": 0.7436838605490643, | |
"eval_f1_micro": 0.7720588235294118, | |
"eval_loss": 0.49997127056121826, | |
"eval_runtime": 13.6431, | |
"eval_samples_per_second": 79.747, | |
"eval_steps_per_second": 2.492, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.77, | |
"grad_norm": 15.758883476257324, | |
"learning_rate": 3.713235294117647e-05, | |
"loss": 0.5018, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.81, | |
"grad_norm": 183.47061157226562, | |
"learning_rate": 3.6519607843137254e-05, | |
"loss": 0.616, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.85, | |
"grad_norm": 48.367374420166016, | |
"learning_rate": 3.5906862745098044e-05, | |
"loss": 0.4927, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.88, | |
"grad_norm": 5.8350114822387695, | |
"learning_rate": 3.529411764705883e-05, | |
"loss": 0.4508, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.92, | |
"grad_norm": 15.554094314575195, | |
"learning_rate": 3.468137254901961e-05, | |
"loss": 0.4715, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.92, | |
"eval_accuracy": 0.7766544117647058, | |
"eval_f1_macro": 0.7450627015924902, | |
"eval_f1_micro": 0.7766544117647058, | |
"eval_loss": 0.4901912808418274, | |
"eval_runtime": 13.5595, | |
"eval_samples_per_second": 80.239, | |
"eval_steps_per_second": 2.507, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.96, | |
"grad_norm": 37.8280029296875, | |
"learning_rate": 3.4068627450980394e-05, | |
"loss": 0.4188, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.99, | |
"grad_norm": 20.624736785888672, | |
"learning_rate": 3.345588235294118e-05, | |
"loss": 0.5049, | |
"step": 270 | |
}, | |
{ | |
"epoch": 1.03, | |
"grad_norm": 60.454341888427734, | |
"learning_rate": 3.284313725490196e-05, | |
"loss": 0.4536, | |
"step": 280 | |
}, | |
{ | |
"epoch": 1.07, | |
"grad_norm": 53.48725509643555, | |
"learning_rate": 3.223039215686275e-05, | |
"loss": 0.4097, | |
"step": 290 | |
}, | |
{ | |
"epoch": 1.1, | |
"grad_norm": 18.42328453063965, | |
"learning_rate": 3.161764705882353e-05, | |
"loss": 0.3937, | |
"step": 300 | |
}, | |
{ | |
"epoch": 1.1, | |
"eval_accuracy": 0.7601102941176471, | |
"eval_f1_macro": 0.7018342410563818, | |
"eval_f1_micro": 0.7601102941176471, | |
"eval_loss": 0.5194450616836548, | |
"eval_runtime": 13.566, | |
"eval_samples_per_second": 80.2, | |
"eval_steps_per_second": 2.506, | |
"step": 300 | |
}, | |
{ | |
"epoch": 1.14, | |
"grad_norm": 42.46508026123047, | |
"learning_rate": 3.100490196078432e-05, | |
"loss": 0.4045, | |
"step": 310 | |
}, | |
{ | |
"epoch": 1.18, | |
"grad_norm": 65.97752380371094, | |
"learning_rate": 3.0392156862745097e-05, | |
"loss": 0.3829, | |
"step": 320 | |
}, | |
{ | |
"epoch": 1.21, | |
"grad_norm": 4.754347801208496, | |
"learning_rate": 2.9779411764705883e-05, | |
"loss": 0.4535, | |
"step": 330 | |
}, | |
{ | |
"epoch": 1.25, | |
"grad_norm": 86.20097351074219, | |
"learning_rate": 2.916666666666667e-05, | |
"loss": 0.4082, | |
"step": 340 | |
}, | |
{ | |
"epoch": 1.29, | |
"grad_norm": 17.790315628051758, | |
"learning_rate": 2.855392156862745e-05, | |
"loss": 0.4219, | |
"step": 350 | |
}, | |
{ | |
"epoch": 1.29, | |
"eval_accuracy": 0.7665441176470589, | |
"eval_f1_macro": 0.7228498074454428, | |
"eval_f1_micro": 0.7665441176470589, | |
"eval_loss": 0.5227769017219543, | |
"eval_runtime": 13.5702, | |
"eval_samples_per_second": 80.176, | |
"eval_steps_per_second": 2.505, | |
"step": 350 | |
}, | |
{ | |
"epoch": 1.32, | |
"grad_norm": 47.08971405029297, | |
"learning_rate": 2.7941176470588236e-05, | |
"loss": 0.4235, | |
"step": 360 | |
}, | |
{ | |
"epoch": 1.36, | |
"grad_norm": 13.4403715133667, | |
"learning_rate": 2.732843137254902e-05, | |
"loss": 0.3631, | |
"step": 370 | |
}, | |
{ | |
"epoch": 1.4, | |
"grad_norm": 50.05192184448242, | |
"learning_rate": 2.6715686274509806e-05, | |
"loss": 0.5085, | |
"step": 380 | |
}, | |
{ | |
"epoch": 1.43, | |
"grad_norm": 97.1346206665039, | |
"learning_rate": 2.6102941176470593e-05, | |
"loss": 0.4432, | |
"step": 390 | |
}, | |
{ | |
"epoch": 1.47, | |
"grad_norm": 52.068641662597656, | |
"learning_rate": 2.5490196078431373e-05, | |
"loss": 0.4315, | |
"step": 400 | |
}, | |
{ | |
"epoch": 1.47, | |
"eval_accuracy": 0.7555147058823529, | |
"eval_f1_macro": 0.6900563751949143, | |
"eval_f1_micro": 0.7555147058823529, | |
"eval_loss": 0.5791015625, | |
"eval_runtime": 13.6465, | |
"eval_samples_per_second": 79.728, | |
"eval_steps_per_second": 2.491, | |
"step": 400 | |
}, | |
{ | |
"epoch": 1.51, | |
"grad_norm": 4.731142997741699, | |
"learning_rate": 2.487745098039216e-05, | |
"loss": 0.4396, | |
"step": 410 | |
}, | |
{ | |
"epoch": 1.54, | |
"grad_norm": 22.810226440429688, | |
"learning_rate": 2.4264705882352942e-05, | |
"loss": 0.4104, | |
"step": 420 | |
}, | |
{ | |
"epoch": 1.58, | |
"grad_norm": 14.011224746704102, | |
"learning_rate": 2.3651960784313726e-05, | |
"loss": 0.3847, | |
"step": 430 | |
}, | |
{ | |
"epoch": 1.62, | |
"grad_norm": 27.048315048217773, | |
"learning_rate": 2.303921568627451e-05, | |
"loss": 0.3681, | |
"step": 440 | |
}, | |
{ | |
"epoch": 1.65, | |
"grad_norm": 6.750571250915527, | |
"learning_rate": 2.2426470588235296e-05, | |
"loss": 0.4134, | |
"step": 450 | |
}, | |
{ | |
"epoch": 1.65, | |
"eval_accuracy": 0.7389705882352942, | |
"eval_f1_macro": 0.719594754017431, | |
"eval_f1_micro": 0.7389705882352942, | |
"eval_loss": 0.6182358860969543, | |
"eval_runtime": 13.5558, | |
"eval_samples_per_second": 80.261, | |
"eval_steps_per_second": 2.508, | |
"step": 450 | |
}, | |
{ | |
"epoch": 1.69, | |
"grad_norm": 51.78306579589844, | |
"learning_rate": 2.181372549019608e-05, | |
"loss": 0.4691, | |
"step": 460 | |
}, | |
{ | |
"epoch": 1.73, | |
"grad_norm": 56.603797912597656, | |
"learning_rate": 2.1200980392156862e-05, | |
"loss": 0.4194, | |
"step": 470 | |
}, | |
{ | |
"epoch": 1.76, | |
"grad_norm": 24.38219451904297, | |
"learning_rate": 2.058823529411765e-05, | |
"loss": 0.3631, | |
"step": 480 | |
}, | |
{ | |
"epoch": 1.8, | |
"grad_norm": 44.108612060546875, | |
"learning_rate": 1.9975490196078432e-05, | |
"loss": 0.3859, | |
"step": 490 | |
}, | |
{ | |
"epoch": 1.84, | |
"grad_norm": 68.38048553466797, | |
"learning_rate": 1.936274509803922e-05, | |
"loss": 0.4173, | |
"step": 500 | |
}, | |
{ | |
"epoch": 1.84, | |
"eval_accuracy": 0.7637867647058824, | |
"eval_f1_macro": 0.7115502256608639, | |
"eval_f1_micro": 0.7637867647058824, | |
"eval_loss": 0.5453814268112183, | |
"eval_runtime": 13.5612, | |
"eval_samples_per_second": 80.229, | |
"eval_steps_per_second": 2.507, | |
"step": 500 | |
}, | |
{ | |
"epoch": 1.88, | |
"grad_norm": 18.69806480407715, | |
"learning_rate": 1.8750000000000002e-05, | |
"loss": 0.4323, | |
"step": 510 | |
}, | |
{ | |
"epoch": 1.91, | |
"grad_norm": 15.339709281921387, | |
"learning_rate": 1.8137254901960785e-05, | |
"loss": 0.4529, | |
"step": 520 | |
}, | |
{ | |
"epoch": 1.95, | |
"grad_norm": 85.60708618164062, | |
"learning_rate": 1.7524509803921568e-05, | |
"loss": 0.4462, | |
"step": 530 | |
}, | |
{ | |
"epoch": 1.99, | |
"grad_norm": 42.06242752075195, | |
"learning_rate": 1.6911764705882355e-05, | |
"loss": 0.3753, | |
"step": 540 | |
}, | |
{ | |
"epoch": 2.02, | |
"grad_norm": 43.271724700927734, | |
"learning_rate": 1.6299019607843138e-05, | |
"loss": 0.3278, | |
"step": 550 | |
}, | |
{ | |
"epoch": 2.02, | |
"eval_accuracy": 0.7720588235294118, | |
"eval_f1_macro": 0.7219169329073483, | |
"eval_f1_micro": 0.7720588235294118, | |
"eval_loss": 0.5476648807525635, | |
"eval_runtime": 13.5763, | |
"eval_samples_per_second": 80.14, | |
"eval_steps_per_second": 2.504, | |
"step": 550 | |
}, | |
{ | |
"epoch": 2.06, | |
"grad_norm": 63.24125289916992, | |
"learning_rate": 1.568627450980392e-05, | |
"loss": 0.3151, | |
"step": 560 | |
}, | |
{ | |
"epoch": 2.1, | |
"grad_norm": 18.521787643432617, | |
"learning_rate": 1.5073529411764706e-05, | |
"loss": 0.2909, | |
"step": 570 | |
}, | |
{ | |
"epoch": 2.13, | |
"grad_norm": 21.641969680786133, | |
"learning_rate": 1.4460784313725493e-05, | |
"loss": 0.2893, | |
"step": 580 | |
}, | |
{ | |
"epoch": 2.17, | |
"grad_norm": 26.12430763244629, | |
"learning_rate": 1.3848039215686276e-05, | |
"loss": 0.2642, | |
"step": 590 | |
}, | |
{ | |
"epoch": 2.21, | |
"grad_norm": 5.281397819519043, | |
"learning_rate": 1.323529411764706e-05, | |
"loss": 0.2641, | |
"step": 600 | |
}, | |
{ | |
"epoch": 2.21, | |
"eval_accuracy": 0.7527573529411765, | |
"eval_f1_macro": 0.7152217678178568, | |
"eval_f1_micro": 0.7527573529411765, | |
"eval_loss": 0.6011173129081726, | |
"eval_runtime": 13.6441, | |
"eval_samples_per_second": 79.741, | |
"eval_steps_per_second": 2.492, | |
"step": 600 | |
}, | |
{ | |
"epoch": 2.24, | |
"grad_norm": 32.14168167114258, | |
"learning_rate": 1.2622549019607843e-05, | |
"loss": 0.2578, | |
"step": 610 | |
}, | |
{ | |
"epoch": 2.28, | |
"grad_norm": 8.231173515319824, | |
"learning_rate": 1.200980392156863e-05, | |
"loss": 0.2012, | |
"step": 620 | |
}, | |
{ | |
"epoch": 2.32, | |
"grad_norm": 46.39328384399414, | |
"learning_rate": 1.1397058823529412e-05, | |
"loss": 0.2582, | |
"step": 630 | |
}, | |
{ | |
"epoch": 2.35, | |
"grad_norm": 52.687957763671875, | |
"learning_rate": 1.0784313725490197e-05, | |
"loss": 0.289, | |
"step": 640 | |
}, | |
{ | |
"epoch": 2.39, | |
"grad_norm": 22.3091983795166, | |
"learning_rate": 1.017156862745098e-05, | |
"loss": 0.2256, | |
"step": 650 | |
}, | |
{ | |
"epoch": 2.39, | |
"eval_accuracy": 0.7601102941176471, | |
"eval_f1_macro": 0.6962074067417461, | |
"eval_f1_micro": 0.7601102941176471, | |
"eval_loss": 0.6484518647193909, | |
"eval_runtime": 13.5588, | |
"eval_samples_per_second": 80.243, | |
"eval_steps_per_second": 2.508, | |
"step": 650 | |
}, | |
{ | |
"epoch": 2.43, | |
"grad_norm": 16.085580825805664, | |
"learning_rate": 9.558823529411764e-06, | |
"loss": 0.2501, | |
"step": 660 | |
}, | |
{ | |
"epoch": 2.46, | |
"grad_norm": 20.8741455078125, | |
"learning_rate": 8.946078431372549e-06, | |
"loss": 0.3018, | |
"step": 670 | |
}, | |
{ | |
"epoch": 2.5, | |
"grad_norm": 25.310302734375, | |
"learning_rate": 8.333333333333334e-06, | |
"loss": 0.2451, | |
"step": 680 | |
}, | |
{ | |
"epoch": 2.54, | |
"grad_norm": 17.985490798950195, | |
"learning_rate": 7.720588235294119e-06, | |
"loss": 0.232, | |
"step": 690 | |
}, | |
{ | |
"epoch": 2.57, | |
"grad_norm": 7.154005527496338, | |
"learning_rate": 7.107843137254902e-06, | |
"loss": 0.2544, | |
"step": 700 | |
}, | |
{ | |
"epoch": 2.57, | |
"eval_accuracy": 0.7628676470588235, | |
"eval_f1_macro": 0.7165018421562924, | |
"eval_f1_micro": 0.7628676470588235, | |
"eval_loss": 0.6459027528762817, | |
"eval_runtime": 13.5807, | |
"eval_samples_per_second": 80.114, | |
"eval_steps_per_second": 2.504, | |
"step": 700 | |
}, | |
{ | |
"epoch": 2.61, | |
"grad_norm": 9.307502746582031, | |
"learning_rate": 6.495098039215687e-06, | |
"loss": 0.3067, | |
"step": 710 | |
}, | |
{ | |
"epoch": 2.65, | |
"grad_norm": 9.009349822998047, | |
"learning_rate": 5.882352941176471e-06, | |
"loss": 0.2225, | |
"step": 720 | |
}, | |
{ | |
"epoch": 2.68, | |
"grad_norm": 14.490361213684082, | |
"learning_rate": 5.269607843137255e-06, | |
"loss": 0.2576, | |
"step": 730 | |
}, | |
{ | |
"epoch": 2.72, | |
"grad_norm": 24.25351333618164, | |
"learning_rate": 4.65686274509804e-06, | |
"loss": 0.2805, | |
"step": 740 | |
}, | |
{ | |
"epoch": 2.76, | |
"grad_norm": 14.948960304260254, | |
"learning_rate": 4.044117647058824e-06, | |
"loss": 0.2839, | |
"step": 750 | |
}, | |
{ | |
"epoch": 2.76, | |
"eval_accuracy": 0.765625, | |
"eval_f1_macro": 0.7252674888969208, | |
"eval_f1_micro": 0.765625, | |
"eval_loss": 0.5921774506568909, | |
"eval_runtime": 13.5697, | |
"eval_samples_per_second": 80.178, | |
"eval_steps_per_second": 2.506, | |
"step": 750 | |
}, | |
{ | |
"epoch": 2.79, | |
"grad_norm": 43.209163665771484, | |
"learning_rate": 3.431372549019608e-06, | |
"loss": 0.2235, | |
"step": 760 | |
}, | |
{ | |
"epoch": 2.83, | |
"grad_norm": 7.3651604652404785, | |
"learning_rate": 2.818627450980392e-06, | |
"loss": 0.2487, | |
"step": 770 | |
}, | |
{ | |
"epoch": 2.87, | |
"grad_norm": 21.250938415527344, | |
"learning_rate": 2.2058823529411767e-06, | |
"loss": 0.2038, | |
"step": 780 | |
}, | |
{ | |
"epoch": 2.9, | |
"grad_norm": 21.40656852722168, | |
"learning_rate": 1.5931372549019608e-06, | |
"loss": 0.27, | |
"step": 790 | |
}, | |
{ | |
"epoch": 2.94, | |
"grad_norm": 32.97100830078125, | |
"learning_rate": 9.80392156862745e-07, | |
"loss": 0.2634, | |
"step": 800 | |
}, | |
{ | |
"epoch": 2.94, | |
"eval_accuracy": 0.7637867647058824, | |
"eval_f1_macro": 0.7075804081718023, | |
"eval_f1_micro": 0.7637867647058824, | |
"eval_loss": 0.6311897039413452, | |
"eval_runtime": 13.5748, | |
"eval_samples_per_second": 80.148, | |
"eval_steps_per_second": 2.505, | |
"step": 800 | |
}, | |
{ | |
"epoch": 2.98, | |
"grad_norm": 8.623468399047852, | |
"learning_rate": 3.6764705882352943e-07, | |
"loss": 0.2572, | |
"step": 810 | |
}, | |
{ | |
"epoch": 3.0, | |
"step": 816, | |
"total_flos": 1.3629706293215232e+17, | |
"train_loss": 0.4493609409706265, | |
"train_runtime": 1238.3624, | |
"train_samples_per_second": 21.076, | |
"train_steps_per_second": 0.659 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 816, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 3, | |
"save_steps": 50, | |
"total_flos": 1.3629706293215232e+17, | |
"train_batch_size": 16, | |
"trial_name": null, | |
"trial_params": null | |
} | |