SLM_vs_LLM_experiments
/
max_seq_length_128_experiments
/Qwen
/Qwen1.5_1.8B_twitter
/trainer_state.json
{ | |
"best_metric": 0.5038631558418274, | |
"best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/Qwen/Qwen1.5_1.8B_twitter/checkpoint-250", | |
"epoch": 3.0, | |
"eval_steps": 50, | |
"global_step": 816, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.04, | |
"grad_norm": 109.95573425292969, | |
"learning_rate": 4.938725490196079e-06, | |
"loss": 1.1177, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.07, | |
"grad_norm": 117.97087860107422, | |
"learning_rate": 4.8774509803921576e-06, | |
"loss": 0.8448, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.11, | |
"grad_norm": 46.05353546142578, | |
"learning_rate": 4.816176470588236e-06, | |
"loss": 0.8301, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.15, | |
"grad_norm": 41.749080657958984, | |
"learning_rate": 4.754901960784314e-06, | |
"loss": 0.7069, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.18, | |
"grad_norm": 26.182971954345703, | |
"learning_rate": 4.693627450980393e-06, | |
"loss": 0.6585, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.18, | |
"eval_accuracy": 0.7123161764705882, | |
"eval_f1_macro": 0.5810560943233977, | |
"eval_f1_micro": 0.7123161764705882, | |
"eval_loss": 0.6434972286224365, | |
"eval_runtime": 2.8155, | |
"eval_samples_per_second": 386.429, | |
"eval_steps_per_second": 12.076, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.22, | |
"grad_norm": 118.7502212524414, | |
"learning_rate": 4.632352941176471e-06, | |
"loss": 0.6179, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.26, | |
"grad_norm": 34.24118423461914, | |
"learning_rate": 4.571078431372549e-06, | |
"loss": 0.6324, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.29, | |
"grad_norm": 92.65385437011719, | |
"learning_rate": 4.509803921568628e-06, | |
"loss": 0.5743, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.33, | |
"grad_norm": 44.45353317260742, | |
"learning_rate": 4.448529411764706e-06, | |
"loss": 0.4798, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.37, | |
"grad_norm": 35.91661071777344, | |
"learning_rate": 4.3872549019607845e-06, | |
"loss": 0.6396, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.37, | |
"eval_accuracy": 0.7297794117647058, | |
"eval_f1_macro": 0.6997657235537935, | |
"eval_f1_micro": 0.7297794117647058, | |
"eval_loss": 0.6015912294387817, | |
"eval_runtime": 2.832, | |
"eval_samples_per_second": 384.18, | |
"eval_steps_per_second": 12.006, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.4, | |
"grad_norm": 56.78458786010742, | |
"learning_rate": 4.3259803921568635e-06, | |
"loss": 0.5814, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.44, | |
"grad_norm": 53.924827575683594, | |
"learning_rate": 4.264705882352942e-06, | |
"loss": 0.5131, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.48, | |
"grad_norm": 74.93573760986328, | |
"learning_rate": 4.20343137254902e-06, | |
"loss": 0.5242, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.51, | |
"grad_norm": 26.097169876098633, | |
"learning_rate": 4.142156862745099e-06, | |
"loss": 0.5529, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.55, | |
"grad_norm": 78.16165161132812, | |
"learning_rate": 4.080882352941177e-06, | |
"loss": 0.5108, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.55, | |
"eval_accuracy": 0.7527573529411765, | |
"eval_f1_macro": 0.6963299829940972, | |
"eval_f1_micro": 0.7527573529411765, | |
"eval_loss": 0.5226907134056091, | |
"eval_runtime": 2.826, | |
"eval_samples_per_second": 384.993, | |
"eval_steps_per_second": 12.031, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.59, | |
"grad_norm": 19.04606056213379, | |
"learning_rate": 4.019607843137255e-06, | |
"loss": 0.5012, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.62, | |
"grad_norm": 127.43460845947266, | |
"learning_rate": 3.958333333333333e-06, | |
"loss": 0.5601, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.66, | |
"grad_norm": 73.45539093017578, | |
"learning_rate": 3.897058823529412e-06, | |
"loss": 0.6134, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.7, | |
"grad_norm": 59.42979049682617, | |
"learning_rate": 3.8357843137254904e-06, | |
"loss": 0.5447, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.74, | |
"grad_norm": 22.45537757873535, | |
"learning_rate": 3.774509803921569e-06, | |
"loss": 0.5065, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.74, | |
"eval_accuracy": 0.7417279411764706, | |
"eval_f1_macro": 0.6346563132227484, | |
"eval_f1_micro": 0.7417279411764706, | |
"eval_loss": 0.5502642393112183, | |
"eval_runtime": 2.8852, | |
"eval_samples_per_second": 377.094, | |
"eval_steps_per_second": 11.784, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.77, | |
"grad_norm": 50.938880920410156, | |
"learning_rate": 3.7132352941176476e-06, | |
"loss": 0.4589, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.81, | |
"grad_norm": 84.68132781982422, | |
"learning_rate": 3.6519607843137257e-06, | |
"loss": 0.5403, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.85, | |
"grad_norm": 27.052024841308594, | |
"learning_rate": 3.5906862745098043e-06, | |
"loss": 0.4618, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.88, | |
"grad_norm": 23.497787475585938, | |
"learning_rate": 3.529411764705883e-06, | |
"loss": 0.452, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.92, | |
"grad_norm": 15.185086250305176, | |
"learning_rate": 3.468137254901961e-06, | |
"loss": 0.4883, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.92, | |
"eval_accuracy": 0.7775735294117647, | |
"eval_f1_macro": 0.7420002194942226, | |
"eval_f1_micro": 0.7775735294117647, | |
"eval_loss": 0.5038631558418274, | |
"eval_runtime": 2.8371, | |
"eval_samples_per_second": 383.494, | |
"eval_steps_per_second": 11.984, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.96, | |
"grad_norm": 24.239835739135742, | |
"learning_rate": 3.4068627450980396e-06, | |
"loss": 0.4325, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.99, | |
"grad_norm": 13.703453063964844, | |
"learning_rate": 3.3455882352941178e-06, | |
"loss": 0.5193, | |
"step": 270 | |
}, | |
{ | |
"epoch": 1.03, | |
"grad_norm": 92.7187728881836, | |
"learning_rate": 3.2843137254901964e-06, | |
"loss": 0.4147, | |
"step": 280 | |
}, | |
{ | |
"epoch": 1.07, | |
"grad_norm": 38.43824768066406, | |
"learning_rate": 3.223039215686275e-06, | |
"loss": 0.3275, | |
"step": 290 | |
}, | |
{ | |
"epoch": 1.1, | |
"grad_norm": 11.464463233947754, | |
"learning_rate": 3.161764705882353e-06, | |
"loss": 0.3296, | |
"step": 300 | |
}, | |
{ | |
"epoch": 1.1, | |
"eval_accuracy": 0.7729779411764706, | |
"eval_f1_macro": 0.730720863982653, | |
"eval_f1_micro": 0.7729779411764706, | |
"eval_loss": 0.5249598026275635, | |
"eval_runtime": 2.8316, | |
"eval_samples_per_second": 384.236, | |
"eval_steps_per_second": 12.007, | |
"step": 300 | |
}, | |
{ | |
"epoch": 1.14, | |
"grad_norm": 18.103742599487305, | |
"learning_rate": 3.1004901960784317e-06, | |
"loss": 0.32, | |
"step": 310 | |
}, | |
{ | |
"epoch": 1.18, | |
"grad_norm": 48.70182800292969, | |
"learning_rate": 3.03921568627451e-06, | |
"loss": 0.3033, | |
"step": 320 | |
}, | |
{ | |
"epoch": 1.21, | |
"grad_norm": 19.211103439331055, | |
"learning_rate": 2.9779411764705884e-06, | |
"loss": 0.371, | |
"step": 330 | |
}, | |
{ | |
"epoch": 1.25, | |
"grad_norm": 30.80494499206543, | |
"learning_rate": 2.916666666666667e-06, | |
"loss": 0.3158, | |
"step": 340 | |
}, | |
{ | |
"epoch": 1.29, | |
"grad_norm": 22.932558059692383, | |
"learning_rate": 2.855392156862745e-06, | |
"loss": 0.322, | |
"step": 350 | |
}, | |
{ | |
"epoch": 1.29, | |
"eval_accuracy": 0.7720588235294118, | |
"eval_f1_macro": 0.7422500391653388, | |
"eval_f1_micro": 0.7720588235294118, | |
"eval_loss": 0.5509535670280457, | |
"eval_runtime": 2.8424, | |
"eval_samples_per_second": 382.778, | |
"eval_steps_per_second": 11.962, | |
"step": 350 | |
}, | |
{ | |
"epoch": 1.32, | |
"grad_norm": 12.943411827087402, | |
"learning_rate": 2.7941176470588237e-06, | |
"loss": 0.3103, | |
"step": 360 | |
}, | |
{ | |
"epoch": 1.36, | |
"grad_norm": 35.10004425048828, | |
"learning_rate": 2.732843137254902e-06, | |
"loss": 0.2967, | |
"step": 370 | |
}, | |
{ | |
"epoch": 1.4, | |
"grad_norm": 21.274372100830078, | |
"learning_rate": 2.6715686274509804e-06, | |
"loss": 0.3739, | |
"step": 380 | |
}, | |
{ | |
"epoch": 1.43, | |
"grad_norm": 33.91293716430664, | |
"learning_rate": 2.610294117647059e-06, | |
"loss": 0.3583, | |
"step": 390 | |
}, | |
{ | |
"epoch": 1.47, | |
"grad_norm": 33.361366271972656, | |
"learning_rate": 2.549019607843137e-06, | |
"loss": 0.3287, | |
"step": 400 | |
}, | |
{ | |
"epoch": 1.47, | |
"eval_accuracy": 0.7582720588235294, | |
"eval_f1_macro": 0.6932231064719864, | |
"eval_f1_micro": 0.7582720588235294, | |
"eval_loss": 0.539191722869873, | |
"eval_runtime": 2.8418, | |
"eval_samples_per_second": 382.862, | |
"eval_steps_per_second": 11.964, | |
"step": 400 | |
}, | |
{ | |
"epoch": 1.51, | |
"grad_norm": 41.59482192993164, | |
"learning_rate": 2.4877450980392158e-06, | |
"loss": 0.2896, | |
"step": 410 | |
}, | |
{ | |
"epoch": 1.54, | |
"grad_norm": 57.865882873535156, | |
"learning_rate": 2.4264705882352943e-06, | |
"loss": 0.3092, | |
"step": 420 | |
}, | |
{ | |
"epoch": 1.58, | |
"grad_norm": 37.957183837890625, | |
"learning_rate": 2.3651960784313725e-06, | |
"loss": 0.3175, | |
"step": 430 | |
}, | |
{ | |
"epoch": 1.62, | |
"grad_norm": 30.82972526550293, | |
"learning_rate": 2.303921568627451e-06, | |
"loss": 0.3005, | |
"step": 440 | |
}, | |
{ | |
"epoch": 1.65, | |
"grad_norm": 34.46055603027344, | |
"learning_rate": 2.2426470588235296e-06, | |
"loss": 0.3097, | |
"step": 450 | |
}, | |
{ | |
"epoch": 1.65, | |
"eval_accuracy": 0.7628676470588235, | |
"eval_f1_macro": 0.7222644376899696, | |
"eval_f1_micro": 0.7628676470588235, | |
"eval_loss": 0.5631462335586548, | |
"eval_runtime": 2.8377, | |
"eval_samples_per_second": 383.405, | |
"eval_steps_per_second": 11.981, | |
"step": 450 | |
}, | |
{ | |
"epoch": 1.69, | |
"grad_norm": 25.395221710205078, | |
"learning_rate": 2.1813725490196082e-06, | |
"loss": 0.3284, | |
"step": 460 | |
}, | |
{ | |
"epoch": 1.73, | |
"grad_norm": 13.454970359802246, | |
"learning_rate": 2.1200980392156864e-06, | |
"loss": 0.3399, | |
"step": 470 | |
}, | |
{ | |
"epoch": 1.76, | |
"grad_norm": 14.682934761047363, | |
"learning_rate": 2.058823529411765e-06, | |
"loss": 0.2575, | |
"step": 480 | |
}, | |
{ | |
"epoch": 1.8, | |
"grad_norm": 8.564711570739746, | |
"learning_rate": 1.9975490196078435e-06, | |
"loss": 0.3048, | |
"step": 490 | |
}, | |
{ | |
"epoch": 1.84, | |
"grad_norm": 30.56090545654297, | |
"learning_rate": 1.9362745098039217e-06, | |
"loss": 0.3397, | |
"step": 500 | |
}, | |
{ | |
"epoch": 1.84, | |
"eval_accuracy": 0.7674632352941176, | |
"eval_f1_macro": 0.7333806007808814, | |
"eval_f1_micro": 0.7674632352941176, | |
"eval_loss": 0.5669376254081726, | |
"eval_runtime": 2.8353, | |
"eval_samples_per_second": 383.737, | |
"eval_steps_per_second": 11.992, | |
"step": 500 | |
}, | |
{ | |
"epoch": 1.88, | |
"grad_norm": 50.637630462646484, | |
"learning_rate": 1.8750000000000003e-06, | |
"loss": 0.3389, | |
"step": 510 | |
}, | |
{ | |
"epoch": 1.91, | |
"grad_norm": 69.52845001220703, | |
"learning_rate": 1.8137254901960786e-06, | |
"loss": 0.365, | |
"step": 520 | |
}, | |
{ | |
"epoch": 1.95, | |
"grad_norm": 19.709930419921875, | |
"learning_rate": 1.752450980392157e-06, | |
"loss": 0.3561, | |
"step": 530 | |
}, | |
{ | |
"epoch": 1.99, | |
"grad_norm": 22.103239059448242, | |
"learning_rate": 1.6911764705882356e-06, | |
"loss": 0.3147, | |
"step": 540 | |
}, | |
{ | |
"epoch": 2.02, | |
"grad_norm": 18.630016326904297, | |
"learning_rate": 1.629901960784314e-06, | |
"loss": 0.2618, | |
"step": 550 | |
}, | |
{ | |
"epoch": 2.02, | |
"eval_accuracy": 0.75, | |
"eval_f1_macro": 0.6870389170896785, | |
"eval_f1_micro": 0.75, | |
"eval_loss": 0.5891400575637817, | |
"eval_runtime": 3.0028, | |
"eval_samples_per_second": 362.323, | |
"eval_steps_per_second": 11.323, | |
"step": 550 | |
}, | |
{ | |
"epoch": 2.06, | |
"grad_norm": 20.225698471069336, | |
"learning_rate": 1.5686274509803923e-06, | |
"loss": 0.2155, | |
"step": 560 | |
}, | |
{ | |
"epoch": 2.1, | |
"grad_norm": 20.72734832763672, | |
"learning_rate": 1.5073529411764707e-06, | |
"loss": 0.1837, | |
"step": 570 | |
}, | |
{ | |
"epoch": 2.13, | |
"grad_norm": 12.758101463317871, | |
"learning_rate": 1.4460784313725492e-06, | |
"loss": 0.1936, | |
"step": 580 | |
}, | |
{ | |
"epoch": 2.17, | |
"grad_norm": 20.31243896484375, | |
"learning_rate": 1.3848039215686276e-06, | |
"loss": 0.1668, | |
"step": 590 | |
}, | |
{ | |
"epoch": 2.21, | |
"grad_norm": 6.187927722930908, | |
"learning_rate": 1.323529411764706e-06, | |
"loss": 0.1745, | |
"step": 600 | |
}, | |
{ | |
"epoch": 2.21, | |
"eval_accuracy": 0.7582720588235294, | |
"eval_f1_macro": 0.7122790012056079, | |
"eval_f1_micro": 0.7582720588235294, | |
"eval_loss": 0.6399500370025635, | |
"eval_runtime": 2.8351, | |
"eval_samples_per_second": 383.754, | |
"eval_steps_per_second": 11.992, | |
"step": 600 | |
}, | |
{ | |
"epoch": 2.24, | |
"grad_norm": 16.426952362060547, | |
"learning_rate": 1.2622549019607843e-06, | |
"loss": 0.1873, | |
"step": 610 | |
}, | |
{ | |
"epoch": 2.28, | |
"grad_norm": 14.441506385803223, | |
"learning_rate": 1.200980392156863e-06, | |
"loss": 0.1318, | |
"step": 620 | |
}, | |
{ | |
"epoch": 2.32, | |
"grad_norm": 25.300006866455078, | |
"learning_rate": 1.1397058823529413e-06, | |
"loss": 0.1468, | |
"step": 630 | |
}, | |
{ | |
"epoch": 2.35, | |
"grad_norm": 5.833555221557617, | |
"learning_rate": 1.0784313725490197e-06, | |
"loss": 0.1969, | |
"step": 640 | |
}, | |
{ | |
"epoch": 2.39, | |
"grad_norm": 14.262299537658691, | |
"learning_rate": 1.017156862745098e-06, | |
"loss": 0.1572, | |
"step": 650 | |
}, | |
{ | |
"epoch": 2.39, | |
"eval_accuracy": 0.7518382352941176, | |
"eval_f1_macro": 0.6966693100713719, | |
"eval_f1_micro": 0.7518382352941176, | |
"eval_loss": 0.6694048643112183, | |
"eval_runtime": 2.8384, | |
"eval_samples_per_second": 383.315, | |
"eval_steps_per_second": 11.979, | |
"step": 650 | |
}, | |
{ | |
"epoch": 2.43, | |
"grad_norm": 9.928728103637695, | |
"learning_rate": 9.558823529411764e-07, | |
"loss": 0.1654, | |
"step": 660 | |
}, | |
{ | |
"epoch": 2.46, | |
"grad_norm": 36.1452522277832, | |
"learning_rate": 8.94607843137255e-07, | |
"loss": 0.181, | |
"step": 670 | |
}, | |
{ | |
"epoch": 2.5, | |
"grad_norm": 14.285146713256836, | |
"learning_rate": 8.333333333333333e-07, | |
"loss": 0.1702, | |
"step": 680 | |
}, | |
{ | |
"epoch": 2.54, | |
"grad_norm": 22.802335739135742, | |
"learning_rate": 7.720588235294119e-07, | |
"loss": 0.1375, | |
"step": 690 | |
}, | |
{ | |
"epoch": 2.57, | |
"grad_norm": 25.91615867614746, | |
"learning_rate": 7.107843137254903e-07, | |
"loss": 0.1804, | |
"step": 700 | |
}, | |
{ | |
"epoch": 2.57, | |
"eval_accuracy": 0.7610294117647058, | |
"eval_f1_macro": 0.7172783241384825, | |
"eval_f1_micro": 0.7610294117647058, | |
"eval_loss": 0.6869542598724365, | |
"eval_runtime": 2.8367, | |
"eval_samples_per_second": 383.549, | |
"eval_steps_per_second": 11.986, | |
"step": 700 | |
}, | |
{ | |
"epoch": 2.61, | |
"grad_norm": 21.840639114379883, | |
"learning_rate": 6.495098039215687e-07, | |
"loss": 0.1811, | |
"step": 710 | |
}, | |
{ | |
"epoch": 2.65, | |
"grad_norm": 12.820947647094727, | |
"learning_rate": 5.882352941176471e-07, | |
"loss": 0.1636, | |
"step": 720 | |
}, | |
{ | |
"epoch": 2.68, | |
"grad_norm": 14.430222511291504, | |
"learning_rate": 5.269607843137256e-07, | |
"loss": 0.1478, | |
"step": 730 | |
}, | |
{ | |
"epoch": 2.72, | |
"grad_norm": 15.748844146728516, | |
"learning_rate": 4.6568627450980395e-07, | |
"loss": 0.1738, | |
"step": 740 | |
}, | |
{ | |
"epoch": 2.76, | |
"grad_norm": 21.588491439819336, | |
"learning_rate": 4.044117647058824e-07, | |
"loss": 0.1817, | |
"step": 750 | |
}, | |
{ | |
"epoch": 2.76, | |
"eval_accuracy": 0.7536764705882353, | |
"eval_f1_macro": 0.7044581029142274, | |
"eval_f1_micro": 0.7536764705882353, | |
"eval_loss": 0.6656494140625, | |
"eval_runtime": 2.8877, | |
"eval_samples_per_second": 376.77, | |
"eval_steps_per_second": 11.774, | |
"step": 750 | |
}, | |
{ | |
"epoch": 2.79, | |
"grad_norm": 34.946441650390625, | |
"learning_rate": 3.4313725490196084e-07, | |
"loss": 0.1558, | |
"step": 760 | |
}, | |
{ | |
"epoch": 2.83, | |
"grad_norm": 14.45042610168457, | |
"learning_rate": 2.8186274509803926e-07, | |
"loss": 0.1671, | |
"step": 770 | |
}, | |
{ | |
"epoch": 2.87, | |
"grad_norm": 22.017681121826172, | |
"learning_rate": 2.2058823529411768e-07, | |
"loss": 0.16, | |
"step": 780 | |
}, | |
{ | |
"epoch": 2.9, | |
"grad_norm": 40.37516403198242, | |
"learning_rate": 1.5931372549019607e-07, | |
"loss": 0.2156, | |
"step": 790 | |
}, | |
{ | |
"epoch": 2.94, | |
"grad_norm": 21.41912841796875, | |
"learning_rate": 9.803921568627452e-08, | |
"loss": 0.1984, | |
"step": 800 | |
}, | |
{ | |
"epoch": 2.94, | |
"eval_accuracy": 0.7518382352941176, | |
"eval_f1_macro": 0.6949038473522191, | |
"eval_f1_micro": 0.7518382352941176, | |
"eval_loss": 0.6783303618431091, | |
"eval_runtime": 2.8354, | |
"eval_samples_per_second": 383.725, | |
"eval_steps_per_second": 11.991, | |
"step": 800 | |
}, | |
{ | |
"epoch": 2.98, | |
"grad_norm": 22.50458335876465, | |
"learning_rate": 3.6764705882352945e-08, | |
"loss": 0.1742, | |
"step": 810 | |
}, | |
{ | |
"epoch": 3.0, | |
"step": 816, | |
"total_flos": 2.435566082929459e+16, | |
"train_loss": 0.3625207788803998, | |
"train_runtime": 885.2091, | |
"train_samples_per_second": 29.485, | |
"train_steps_per_second": 0.922 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 816, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 3, | |
"save_steps": 50, | |
"total_flos": 2.435566082929459e+16, | |
"train_batch_size": 16, | |
"trial_name": null, | |
"trial_params": null | |
} | |