{ "best_metric": null, "best_model_checkpoint": null, "epoch": 35.0, "global_step": 33600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.535132032339264e-05, "loss": 1.7652, "step": 960 }, { "epoch": 1.0, "eval_accuracy": 0.66545764302116, "eval_loss": 1.7168171405792236, "eval_runtime": 51.9578, "eval_samples_per_second": 146.35, "eval_steps_per_second": 0.924, "step": 960 }, { "epoch": 2.0, "learning_rate": 1.6900880215595094e-05, "loss": 1.5201, "step": 1920 }, { "epoch": 2.0, "eval_accuracy": 0.6816024828889072, "eval_loss": 1.6037945747375488, "eval_runtime": 51.6258, "eval_samples_per_second": 147.291, "eval_steps_per_second": 0.93, "step": 1920 }, { "epoch": 3.0, "learning_rate": 1.7807314645155048e-05, "loss": 1.4366, "step": 2880 }, { "epoch": 3.0, "eval_accuracy": 0.6859236059235067, "eval_loss": 1.5763635635375977, "eval_runtime": 52.5239, "eval_samples_per_second": 144.772, "eval_steps_per_second": 0.914, "step": 2880 }, { "epoch": 4.0, "learning_rate": 1.8450440107797548e-05, "loss": 1.3831, "step": 3840 }, { "epoch": 4.0, "eval_accuracy": 0.6914059105006866, "eval_loss": 1.5295616388320923, "eval_runtime": 51.6883, "eval_samples_per_second": 147.113, "eval_steps_per_second": 0.929, "step": 3840 }, { "epoch": 5.0, "learning_rate": 1.894928697180815e-05, "loss": 1.3447, "step": 4800 }, { "epoch": 5.0, "eval_accuracy": 0.6934798357287497, "eval_loss": 1.5127382278442383, "eval_runtime": 51.7137, "eval_samples_per_second": 147.04, "eval_steps_per_second": 0.928, "step": 4800 }, { "epoch": 6.0, "learning_rate": 1.93568745373575e-05, "loss": 1.314, "step": 5760 }, { "epoch": 6.0, "eval_accuracy": 0.6982260680110423, "eval_loss": 1.4813944101333618, "eval_runtime": 51.6818, "eval_samples_per_second": 147.131, "eval_steps_per_second": 0.929, "step": 5760 }, { "epoch": 7.0, "learning_rate": 1.9701484913790247e-05, "loss": 1.29, "step": 6720 }, { "epoch": 7.0, "eval_accuracy": 0.6991250528199647, "eval_loss": 1.4718950986862183, "eval_runtime": 52.9344, "eval_samples_per_second": 143.649, "eval_steps_per_second": 0.907, "step": 6720 }, { "epoch": 8.0, "learning_rate": 2e-05, "loss": 1.2669, "step": 7680 }, { "epoch": 8.0, "eval_accuracy": 0.7032825019384854, "eval_loss": 1.449450135231018, "eval_runtime": 50.9613, "eval_samples_per_second": 149.211, "eval_steps_per_second": 0.942, "step": 7680 }, { "epoch": 9.0, "learning_rate": 2e-05, "loss": 1.2461, "step": 8640 }, { "epoch": 9.0, "eval_accuracy": 0.7032601118345916, "eval_loss": 1.4466781616210938, "eval_runtime": 50.8922, "eval_samples_per_second": 149.414, "eval_steps_per_second": 0.943, "step": 8640 }, { "epoch": 10.0, "learning_rate": 2e-05, "loss": 1.2323, "step": 9600 }, { "epoch": 10.0, "eval_accuracy": 0.7045017515092793, "eval_loss": 1.4342981576919556, "eval_runtime": 51.2867, "eval_samples_per_second": 148.264, "eval_steps_per_second": 0.936, "step": 9600 }, { "epoch": 11.0, "learning_rate": 2e-05, "loss": 1.2191, "step": 10560 }, { "epoch": 11.0, "eval_accuracy": 0.7057310891893632, "eval_loss": 1.4175918102264404, "eval_runtime": 51.4742, "eval_samples_per_second": 147.725, "eval_steps_per_second": 0.933, "step": 10560 }, { "epoch": 12.0, "learning_rate": 2e-05, "loss": 1.2046, "step": 11520 }, { "epoch": 12.0, "eval_accuracy": 0.7111463874637353, "eval_loss": 1.3918827772140503, "eval_runtime": 51.2424, "eval_samples_per_second": 148.393, "eval_steps_per_second": 0.937, "step": 11520 }, { "epoch": 13.0, "learning_rate": 2e-05, "loss": 1.1944, "step": 12480 }, { "epoch": 13.0, "eval_accuracy": 0.7096209003462108, "eval_loss": 1.397229790687561, "eval_runtime": 51.3927, "eval_samples_per_second": 147.959, "eval_steps_per_second": 0.934, "step": 12480 }, { "epoch": 14.0, "learning_rate": 2e-05, "loss": 1.1821, "step": 13440 }, { "epoch": 14.0, "eval_accuracy": 0.7115298351243047, "eval_loss": 1.386526107788086, "eval_runtime": 51.5062, "eval_samples_per_second": 147.633, "eval_steps_per_second": 0.932, "step": 13440 }, { "epoch": 15.0, "learning_rate": 2e-05, "loss": 1.1716, "step": 14400 }, { "epoch": 15.0, "eval_accuracy": 0.7124512834797834, "eval_loss": 1.383684515953064, "eval_runtime": 51.5334, "eval_samples_per_second": 147.555, "eval_steps_per_second": 0.931, "step": 14400 }, { "epoch": 16.0, "learning_rate": 2e-05, "loss": 1.1627, "step": 15360 }, { "epoch": 16.0, "eval_accuracy": 0.7130167108614363, "eval_loss": 1.3739854097366333, "eval_runtime": 51.1674, "eval_samples_per_second": 148.61, "eval_steps_per_second": 0.938, "step": 15360 }, { "epoch": 17.0, "learning_rate": 2e-05, "loss": 1.1535, "step": 16320 }, { "epoch": 17.0, "eval_accuracy": 0.7154258294693137, "eval_loss": 1.3581925630569458, "eval_runtime": 50.9905, "eval_samples_per_second": 149.126, "eval_steps_per_second": 0.941, "step": 16320 }, { "epoch": 18.0, "learning_rate": 2e-05, "loss": 1.1459, "step": 17280 }, { "epoch": 18.0, "eval_accuracy": 0.7157034442498541, "eval_loss": 1.3612279891967773, "eval_runtime": 51.0142, "eval_samples_per_second": 149.057, "eval_steps_per_second": 0.941, "step": 17280 }, { "epoch": 19.0, "learning_rate": 2e-05, "loss": 1.1381, "step": 18240 }, { "epoch": 19.0, "eval_accuracy": 0.7173944953215458, "eval_loss": 1.3571031093597412, "eval_runtime": 51.233, "eval_samples_per_second": 148.42, "eval_steps_per_second": 0.937, "step": 18240 }, { "epoch": 20.0, "learning_rate": 2e-05, "loss": 1.1314, "step": 19200 }, { "epoch": 20.0, "eval_accuracy": 0.7164889608500034, "eval_loss": 1.3498369455337524, "eval_runtime": 51.475, "eval_samples_per_second": 147.722, "eval_steps_per_second": 0.932, "step": 19200 }, { "epoch": 21.0, "learning_rate": 2e-05, "loss": 1.1216, "step": 20160 }, { "epoch": 21.0, "eval_accuracy": 0.7190562640400042, "eval_loss": 1.3336502313613892, "eval_runtime": 51.2931, "eval_samples_per_second": 148.246, "eval_steps_per_second": 0.936, "step": 20160 }, { "epoch": 22.0, "learning_rate": 2e-05, "loss": 1.1187, "step": 21120 }, { "epoch": 22.0, "eval_accuracy": 0.7189180295993838, "eval_loss": 1.3376628160476685, "eval_runtime": 51.22, "eval_samples_per_second": 148.458, "eval_steps_per_second": 0.937, "step": 21120 }, { "epoch": 23.0, "learning_rate": 2e-05, "loss": 1.1128, "step": 22080 }, { "epoch": 23.0, "eval_accuracy": 0.7185502647071909, "eval_loss": 1.3377180099487305, "eval_runtime": 50.9033, "eval_samples_per_second": 149.381, "eval_steps_per_second": 0.943, "step": 22080 }, { "epoch": 24.0, "learning_rate": 2e-05, "loss": 1.1052, "step": 23040 }, { "epoch": 24.0, "eval_accuracy": 0.7208811149658572, "eval_loss": 1.3223472833633423, "eval_runtime": 51.1878, "eval_samples_per_second": 148.551, "eval_steps_per_second": 0.938, "step": 23040 }, { "epoch": 25.0, "learning_rate": 2e-05, "loss": 1.0996, "step": 24000 }, { "epoch": 25.0, "eval_accuracy": 0.7216902223097578, "eval_loss": 1.3264613151550293, "eval_runtime": 50.8905, "eval_samples_per_second": 149.419, "eval_steps_per_second": 0.943, "step": 24000 }, { "epoch": 26.0, "learning_rate": 2e-05, "loss": 1.0961, "step": 24960 }, { "epoch": 26.0, "eval_accuracy": 0.7211948506436695, "eval_loss": 1.320527195930481, "eval_runtime": 50.9638, "eval_samples_per_second": 149.204, "eval_steps_per_second": 0.942, "step": 24960 }, { "epoch": 27.0, "learning_rate": 2e-05, "loss": 1.0902, "step": 25920 }, { "epoch": 27.0, "eval_accuracy": 0.7213838036019521, "eval_loss": 1.321337342262268, "eval_runtime": 52.1531, "eval_samples_per_second": 145.802, "eval_steps_per_second": 0.92, "step": 25920 }, { "epoch": 28.0, "learning_rate": 2e-05, "loss": 1.0835, "step": 26880 }, { "epoch": 28.0, "eval_accuracy": 0.7239870388684853, "eval_loss": 1.3021934032440186, "eval_runtime": 51.6703, "eval_samples_per_second": 147.164, "eval_steps_per_second": 0.929, "step": 26880 }, { "epoch": 29.0, "learning_rate": 2e-05, "loss": 1.0796, "step": 27840 }, { "epoch": 29.0, "eval_accuracy": 0.7225473777214267, "eval_loss": 1.3106894493103027, "eval_runtime": 51.4692, "eval_samples_per_second": 147.739, "eval_steps_per_second": 0.933, "step": 27840 }, { "epoch": 30.0, "learning_rate": 2e-05, "loss": 1.076, "step": 28800 }, { "epoch": 30.0, "eval_accuracy": 0.7246294369637408, "eval_loss": 1.3005998134613037, "eval_runtime": 51.1849, "eval_samples_per_second": 148.56, "eval_steps_per_second": 0.938, "step": 28800 }, { "epoch": 31.0, "learning_rate": 2e-05, "loss": 1.0713, "step": 29760 }, { "epoch": 31.0, "eval_accuracy": 0.723824419552894, "eval_loss": 1.3021259307861328, "eval_runtime": 51.1204, "eval_samples_per_second": 148.747, "eval_steps_per_second": 0.939, "step": 29760 }, { "epoch": 32.0, "learning_rate": 2e-05, "loss": 1.0679, "step": 30720 }, { "epoch": 32.0, "eval_accuracy": 0.7239979316606835, "eval_loss": 1.3063867092132568, "eval_runtime": 50.892, "eval_samples_per_second": 149.415, "eval_steps_per_second": 0.943, "step": 30720 }, { "epoch": 33.0, "learning_rate": 2e-05, "loss": 1.0638, "step": 31680 }, { "epoch": 33.0, "eval_accuracy": 0.7261307848236347, "eval_loss": 1.2864927053451538, "eval_runtime": 50.9275, "eval_samples_per_second": 149.31, "eval_steps_per_second": 0.943, "step": 31680 }, { "epoch": 34.0, "learning_rate": 2e-05, "loss": 1.058, "step": 32640 }, { "epoch": 34.0, "eval_accuracy": 0.7236361200229268, "eval_loss": 1.3007187843322754, "eval_runtime": 51.9944, "eval_samples_per_second": 146.246, "eval_steps_per_second": 0.923, "step": 32640 }, { "epoch": 35.0, "learning_rate": 2e-05, "loss": 1.0548, "step": 33600 }, { "epoch": 35.0, "eval_accuracy": 0.7257316118449666, "eval_loss": 1.2933671474456787, "eval_runtime": 51.2859, "eval_samples_per_second": 148.267, "eval_steps_per_second": 0.936, "step": 33600 } ], "max_steps": 38400, "num_train_epochs": 40, "total_flos": 2041797358387200.0, "trial_name": null, "trial_params": null }