adapters-llama2-bnb4-QLORA-super_glue-copa
/
trainer_state-llama2-bnb4-QLORA-super_glue-copa-sequence_classification.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 10.0, | |
"eval_steps": 1, | |
"global_step": 50, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.2, | |
"grad_norm": 68.95748138427734, | |
"learning_rate": 2.5e-05, | |
"loss": 1.2539, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.2, | |
"eval_accuracy": 0.55, | |
"eval_loss": 1.1243222951889038, | |
"eval_runtime": 1.4386, | |
"eval_samples_per_second": 69.513, | |
"eval_steps_per_second": 2.085, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.4, | |
"grad_norm": 66.90943908691406, | |
"learning_rate": 5e-05, | |
"loss": 1.3101, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.4, | |
"eval_accuracy": 0.56, | |
"eval_loss": 0.8135351538658142, | |
"eval_runtime": 1.948, | |
"eval_samples_per_second": 51.335, | |
"eval_steps_per_second": 1.54, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.6, | |
"grad_norm": 51.475852966308594, | |
"learning_rate": 4.8958333333333335e-05, | |
"loss": 0.96, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.6, | |
"eval_accuracy": 0.45, | |
"eval_loss": 1.159423828125, | |
"eval_runtime": 1.9698, | |
"eval_samples_per_second": 50.768, | |
"eval_steps_per_second": 1.523, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.8, | |
"grad_norm": 62.35882568359375, | |
"learning_rate": 4.791666666666667e-05, | |
"loss": 1.0992, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.8, | |
"eval_accuracy": 0.45, | |
"eval_loss": 1.4467943906784058, | |
"eval_runtime": 1.6208, | |
"eval_samples_per_second": 61.7, | |
"eval_steps_per_second": 1.851, | |
"step": 4 | |
}, | |
{ | |
"epoch": 1.0, | |
"grad_norm": 63.49054718017578, | |
"learning_rate": 4.6875e-05, | |
"loss": 1.1371, | |
"step": 5 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_accuracy": 0.45, | |
"eval_loss": 1.2584083080291748, | |
"eval_runtime": 1.7421, | |
"eval_samples_per_second": 57.401, | |
"eval_steps_per_second": 1.722, | |
"step": 5 | |
}, | |
{ | |
"epoch": 1.2, | |
"grad_norm": 80.31912994384766, | |
"learning_rate": 4.5833333333333334e-05, | |
"loss": 1.3249, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.2, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.9616796970367432, | |
"eval_runtime": 1.85, | |
"eval_samples_per_second": 54.054, | |
"eval_steps_per_second": 1.622, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.4, | |
"grad_norm": 52.1851806640625, | |
"learning_rate": 4.4791666666666673e-05, | |
"loss": 0.8871, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.4, | |
"eval_accuracy": 0.44, | |
"eval_loss": 0.7503124475479126, | |
"eval_runtime": 1.6478, | |
"eval_samples_per_second": 60.686, | |
"eval_steps_per_second": 1.821, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.6, | |
"grad_norm": 16.447166442871094, | |
"learning_rate": 4.375e-05, | |
"loss": 0.7149, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.6, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.7278904914855957, | |
"eval_runtime": 1.6203, | |
"eval_samples_per_second": 61.715, | |
"eval_steps_per_second": 1.851, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.8, | |
"grad_norm": 7.962558746337891, | |
"learning_rate": 4.270833333333333e-05, | |
"loss": 0.698, | |
"step": 9 | |
}, | |
{ | |
"epoch": 1.8, | |
"eval_accuracy": 0.44, | |
"eval_loss": 0.7295898199081421, | |
"eval_runtime": 1.7356, | |
"eval_samples_per_second": 57.618, | |
"eval_steps_per_second": 1.729, | |
"step": 9 | |
}, | |
{ | |
"epoch": 2.0, | |
"grad_norm": 3.8986926078796387, | |
"learning_rate": 4.166666666666667e-05, | |
"loss": 0.7392, | |
"step": 10 | |
}, | |
{ | |
"epoch": 2.0, | |
"eval_accuracy": 0.45, | |
"eval_loss": 0.7465624213218689, | |
"eval_runtime": 1.7741, | |
"eval_samples_per_second": 56.367, | |
"eval_steps_per_second": 1.691, | |
"step": 10 | |
}, | |
{ | |
"epoch": 2.2, | |
"grad_norm": 15.509366035461426, | |
"learning_rate": 4.0625000000000005e-05, | |
"loss": 0.6846, | |
"step": 11 | |
}, | |
{ | |
"epoch": 2.2, | |
"eval_accuracy": 0.45, | |
"eval_loss": 0.7387697100639343, | |
"eval_runtime": 1.6538, | |
"eval_samples_per_second": 60.465, | |
"eval_steps_per_second": 1.814, | |
"step": 11 | |
}, | |
{ | |
"epoch": 2.4, | |
"grad_norm": 1.7213455438613892, | |
"learning_rate": 3.958333333333333e-05, | |
"loss": 0.7069, | |
"step": 12 | |
}, | |
{ | |
"epoch": 2.4, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.7291209697723389, | |
"eval_runtime": 1.7159, | |
"eval_samples_per_second": 58.278, | |
"eval_steps_per_second": 1.748, | |
"step": 12 | |
}, | |
{ | |
"epoch": 2.6, | |
"grad_norm": 6.513551235198975, | |
"learning_rate": 3.854166666666667e-05, | |
"loss": 0.6868, | |
"step": 13 | |
}, | |
{ | |
"epoch": 2.6, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.7140040397644043, | |
"eval_runtime": 1.8804, | |
"eval_samples_per_second": 53.179, | |
"eval_steps_per_second": 1.595, | |
"step": 13 | |
}, | |
{ | |
"epoch": 2.8, | |
"grad_norm": 7.754150867462158, | |
"learning_rate": 3.7500000000000003e-05, | |
"loss": 0.7081, | |
"step": 14 | |
}, | |
{ | |
"epoch": 2.8, | |
"eval_accuracy": 0.48, | |
"eval_loss": 0.7086718082427979, | |
"eval_runtime": 1.6009, | |
"eval_samples_per_second": 62.464, | |
"eval_steps_per_second": 1.874, | |
"step": 14 | |
}, | |
{ | |
"epoch": 3.0, | |
"grad_norm": 4.853301048278809, | |
"learning_rate": 3.6458333333333336e-05, | |
"loss": 0.6794, | |
"step": 15 | |
}, | |
{ | |
"epoch": 3.0, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.7157617807388306, | |
"eval_runtime": 1.6674, | |
"eval_samples_per_second": 59.975, | |
"eval_steps_per_second": 1.799, | |
"step": 15 | |
}, | |
{ | |
"epoch": 3.2, | |
"grad_norm": 15.275257110595703, | |
"learning_rate": 3.541666666666667e-05, | |
"loss": 0.6793, | |
"step": 16 | |
}, | |
{ | |
"epoch": 3.2, | |
"eval_accuracy": 0.43, | |
"eval_loss": 0.7122849822044373, | |
"eval_runtime": 1.7474, | |
"eval_samples_per_second": 57.229, | |
"eval_steps_per_second": 1.717, | |
"step": 16 | |
}, | |
{ | |
"epoch": 3.4, | |
"grad_norm": 14.405172348022461, | |
"learning_rate": 3.4375e-05, | |
"loss": 0.6398, | |
"step": 17 | |
}, | |
{ | |
"epoch": 3.4, | |
"eval_accuracy": 0.45, | |
"eval_loss": 0.7217579483985901, | |
"eval_runtime": 1.6936, | |
"eval_samples_per_second": 59.045, | |
"eval_steps_per_second": 1.771, | |
"step": 17 | |
}, | |
{ | |
"epoch": 3.6, | |
"grad_norm": 11.8975830078125, | |
"learning_rate": 3.3333333333333335e-05, | |
"loss": 0.6672, | |
"step": 18 | |
}, | |
{ | |
"epoch": 3.6, | |
"eval_accuracy": 0.48, | |
"eval_loss": 0.7577735781669617, | |
"eval_runtime": 1.6354, | |
"eval_samples_per_second": 61.148, | |
"eval_steps_per_second": 1.834, | |
"step": 18 | |
}, | |
{ | |
"epoch": 3.8, | |
"grad_norm": 22.628250122070312, | |
"learning_rate": 3.229166666666667e-05, | |
"loss": 0.6805, | |
"step": 19 | |
}, | |
{ | |
"epoch": 3.8, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.7708593606948853, | |
"eval_runtime": 1.738, | |
"eval_samples_per_second": 57.539, | |
"eval_steps_per_second": 1.726, | |
"step": 19 | |
}, | |
{ | |
"epoch": 4.0, | |
"grad_norm": 26.391395568847656, | |
"learning_rate": 3.125e-05, | |
"loss": 0.7072, | |
"step": 20 | |
}, | |
{ | |
"epoch": 4.0, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.7524511814117432, | |
"eval_runtime": 1.8066, | |
"eval_samples_per_second": 55.352, | |
"eval_steps_per_second": 1.661, | |
"step": 20 | |
}, | |
{ | |
"epoch": 4.2, | |
"grad_norm": 28.18121910095215, | |
"learning_rate": 3.0208333333333334e-05, | |
"loss": 0.671, | |
"step": 21 | |
}, | |
{ | |
"epoch": 4.2, | |
"eval_accuracy": 0.42, | |
"eval_loss": 0.7227538824081421, | |
"eval_runtime": 1.8959, | |
"eval_samples_per_second": 52.745, | |
"eval_steps_per_second": 1.582, | |
"step": 21 | |
}, | |
{ | |
"epoch": 4.4, | |
"grad_norm": 2.9892055988311768, | |
"learning_rate": 2.916666666666667e-05, | |
"loss": 0.6591, | |
"step": 22 | |
}, | |
{ | |
"epoch": 4.4, | |
"eval_accuracy": 0.44, | |
"eval_loss": 0.7137499451637268, | |
"eval_runtime": 1.8291, | |
"eval_samples_per_second": 54.67, | |
"eval_steps_per_second": 1.64, | |
"step": 22 | |
}, | |
{ | |
"epoch": 4.6, | |
"grad_norm": 2.557835102081299, | |
"learning_rate": 2.8125000000000003e-05, | |
"loss": 0.646, | |
"step": 23 | |
}, | |
{ | |
"epoch": 4.6, | |
"eval_accuracy": 0.43, | |
"eval_loss": 0.7131056189537048, | |
"eval_runtime": 1.8972, | |
"eval_samples_per_second": 52.709, | |
"eval_steps_per_second": 1.581, | |
"step": 23 | |
}, | |
{ | |
"epoch": 4.8, | |
"grad_norm": 2.620720624923706, | |
"learning_rate": 2.7083333333333332e-05, | |
"loss": 0.6408, | |
"step": 24 | |
}, | |
{ | |
"epoch": 4.8, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.7070116400718689, | |
"eval_runtime": 1.7017, | |
"eval_samples_per_second": 58.766, | |
"eval_steps_per_second": 1.763, | |
"step": 24 | |
}, | |
{ | |
"epoch": 5.0, | |
"grad_norm": 11.309135437011719, | |
"learning_rate": 2.604166666666667e-05, | |
"loss": 0.6172, | |
"step": 25 | |
}, | |
{ | |
"epoch": 5.0, | |
"eval_accuracy": 0.45, | |
"eval_loss": 0.7086328864097595, | |
"eval_runtime": 1.7745, | |
"eval_samples_per_second": 56.354, | |
"eval_steps_per_second": 1.691, | |
"step": 25 | |
}, | |
{ | |
"epoch": 5.2, | |
"grad_norm": 6.028090953826904, | |
"learning_rate": 2.5e-05, | |
"loss": 0.6046, | |
"step": 26 | |
}, | |
{ | |
"epoch": 5.2, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.7231249809265137, | |
"eval_runtime": 1.7703, | |
"eval_samples_per_second": 56.487, | |
"eval_steps_per_second": 1.695, | |
"step": 26 | |
}, | |
{ | |
"epoch": 5.4, | |
"grad_norm": 7.73970890045166, | |
"learning_rate": 2.3958333333333334e-05, | |
"loss": 0.593, | |
"step": 27 | |
}, | |
{ | |
"epoch": 5.4, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.7212988138198853, | |
"eval_runtime": 1.8912, | |
"eval_samples_per_second": 52.876, | |
"eval_steps_per_second": 1.586, | |
"step": 27 | |
}, | |
{ | |
"epoch": 5.6, | |
"grad_norm": 4.664271831512451, | |
"learning_rate": 2.2916666666666667e-05, | |
"loss": 0.5987, | |
"step": 28 | |
}, | |
{ | |
"epoch": 5.6, | |
"eval_accuracy": 0.5, | |
"eval_loss": 0.7355861067771912, | |
"eval_runtime": 1.7681, | |
"eval_samples_per_second": 56.559, | |
"eval_steps_per_second": 1.697, | |
"step": 28 | |
}, | |
{ | |
"epoch": 5.8, | |
"grad_norm": 18.360387802124023, | |
"learning_rate": 2.1875e-05, | |
"loss": 0.6062, | |
"step": 29 | |
}, | |
{ | |
"epoch": 5.8, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.7342479825019836, | |
"eval_runtime": 1.8077, | |
"eval_samples_per_second": 55.318, | |
"eval_steps_per_second": 1.66, | |
"step": 29 | |
}, | |
{ | |
"epoch": 6.0, | |
"grad_norm": 22.629905700683594, | |
"learning_rate": 2.0833333333333336e-05, | |
"loss": 0.6791, | |
"step": 30 | |
}, | |
{ | |
"epoch": 6.0, | |
"eval_accuracy": 0.51, | |
"eval_loss": 0.7144920229911804, | |
"eval_runtime": 1.7475, | |
"eval_samples_per_second": 57.225, | |
"eval_steps_per_second": 1.717, | |
"step": 30 | |
}, | |
{ | |
"epoch": 6.2, | |
"grad_norm": 10.081146240234375, | |
"learning_rate": 1.9791666666666665e-05, | |
"loss": 0.6149, | |
"step": 31 | |
}, | |
{ | |
"epoch": 6.2, | |
"eval_accuracy": 0.53, | |
"eval_loss": 0.6931836009025574, | |
"eval_runtime": 1.6643, | |
"eval_samples_per_second": 60.086, | |
"eval_steps_per_second": 1.803, | |
"step": 31 | |
}, | |
{ | |
"epoch": 6.4, | |
"grad_norm": 4.46759557723999, | |
"learning_rate": 1.8750000000000002e-05, | |
"loss": 0.5574, | |
"step": 32 | |
}, | |
{ | |
"epoch": 6.4, | |
"eval_accuracy": 0.6, | |
"eval_loss": 0.6884767413139343, | |
"eval_runtime": 1.5894, | |
"eval_samples_per_second": 62.917, | |
"eval_steps_per_second": 1.888, | |
"step": 32 | |
}, | |
{ | |
"epoch": 6.6, | |
"grad_norm": 13.157549858093262, | |
"learning_rate": 1.7708333333333335e-05, | |
"loss": 0.5878, | |
"step": 33 | |
}, | |
{ | |
"epoch": 6.6, | |
"eval_accuracy": 0.59, | |
"eval_loss": 0.6877928972244263, | |
"eval_runtime": 1.5704, | |
"eval_samples_per_second": 63.676, | |
"eval_steps_per_second": 1.91, | |
"step": 33 | |
}, | |
{ | |
"epoch": 6.8, | |
"grad_norm": 5.638681888580322, | |
"learning_rate": 1.6666666666666667e-05, | |
"loss": 0.585, | |
"step": 34 | |
}, | |
{ | |
"epoch": 6.8, | |
"eval_accuracy": 0.57, | |
"eval_loss": 0.6898437738418579, | |
"eval_runtime": 1.6724, | |
"eval_samples_per_second": 59.793, | |
"eval_steps_per_second": 1.794, | |
"step": 34 | |
}, | |
{ | |
"epoch": 7.0, | |
"grad_norm": 10.743139266967773, | |
"learning_rate": 1.5625e-05, | |
"loss": 0.5654, | |
"step": 35 | |
}, | |
{ | |
"epoch": 7.0, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.7054589986801147, | |
"eval_runtime": 1.6311, | |
"eval_samples_per_second": 61.309, | |
"eval_steps_per_second": 1.839, | |
"step": 35 | |
}, | |
{ | |
"epoch": 7.2, | |
"grad_norm": 7.46209716796875, | |
"learning_rate": 1.4583333333333335e-05, | |
"loss": 0.5444, | |
"step": 36 | |
}, | |
{ | |
"epoch": 7.2, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.7182031869888306, | |
"eval_runtime": 1.6889, | |
"eval_samples_per_second": 59.208, | |
"eval_steps_per_second": 1.776, | |
"step": 36 | |
}, | |
{ | |
"epoch": 7.4, | |
"grad_norm": 2.8762495517730713, | |
"learning_rate": 1.3541666666666666e-05, | |
"loss": 0.5509, | |
"step": 37 | |
}, | |
{ | |
"epoch": 7.4, | |
"eval_accuracy": 0.49, | |
"eval_loss": 0.737841784954071, | |
"eval_runtime": 1.5978, | |
"eval_samples_per_second": 62.586, | |
"eval_steps_per_second": 1.878, | |
"step": 37 | |
}, | |
{ | |
"epoch": 7.6, | |
"grad_norm": 14.286393165588379, | |
"learning_rate": 1.25e-05, | |
"loss": 0.5409, | |
"step": 38 | |
}, | |
{ | |
"epoch": 7.6, | |
"eval_accuracy": 0.5, | |
"eval_loss": 0.741650402545929, | |
"eval_runtime": 1.5957, | |
"eval_samples_per_second": 62.668, | |
"eval_steps_per_second": 1.88, | |
"step": 38 | |
}, | |
{ | |
"epoch": 7.8, | |
"grad_norm": 19.287935256958008, | |
"learning_rate": 1.1458333333333333e-05, | |
"loss": 0.6008, | |
"step": 39 | |
}, | |
{ | |
"epoch": 7.8, | |
"eval_accuracy": 0.51, | |
"eval_loss": 0.7236230373382568, | |
"eval_runtime": 1.5983, | |
"eval_samples_per_second": 62.567, | |
"eval_steps_per_second": 1.877, | |
"step": 39 | |
}, | |
{ | |
"epoch": 8.0, | |
"grad_norm": 31.297225952148438, | |
"learning_rate": 1.0416666666666668e-05, | |
"loss": 0.5714, | |
"step": 40 | |
}, | |
{ | |
"epoch": 8.0, | |
"eval_accuracy": 0.54, | |
"eval_loss": 0.6986133456230164, | |
"eval_runtime": 1.594, | |
"eval_samples_per_second": 62.736, | |
"eval_steps_per_second": 1.882, | |
"step": 40 | |
}, | |
{ | |
"epoch": 8.2, | |
"grad_norm": 14.829645156860352, | |
"learning_rate": 9.375000000000001e-06, | |
"loss": 0.5191, | |
"step": 41 | |
}, | |
{ | |
"epoch": 8.2, | |
"eval_accuracy": 0.58, | |
"eval_loss": 0.6758496761322021, | |
"eval_runtime": 1.5966, | |
"eval_samples_per_second": 62.632, | |
"eval_steps_per_second": 1.879, | |
"step": 41 | |
}, | |
{ | |
"epoch": 8.4, | |
"grad_norm": 10.33276081085205, | |
"learning_rate": 8.333333333333334e-06, | |
"loss": 0.5355, | |
"step": 42 | |
}, | |
{ | |
"epoch": 8.4, | |
"eval_accuracy": 0.62, | |
"eval_loss": 0.6673437356948853, | |
"eval_runtime": 1.6879, | |
"eval_samples_per_second": 59.245, | |
"eval_steps_per_second": 1.777, | |
"step": 42 | |
}, | |
{ | |
"epoch": 8.6, | |
"grad_norm": 11.735895156860352, | |
"learning_rate": 7.2916666666666674e-06, | |
"loss": 0.4928, | |
"step": 43 | |
}, | |
{ | |
"epoch": 8.6, | |
"eval_accuracy": 0.62, | |
"eval_loss": 0.6667187213897705, | |
"eval_runtime": 1.5808, | |
"eval_samples_per_second": 63.259, | |
"eval_steps_per_second": 1.898, | |
"step": 43 | |
}, | |
{ | |
"epoch": 8.8, | |
"grad_norm": 25.970138549804688, | |
"learning_rate": 6.25e-06, | |
"loss": 0.5404, | |
"step": 44 | |
}, | |
{ | |
"epoch": 8.8, | |
"eval_accuracy": 0.62, | |
"eval_loss": 0.6707518100738525, | |
"eval_runtime": 1.5837, | |
"eval_samples_per_second": 63.145, | |
"eval_steps_per_second": 1.894, | |
"step": 44 | |
}, | |
{ | |
"epoch": 9.0, | |
"grad_norm": 29.1481990814209, | |
"learning_rate": 5.208333333333334e-06, | |
"loss": 0.5967, | |
"step": 45 | |
}, | |
{ | |
"epoch": 9.0, | |
"eval_accuracy": 0.62, | |
"eval_loss": 0.6674315929412842, | |
"eval_runtime": 1.5809, | |
"eval_samples_per_second": 63.255, | |
"eval_steps_per_second": 1.898, | |
"step": 45 | |
}, | |
{ | |
"epoch": 9.2, | |
"grad_norm": 21.367929458618164, | |
"learning_rate": 4.166666666666667e-06, | |
"loss": 0.5589, | |
"step": 46 | |
}, | |
{ | |
"epoch": 9.2, | |
"eval_accuracy": 0.64, | |
"eval_loss": 0.663388729095459, | |
"eval_runtime": 1.5813, | |
"eval_samples_per_second": 63.239, | |
"eval_steps_per_second": 1.897, | |
"step": 46 | |
}, | |
{ | |
"epoch": 9.4, | |
"grad_norm": 20.662874221801758, | |
"learning_rate": 3.125e-06, | |
"loss": 0.5451, | |
"step": 47 | |
}, | |
{ | |
"epoch": 9.4, | |
"eval_accuracy": 0.66, | |
"eval_loss": 0.6626856327056885, | |
"eval_runtime": 1.5818, | |
"eval_samples_per_second": 63.218, | |
"eval_steps_per_second": 1.897, | |
"step": 47 | |
}, | |
{ | |
"epoch": 9.6, | |
"grad_norm": 15.55025863647461, | |
"learning_rate": 2.0833333333333334e-06, | |
"loss": 0.4518, | |
"step": 48 | |
}, | |
{ | |
"epoch": 9.6, | |
"eval_accuracy": 0.65, | |
"eval_loss": 0.6588379144668579, | |
"eval_runtime": 1.5812, | |
"eval_samples_per_second": 63.244, | |
"eval_steps_per_second": 1.897, | |
"step": 48 | |
}, | |
{ | |
"epoch": 9.8, | |
"grad_norm": 2.4273836612701416, | |
"learning_rate": 1.0416666666666667e-06, | |
"loss": 0.5133, | |
"step": 49 | |
}, | |
{ | |
"epoch": 9.8, | |
"eval_accuracy": 0.63, | |
"eval_loss": 0.6571874022483826, | |
"eval_runtime": 1.5817, | |
"eval_samples_per_second": 63.224, | |
"eval_steps_per_second": 1.897, | |
"step": 49 | |
}, | |
{ | |
"epoch": 10.0, | |
"grad_norm": 11.029585838317871, | |
"learning_rate": 0.0, | |
"loss": 0.4801, | |
"step": 50 | |
}, | |
{ | |
"epoch": 10.0, | |
"eval_accuracy": 0.64, | |
"eval_loss": 0.6585450172424316, | |
"eval_runtime": 1.5799, | |
"eval_samples_per_second": 63.295, | |
"eval_steps_per_second": 1.899, | |
"step": 50 | |
}, | |
{ | |
"epoch": 10.0, | |
"step": 50, | |
"total_flos": 8927832973508608.0, | |
"train_loss": 0.6846528232097626, | |
"train_runtime": 233.8542, | |
"train_samples_per_second": 17.105, | |
"train_steps_per_second": 0.214 | |
} | |
], | |
"logging_steps": 1, | |
"max_steps": 50, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 10, | |
"save_steps": 500, | |
"total_flos": 8927832973508608.0, | |
"train_batch_size": 10, | |
"trial_name": null, | |
"trial_params": null | |
} | |