|
{ |
|
"best_metric": 3.699657678604126, |
|
"best_model_checkpoint": "/scratch/ka2773/project/lm-mem/checkpoints/gpt2_40m_12-768-1024_a_02/checkpoint-31000", |
|
"epoch": 8.481532147742818, |
|
"global_step": 31000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 6e-05, |
|
"loss": 6.6803, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 6.017919540405273, |
|
"eval_runtime": 9.4636, |
|
"eval_samples_per_second": 24.938, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6e-05, |
|
"loss": 6.0117, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 5.752170085906982, |
|
"eval_runtime": 9.4619, |
|
"eval_samples_per_second": 24.942, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6e-05, |
|
"loss": 5.8304, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 5.598978519439697, |
|
"eval_runtime": 9.4601, |
|
"eval_samples_per_second": 24.947, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 6e-05, |
|
"loss": 5.6932, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 5.4606218338012695, |
|
"eval_runtime": 9.4514, |
|
"eval_samples_per_second": 24.97, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6e-05, |
|
"loss": 5.5582, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 5.324854373931885, |
|
"eval_runtime": 9.4555, |
|
"eval_samples_per_second": 24.959, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 6e-05, |
|
"loss": 5.4194, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 5.187784194946289, |
|
"eval_runtime": 9.4406, |
|
"eval_samples_per_second": 24.998, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6e-05, |
|
"loss": 5.2874, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 5.043916702270508, |
|
"eval_runtime": 9.4594, |
|
"eval_samples_per_second": 24.949, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 6e-05, |
|
"loss": 5.1502, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 4.9315690994262695, |
|
"eval_runtime": 9.458, |
|
"eval_samples_per_second": 24.952, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 6e-05, |
|
"loss": 5.0351, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 4.828794479370117, |
|
"eval_runtime": 9.4563, |
|
"eval_samples_per_second": 24.957, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6e-05, |
|
"loss": 4.9495, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 4.735019683837891, |
|
"eval_runtime": 9.4648, |
|
"eval_samples_per_second": 24.934, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 6e-05, |
|
"loss": 4.8715, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 4.6624579429626465, |
|
"eval_runtime": 9.4463, |
|
"eval_samples_per_second": 24.983, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 6e-05, |
|
"loss": 4.7962, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 4.586391925811768, |
|
"eval_runtime": 9.4563, |
|
"eval_samples_per_second": 24.957, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6e-05, |
|
"loss": 4.7329, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_loss": 4.520113468170166, |
|
"eval_runtime": 9.4613, |
|
"eval_samples_per_second": 24.944, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6e-05, |
|
"loss": 4.6692, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 4.469329357147217, |
|
"eval_runtime": 9.4531, |
|
"eval_samples_per_second": 24.965, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 6e-05, |
|
"loss": 4.603, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 4.408419132232666, |
|
"eval_runtime": 9.4573, |
|
"eval_samples_per_second": 24.954, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 6e-05, |
|
"loss": 4.5274, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 4.358486175537109, |
|
"eval_runtime": 9.4556, |
|
"eval_samples_per_second": 24.959, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6e-05, |
|
"loss": 4.482, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 4.316709041595459, |
|
"eval_runtime": 9.4572, |
|
"eval_samples_per_second": 24.954, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 6e-05, |
|
"loss": 4.4431, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 4.261863708496094, |
|
"eval_runtime": 9.4644, |
|
"eval_samples_per_second": 24.935, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6e-05, |
|
"loss": 4.4003, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 4.2196125984191895, |
|
"eval_runtime": 9.4647, |
|
"eval_samples_per_second": 24.935, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 6e-05, |
|
"loss": 4.3602, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_loss": 4.1680402755737305, |
|
"eval_runtime": 9.4399, |
|
"eval_samples_per_second": 25.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 6e-05, |
|
"loss": 4.3072, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_loss": 4.134276390075684, |
|
"eval_runtime": 9.4305, |
|
"eval_samples_per_second": 25.025, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 6e-05, |
|
"loss": 4.2849, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_loss": 4.093378067016602, |
|
"eval_runtime": 9.4676, |
|
"eval_samples_per_second": 24.927, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 6e-05, |
|
"loss": 4.2092, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_loss": 4.067226886749268, |
|
"eval_runtime": 9.4548, |
|
"eval_samples_per_second": 24.961, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 6e-05, |
|
"loss": 4.1923, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_loss": 4.044227600097656, |
|
"eval_runtime": 9.4733, |
|
"eval_samples_per_second": 24.912, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 6e-05, |
|
"loss": 4.1774, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_loss": 4.027223587036133, |
|
"eval_runtime": 9.4672, |
|
"eval_samples_per_second": 24.928, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 6e-05, |
|
"loss": 4.1547, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_loss": 4.007537841796875, |
|
"eval_runtime": 9.4577, |
|
"eval_samples_per_second": 24.953, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 6e-05, |
|
"loss": 4.141, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_loss": 3.9847309589385986, |
|
"eval_runtime": 9.464, |
|
"eval_samples_per_second": 24.937, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 6e-05, |
|
"loss": 4.1375, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_loss": 3.9671761989593506, |
|
"eval_runtime": 9.4732, |
|
"eval_samples_per_second": 24.912, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 6e-05, |
|
"loss": 4.1064, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_loss": 3.9399986267089844, |
|
"eval_runtime": 9.4647, |
|
"eval_samples_per_second": 24.935, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 6e-05, |
|
"loss": 4.0485, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_loss": 3.932234525680542, |
|
"eval_runtime": 9.4672, |
|
"eval_samples_per_second": 24.928, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 6e-05, |
|
"loss": 4.0335, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_loss": 3.9215309619903564, |
|
"eval_runtime": 9.4597, |
|
"eval_samples_per_second": 24.948, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 6e-05, |
|
"loss": 4.0358, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_loss": 3.898069381713867, |
|
"eval_runtime": 9.4635, |
|
"eval_samples_per_second": 24.938, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 6e-05, |
|
"loss": 4.0187, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_loss": 3.893036127090454, |
|
"eval_runtime": 9.4622, |
|
"eval_samples_per_second": 24.941, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 6e-05, |
|
"loss": 4.0188, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_loss": 3.8782527446746826, |
|
"eval_runtime": 9.459, |
|
"eval_samples_per_second": 24.95, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 6e-05, |
|
"loss": 4.0176, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"eval_loss": 3.8655033111572266, |
|
"eval_runtime": 9.4578, |
|
"eval_samples_per_second": 24.953, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 6e-05, |
|
"loss": 4.0025, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_loss": 3.8568382263183594, |
|
"eval_runtime": 9.4581, |
|
"eval_samples_per_second": 24.952, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 6e-05, |
|
"loss": 3.9554, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"eval_loss": 3.849705696105957, |
|
"eval_runtime": 9.4471, |
|
"eval_samples_per_second": 24.981, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 6e-05, |
|
"loss": 3.9252, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"eval_loss": 3.838930130004883, |
|
"eval_runtime": 9.4605, |
|
"eval_samples_per_second": 24.946, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 6e-05, |
|
"loss": 3.9239, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"eval_loss": 3.8333945274353027, |
|
"eval_runtime": 9.462, |
|
"eval_samples_per_second": 24.942, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 6e-05, |
|
"loss": 3.9354, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"eval_loss": 3.8130805492401123, |
|
"eval_runtime": 9.4596, |
|
"eval_samples_per_second": 24.948, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 6e-05, |
|
"loss": 3.9418, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"eval_loss": 3.8175787925720215, |
|
"eval_runtime": 9.4602, |
|
"eval_samples_per_second": 24.947, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 6e-05, |
|
"loss": 3.9291, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"eval_loss": 3.807772636413574, |
|
"eval_runtime": 9.459, |
|
"eval_samples_per_second": 24.95, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 6e-05, |
|
"loss": 3.9309, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_loss": 3.7906742095947266, |
|
"eval_runtime": 9.4675, |
|
"eval_samples_per_second": 24.927, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 6e-05, |
|
"loss": 3.9105, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"eval_loss": 3.7875397205352783, |
|
"eval_runtime": 9.4606, |
|
"eval_samples_per_second": 24.946, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 6e-05, |
|
"loss": 3.8424, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"eval_loss": 3.782013416290283, |
|
"eval_runtime": 9.4639, |
|
"eval_samples_per_second": 24.937, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 6e-05, |
|
"loss": 3.8579, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"eval_loss": 3.781845808029175, |
|
"eval_runtime": 9.4621, |
|
"eval_samples_per_second": 24.941, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 6e-05, |
|
"loss": 3.868, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_loss": 3.76729679107666, |
|
"eval_runtime": 9.4657, |
|
"eval_samples_per_second": 24.932, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 6e-05, |
|
"loss": 3.8631, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"eval_loss": 3.7602250576019287, |
|
"eval_runtime": 9.4649, |
|
"eval_samples_per_second": 24.934, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 6e-05, |
|
"loss": 3.8635, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"eval_loss": 3.7623238563537598, |
|
"eval_runtime": 9.456, |
|
"eval_samples_per_second": 24.958, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 6e-05, |
|
"loss": 3.8632, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"eval_loss": 3.7607743740081787, |
|
"eval_runtime": 9.4601, |
|
"eval_samples_per_second": 24.947, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 6e-05, |
|
"loss": 3.873, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_loss": 3.749258279800415, |
|
"eval_runtime": 9.4598, |
|
"eval_samples_per_second": 24.948, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 6e-05, |
|
"loss": 3.7911, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"eval_loss": 3.744161367416382, |
|
"eval_runtime": 9.4582, |
|
"eval_samples_per_second": 24.952, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 6e-05, |
|
"loss": 3.8039, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_loss": 3.739082098007202, |
|
"eval_runtime": 9.4581, |
|
"eval_samples_per_second": 24.952, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 6e-05, |
|
"loss": 3.7996, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"eval_loss": 3.738431692123413, |
|
"eval_runtime": 9.4714, |
|
"eval_samples_per_second": 24.917, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 6e-05, |
|
"loss": 3.8157, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"eval_loss": 3.7242279052734375, |
|
"eval_runtime": 9.4626, |
|
"eval_samples_per_second": 24.94, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 6e-05, |
|
"loss": 3.8224, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"eval_loss": 3.7267138957977295, |
|
"eval_runtime": 9.4527, |
|
"eval_samples_per_second": 24.966, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 6e-05, |
|
"loss": 3.8246, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"eval_loss": 3.711819887161255, |
|
"eval_runtime": 9.4622, |
|
"eval_samples_per_second": 24.941, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 6e-05, |
|
"loss": 3.8176, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"eval_loss": 3.707026958465576, |
|
"eval_runtime": 9.4595, |
|
"eval_samples_per_second": 24.948, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 6e-05, |
|
"loss": 3.7763, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"eval_loss": 3.7134788036346436, |
|
"eval_runtime": 9.4603, |
|
"eval_samples_per_second": 24.946, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 6e-05, |
|
"loss": 3.7557, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"eval_loss": 3.7067065238952637, |
|
"eval_runtime": 9.4683, |
|
"eval_samples_per_second": 24.925, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 6e-05, |
|
"loss": 3.7662, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"eval_loss": 3.7023823261260986, |
|
"eval_runtime": 9.4321, |
|
"eval_samples_per_second": 25.021, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 6e-05, |
|
"loss": 3.7677, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"eval_loss": 3.699657678604126, |
|
"eval_runtime": 9.4485, |
|
"eval_samples_per_second": 24.978, |
|
"step": 31000 |
|
} |
|
], |
|
"max_steps": 36550, |
|
"num_train_epochs": 10, |
|
"total_flos": 3969647640576000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|