|
{ |
|
"best_metric": 0.7876038551330566, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 2.150537634408602, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.043010752688172046, |
|
"grad_norm": 9.35327434539795, |
|
"learning_rate": 5e-05, |
|
"loss": 13.6544, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.043010752688172046, |
|
"eval_loss": 13.57972526550293, |
|
"eval_runtime": 7.9714, |
|
"eval_samples_per_second": 19.695, |
|
"eval_steps_per_second": 2.509, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08602150537634409, |
|
"grad_norm": 8.438759803771973, |
|
"learning_rate": 0.0001, |
|
"loss": 13.6314, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.12903225806451613, |
|
"grad_norm": 6.360323905944824, |
|
"learning_rate": 9.989294616193017e-05, |
|
"loss": 13.4301, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.17204301075268819, |
|
"grad_norm": 8.016193389892578, |
|
"learning_rate": 9.957224306869053e-05, |
|
"loss": 12.3516, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.21505376344086022, |
|
"grad_norm": 7.792659282684326, |
|
"learning_rate": 9.903926402016153e-05, |
|
"loss": 9.8482, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.25806451612903225, |
|
"grad_norm": 7.131053447723389, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 9.283, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.3010752688172043, |
|
"grad_norm": 11.247384071350098, |
|
"learning_rate": 9.73465064747553e-05, |
|
"loss": 6.194, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.34408602150537637, |
|
"grad_norm": 7.143100261688232, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 4.8235, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.3870967741935484, |
|
"grad_norm": 6.129095077514648, |
|
"learning_rate": 9.484363707663442e-05, |
|
"loss": 5.4014, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.43010752688172044, |
|
"grad_norm": 14.87994384765625, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 5.0606, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.4731182795698925, |
|
"grad_norm": 11.771108627319336, |
|
"learning_rate": 9.157348061512727e-05, |
|
"loss": 4.6802, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.5161290322580645, |
|
"grad_norm": 5.785019874572754, |
|
"learning_rate": 8.966766701456177e-05, |
|
"loss": 4.1953, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.5591397849462365, |
|
"grad_norm": 4.753819942474365, |
|
"learning_rate": 8.759199037394887e-05, |
|
"loss": 2.6155, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.6021505376344086, |
|
"grad_norm": 4.773311614990234, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 3.2332, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.6451612903225806, |
|
"grad_norm": 5.8297810554504395, |
|
"learning_rate": 8.296729075500344e-05, |
|
"loss": 2.3767, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.6881720430107527, |
|
"grad_norm": 6.003252983093262, |
|
"learning_rate": 8.043807145043604e-05, |
|
"loss": 2.3936, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.7311827956989247, |
|
"grad_norm": 4.296295166015625, |
|
"learning_rate": 7.777851165098012e-05, |
|
"loss": 2.2567, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.7741935483870968, |
|
"grad_norm": 2.917107343673706, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 2.0352, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.8172043010752689, |
|
"grad_norm": 3.7247848510742188, |
|
"learning_rate": 7.211443451095007e-05, |
|
"loss": 1.577, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.8602150537634409, |
|
"grad_norm": 3.8060548305511475, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 1.8322, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9032258064516129, |
|
"grad_norm": 2.8651576042175293, |
|
"learning_rate": 6.607197326515808e-05, |
|
"loss": 1.4222, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.946236559139785, |
|
"grad_norm": 2.706366777420044, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 1.3017, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.989247311827957, |
|
"grad_norm": 2.2514758110046387, |
|
"learning_rate": 5.9754516100806423e-05, |
|
"loss": 1.3554, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.032258064516129, |
|
"grad_norm": 8.478227615356445, |
|
"learning_rate": 5.6526309611002594e-05, |
|
"loss": 1.731, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.075268817204301, |
|
"grad_norm": 5.380260467529297, |
|
"learning_rate": 5.327015646150716e-05, |
|
"loss": 0.7884, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.075268817204301, |
|
"eval_loss": 1.0795084238052368, |
|
"eval_runtime": 7.9611, |
|
"eval_samples_per_second": 19.721, |
|
"eval_steps_per_second": 2.512, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.118279569892473, |
|
"grad_norm": 4.564328670501709, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2343, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.1612903225806452, |
|
"grad_norm": 3.6037137508392334, |
|
"learning_rate": 4.6729843538492847e-05, |
|
"loss": 1.0317, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.2043010752688172, |
|
"grad_norm": 3.2914304733276367, |
|
"learning_rate": 4.347369038899744e-05, |
|
"loss": 0.9329, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.2473118279569892, |
|
"grad_norm": 5.4039154052734375, |
|
"learning_rate": 4.0245483899193595e-05, |
|
"loss": 1.3014, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.2903225806451613, |
|
"grad_norm": 3.8691208362579346, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 0.9831, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 4.34861946105957, |
|
"learning_rate": 3.392802673484193e-05, |
|
"loss": 1.0507, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.3763440860215055, |
|
"grad_norm": 2.965681314468384, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 1.1723, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.4193548387096775, |
|
"grad_norm": 3.439244508743286, |
|
"learning_rate": 2.7885565489049946e-05, |
|
"loss": 0.9571, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.4623655913978495, |
|
"grad_norm": 2.797593593597412, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 0.8522, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.5053763440860215, |
|
"grad_norm": 2.8328845500946045, |
|
"learning_rate": 2.2221488349019903e-05, |
|
"loss": 1.0963, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.5483870967741935, |
|
"grad_norm": 2.290050506591797, |
|
"learning_rate": 1.9561928549563968e-05, |
|
"loss": 0.701, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.5913978494623655, |
|
"grad_norm": 3.077988386154175, |
|
"learning_rate": 1.703270924499656e-05, |
|
"loss": 0.8191, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.6344086021505375, |
|
"grad_norm": 2.5467429161071777, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 0.9073, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.6774193548387095, |
|
"grad_norm": 2.68570876121521, |
|
"learning_rate": 1.2408009626051137e-05, |
|
"loss": 0.8795, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.7204301075268817, |
|
"grad_norm": 2.404557466506958, |
|
"learning_rate": 1.0332332985438248e-05, |
|
"loss": 0.7958, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.7634408602150538, |
|
"grad_norm": 2.3604447841644287, |
|
"learning_rate": 8.426519384872733e-06, |
|
"loss": 0.8523, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.8064516129032258, |
|
"grad_norm": 1.8468281030654907, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 0.6095, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.849462365591398, |
|
"grad_norm": 3.724862813949585, |
|
"learning_rate": 5.156362923365588e-06, |
|
"loss": 0.7875, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.89247311827957, |
|
"grad_norm": 3.9843430519104004, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 0.906, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.935483870967742, |
|
"grad_norm": 1.6732141971588135, |
|
"learning_rate": 2.653493525244721e-06, |
|
"loss": 0.7148, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.978494623655914, |
|
"grad_norm": 2.5346105098724365, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 0.8121, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.021505376344086, |
|
"grad_norm": 5.31929349899292, |
|
"learning_rate": 9.607359798384785e-07, |
|
"loss": 1.3606, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 2.064516129032258, |
|
"grad_norm": 1.4526803493499756, |
|
"learning_rate": 4.277569313094809e-07, |
|
"loss": 0.6546, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.10752688172043, |
|
"grad_norm": 2.4552512168884277, |
|
"learning_rate": 1.0705383806982606e-07, |
|
"loss": 0.819, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 2.150537634408602, |
|
"grad_norm": 1.7610336542129517, |
|
"learning_rate": 0.0, |
|
"loss": 0.816, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.150537634408602, |
|
"eval_loss": 0.7876038551330566, |
|
"eval_runtime": 7.9591, |
|
"eval_samples_per_second": 19.726, |
|
"eval_steps_per_second": 2.513, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0627374735425536e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|