|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.4, |
|
"eval_steps": 25, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.18923527002334595, |
|
"learning_rate": 0.0001666666666666667, |
|
"loss": 1.1779, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 0.7435017824172974, |
|
"eval_runtime": 621.1563, |
|
"eval_samples_per_second": 4.356, |
|
"eval_steps_per_second": 0.546, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.16780352592468262, |
|
"learning_rate": 0.00019148936170212768, |
|
"loss": 1.2708, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 0.7384106516838074, |
|
"eval_runtime": 619.3795, |
|
"eval_samples_per_second": 4.369, |
|
"eval_steps_per_second": 0.547, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.1804531365633011, |
|
"learning_rate": 0.00018085106382978726, |
|
"loss": 1.2848, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 0.7341726422309875, |
|
"eval_runtime": 619.225, |
|
"eval_samples_per_second": 4.37, |
|
"eval_steps_per_second": 0.547, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.17204701900482178, |
|
"learning_rate": 0.00017021276595744682, |
|
"loss": 1.3443, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 0.7313376665115356, |
|
"eval_runtime": 619.8363, |
|
"eval_samples_per_second": 4.366, |
|
"eval_steps_per_second": 0.547, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.2668892741203308, |
|
"learning_rate": 0.00015957446808510637, |
|
"loss": 1.2563, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 0.7282304167747498, |
|
"eval_runtime": 619.962, |
|
"eval_samples_per_second": 4.365, |
|
"eval_steps_per_second": 0.547, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.21089820563793182, |
|
"learning_rate": 0.00014893617021276596, |
|
"loss": 0.7529, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 0.7184526324272156, |
|
"eval_runtime": 619.2606, |
|
"eval_samples_per_second": 4.37, |
|
"eval_steps_per_second": 0.547, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.158623605966568, |
|
"learning_rate": 0.00013829787234042554, |
|
"loss": 0.7957, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 0.7136204242706299, |
|
"eval_runtime": 619.6757, |
|
"eval_samples_per_second": 4.367, |
|
"eval_steps_per_second": 0.547, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.16229337453842163, |
|
"learning_rate": 0.00012765957446808513, |
|
"loss": 0.7322, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.7111368179321289, |
|
"eval_runtime": 620.1668, |
|
"eval_samples_per_second": 4.363, |
|
"eval_steps_per_second": 0.547, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.0317260660736e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|