juantollo's picture
Upload folder using huggingface_hub
b1faa7a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"grad_norm": 0.03134724870324135,
"learning_rate": 0.00095,
"loss": 1.1746,
"step": 500
},
{
"epoch": 0.1,
"grad_norm": 0.01849432662129402,
"learning_rate": 0.0009000000000000001,
"loss": 1.1003,
"step": 1000
},
{
"epoch": 0.15,
"grad_norm": 0.03626084700226784,
"learning_rate": 0.00085,
"loss": 1.0882,
"step": 1500
},
{
"epoch": 0.2,
"grad_norm": 0.03540480509400368,
"learning_rate": 0.0008,
"loss": 1.0733,
"step": 2000
},
{
"epoch": 0.25,
"grad_norm": 0.04738762974739075,
"learning_rate": 0.00075,
"loss": 1.0577,
"step": 2500
},
{
"epoch": 0.3,
"grad_norm": 0.06647106260061264,
"learning_rate": 0.0007,
"loss": 1.059,
"step": 3000
},
{
"epoch": 0.35,
"grad_norm": 0.06651030480861664,
"learning_rate": 0.0006500000000000001,
"loss": 1.0458,
"step": 3500
},
{
"epoch": 0.4,
"grad_norm": 0.07928076386451721,
"learning_rate": 0.0006,
"loss": 1.0317,
"step": 4000
},
{
"epoch": 0.45,
"grad_norm": 0.09158056229352951,
"learning_rate": 0.00055,
"loss": 1.0277,
"step": 4500
},
{
"epoch": 0.5,
"grad_norm": 0.08878821134567261,
"learning_rate": 0.0005,
"loss": 1.0124,
"step": 5000
},
{
"epoch": 0.55,
"grad_norm": 0.13431012630462646,
"learning_rate": 0.00045000000000000004,
"loss": 1.0007,
"step": 5500
},
{
"epoch": 0.6,
"grad_norm": 0.10832180082798004,
"learning_rate": 0.0004,
"loss": 0.9889,
"step": 6000
},
{
"epoch": 0.65,
"grad_norm": 0.10065455734729767,
"learning_rate": 0.00035,
"loss": 0.9858,
"step": 6500
},
{
"epoch": 0.7,
"grad_norm": 0.1227111890912056,
"learning_rate": 0.0003,
"loss": 0.971,
"step": 7000
},
{
"epoch": 0.75,
"grad_norm": 0.13141539692878723,
"learning_rate": 0.00025,
"loss": 0.9733,
"step": 7500
},
{
"epoch": 0.8,
"grad_norm": 0.1325940638780594,
"learning_rate": 0.0002,
"loss": 0.9567,
"step": 8000
},
{
"epoch": 0.85,
"grad_norm": 0.13246145844459534,
"learning_rate": 0.00015,
"loss": 0.9713,
"step": 8500
},
{
"epoch": 0.9,
"grad_norm": 0.12682273983955383,
"learning_rate": 0.0001,
"loss": 0.9513,
"step": 9000
},
{
"epoch": 0.95,
"grad_norm": 0.15320372581481934,
"learning_rate": 5e-05,
"loss": 0.9449,
"step": 9500
},
{
"epoch": 1.0,
"grad_norm": 0.15557877719402313,
"learning_rate": 0.0,
"loss": 0.9535,
"step": 10000
}
],
"logging_steps": 500,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.4824605073408e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}