{ "best_metric": 2.5862808227539062, "best_model_checkpoint": "./Qwen2-4B-Chat-hinglish-sft/checkpoint-125", "epoch": 0.49652432969215493, "eval_steps": 25, "global_step": 125, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 1.1830133199691772, "learning_rate": 0.00019999629591162656, "loss": 4.7862, "step": 25 }, { "epoch": 0.1, "eval_loss": 4.098670482635498, "eval_runtime": 4.1063, "eval_samples_per_second": 2.679, "eval_steps_per_second": 2.679, "step": 25 }, { "epoch": 0.2, "grad_norm": 1.0619295835494995, "learning_rate": 0.00019932568492674144, "loss": 2.9161, "step": 50 }, { "epoch": 0.2, "eval_loss": 3.1406357288360596, "eval_runtime": 4.0777, "eval_samples_per_second": 2.698, "eval_steps_per_second": 2.698, "step": 50 }, { "epoch": 0.3, "grad_norm": 1.1028335094451904, "learning_rate": 0.0001975064532257195, "loss": 2.4282, "step": 75 }, { "epoch": 0.3, "eval_loss": 2.8003995418548584, "eval_runtime": 4.076, "eval_samples_per_second": 2.699, "eval_steps_per_second": 2.699, "step": 75 }, { "epoch": 0.4, "grad_norm": 0.9855464100837708, "learning_rate": 0.00019455963874271426, "loss": 2.2815, "step": 100 }, { "epoch": 0.4, "eval_loss": 2.6393470764160156, "eval_runtime": 4.084, "eval_samples_per_second": 2.693, "eval_steps_per_second": 2.693, "step": 100 }, { "epoch": 0.5, "grad_norm": 1.154259204864502, "learning_rate": 0.00019051931898913976, "loss": 2.1485, "step": 125 }, { "epoch": 0.5, "eval_loss": 2.5862808227539062, "eval_runtime": 4.0324, "eval_samples_per_second": 2.728, "eval_steps_per_second": 2.728, "step": 125 } ], "logging_steps": 25, "max_steps": 753, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 25, "total_flos": 905742623738880.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }