{ "best_metric": 0.7474747474747475, "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-11/checkpoint-144", "epoch": 3.0, "eval_steps": 500, "global_step": 144, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 1.2641795873641968, "learning_rate": 4.277342370667828e-06, "loss": 0.702, "step": 5 }, { "epoch": 0.21, "grad_norm": 1.1789261102676392, "learning_rate": 8.554684741335655e-06, "loss": 0.6864, "step": 10 }, { "epoch": 0.31, "grad_norm": 1.3848576545715332, "learning_rate": 1.2832027112003484e-05, "loss": 0.6855, "step": 15 }, { "epoch": 0.42, "grad_norm": 0.953545093536377, "learning_rate": 1.710936948267131e-05, "loss": 0.6668, "step": 20 }, { "epoch": 0.52, "grad_norm": 0.9704298377037048, "learning_rate": 2.138671185333914e-05, "loss": 0.6426, "step": 25 }, { "epoch": 0.62, "grad_norm": 0.7949383854866028, "learning_rate": 2.5664054224006968e-05, "loss": 0.6293, "step": 30 }, { "epoch": 0.73, "grad_norm": 2.390880823135376, "learning_rate": 2.994139659467479e-05, "loss": 0.5631, "step": 35 }, { "epoch": 0.83, "grad_norm": 1.7147374153137207, "learning_rate": 3.421873896534262e-05, "loss": 0.6562, "step": 40 }, { "epoch": 0.94, "grad_norm": 3.123836040496826, "learning_rate": 3.849608133601045e-05, "loss": 0.6243, "step": 45 }, { "epoch": 1.0, "eval_f1": 0.7326732673267327, "eval_loss": 0.7374954223632812, "eval_runtime": 1.3701, "eval_samples_per_second": 46.713, "eval_steps_per_second": 5.839, "step": 48 }, { "epoch": 1.04, "grad_norm": 4.445352077484131, "learning_rate": 4.0872382653048134e-05, "loss": 0.6965, "step": 50 }, { "epoch": 1.15, "grad_norm": 0.8689994215965271, "learning_rate": 4.03971223896406e-05, "loss": 0.6945, "step": 55 }, { "epoch": 1.25, "grad_norm": 1.744273066520691, "learning_rate": 3.992186212623306e-05, "loss": 0.4097, "step": 60 }, { "epoch": 1.35, "grad_norm": 0.6568699479103088, "learning_rate": 3.944660186282552e-05, "loss": 0.5621, "step": 65 }, { "epoch": 1.46, "grad_norm": 1.516032099723816, "learning_rate": 3.8971341599417986e-05, "loss": 0.4826, "step": 70 }, { "epoch": 1.56, "grad_norm": Infinity, "learning_rate": 3.8591133388691955e-05, "loss": 0.7666, "step": 75 }, { "epoch": 1.67, "grad_norm": 1.788261890411377, "learning_rate": 3.811587312528442e-05, "loss": 0.6187, "step": 80 }, { "epoch": 1.77, "grad_norm": 1.7383731603622437, "learning_rate": 3.764061286187688e-05, "loss": 0.5974, "step": 85 }, { "epoch": 1.88, "grad_norm": 1.9014606475830078, "learning_rate": 3.7165352598469344e-05, "loss": 0.5979, "step": 90 }, { "epoch": 1.98, "grad_norm": 2.1836445331573486, "learning_rate": 3.669009233506181e-05, "loss": 0.6104, "step": 95 }, { "epoch": 2.0, "eval_f1": 0.7326732673267327, "eval_loss": 0.6798496246337891, "eval_runtime": 1.3616, "eval_samples_per_second": 47.003, "eval_steps_per_second": 5.875, "step": 96 }, { "epoch": 2.08, "grad_norm": 1.0497145652770996, "learning_rate": 3.6214832071654276e-05, "loss": 0.5777, "step": 100 }, { "epoch": 2.19, "grad_norm": 1.1734980344772339, "learning_rate": 3.573957180824674e-05, "loss": 0.4793, "step": 105 }, { "epoch": 2.29, "grad_norm": 1.6985560655593872, "learning_rate": 3.52643115448392e-05, "loss": 0.5708, "step": 110 }, { "epoch": 2.4, "grad_norm": 1.110007882118225, "learning_rate": 3.4789051281431665e-05, "loss": 0.5446, "step": 115 }, { "epoch": 2.5, "grad_norm": Infinity, "learning_rate": 3.4408843070705634e-05, "loss": 0.5224, "step": 120 }, { "epoch": 2.6, "grad_norm": 4.383241653442383, "learning_rate": 3.39335828072981e-05, "loss": 0.4492, "step": 125 }, { "epoch": 2.71, "grad_norm": 20.106430053710938, "learning_rate": 3.355337459657207e-05, "loss": 0.6228, "step": 130 }, { "epoch": 2.81, "grad_norm": 3.188138008117676, "learning_rate": 3.3078114333164536e-05, "loss": 0.3671, "step": 135 }, { "epoch": 2.92, "grad_norm": 3.840233087539673, "learning_rate": 3.2602854069757e-05, "loss": 0.3217, "step": 140 }, { "epoch": 3.0, "eval_f1": 0.7474747474747475, "eval_loss": 0.8114051818847656, "eval_runtime": 1.4005, "eval_samples_per_second": 45.699, "eval_steps_per_second": 5.712, "step": 144 } ], "logging_steps": 5, "max_steps": 480, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 2226487719780480.0, "train_batch_size": 4, "trial_name": null, "trial_params": { "learning_rate": 4.1062486758411146e-05, "per_device_train_batch_size": 4 } }