{ "best_metric": 0.2750368118286133, "best_model_checkpoint": "./ryan_model314_3/checkpoint-550", "epoch": 0.88, "eval_steps": 50, "global_step": 550, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "grad_norm": 1.5017586946487427, "learning_rate": 0.000192, "loss": 0.4423, "step": 50 }, { "epoch": 0.08, "eval_loss": 0.33861014246940613, "eval_na_accuracy": 0.904, "eval_ordinal_accuracy": 0.4629418472063854, "eval_ordinal_mae": 0.6577621472191316, "eval_runtime": 123.3898, "eval_samples_per_second": 8.104, "eval_steps_per_second": 1.013, "step": 50 }, { "epoch": 0.16, "grad_norm": 0.8501819372177124, "learning_rate": 0.00018400000000000003, "loss": 0.3088, "step": 100 }, { "epoch": 0.16, "eval_loss": 0.3268783390522003, "eval_na_accuracy": 0.928, "eval_ordinal_accuracy": 0.5370581527936146, "eval_ordinal_mae": 0.5969413880658287, "eval_runtime": 43.4997, "eval_samples_per_second": 22.989, "eval_steps_per_second": 2.874, "step": 100 }, { "epoch": 0.24, "grad_norm": 0.5424334406852722, "learning_rate": 0.00017600000000000002, "loss": 0.316, "step": 150 }, { "epoch": 0.24, "eval_loss": 0.3395713269710541, "eval_na_accuracy": 0.902, "eval_ordinal_accuracy": 0.5142531356898518, "eval_ordinal_mae": 0.6323422620227872, "eval_runtime": 43.339, "eval_samples_per_second": 23.074, "eval_steps_per_second": 2.884, "step": 150 }, { "epoch": 0.32, "grad_norm": 1.2484453916549683, "learning_rate": 0.000168, "loss": 0.2821, "step": 200 }, { "epoch": 0.32, "eval_loss": 0.32339948415756226, "eval_na_accuracy": 0.927, "eval_ordinal_accuracy": 0.5131128848346637, "eval_ordinal_mae": 0.6292874569299393, "eval_runtime": 42.0004, "eval_samples_per_second": 23.809, "eval_steps_per_second": 2.976, "step": 200 }, { "epoch": 0.4, "grad_norm": 1.4807660579681396, "learning_rate": 0.00016, "loss": 0.2731, "step": 250 }, { "epoch": 0.4, "eval_loss": 0.3313509225845337, "eval_na_accuracy": 0.925, "eval_ordinal_accuracy": 0.508551881413911, "eval_ordinal_mae": 0.5856009521101041, "eval_runtime": 55.6564, "eval_samples_per_second": 17.967, "eval_steps_per_second": 2.246, "step": 250 }, { "epoch": 0.48, "grad_norm": 1.4179209470748901, "learning_rate": 0.000152, "loss": 0.2975, "step": 300 }, { "epoch": 0.48, "eval_loss": 0.3036611080169678, "eval_na_accuracy": 0.927, "eval_ordinal_accuracy": 0.5963511972633979, "eval_ordinal_mae": 0.5690023564742932, "eval_runtime": 42.7034, "eval_samples_per_second": 23.417, "eval_steps_per_second": 2.927, "step": 300 }, { "epoch": 0.56, "grad_norm": 0.7659221887588501, "learning_rate": 0.000144, "loss": 0.2609, "step": 350 }, { "epoch": 0.56, "eval_loss": 0.3209022283554077, "eval_na_accuracy": 0.928, "eval_ordinal_accuracy": 0.5450399087799316, "eval_ordinal_mae": 0.5764862077817825, "eval_runtime": 43.1206, "eval_samples_per_second": 23.191, "eval_steps_per_second": 2.899, "step": 350 }, { "epoch": 0.64, "grad_norm": 1.0847711563110352, "learning_rate": 0.00013600000000000003, "loss": 0.287, "step": 400 }, { "epoch": 0.64, "eval_loss": 0.29075464606285095, "eval_na_accuracy": 0.931, "eval_ordinal_accuracy": 0.5826681870011402, "eval_ordinal_mae": 0.5458187616535902, "eval_runtime": 42.3269, "eval_samples_per_second": 23.626, "eval_steps_per_second": 2.953, "step": 400 }, { "epoch": 0.72, "grad_norm": 0.9720218181610107, "learning_rate": 0.00012800000000000002, "loss": 0.2905, "step": 450 }, { "epoch": 0.72, "eval_loss": 0.30074238777160645, "eval_na_accuracy": 0.919, "eval_ordinal_accuracy": 0.5986316989737742, "eval_ordinal_mae": 0.548372159519042, "eval_runtime": 76.7524, "eval_samples_per_second": 13.029, "eval_steps_per_second": 1.629, "step": 450 }, { "epoch": 0.8, "grad_norm": 0.8414099216461182, "learning_rate": 0.00012, "loss": 0.2574, "step": 500 }, { "epoch": 0.8, "eval_loss": 0.28344637155532837, "eval_na_accuracy": 0.929, "eval_ordinal_accuracy": 0.6031927023945268, "eval_ordinal_mae": 0.5363022306512, "eval_runtime": 42.8484, "eval_samples_per_second": 23.338, "eval_steps_per_second": 2.917, "step": 500 }, { "epoch": 0.88, "grad_norm": 0.5895617604255676, "learning_rate": 0.00011200000000000001, "loss": 0.2855, "step": 550 }, { "epoch": 0.88, "eval_loss": 0.2750368118286133, "eval_na_accuracy": 0.931, "eval_ordinal_accuracy": 0.6271379703534777, "eval_ordinal_mae": 0.5319093595330124, "eval_runtime": 42.3171, "eval_samples_per_second": 23.631, "eval_steps_per_second": 2.954, "step": 550 }, { "epoch": 0.88, "step": 550, "total_flos": 6.81953956282368e+17, "train_loss": 0.3000895881652832, "train_runtime": 2172.3633, "train_samples_per_second": 9.207, "train_steps_per_second": 0.575 } ], "logging_steps": 50, "max_steps": 1250, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "total_flos": 6.81953956282368e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }