{ "best_metric": 0.6263097524642944, "best_model_checkpoint": "/home1/datahome/villien/project_hub/DinoVdeau/models/Ziboiai-large-2024_10_31-prova_batch-size32_freeze_probs/checkpoint-60", "epoch": 40.0, "eval_steps": 500, "global_step": 80, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_explained_variance": 0.03638218343257904, "eval_loss": 0.7150455713272095, "eval_mae": 0.3848940134048462, "eval_r2": -20.29086685180664, "eval_rmse": 0.40997111797332764, "eval_runtime": 1.32, "eval_samples_per_second": 37.88, "eval_steps_per_second": 1.515, "learning_rate": 0.001, "step": 2 }, { "epoch": 2.0, "eval_explained_variance": 0.0240942370146513, "eval_loss": 0.7314126491546631, "eval_mae": 0.3895121216773987, "eval_r2": -21.218204498291016, "eval_rmse": 0.4163060486316681, "eval_runtime": 0.3125, "eval_samples_per_second": 160.002, "eval_steps_per_second": 6.4, "learning_rate": 0.001, "step": 4 }, { "epoch": 3.0, "eval_explained_variance": -0.04694412648677826, "eval_loss": 0.7726277112960815, "eval_mae": 0.40413352847099304, "eval_r2": -24.822391510009766, "eval_rmse": 0.4320966601371765, "eval_runtime": 0.3275, "eval_samples_per_second": 152.667, "eval_steps_per_second": 6.107, "learning_rate": 0.001, "step": 6 }, { "epoch": 4.0, "eval_explained_variance": -0.06671242415904999, "eval_loss": 0.7917326092720032, "eval_mae": 0.4094983637332916, "eval_r2": -26.581586837768555, "eval_rmse": 0.4379725754261017, "eval_runtime": 0.3153, "eval_samples_per_second": 158.574, "eval_steps_per_second": 6.343, "learning_rate": 0.001, "step": 8 }, { "epoch": 5.0, "eval_explained_variance": -0.13621382415294647, "eval_loss": 0.7852649092674255, "eval_mae": 0.402120441198349, "eval_r2": -26.95589256286621, "eval_rmse": 0.43184274435043335, "eval_runtime": 0.3123, "eval_samples_per_second": 160.086, "eval_steps_per_second": 6.403, "learning_rate": 0.001, "step": 10 }, { "epoch": 6.0, "eval_explained_variance": -0.12974193692207336, "eval_loss": 0.7647674679756165, "eval_mae": 0.3905399441719055, "eval_r2": -24.40153694152832, "eval_rmse": 0.42244094610214233, "eval_runtime": 0.3317, "eval_samples_per_second": 150.759, "eval_steps_per_second": 6.03, "learning_rate": 0.001, "step": 12 }, { "epoch": 7.0, "eval_explained_variance": -0.10977767407894135, "eval_loss": 0.7391812205314636, "eval_mae": 0.376028835773468, "eval_r2": -22.557889938354492, "eval_rmse": 0.41028541326522827, "eval_runtime": 0.3205, "eval_samples_per_second": 155.989, "eval_steps_per_second": 6.24, "learning_rate": 0.001, "step": 14 }, { "epoch": 8.0, "eval_explained_variance": -0.10544480383396149, "eval_loss": 0.7115270495414734, "eval_mae": 0.36385056376457214, "eval_r2": -20.067392349243164, "eval_rmse": 0.39825379848480225, "eval_runtime": 0.3104, "eval_samples_per_second": 161.103, "eval_steps_per_second": 6.444, "learning_rate": 0.0001, "step": 16 }, { "epoch": 9.0, "eval_explained_variance": -0.09249210357666016, "eval_loss": 0.6896975040435791, "eval_mae": 0.35347798466682434, "eval_r2": -18.16646385192871, "eval_rmse": 0.3878582715988159, "eval_runtime": 0.3226, "eval_samples_per_second": 155.006, "eval_steps_per_second": 6.2, "learning_rate": 0.0001, "step": 18 }, { "epoch": 10.0, "eval_explained_variance": -0.10285507887601852, "eval_loss": 0.6777035593986511, "eval_mae": 0.34683120250701904, "eval_r2": -16.94469451904297, "eval_rmse": 0.3818005323410034, "eval_runtime": 0.3016, "eval_samples_per_second": 165.76, "eval_steps_per_second": 6.63, "learning_rate": 0.0001, "step": 20 }, { "epoch": 11.0, "eval_explained_variance": -0.11687294393777847, "eval_loss": 0.6701759099960327, "eval_mae": 0.3423532247543335, "eval_r2": -16.037521362304688, "eval_rmse": 0.3779585659503937, "eval_runtime": 0.3107, "eval_samples_per_second": 160.916, "eval_steps_per_second": 6.437, "learning_rate": 0.0001, "step": 22 }, { "epoch": 12.0, "eval_explained_variance": -0.11208173632621765, "eval_loss": 0.663905918598175, "eval_mae": 0.3388546407222748, "eval_r2": -15.605177879333496, "eval_rmse": 0.37438222765922546, "eval_runtime": 0.3308, "eval_samples_per_second": 151.17, "eval_steps_per_second": 6.047, "learning_rate": 0.0001, "step": 24 }, { "epoch": 13.0, "eval_explained_variance": -0.10647904872894287, "eval_loss": 0.656491219997406, "eval_mae": 0.3345881700515747, "eval_r2": -14.805088996887207, "eval_rmse": 0.3702985942363739, "eval_runtime": 0.3222, "eval_samples_per_second": 155.166, "eval_steps_per_second": 6.207, "learning_rate": 0.0001, "step": 26 }, { "epoch": 14.0, "eval_explained_variance": -0.0958017110824585, "eval_loss": 0.6501385569572449, "eval_mae": 0.33100754022598267, "eval_r2": -14.231175422668457, "eval_rmse": 0.3668138384819031, "eval_runtime": 0.3293, "eval_samples_per_second": 151.853, "eval_steps_per_second": 6.074, "learning_rate": 0.0001, "step": 28 }, { "epoch": 15.0, "eval_explained_variance": -0.08547426015138626, "eval_loss": 0.6467865705490112, "eval_mae": 0.32885220646858215, "eval_r2": -14.07986831665039, "eval_rmse": 0.36475783586502075, "eval_runtime": 0.3253, "eval_samples_per_second": 153.717, "eval_steps_per_second": 6.149, "learning_rate": 0.0001, "step": 30 }, { "epoch": 16.0, "eval_explained_variance": -0.08231981098651886, "eval_loss": 0.6471170783042908, "eval_mae": 0.3288896679878235, "eval_r2": -14.255745887756348, "eval_rmse": 0.3650059998035431, "eval_runtime": 0.305, "eval_samples_per_second": 163.945, "eval_steps_per_second": 6.558, "learning_rate": 0.0001, "step": 32 }, { "epoch": 17.0, "eval_explained_variance": -0.08097466081380844, "eval_loss": 0.6435126662254333, "eval_mae": 0.3268200755119324, "eval_r2": -14.059813499450684, "eval_rmse": 0.36310678720474243, "eval_runtime": 0.3322, "eval_samples_per_second": 150.492, "eval_steps_per_second": 6.02, "learning_rate": 0.0001, "step": 34 }, { "epoch": 18.0, "eval_explained_variance": -0.07994352281093597, "eval_loss": 0.6437923908233643, "eval_mae": 0.3269612491130829, "eval_r2": -14.036934852600098, "eval_rmse": 0.36342939734458923, "eval_runtime": 0.3107, "eval_samples_per_second": 160.922, "eval_steps_per_second": 6.437, "learning_rate": 0.0001, "step": 36 }, { "epoch": 19.0, "eval_explained_variance": -0.08883289247751236, "eval_loss": 0.6399621367454529, "eval_mae": 0.3249860107898712, "eval_r2": -13.81522274017334, "eval_rmse": 0.36136963963508606, "eval_runtime": 0.3104, "eval_samples_per_second": 161.092, "eval_steps_per_second": 6.444, "learning_rate": 0.0001, "step": 38 }, { "epoch": 20.0, "eval_explained_variance": -0.09353505074977875, "eval_loss": 0.6391971707344055, "eval_mae": 0.3246455192565918, "eval_r2": -13.710391998291016, "eval_rmse": 0.3608955144882202, "eval_runtime": 0.3119, "eval_samples_per_second": 160.306, "eval_steps_per_second": 6.412, "learning_rate": 0.0001, "step": 40 }, { "epoch": 21.0, "eval_explained_variance": -0.09930111467838287, "eval_loss": 0.6386714577674866, "eval_mae": 0.32462170720100403, "eval_r2": -13.809860229492188, "eval_rmse": 0.3606450855731964, "eval_runtime": 0.3149, "eval_samples_per_second": 158.8, "eval_steps_per_second": 6.352, "learning_rate": 0.0001, "step": 42 }, { "epoch": 22.0, "eval_explained_variance": -0.10561199486255646, "eval_loss": 0.6388444304466248, "eval_mae": 0.3243348002433777, "eval_r2": -13.849721908569336, "eval_rmse": 0.36056435108184814, "eval_runtime": 0.3094, "eval_samples_per_second": 161.607, "eval_steps_per_second": 6.464, "learning_rate": 0.0001, "step": 44 }, { "epoch": 23.0, "eval_explained_variance": -0.1035044863820076, "eval_loss": 0.6361631155014038, "eval_mae": 0.3227779269218445, "eval_r2": -13.562189102172852, "eval_rmse": 0.35895633697509766, "eval_runtime": 0.3094, "eval_samples_per_second": 161.581, "eval_steps_per_second": 6.463, "learning_rate": 0.0001, "step": 46 }, { "epoch": 24.0, "eval_explained_variance": -0.10584529489278793, "eval_loss": 0.635435163974762, "eval_mae": 0.3223152160644531, "eval_r2": -13.645319938659668, "eval_rmse": 0.35847193002700806, "eval_runtime": 0.3094, "eval_samples_per_second": 161.602, "eval_steps_per_second": 6.464, "learning_rate": 0.0001, "step": 48 }, { "epoch": 25.0, "eval_explained_variance": -0.1035505086183548, "eval_loss": 0.6344550848007202, "eval_mae": 0.32144099473953247, "eval_r2": -13.602314949035645, "eval_rmse": 0.35783687233924866, "eval_runtime": 0.3092, "eval_samples_per_second": 161.704, "eval_steps_per_second": 6.468, "learning_rate": 0.0001, "step": 50 }, { "epoch": 26.0, "eval_explained_variance": -0.11728200316429138, "eval_loss": 0.6348865628242493, "eval_mae": 0.3211889863014221, "eval_r2": -13.630416870117188, "eval_rmse": 0.3580625355243683, "eval_runtime": 0.331, "eval_samples_per_second": 151.064, "eval_steps_per_second": 6.043, "learning_rate": 0.0001, "step": 52 }, { "epoch": 27.0, "eval_explained_variance": -0.11483900249004364, "eval_loss": 0.6332749724388123, "eval_mae": 0.32009246945381165, "eval_r2": -13.561347007751465, "eval_rmse": 0.3570806384086609, "eval_runtime": 0.3173, "eval_samples_per_second": 157.565, "eval_steps_per_second": 6.303, "learning_rate": 0.0001, "step": 54 }, { "epoch": 28.0, "eval_explained_variance": -0.10828801989555359, "eval_loss": 0.6295092701911926, "eval_mae": 0.31767499446868896, "eval_r2": -13.23308277130127, "eval_rmse": 0.35479238629341125, "eval_runtime": 0.3087, "eval_samples_per_second": 161.989, "eval_steps_per_second": 6.48, "learning_rate": 0.0001, "step": 56 }, { "epoch": 29.0, "eval_explained_variance": -0.1047045886516571, "eval_loss": 0.6285346746444702, "eval_mae": 0.3173280954360962, "eval_r2": -13.162256240844727, "eval_rmse": 0.35434553027153015, "eval_runtime": 0.3277, "eval_samples_per_second": 152.596, "eval_steps_per_second": 6.104, "learning_rate": 0.0001, "step": 58 }, { "epoch": 30.0, "eval_explained_variance": -0.09264782071113586, "eval_loss": 0.6263097524642944, "eval_mae": 0.31627562642097473, "eval_r2": -12.713174819946289, "eval_rmse": 0.3532228171825409, "eval_runtime": 0.3523, "eval_samples_per_second": 141.931, "eval_steps_per_second": 5.677, "learning_rate": 0.0001, "step": 60 }, { "epoch": 31.0, "eval_explained_variance": -0.08934260159730911, "eval_loss": 0.6272528767585754, "eval_mae": 0.316723495721817, "eval_r2": -12.873921394348145, "eval_rmse": 0.35376670956611633, "eval_runtime": 0.3073, "eval_samples_per_second": 162.723, "eval_steps_per_second": 6.509, "learning_rate": 0.0001, "step": 62 }, { "epoch": 32.0, "eval_explained_variance": -0.07898036390542984, "eval_loss": 0.6294133067131042, "eval_mae": 0.31807586550712585, "eval_r2": -12.935453414916992, "eval_rmse": 0.3550169765949249, "eval_runtime": 0.3094, "eval_samples_per_second": 161.626, "eval_steps_per_second": 6.465, "learning_rate": 0.0001, "step": 64 }, { "epoch": 33.0, "eval_explained_variance": -0.07519607990980148, "eval_loss": 0.6299176216125488, "eval_mae": 0.3185364603996277, "eval_r2": -12.93520736694336, "eval_rmse": 0.35538923740386963, "eval_runtime": 0.3097, "eval_samples_per_second": 161.472, "eval_steps_per_second": 6.459, "learning_rate": 0.0001, "step": 66 }, { "epoch": 34.0, "eval_explained_variance": -0.07019602507352829, "eval_loss": 0.6320692300796509, "eval_mae": 0.3193182349205017, "eval_r2": -13.267191886901855, "eval_rmse": 0.35644862055778503, "eval_runtime": 0.3161, "eval_samples_per_second": 158.177, "eval_steps_per_second": 6.327, "learning_rate": 0.0001, "step": 68 }, { "epoch": 35.0, "eval_explained_variance": -0.04873532056808472, "eval_loss": 0.6279481649398804, "eval_mae": 0.31752488017082214, "eval_r2": -12.99951171875, "eval_rmse": 0.3541102707386017, "eval_runtime": 0.3124, "eval_samples_per_second": 160.036, "eval_steps_per_second": 6.401, "learning_rate": 0.0001, "step": 70 }, { "epoch": 36.0, "eval_explained_variance": -0.04663123935461044, "eval_loss": 0.6280075907707214, "eval_mae": 0.31736499071121216, "eval_r2": -13.00741195678711, "eval_rmse": 0.35407301783561707, "eval_runtime": 0.3095, "eval_samples_per_second": 161.554, "eval_steps_per_second": 6.462, "learning_rate": 0.0001, "step": 72 }, { "epoch": 37.0, "eval_explained_variance": -0.04936327785253525, "eval_loss": 0.6303659081459045, "eval_mae": 0.3187006115913391, "eval_r2": -13.230977058410645, "eval_rmse": 0.35543760657310486, "eval_runtime": 0.3251, "eval_samples_per_second": 153.806, "eval_steps_per_second": 6.152, "learning_rate": 1e-05, "step": 74 }, { "epoch": 38.0, "eval_explained_variance": -0.04394898936152458, "eval_loss": 0.6297122836112976, "eval_mae": 0.31833118200302124, "eval_r2": -12.983016967773438, "eval_rmse": 0.3550592064857483, "eval_runtime": 0.3087, "eval_samples_per_second": 161.995, "eval_steps_per_second": 6.48, "learning_rate": 1e-05, "step": 76 }, { "epoch": 39.0, "eval_explained_variance": -0.04296223446726799, "eval_loss": 0.630845308303833, "eval_mae": 0.3193325996398926, "eval_r2": -13.159842491149902, "eval_rmse": 0.35580796003341675, "eval_runtime": 0.3097, "eval_samples_per_second": 161.465, "eval_steps_per_second": 6.459, "learning_rate": 1e-05, "step": 78 }, { "epoch": 40.0, "eval_explained_variance": -0.04348618537187576, "eval_loss": 0.6291573643684387, "eval_mae": 0.3182610869407654, "eval_r2": -13.069788932800293, "eval_rmse": 0.3547934889793396, "eval_runtime": 0.3206, "eval_samples_per_second": 155.938, "eval_steps_per_second": 6.238, "learning_rate": 1e-05, "step": 80 }, { "epoch": 40.0, "learning_rate": 1e-05, "step": 80, "total_flos": 2.9601852123168e+17, "train_loss": 0.64580397605896, "train_runtime": 275.9938, "train_samples_per_second": 27.175, "train_steps_per_second": 1.087 } ], "logging_steps": 500, "max_steps": 300, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.9601852123168e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }