{ "best_metric": 0.03524893894791603, "best_model_checkpoint": "/kaggle/working/output/checkpoint-84", "epoch": 29.925925925925927, "eval_steps": 500, "global_step": 101, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8888888888888888, "eval_LCC": -0.11469366908669681, "eval_SROCC": -0.15860744297719087, "eval_loss": 0.4400586783885956, "eval_runtime": 42.4739, "eval_samples_per_second": 1.177, "eval_steps_per_second": 0.047, "step": 3 }, { "epoch": 1.7777777777777777, "eval_LCC": -0.1545349416495121, "eval_SROCC": -0.2675150060024009, "eval_loss": 0.22064876556396484, "eval_runtime": 42.0384, "eval_samples_per_second": 1.189, "eval_steps_per_second": 0.048, "step": 6 }, { "epoch": 2.962962962962963, "grad_norm": 16.114206314086914, "learning_rate": 6.666666666666667e-06, "loss": 0.3084, "step": 10 }, { "epoch": 2.962962962962963, "eval_LCC": -0.1813303002138391, "eval_SROCC": -0.27539015606242495, "eval_loss": 0.1909903734922409, "eval_runtime": 42.3941, "eval_samples_per_second": 1.179, "eval_steps_per_second": 0.047, "step": 10 }, { "epoch": 3.851851851851852, "eval_LCC": -0.1510725671786146, "eval_SROCC": -0.2169987995198079, "eval_loss": 0.23344503343105316, "eval_runtime": 42.0983, "eval_samples_per_second": 1.188, "eval_steps_per_second": 0.048, "step": 13 }, { "epoch": 4.7407407407407405, "eval_LCC": -0.1309558195202113, "eval_SROCC": -0.2, "eval_loss": 0.14843837916851044, "eval_runtime": 42.2346, "eval_samples_per_second": 1.184, "eval_steps_per_second": 0.047, "step": 16 }, { "epoch": 5.925925925925926, "grad_norm": 12.447588920593262, "learning_rate": 9.966191788709716e-06, "loss": 0.0852, "step": 20 }, { "epoch": 5.925925925925926, "eval_LCC": -0.08517980393535173, "eval_SROCC": -0.10213685474189677, "eval_loss": 0.12589265406131744, "eval_runtime": 42.2026, "eval_samples_per_second": 1.185, "eval_steps_per_second": 0.047, "step": 20 }, { "epoch": 6.814814814814815, "eval_LCC": -0.05947795499623739, "eval_SROCC": -0.07092436974789915, "eval_loss": 0.15516823530197144, "eval_runtime": 42.1659, "eval_samples_per_second": 1.186, "eval_steps_per_second": 0.047, "step": 23 }, { "epoch": 8.0, "eval_LCC": -0.0584113029101595, "eval_SROCC": -0.09483793517406962, "eval_loss": 0.09420724213123322, "eval_runtime": 42.5672, "eval_samples_per_second": 1.175, "eval_steps_per_second": 0.047, "step": 27 }, { "epoch": 8.88888888888889, "grad_norm": 6.68361759185791, "learning_rate": 9.698463103929542e-06, "loss": 0.0406, "step": 30 }, { "epoch": 8.88888888888889, "eval_LCC": -0.05502642204622495, "eval_SROCC": -0.04797118847539015, "eval_loss": 0.08413399010896683, "eval_runtime": 42.0478, "eval_samples_per_second": 1.189, "eval_steps_per_second": 0.048, "step": 30 }, { "epoch": 9.777777777777779, "eval_LCC": -0.044815400478992636, "eval_SROCC": -0.0575750300120048, "eval_loss": 0.08856825530529022, "eval_runtime": 42.1399, "eval_samples_per_second": 1.187, "eval_steps_per_second": 0.047, "step": 33 }, { "epoch": 10.962962962962964, "eval_LCC": -0.04737793047406845, "eval_SROCC": -0.07726290516206481, "eval_loss": 0.07205035537481308, "eval_runtime": 42.2456, "eval_samples_per_second": 1.184, "eval_steps_per_second": 0.047, "step": 37 }, { "epoch": 11.851851851851851, "grad_norm": 1.6507503986358643, "learning_rate": 9.177439057064684e-06, "loss": 0.023, "step": 40 }, { "epoch": 11.851851851851851, "eval_LCC": -0.03637477859676811, "eval_SROCC": -0.04460984393757503, "eval_loss": 0.06969437748193741, "eval_runtime": 42.0276, "eval_samples_per_second": 1.19, "eval_steps_per_second": 0.048, "step": 40 }, { "epoch": 12.74074074074074, "eval_LCC": -0.00905185691215031, "eval_SROCC": -0.021656662665066023, "eval_loss": 0.05769188702106476, "eval_runtime": 42.1379, "eval_samples_per_second": 1.187, "eval_steps_per_second": 0.047, "step": 43 }, { "epoch": 13.925925925925926, "eval_LCC": 0.011204733733650302, "eval_SROCC": -0.03135654261704682, "eval_loss": 0.06662916392087936, "eval_runtime": 41.9846, "eval_samples_per_second": 1.191, "eval_steps_per_second": 0.048, "step": 47 }, { "epoch": 14.814814814814815, "grad_norm": 0.8441389799118042, "learning_rate": 8.43120818934367e-06, "loss": 0.0136, "step": 50 }, { "epoch": 14.814814814814815, "eval_LCC": 0.006038695767176178, "eval_SROCC": -0.05008403361344538, "eval_loss": 0.052547648549079895, "eval_runtime": 42.0346, "eval_samples_per_second": 1.189, "eval_steps_per_second": 0.048, "step": 50 }, { "epoch": 16.0, "eval_LCC": 0.05035447924360885, "eval_SROCC": -0.01781512605042017, "eval_loss": 0.06257949769496918, "eval_runtime": 42.0741, "eval_samples_per_second": 1.188, "eval_steps_per_second": 0.048, "step": 54 }, { "epoch": 16.88888888888889, "eval_LCC": 0.08269606829608296, "eval_SROCC": 0.015894357743097238, "eval_loss": 0.043766554445028305, "eval_runtime": 42.063, "eval_samples_per_second": 1.189, "eval_steps_per_second": 0.048, "step": 57 }, { "epoch": 17.77777777777778, "grad_norm": 1.5869125127792358, "learning_rate": 7.500000000000001e-06, "loss": 0.0113, "step": 60 }, { "epoch": 17.77777777777778, "eval_LCC": 0.10735678232307705, "eval_SROCC": 0.074093637454982, "eval_loss": 0.05027168616652489, "eval_runtime": 42.0969, "eval_samples_per_second": 1.188, "eval_steps_per_second": 0.048, "step": 60 }, { "epoch": 18.962962962962962, "eval_LCC": 0.11292358158916863, "eval_SROCC": 0.08177671068427371, "eval_loss": 0.04287625476717949, "eval_runtime": 42.1066, "eval_samples_per_second": 1.187, "eval_steps_per_second": 0.047, "step": 64 }, { "epoch": 19.85185185185185, "eval_LCC": 0.11877825314298533, "eval_SROCC": 0.08744297719087635, "eval_loss": 0.04554678872227669, "eval_runtime": 42.3014, "eval_samples_per_second": 1.182, "eval_steps_per_second": 0.047, "step": 67 }, { "epoch": 20.74074074074074, "grad_norm": 2.4397213459014893, "learning_rate": 6.434016163555452e-06, "loss": 0.0097, "step": 70 }, { "epoch": 20.74074074074074, "eval_LCC": 0.1315746635210365, "eval_SROCC": 0.09262905162064825, "eval_loss": 0.05967041850090027, "eval_runtime": 42.314, "eval_samples_per_second": 1.182, "eval_steps_per_second": 0.047, "step": 70 }, { "epoch": 21.925925925925927, "eval_LCC": 0.14461751047124533, "eval_SROCC": 0.06141656662665066, "eval_loss": 0.03967233747243881, "eval_runtime": 41.6175, "eval_samples_per_second": 1.201, "eval_steps_per_second": 0.048, "step": 74 }, { "epoch": 22.814814814814813, "eval_LCC": 0.1636524681044405, "eval_SROCC": 0.07783913565426169, "eval_loss": 0.052949074655771255, "eval_runtime": 42.4694, "eval_samples_per_second": 1.177, "eval_steps_per_second": 0.047, "step": 77 }, { "epoch": 23.703703703703702, "grad_norm": 1.3437652587890625, "learning_rate": 5.290724144552379e-06, "loss": 0.0084, "step": 80 }, { "epoch": 24.0, "eval_LCC": 0.17607777965634505, "eval_SROCC": 0.07159663865546219, "eval_loss": 0.03660748153924942, "eval_runtime": 41.9971, "eval_samples_per_second": 1.191, "eval_steps_per_second": 0.048, "step": 81 }, { "epoch": 24.88888888888889, "eval_LCC": 0.18198076982766778, "eval_SROCC": 0.0683313325330132, "eval_loss": 0.03524893894791603, "eval_runtime": 42.1358, "eval_samples_per_second": 1.187, "eval_steps_per_second": 0.047, "step": 84 }, { "epoch": 25.77777777777778, "eval_LCC": 0.1847607546132782, "eval_SROCC": 0.09695078031212485, "eval_loss": 0.04908595234155655, "eval_runtime": 42.3591, "eval_samples_per_second": 1.18, "eval_steps_per_second": 0.047, "step": 87 }, { "epoch": 26.666666666666668, "grad_norm": 1.1412720680236816, "learning_rate": 4.131759111665349e-06, "loss": 0.0078, "step": 90 }, { "epoch": 26.962962962962962, "eval_LCC": 0.18306996734895817, "eval_SROCC": 0.09839135654261706, "eval_loss": 0.039556559175252914, "eval_runtime": 42.0202, "eval_samples_per_second": 1.19, "eval_steps_per_second": 0.048, "step": 91 }, { "epoch": 27.85185185185185, "eval_LCC": 0.18556425091617126, "eval_SROCC": 0.1011764705882353, "eval_loss": 0.039471760392189026, "eval_runtime": 42.1635, "eval_samples_per_second": 1.186, "eval_steps_per_second": 0.047, "step": 94 }, { "epoch": 28.74074074074074, "eval_LCC": 0.19561082710345667, "eval_SROCC": 0.10972388955582232, "eval_loss": 0.04257187247276306, "eval_runtime": 42.1398, "eval_samples_per_second": 1.187, "eval_steps_per_second": 0.047, "step": 97 }, { "epoch": 29.62962962962963, "grad_norm": 0.5356392860412598, "learning_rate": 3.019601169804216e-06, "loss": 0.0063, "step": 100 }, { "epoch": 29.925925925925927, "eval_LCC": 0.19841940652904885, "eval_SROCC": 0.10021608643457382, "eval_loss": 0.03704619035124779, "eval_runtime": 42.1404, "eval_samples_per_second": 1.187, "eval_steps_per_second": 0.047, "step": 101 }, { "epoch": 29.925925925925927, "step": 101, "total_flos": 5.9133993779567e+18, "train_loss": 0.050966506113879165, "train_runtime": 7148.5533, "train_samples_per_second": 1.497, "train_steps_per_second": 0.021 } ], "logging_steps": 10, "max_steps": 150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.9133993779567e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }