| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.1, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "grad_norm": 0.9758233428001404, |
| "learning_rate": 1.8e-06, |
| "loss": 1.1603, |
| "step": 10 |
| }, |
| { |
| "grad_norm": 0.42593392729759216, |
| "learning_rate": 3.8e-06, |
| "loss": 1.1479, |
| "step": 20 |
| }, |
| { |
| "grad_norm": 0.21856296062469482, |
| "learning_rate": 5.8e-06, |
| "loss": 1.1204, |
| "step": 30 |
| }, |
| { |
| "grad_norm": 0.1426735520362854, |
| "learning_rate": 7.8e-06, |
| "loss": 1.1026, |
| "step": 40 |
| }, |
| { |
| "grad_norm": 0.13335095345973969, |
| "learning_rate": 9.800000000000001e-06, |
| "loss": 1.0891, |
| "step": 50 |
| }, |
| { |
| "grad_norm": 0.11337289214134216, |
| "learning_rate": 1.18e-05, |
| "loss": 1.0658, |
| "step": 60 |
| }, |
| { |
| "grad_norm": 0.09717453271150589, |
| "learning_rate": 1.3800000000000002e-05, |
| "loss": 1.0543, |
| "step": 70 |
| }, |
| { |
| "grad_norm": 0.10322871059179306, |
| "learning_rate": 1.58e-05, |
| "loss": 1.0375, |
| "step": 80 |
| }, |
| { |
| "grad_norm": 0.17508411407470703, |
| "learning_rate": 1.78e-05, |
| "loss": 1.0261, |
| "step": 90 |
| }, |
| { |
| "grad_norm": 0.29097145795822144, |
| "learning_rate": 1.9800000000000004e-05, |
| "loss": 1.0143, |
| "step": 100 |
| }, |
| { |
| "grad_norm": 0.28556156158447266, |
| "learning_rate": 2.18e-05, |
| "loss": 0.9888, |
| "step": 110 |
| }, |
| { |
| "grad_norm": 0.7807521224021912, |
| "learning_rate": 2.38e-05, |
| "loss": 0.943, |
| "step": 120 |
| }, |
| { |
| "grad_norm": 0.6720882654190063, |
| "learning_rate": 2.58e-05, |
| "loss": 0.8726, |
| "step": 130 |
| }, |
| { |
| "grad_norm": 0.867290198802948, |
| "learning_rate": 2.7800000000000005e-05, |
| "loss": 0.7941, |
| "step": 140 |
| }, |
| { |
| "grad_norm": 0.7180277705192566, |
| "learning_rate": 2.98e-05, |
| "loss": 0.731, |
| "step": 150 |
| }, |
| { |
| "grad_norm": 0.6411461234092712, |
| "learning_rate": 3.18e-05, |
| "loss": 0.6729, |
| "step": 160 |
| }, |
| { |
| "grad_norm": 0.8856205344200134, |
| "learning_rate": 3.38e-05, |
| "loss": 0.6213, |
| "step": 170 |
| }, |
| { |
| "grad_norm": 1.1326690912246704, |
| "learning_rate": 3.58e-05, |
| "loss": 0.5673, |
| "step": 180 |
| }, |
| { |
| "grad_norm": 0.8645981550216675, |
| "learning_rate": 3.7800000000000004e-05, |
| "loss": 0.5007, |
| "step": 190 |
| }, |
| { |
| "grad_norm": 1.4590177536010742, |
| "learning_rate": 3.9800000000000005e-05, |
| "loss": 0.4514, |
| "step": 200 |
| }, |
| { |
| "grad_norm": 1.0651799440383911, |
| "learning_rate": 4.18e-05, |
| "loss": 0.4098, |
| "step": 210 |
| }, |
| { |
| "grad_norm": 0.9970216155052185, |
| "learning_rate": 4.38e-05, |
| "loss": 0.3782, |
| "step": 220 |
| }, |
| { |
| "grad_norm": 1.4383465051651, |
| "learning_rate": 4.58e-05, |
| "loss": 0.3505, |
| "step": 230 |
| }, |
| { |
| "grad_norm": 1.0782465934753418, |
| "learning_rate": 4.78e-05, |
| "loss": 0.3158, |
| "step": 240 |
| }, |
| { |
| "grad_norm": 1.101946234703064, |
| "learning_rate": 4.9800000000000004e-05, |
| "loss": 0.2916, |
| "step": 250 |
| }, |
| { |
| "grad_norm": 1.587511420249939, |
| "learning_rate": 5.1800000000000005e-05, |
| "loss": 0.261, |
| "step": 260 |
| }, |
| { |
| "grad_norm": 1.435607671737671, |
| "learning_rate": 5.380000000000001e-05, |
| "loss": 0.2437, |
| "step": 270 |
| }, |
| { |
| "grad_norm": 0.9712788462638855, |
| "learning_rate": 5.580000000000001e-05, |
| "loss": 0.2171, |
| "step": 280 |
| }, |
| { |
| "grad_norm": 1.7632523775100708, |
| "learning_rate": 5.7799999999999995e-05, |
| "loss": 0.2055, |
| "step": 290 |
| }, |
| { |
| "grad_norm": 1.7171282768249512, |
| "learning_rate": 5.9800000000000003e-05, |
| "loss": 0.1883, |
| "step": 300 |
| }, |
| { |
| "grad_norm": 1.374088168144226, |
| "learning_rate": 6.18e-05, |
| "loss": 0.1745, |
| "step": 310 |
| }, |
| { |
| "grad_norm": 1.4401962757110596, |
| "learning_rate": 6.38e-05, |
| "loss": 0.1576, |
| "step": 320 |
| }, |
| { |
| "grad_norm": 1.3337668180465698, |
| "learning_rate": 6.58e-05, |
| "loss": 0.1363, |
| "step": 330 |
| }, |
| { |
| "grad_norm": 0.9742352366447449, |
| "learning_rate": 6.780000000000001e-05, |
| "loss": 0.1193, |
| "step": 340 |
| }, |
| { |
| "grad_norm": 1.369019865989685, |
| "learning_rate": 6.98e-05, |
| "loss": 0.1097, |
| "step": 350 |
| }, |
| { |
| "grad_norm": 1.2735106945037842, |
| "learning_rate": 7.18e-05, |
| "loss": 0.0989, |
| "step": 360 |
| }, |
| { |
| "grad_norm": 1.1764349937438965, |
| "learning_rate": 7.38e-05, |
| "loss": 0.0924, |
| "step": 370 |
| }, |
| { |
| "grad_norm": 1.46502685546875, |
| "learning_rate": 7.58e-05, |
| "loss": 0.0892, |
| "step": 380 |
| }, |
| { |
| "grad_norm": 1.2585327625274658, |
| "learning_rate": 7.780000000000001e-05, |
| "loss": 0.0867, |
| "step": 390 |
| }, |
| { |
| "grad_norm": 1.3663945198059082, |
| "learning_rate": 7.98e-05, |
| "loss": 0.0823, |
| "step": 400 |
| }, |
| { |
| "grad_norm": 1.3091000318527222, |
| "learning_rate": 8.18e-05, |
| "loss": 0.0749, |
| "step": 410 |
| }, |
| { |
| "grad_norm": 1.0578632354736328, |
| "learning_rate": 8.38e-05, |
| "loss": 0.0772, |
| "step": 420 |
| }, |
| { |
| "grad_norm": 1.354399561882019, |
| "learning_rate": 8.58e-05, |
| "loss": 0.0747, |
| "step": 430 |
| }, |
| { |
| "grad_norm": 0.9889944195747375, |
| "learning_rate": 8.78e-05, |
| "loss": 0.0741, |
| "step": 440 |
| }, |
| { |
| "grad_norm": 1.1401726007461548, |
| "learning_rate": 8.98e-05, |
| "loss": 0.0722, |
| "step": 450 |
| }, |
| { |
| "grad_norm": 0.9166666269302368, |
| "learning_rate": 9.180000000000001e-05, |
| "loss": 0.0656, |
| "step": 460 |
| }, |
| { |
| "grad_norm": 1.3585307598114014, |
| "learning_rate": 9.38e-05, |
| "loss": 0.0714, |
| "step": 470 |
| }, |
| { |
| "grad_norm": 1.211781620979309, |
| "learning_rate": 9.58e-05, |
| "loss": 0.0692, |
| "step": 480 |
| }, |
| { |
| "grad_norm": 1.2247947454452515, |
| "learning_rate": 9.78e-05, |
| "loss": 0.0648, |
| "step": 490 |
| }, |
| { |
| "grad_norm": 0.8823159337043762, |
| "learning_rate": 9.98e-05, |
| "loss": 0.0669, |
| "step": 500 |
| }, |
| { |
| "grad_norm": 1.1154427528381348, |
| "learning_rate": 9.9999778549206e-05, |
| "loss": 0.0639, |
| "step": 510 |
| }, |
| { |
| "grad_norm": 0.8653926253318787, |
| "learning_rate": 9.999901304280685e-05, |
| "loss": 0.0567, |
| "step": 520 |
| }, |
| { |
| "grad_norm": 0.831631064414978, |
| "learning_rate": 9.999770075521164e-05, |
| "loss": 0.0538, |
| "step": 530 |
| }, |
| { |
| "grad_norm": 0.8719817399978638, |
| "learning_rate": 9.99958417007713e-05, |
| "loss": 0.0591, |
| "step": 540 |
| }, |
| { |
| "grad_norm": 0.9873416423797607, |
| "learning_rate": 9.999343589981615e-05, |
| "loss": 0.0542, |
| "step": 550 |
| }, |
| { |
| "grad_norm": 0.9876612424850464, |
| "learning_rate": 9.999048337865568e-05, |
| "loss": 0.0522, |
| "step": 560 |
| }, |
| { |
| "grad_norm": 1.1858694553375244, |
| "learning_rate": 9.998698416957815e-05, |
| "loss": 0.0558, |
| "step": 570 |
| }, |
| { |
| "grad_norm": 0.646891176700592, |
| "learning_rate": 9.998293831085037e-05, |
| "loss": 0.0517, |
| "step": 580 |
| }, |
| { |
| "grad_norm": 0.6390032768249512, |
| "learning_rate": 9.997834584671719e-05, |
| "loss": 0.0537, |
| "step": 590 |
| }, |
| { |
| "grad_norm": 0.8343250751495361, |
| "learning_rate": 9.997320682740107e-05, |
| "loss": 0.0517, |
| "step": 600 |
| }, |
| { |
| "grad_norm": 0.7489078640937805, |
| "learning_rate": 9.996752130910149e-05, |
| "loss": 0.0489, |
| "step": 610 |
| }, |
| { |
| "grad_norm": 0.8712443113327026, |
| "learning_rate": 9.99612893539944e-05, |
| "loss": 0.0501, |
| "step": 620 |
| }, |
| { |
| "grad_norm": 0.9857075810432434, |
| "learning_rate": 9.995451103023144e-05, |
| "loss": 0.0477, |
| "step": 630 |
| }, |
| { |
| "grad_norm": 0.7690502405166626, |
| "learning_rate": 9.994718641193928e-05, |
| "loss": 0.0493, |
| "step": 640 |
| }, |
| { |
| "grad_norm": 1.0277982950210571, |
| "learning_rate": 9.993931557921874e-05, |
| "loss": 0.0481, |
| "step": 650 |
| }, |
| { |
| "grad_norm": 0.7521364688873291, |
| "learning_rate": 9.993089861814402e-05, |
| "loss": 0.0445, |
| "step": 660 |
| }, |
| { |
| "grad_norm": 0.7378376722335815, |
| "learning_rate": 9.992193562076166e-05, |
| "loss": 0.0519, |
| "step": 670 |
| }, |
| { |
| "grad_norm": 0.848283052444458, |
| "learning_rate": 9.991242668508954e-05, |
| "loss": 0.0468, |
| "step": 680 |
| }, |
| { |
| "grad_norm": 0.8540059328079224, |
| "learning_rate": 9.990237191511587e-05, |
| "loss": 0.0438, |
| "step": 690 |
| }, |
| { |
| "grad_norm": 0.8540256023406982, |
| "learning_rate": 9.989177142079802e-05, |
| "loss": 0.0436, |
| "step": 700 |
| }, |
| { |
| "grad_norm": 0.8738433718681335, |
| "learning_rate": 9.988062531806126e-05, |
| "loss": 0.0423, |
| "step": 710 |
| }, |
| { |
| "grad_norm": 0.7137842178344727, |
| "learning_rate": 9.986893372879762e-05, |
| "loss": 0.0423, |
| "step": 720 |
| }, |
| { |
| "grad_norm": 0.8221761584281921, |
| "learning_rate": 9.985669678086443e-05, |
| "loss": 0.0454, |
| "step": 730 |
| }, |
| { |
| "grad_norm": 0.7644378542900085, |
| "learning_rate": 9.984391460808298e-05, |
| "loss": 0.043, |
| "step": 740 |
| }, |
| { |
| "grad_norm": 0.9529057145118713, |
| "learning_rate": 9.983058735023709e-05, |
| "loss": 0.0412, |
| "step": 750 |
| }, |
| { |
| "grad_norm": 0.5925287008285522, |
| "learning_rate": 9.98167151530715e-05, |
| "loss": 0.0412, |
| "step": 760 |
| }, |
| { |
| "grad_norm": 0.9713786840438843, |
| "learning_rate": 9.980229816829034e-05, |
| "loss": 0.0438, |
| "step": 770 |
| }, |
| { |
| "grad_norm": 0.8202999830245972, |
| "learning_rate": 9.978733655355544e-05, |
| "loss": 0.0426, |
| "step": 780 |
| }, |
| { |
| "grad_norm": 0.8735260367393494, |
| "learning_rate": 9.977183047248464e-05, |
| "loss": 0.0417, |
| "step": 790 |
| }, |
| { |
| "grad_norm": 0.780419647693634, |
| "learning_rate": 9.975578009464992e-05, |
| "loss": 0.0442, |
| "step": 800 |
| }, |
| { |
| "grad_norm": 0.6911860108375549, |
| "learning_rate": 9.97391855955757e-05, |
| "loss": 0.0387, |
| "step": 810 |
| }, |
| { |
| "grad_norm": 0.7028207182884216, |
| "learning_rate": 9.972204715673669e-05, |
| "loss": 0.0406, |
| "step": 820 |
| }, |
| { |
| "grad_norm": 0.6863601207733154, |
| "learning_rate": 9.970436496555617e-05, |
| "loss": 0.0365, |
| "step": 830 |
| }, |
| { |
| "grad_norm": 0.8116464614868164, |
| "learning_rate": 9.968613921540373e-05, |
| "loss": 0.0403, |
| "step": 840 |
| }, |
| { |
| "grad_norm": 0.5927073359489441, |
| "learning_rate": 9.966737010559326e-05, |
| "loss": 0.0457, |
| "step": 850 |
| }, |
| { |
| "grad_norm": 0.7573205828666687, |
| "learning_rate": 9.964805784138072e-05, |
| "loss": 0.0386, |
| "step": 860 |
| }, |
| { |
| "grad_norm": 0.777636706829071, |
| "learning_rate": 9.962820263396195e-05, |
| "loss": 0.0377, |
| "step": 870 |
| }, |
| { |
| "grad_norm": 0.6754245758056641, |
| "learning_rate": 9.960780470047033e-05, |
| "loss": 0.0402, |
| "step": 880 |
| }, |
| { |
| "grad_norm": 0.7781431674957275, |
| "learning_rate": 9.958686426397437e-05, |
| "loss": 0.0392, |
| "step": 890 |
| }, |
| { |
| "grad_norm": 0.7408170104026794, |
| "learning_rate": 9.956538155347534e-05, |
| "loss": 0.0397, |
| "step": 900 |
| }, |
| { |
| "grad_norm": 0.7765600085258484, |
| "learning_rate": 9.95433568039047e-05, |
| "loss": 0.0368, |
| "step": 910 |
| }, |
| { |
| "grad_norm": 0.7626947164535522, |
| "learning_rate": 9.952079025612162e-05, |
| "loss": 0.0375, |
| "step": 920 |
| }, |
| { |
| "grad_norm": 0.7383814454078674, |
| "learning_rate": 9.949768215691022e-05, |
| "loss": 0.0396, |
| "step": 930 |
| }, |
| { |
| "grad_norm": 0.5346885919570923, |
| "learning_rate": 9.9474032758977e-05, |
| "loss": 0.038, |
| "step": 940 |
| }, |
| { |
| "grad_norm": 0.649907648563385, |
| "learning_rate": 9.944984232094794e-05, |
| "loss": 0.0377, |
| "step": 950 |
| }, |
| { |
| "grad_norm": 0.7877050042152405, |
| "learning_rate": 9.942511110736584e-05, |
| "loss": 0.0359, |
| "step": 960 |
| }, |
| { |
| "grad_norm": 0.7351949214935303, |
| "learning_rate": 9.939983938868726e-05, |
| "loss": 0.0372, |
| "step": 970 |
| }, |
| { |
| "grad_norm": 1.1521971225738525, |
| "learning_rate": 9.93740274412797e-05, |
| "loss": 0.0361, |
| "step": 980 |
| }, |
| { |
| "grad_norm": 0.817077100276947, |
| "learning_rate": 9.934767554741846e-05, |
| "loss": 0.0377, |
| "step": 990 |
| }, |
| { |
| "grad_norm": 0.5828076601028442, |
| "learning_rate": 9.932078399528361e-05, |
| "loss": 0.0365, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 10000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 128, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|