{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.00680275, "global_step": 1700000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2e-05, "loss": 2.0179, "step": 100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.9412, "step": 200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.918, "step": 300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.906, "step": 400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8979, "step": 500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8955, "step": 600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.887, "step": 700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8873, "step": 800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8844, "step": 900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8842, "step": 1000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.872, "step": 1100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8773, "step": 1200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8775, "step": 1300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8627, "step": 1400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.875, "step": 1500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8666, "step": 1600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.869, "step": 1700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8593, "step": 1800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8715, "step": 1900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8704, "step": 2000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8658, "step": 2100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8662, "step": 2200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8616, "step": 2300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8675, "step": 2400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8675, "step": 2500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8636, "step": 2600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8626, "step": 2700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8652, "step": 2800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8607, "step": 2900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8548, "step": 3000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8571, "step": 3100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8524, "step": 3200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8551, "step": 3300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.866, "step": 3400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8453, "step": 3500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8572, "step": 3600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8596, "step": 3700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8607, "step": 3800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.859, "step": 3900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8548, "step": 4000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8657, "step": 4100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8452, "step": 4200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8466, "step": 4300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8484, "step": 4400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8459, "step": 4500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8452, "step": 4600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8515, "step": 4700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8554, "step": 4800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8528, "step": 4900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8565, "step": 5000 }, { "epoch": 0.0, "eval_loss": 0.7948816418647766, "eval_runtime": 204.858, "eval_samples_per_second": 244.072, "eval_steps_per_second": 1.909, "step": 5000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8474, "step": 5100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8506, "step": 5200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8418, "step": 5300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8567, "step": 5400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8483, "step": 5500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8473, "step": 5600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8454, "step": 5700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8499, "step": 5800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.841, "step": 5900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8545, "step": 6000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8393, "step": 6100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8434, "step": 6200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8405, "step": 6300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8464, "step": 6400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8456, "step": 6500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8466, "step": 6600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8451, "step": 6700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8452, "step": 6800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.844, "step": 6900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8416, "step": 7000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8453, "step": 7100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8544, "step": 7200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8442, "step": 7300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.838, "step": 7400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8398, "step": 7500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8431, "step": 7600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8431, "step": 7700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8531, "step": 7800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8326, "step": 7900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8417, "step": 8000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8506, "step": 8100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.848, "step": 8200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.84, "step": 8300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8362, "step": 8400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8445, "step": 8500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8392, "step": 8600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8361, "step": 8700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8354, "step": 8800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8373, "step": 8900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8268, "step": 9000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8424, "step": 9100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8322, "step": 9200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8407, "step": 9300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8364, "step": 9400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8346, "step": 9500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8368, "step": 9600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8377, "step": 9700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8372, "step": 9800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8392, "step": 9900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8439, "step": 10000 }, { "epoch": 0.0, "eval_loss": 0.7838985323905945, "eval_runtime": 204.5164, "eval_samples_per_second": 244.479, "eval_steps_per_second": 1.912, "step": 10000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.834, "step": 10100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8392, "step": 10200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8345, "step": 10300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8403, "step": 10400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8346, "step": 10500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8348, "step": 10600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8424, "step": 10700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8338, "step": 10800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8361, "step": 10900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8398, "step": 11000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8307, "step": 11100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8302, "step": 11200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8402, "step": 11300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8289, "step": 11400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8347, "step": 11500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8312, "step": 11600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.834, "step": 11700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8324, "step": 11800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8428, "step": 11900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8356, "step": 12000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8369, "step": 12100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.83, "step": 12200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8272, "step": 12300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8345, "step": 12400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8301, "step": 12500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8286, "step": 12600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8309, "step": 12700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8253, "step": 12800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8319, "step": 12900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8258, "step": 13000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.835, "step": 13100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8318, "step": 13200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8337, "step": 13300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8283, "step": 13400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.832, "step": 13500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8352, "step": 13600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8319, "step": 13700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8359, "step": 13800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8321, "step": 13900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8316, "step": 14000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8304, "step": 14100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.828, "step": 14200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8294, "step": 14300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8345, "step": 14400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8276, "step": 14500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8254, "step": 14600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8362, "step": 14700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8353, "step": 14800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8346, "step": 14900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8313, "step": 15000 }, { "epoch": 0.0, "eval_loss": 0.7768447399139404, "eval_runtime": 202.8477, "eval_samples_per_second": 246.49, "eval_steps_per_second": 1.928, "step": 15000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8296, "step": 15100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8347, "step": 15200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.829, "step": 15300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8262, "step": 15400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8319, "step": 15500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8256, "step": 15600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8267, "step": 15700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.824, "step": 15800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8318, "step": 15900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8257, "step": 16000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.825, "step": 16100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8244, "step": 16200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8158, "step": 16300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8251, "step": 16400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8259, "step": 16500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8262, "step": 16600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8228, "step": 16700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.823, "step": 16800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8242, "step": 16900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8274, "step": 17000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8256, "step": 17100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8273, "step": 17200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8195, "step": 17300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8223, "step": 17400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8245, "step": 17500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8207, "step": 17600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.82, "step": 17700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8275, "step": 17800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8329, "step": 17900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8157, "step": 18000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8308, "step": 18100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8239, "step": 18200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8254, "step": 18300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.821, "step": 18400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8293, "step": 18500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.821, "step": 18600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8142, "step": 18700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.829, "step": 18800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8251, "step": 18900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8339, "step": 19000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8252, "step": 19100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8246, "step": 19200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8289, "step": 19300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8295, "step": 19400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8255, "step": 19500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8204, "step": 19600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.815, "step": 19700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8156, "step": 19800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8201, "step": 19900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8209, "step": 20000 }, { "epoch": 0.0, "eval_loss": 0.7698022723197937, "eval_runtime": 205.3954, "eval_samples_per_second": 243.433, "eval_steps_per_second": 1.904, "step": 20000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8159, "step": 20100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8257, "step": 20200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8302, "step": 20300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8291, "step": 20400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8157, "step": 20500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8214, "step": 20600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8203, "step": 20700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8253, "step": 20800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8116, "step": 20900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8218, "step": 21000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8173, "step": 21100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8247, "step": 21200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8152, "step": 21300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8124, "step": 21400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8185, "step": 21500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8243, "step": 21600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8289, "step": 21700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8208, "step": 21800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8195, "step": 21900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8239, "step": 22000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8157, "step": 22100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8215, "step": 22200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8205, "step": 22300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8174, "step": 22400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8265, "step": 22500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8162, "step": 22600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8143, "step": 22700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8233, "step": 22800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8104, "step": 22900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8176, "step": 23000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8196, "step": 23100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8301, "step": 23200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8213, "step": 23300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8205, "step": 23400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8099, "step": 23500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8203, "step": 23600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8176, "step": 23700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.822, "step": 23800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8208, "step": 23900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8172, "step": 24000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8251, "step": 24100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8111, "step": 24200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8215, "step": 24300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8133, "step": 24400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.818, "step": 24500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8173, "step": 24600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8145, "step": 24700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8226, "step": 24800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8236, "step": 24900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8213, "step": 25000 }, { "epoch": 0.0, "eval_loss": 0.7671163082122803, "eval_runtime": 204.3887, "eval_samples_per_second": 244.632, "eval_steps_per_second": 1.913, "step": 25000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8148, "step": 25100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8203, "step": 25200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8242, "step": 25300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8179, "step": 25400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8132, "step": 25500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8109, "step": 25600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8088, "step": 25700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8106, "step": 25800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8136, "step": 25900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.817, "step": 26000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.812, "step": 26100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.813, "step": 26200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8125, "step": 26300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8123, "step": 26400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8154, "step": 26500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8067, "step": 26600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8138, "step": 26700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8248, "step": 26800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.812, "step": 26900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8133, "step": 27000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.822, "step": 27100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8189, "step": 27200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8089, "step": 27300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8136, "step": 27400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8129, "step": 27500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8155, "step": 27600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8175, "step": 27700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8119, "step": 27800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8169, "step": 27900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8102, "step": 28000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8208, "step": 28100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8125, "step": 28200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8127, "step": 28300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8126, "step": 28400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8104, "step": 28500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8122, "step": 28600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8179, "step": 28700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8117, "step": 28800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8073, "step": 28900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8094, "step": 29000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8165, "step": 29100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8109, "step": 29200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8134, "step": 29300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8132, "step": 29400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8163, "step": 29500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8153, "step": 29600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8097, "step": 29700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8138, "step": 29800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8085, "step": 29900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8121, "step": 30000 }, { "epoch": 0.0, "eval_loss": 0.7610729932785034, "eval_runtime": 206.7211, "eval_samples_per_second": 241.872, "eval_steps_per_second": 1.891, "step": 30000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8162, "step": 30100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8095, "step": 30200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8105, "step": 30300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8175, "step": 30400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8164, "step": 30500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8066, "step": 30600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8135, "step": 30700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8173, "step": 30800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8058, "step": 30900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8207, "step": 31000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.812, "step": 31100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8066, "step": 31200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8089, "step": 31300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8145, "step": 31400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8066, "step": 31500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8147, "step": 31600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8141, "step": 31700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8064, "step": 31800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.814, "step": 31900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.818, "step": 32000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8059, "step": 32100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.815, "step": 32200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8129, "step": 32300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8103, "step": 32400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8152, "step": 32500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.808, "step": 32600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8147, "step": 32700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8097, "step": 32800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8152, "step": 32900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8054, "step": 33000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8183, "step": 33100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8089, "step": 33200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8135, "step": 33300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8109, "step": 33400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8119, "step": 33500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.814, "step": 33600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8031, "step": 33700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8024, "step": 33800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8113, "step": 33900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8127, "step": 34000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8107, "step": 34100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8119, "step": 34200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8112, "step": 34300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8042, "step": 34400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8091, "step": 34500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8047, "step": 34600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8091, "step": 34700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8105, "step": 34800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8144, "step": 34900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8092, "step": 35000 }, { "epoch": 0.0, "eval_loss": 0.7591666579246521, "eval_runtime": 205.1234, "eval_samples_per_second": 243.756, "eval_steps_per_second": 1.906, "step": 35000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8158, "step": 35100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8112, "step": 35200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.811, "step": 35300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8102, "step": 35400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8084, "step": 35500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8089, "step": 35600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8135, "step": 35700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8075, "step": 35800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8118, "step": 35900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8034, "step": 36000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8166, "step": 36100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.818, "step": 36200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.807, "step": 36300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8169, "step": 36400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8164, "step": 36500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8101, "step": 36600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8079, "step": 36700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8126, "step": 36800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8072, "step": 36900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8053, "step": 37000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8063, "step": 37100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8075, "step": 37200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8049, "step": 37300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8071, "step": 37400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8033, "step": 37500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8092, "step": 37600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.81, "step": 37700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8122, "step": 37800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8084, "step": 37900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.813, "step": 38000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8036, "step": 38100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8048, "step": 38200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8038, "step": 38300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8009, "step": 38400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8074, "step": 38500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8083, "step": 38600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.7932, "step": 38700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8065, "step": 38800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.7998, "step": 38900 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8116, "step": 39000 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8023, "step": 39100 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8137, "step": 39200 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8076, "step": 39300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8098, "step": 39400 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8028, "step": 39500 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8041, "step": 39600 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.7989, "step": 39700 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8083, "step": 39800 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.8003, "step": 39900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.808, "step": 40000 }, { "epoch": 0.01, "eval_loss": 0.757146418094635, "eval_runtime": 205.0093, "eval_samples_per_second": 243.891, "eval_steps_per_second": 1.907, "step": 40000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8112, "step": 40100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7983, "step": 40200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8066, "step": 40300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8092, "step": 40400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8075, "step": 40500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8061, "step": 40600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8054, "step": 40700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8071, "step": 40800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8105, "step": 40900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8053, "step": 41000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8064, "step": 41100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8088, "step": 41200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.809, "step": 41300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8029, "step": 41400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8052, "step": 41500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7991, "step": 41600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8055, "step": 41700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8038, "step": 41800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8041, "step": 41900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8033, "step": 42000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8125, "step": 42100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8087, "step": 42200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8015, "step": 42300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8064, "step": 42400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8043, "step": 42500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8082, "step": 42600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8053, "step": 42700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8012, "step": 42800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8082, "step": 42900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8114, "step": 43000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8012, "step": 43100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8011, "step": 43200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8035, "step": 43300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8099, "step": 43400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8049, "step": 43500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8112, "step": 43600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8115, "step": 43700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.804, "step": 43800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8126, "step": 43900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8053, "step": 44000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8046, "step": 44100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7977, "step": 44200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8066, "step": 44300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8042, "step": 44400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7989, "step": 44500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8102, "step": 44600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8105, "step": 44700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8098, "step": 44800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8042, "step": 44900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7997, "step": 45000 }, { "epoch": 0.01, "eval_loss": 0.7535812258720398, "eval_runtime": 203.0127, "eval_samples_per_second": 246.29, "eval_steps_per_second": 1.926, "step": 45000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7972, "step": 45100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8005, "step": 45200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8084, "step": 45300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8072, "step": 45400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8104, "step": 45500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7954, "step": 45600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8027, "step": 45700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8109, "step": 45800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8098, "step": 45900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7953, "step": 46000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8015, "step": 46100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8039, "step": 46200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8016, "step": 46300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8059, "step": 46400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8043, "step": 46500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8015, "step": 46600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8012, "step": 46700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7971, "step": 46800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8022, "step": 46900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7949, "step": 47000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8082, "step": 47100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8008, "step": 47200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8084, "step": 47300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7934, "step": 47400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8008, "step": 47500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8089, "step": 47600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8104, "step": 47700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8055, "step": 47800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8078, "step": 47900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8056, "step": 48000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7976, "step": 48100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8091, "step": 48200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7998, "step": 48300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8086, "step": 48400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8002, "step": 48500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8044, "step": 48600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8104, "step": 48700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7998, "step": 48800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.802, "step": 48900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8096, "step": 49000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8043, "step": 49100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8009, "step": 49200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7989, "step": 49300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7979, "step": 49400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7973, "step": 49500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8013, "step": 49600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7988, "step": 49700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8051, "step": 49800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7975, "step": 49900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8119, "step": 50000 }, { "epoch": 0.01, "eval_loss": 0.7516446113586426, "eval_runtime": 205.3381, "eval_samples_per_second": 243.501, "eval_steps_per_second": 1.904, "step": 50000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8062, "step": 50100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8114, "step": 50200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7993, "step": 50300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8058, "step": 50400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8062, "step": 50500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7987, "step": 50600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8012, "step": 50700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.81, "step": 50800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8095, "step": 50900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7983, "step": 51000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8015, "step": 51100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8014, "step": 51200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8092, "step": 51300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8071, "step": 51400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8085, "step": 51500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8012, "step": 51600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8012, "step": 51700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7944, "step": 51800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8111, "step": 51900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8136, "step": 52000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.791, "step": 52100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8037, "step": 52200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.798, "step": 52300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8099, "step": 52400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8014, "step": 52500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7988, "step": 52600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8044, "step": 52700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8009, "step": 52800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8058, "step": 52900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7934, "step": 53000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8045, "step": 53100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8063, "step": 53200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8086, "step": 53300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7966, "step": 53400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7961, "step": 53500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.803, "step": 53600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8001, "step": 53700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8063, "step": 53800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8047, "step": 53900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.807, "step": 54000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8009, "step": 54100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8008, "step": 54200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7991, "step": 54300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7956, "step": 54400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8073, "step": 54500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7919, "step": 54600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7955, "step": 54700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8035, "step": 54800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7994, "step": 54900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8085, "step": 55000 }, { "epoch": 0.01, "eval_loss": 0.7525084614753723, "eval_runtime": 204.3549, "eval_samples_per_second": 244.672, "eval_steps_per_second": 1.913, "step": 55000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7948, "step": 55100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8059, "step": 55200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7999, "step": 55300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7986, "step": 55400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7997, "step": 55500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.797, "step": 55600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7938, "step": 55700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7983, "step": 55800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7982, "step": 55900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8123, "step": 56000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8031, "step": 56100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8058, "step": 56200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8066, "step": 56300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8023, "step": 56400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8018, "step": 56500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7935, "step": 56600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8068, "step": 56700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7936, "step": 56800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8007, "step": 56900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7978, "step": 57000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.801, "step": 57100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8017, "step": 57200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7992, "step": 57300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7999, "step": 57400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7954, "step": 57500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8033, "step": 57600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7991, "step": 57700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8016, "step": 57800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7995, "step": 57900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8027, "step": 58000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7939, "step": 58100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7999, "step": 58200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7989, "step": 58300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7873, "step": 58400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8025, "step": 58500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8011, "step": 58600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8035, "step": 58700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7941, "step": 58800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8069, "step": 58900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7931, "step": 59000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8056, "step": 59100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8015, "step": 59200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.794, "step": 59300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8043, "step": 59400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7985, "step": 59500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7937, "step": 59600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8014, "step": 59700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7991, "step": 59800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8088, "step": 59900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7926, "step": 60000 }, { "epoch": 0.01, "eval_loss": 0.7483974099159241, "eval_runtime": 204.1773, "eval_samples_per_second": 244.885, "eval_steps_per_second": 1.915, "step": 60000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7991, "step": 60100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7959, "step": 60200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8101, "step": 60300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8033, "step": 60400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7919, "step": 60500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7908, "step": 60600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7964, "step": 60700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.798, "step": 60800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.801, "step": 60900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7974, "step": 61000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.801, "step": 61100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7997, "step": 61200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.791, "step": 61300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7945, "step": 61400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7883, "step": 61500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7956, "step": 61600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7994, "step": 61700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7966, "step": 61800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7973, "step": 61900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8029, "step": 62000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8007, "step": 62100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7996, "step": 62200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8, "step": 62300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7922, "step": 62400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7997, "step": 62500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7978, "step": 62600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.797, "step": 62700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8031, "step": 62800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8005, "step": 62900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7945, "step": 63000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.797, "step": 63100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7999, "step": 63200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8007, "step": 63300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7905, "step": 63400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8, "step": 63500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8013, "step": 63600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7962, "step": 63700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7984, "step": 63800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7936, "step": 63900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7919, "step": 64000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7932, "step": 64100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7889, "step": 64200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8014, "step": 64300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8003, "step": 64400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8007, "step": 64500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7923, "step": 64600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7996, "step": 64700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7984, "step": 64800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.795, "step": 64900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8047, "step": 65000 }, { "epoch": 0.01, "eval_loss": 0.7496934533119202, "eval_runtime": 203.6389, "eval_samples_per_second": 245.533, "eval_steps_per_second": 1.92, "step": 65000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7968, "step": 65100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7978, "step": 65200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8041, "step": 65300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7922, "step": 65400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7993, "step": 65500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7982, "step": 65600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7943, "step": 65700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8029, "step": 65800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7929, "step": 65900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7956, "step": 66000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8007, "step": 66100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7981, "step": 66200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7972, "step": 66300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8035, "step": 66400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7933, "step": 66500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7889, "step": 66600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8019, "step": 66700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7898, "step": 66800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7928, "step": 66900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8028, "step": 67000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7983, "step": 67100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7945, "step": 67200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8012, "step": 67300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7961, "step": 67400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7982, "step": 67500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7905, "step": 67600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7996, "step": 67700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7909, "step": 67800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8064, "step": 67900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7966, "step": 68000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7893, "step": 68100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8021, "step": 68200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7942, "step": 68300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8015, "step": 68400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7982, "step": 68500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7991, "step": 68600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7998, "step": 68700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7948, "step": 68800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7916, "step": 68900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8064, "step": 69000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7984, "step": 69100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7907, "step": 69200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7888, "step": 69300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.794, "step": 69400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7953, "step": 69500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7908, "step": 69600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7963, "step": 69700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8021, "step": 69800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.812, "step": 69900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7998, "step": 70000 }, { "epoch": 0.01, "eval_loss": 0.7458716630935669, "eval_runtime": 204.0562, "eval_samples_per_second": 245.031, "eval_steps_per_second": 1.916, "step": 70000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7952, "step": 70100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7979, "step": 70200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7973, "step": 70300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7874, "step": 70400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.792, "step": 70500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7923, "step": 70600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7941, "step": 70700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7941, "step": 70800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7937, "step": 70900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7899, "step": 71000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7943, "step": 71100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7962, "step": 71200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7981, "step": 71300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7916, "step": 71400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7987, "step": 71500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7944, "step": 71600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7968, "step": 71700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7915, "step": 71800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7956, "step": 71900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7902, "step": 72000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7905, "step": 72100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8053, "step": 72200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7997, "step": 72300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7984, "step": 72400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8014, "step": 72500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7885, "step": 72600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7943, "step": 72700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7996, "step": 72800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7937, "step": 72900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7917, "step": 73000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7936, "step": 73100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7936, "step": 73200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7964, "step": 73300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8022, "step": 73400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8045, "step": 73500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7955, "step": 73600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7955, "step": 73700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7982, "step": 73800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8026, "step": 73900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7852, "step": 74000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7829, "step": 74100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.796, "step": 74200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7986, "step": 74300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7963, "step": 74400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7941, "step": 74500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7968, "step": 74600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.806, "step": 74700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7971, "step": 74800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7869, "step": 74900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7974, "step": 75000 }, { "epoch": 0.01, "eval_loss": 0.7442731857299805, "eval_runtime": 204.653, "eval_samples_per_second": 244.316, "eval_steps_per_second": 1.911, "step": 75000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8016, "step": 75100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7977, "step": 75200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7925, "step": 75300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7942, "step": 75400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7965, "step": 75500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7906, "step": 75600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7885, "step": 75700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7933, "step": 75800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7876, "step": 75900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7888, "step": 76000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7935, "step": 76100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7996, "step": 76200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7923, "step": 76300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7881, "step": 76400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7948, "step": 76500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7923, "step": 76600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7879, "step": 76700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7823, "step": 76800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7982, "step": 76900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7953, "step": 77000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7986, "step": 77100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7944, "step": 77200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7888, "step": 77300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7973, "step": 77400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7789, "step": 77500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7931, "step": 77600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7964, "step": 77700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7833, "step": 77800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7942, "step": 77900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7909, "step": 78000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7972, "step": 78100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7961, "step": 78200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7862, "step": 78300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7856, "step": 78400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.785, "step": 78500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7887, "step": 78600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.79, "step": 78700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7959, "step": 78800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7872, "step": 78900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7946, "step": 79000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7914, "step": 79100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.789, "step": 79200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7928, "step": 79300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7967, "step": 79400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7967, "step": 79500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7863, "step": 79600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7924, "step": 79700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7918, "step": 79800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8018, "step": 79900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7919, "step": 80000 }, { "epoch": 0.01, "eval_loss": 0.7424513101577759, "eval_runtime": 204.0239, "eval_samples_per_second": 245.069, "eval_steps_per_second": 1.916, "step": 80000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7984, "step": 80100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7846, "step": 80200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7914, "step": 80300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7921, "step": 80400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7906, "step": 80500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7852, "step": 80600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7894, "step": 80700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7833, "step": 80800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7948, "step": 80900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7951, "step": 81000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8013, "step": 81100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7946, "step": 81200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.795, "step": 81300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7908, "step": 81400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7907, "step": 81500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7882, "step": 81600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7883, "step": 81700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7845, "step": 81800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7848, "step": 81900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7887, "step": 82000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.795, "step": 82100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7858, "step": 82200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7923, "step": 82300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.791, "step": 82400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7897, "step": 82500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7909, "step": 82600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7911, "step": 82700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7941, "step": 82800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7937, "step": 82900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7911, "step": 83000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7926, "step": 83100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7931, "step": 83200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7892, "step": 83300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7917, "step": 83400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7979, "step": 83500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7878, "step": 83600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7998, "step": 83700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7898, "step": 83800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7911, "step": 83900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7857, "step": 84000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7887, "step": 84100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7859, "step": 84200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7924, "step": 84300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.789, "step": 84400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7867, "step": 84500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7917, "step": 84600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7816, "step": 84700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7862, "step": 84800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7913, "step": 84900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7876, "step": 85000 }, { "epoch": 0.01, "eval_loss": 0.7407427430152893, "eval_runtime": 206.2207, "eval_samples_per_second": 242.459, "eval_steps_per_second": 1.896, "step": 85000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7913, "step": 85100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7956, "step": 85200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7898, "step": 85300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7863, "step": 85400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7855, "step": 85500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7911, "step": 85600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7868, "step": 85700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7966, "step": 85800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7986, "step": 85900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7903, "step": 86000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7953, "step": 86100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7933, "step": 86200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.793, "step": 86300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7869, "step": 86400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7888, "step": 86500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7908, "step": 86600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7871, "step": 86700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.786, "step": 86800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7923, "step": 86900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7861, "step": 87000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7923, "step": 87100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7873, "step": 87200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7866, "step": 87300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7884, "step": 87400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7862, "step": 87500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7909, "step": 87600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7956, "step": 87700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7911, "step": 87800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7837, "step": 87900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.79, "step": 88000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7893, "step": 88100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.793, "step": 88200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7845, "step": 88300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7824, "step": 88400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8, "step": 88500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7927, "step": 88600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7804, "step": 88700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7907, "step": 88800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7846, "step": 88900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7935, "step": 89000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7906, "step": 89100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7893, "step": 89200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7869, "step": 89300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7985, "step": 89400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7823, "step": 89500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7899, "step": 89600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7887, "step": 89700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7882, "step": 89800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7897, "step": 89900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.788, "step": 90000 }, { "epoch": 0.01, "eval_loss": 0.7397116422653198, "eval_runtime": 204.1911, "eval_samples_per_second": 244.869, "eval_steps_per_second": 1.915, "step": 90000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7879, "step": 90100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8019, "step": 90200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8016, "step": 90300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7867, "step": 90400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7935, "step": 90500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7913, "step": 90600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7854, "step": 90700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7868, "step": 90800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7922, "step": 90900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7846, "step": 91000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7914, "step": 91100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7935, "step": 91200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7901, "step": 91300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7808, "step": 91400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7911, "step": 91500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7822, "step": 91600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7895, "step": 91700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7835, "step": 91800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7893, "step": 91900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7907, "step": 92000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7936, "step": 92100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7904, "step": 92200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7886, "step": 92300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.788, "step": 92400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7873, "step": 92500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7993, "step": 92600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7828, "step": 92700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7862, "step": 92800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7897, "step": 92900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7884, "step": 93000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7897, "step": 93100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.789, "step": 93200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7905, "step": 93300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7856, "step": 93400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7925, "step": 93500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7918, "step": 93600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7912, "step": 93700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7858, "step": 93800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7897, "step": 93900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7838, "step": 94000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.8, "step": 94100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7821, "step": 94200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.786, "step": 94300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7849, "step": 94400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7863, "step": 94500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7892, "step": 94600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7881, "step": 94700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7844, "step": 94800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7853, "step": 94900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7907, "step": 95000 }, { "epoch": 0.01, "eval_loss": 0.7405564188957214, "eval_runtime": 203.9554, "eval_samples_per_second": 245.152, "eval_steps_per_second": 1.917, "step": 95000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7864, "step": 95100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7899, "step": 95200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7815, "step": 95300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7872, "step": 95400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.789, "step": 95500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7887, "step": 95600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7887, "step": 95700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7874, "step": 95800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7854, "step": 95900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7832, "step": 96000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7887, "step": 96100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7955, "step": 96200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7921, "step": 96300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7901, "step": 96400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7852, "step": 96500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7971, "step": 96600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7873, "step": 96700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.79, "step": 96800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7875, "step": 96900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7945, "step": 97000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7863, "step": 97100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7876, "step": 97200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7901, "step": 97300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7942, "step": 97400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7868, "step": 97500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7866, "step": 97600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7901, "step": 97700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7951, "step": 97800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7856, "step": 97900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7875, "step": 98000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7924, "step": 98100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7894, "step": 98200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7927, "step": 98300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7877, "step": 98400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7875, "step": 98500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7915, "step": 98600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7875, "step": 98700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7845, "step": 98800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7878, "step": 98900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7829, "step": 99000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7895, "step": 99100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7924, "step": 99200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.786, "step": 99300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7873, "step": 99400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7921, "step": 99500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7947, "step": 99600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7904, "step": 99700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7848, "step": 99800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7896, "step": 99900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7924, "step": 100000 }, { "epoch": 0.01, "eval_loss": 0.7385464310646057, "eval_runtime": 205.6058, "eval_samples_per_second": 243.184, "eval_steps_per_second": 1.902, "step": 100000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.782, "step": 100100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.794, "step": 100200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7825, "step": 100300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7868, "step": 100400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7797, "step": 100500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7949, "step": 100600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7815, "step": 100700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7788, "step": 100800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.784, "step": 100900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7843, "step": 101000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.787, "step": 101100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7921, "step": 101200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7916, "step": 101300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7845, "step": 101400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7878, "step": 101500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7896, "step": 101600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.791, "step": 101700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7858, "step": 101800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7953, "step": 101900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7764, "step": 102000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7973, "step": 102100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7809, "step": 102200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7707, "step": 102300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7865, "step": 102400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7948, "step": 102500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7767, "step": 102600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7866, "step": 102700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.792, "step": 102800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7866, "step": 102900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.79, "step": 103000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7924, "step": 103100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7889, "step": 103200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7825, "step": 103300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7825, "step": 103400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.79, "step": 103500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7844, "step": 103600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7794, "step": 103700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7828, "step": 103800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7903, "step": 103900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7762, "step": 104000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7849, "step": 104100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7918, "step": 104200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7906, "step": 104300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7818, "step": 104400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7858, "step": 104500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7919, "step": 104600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7869, "step": 104700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7941, "step": 104800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7888, "step": 104900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7874, "step": 105000 }, { "epoch": 0.01, "eval_loss": 0.7402731776237488, "eval_runtime": 199.5477, "eval_samples_per_second": 250.567, "eval_steps_per_second": 1.959, "step": 105000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7854, "step": 105100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7849, "step": 105200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7921, "step": 105300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7866, "step": 105400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7895, "step": 105500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7858, "step": 105600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.792, "step": 105700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7804, "step": 105800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7822, "step": 105900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.794, "step": 106000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7754, "step": 106100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7853, "step": 106200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7805, "step": 106300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7792, "step": 106400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7894, "step": 106500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7873, "step": 106600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7809, "step": 106700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.787, "step": 106800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7787, "step": 106900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7799, "step": 107000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7846, "step": 107100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7807, "step": 107200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7847, "step": 107300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7839, "step": 107400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7834, "step": 107500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7882, "step": 107600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7877, "step": 107700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7872, "step": 107800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7859, "step": 107900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.779, "step": 108000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7904, "step": 108100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.789, "step": 108200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7874, "step": 108300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7892, "step": 108400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7875, "step": 108500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7897, "step": 108600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7872, "step": 108700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7899, "step": 108800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7835, "step": 108900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7811, "step": 109000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7823, "step": 109100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7827, "step": 109200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7844, "step": 109300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.782, "step": 109400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7925, "step": 109500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7805, "step": 109600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7775, "step": 109700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7846, "step": 109800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7934, "step": 109900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7832, "step": 110000 }, { "epoch": 0.01, "eval_loss": 0.7355017066001892, "eval_runtime": 203.8186, "eval_samples_per_second": 245.316, "eval_steps_per_second": 1.918, "step": 110000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7852, "step": 110100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7848, "step": 110200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7789, "step": 110300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7853, "step": 110400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7876, "step": 110500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7855, "step": 110600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.776, "step": 110700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7922, "step": 110800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7809, "step": 110900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7858, "step": 111000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7866, "step": 111100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.784, "step": 111200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7831, "step": 111300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7861, "step": 111400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7901, "step": 111500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7832, "step": 111600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7812, "step": 111700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7866, "step": 111800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7853, "step": 111900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7947, "step": 112000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7887, "step": 112100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7843, "step": 112200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7845, "step": 112300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.784, "step": 112400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.782, "step": 112500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7806, "step": 112600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7868, "step": 112700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7806, "step": 112800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7756, "step": 112900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7834, "step": 113000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7843, "step": 113100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7761, "step": 113200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7799, "step": 113300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7866, "step": 113400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7843, "step": 113500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7831, "step": 113600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7854, "step": 113700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7823, "step": 113800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.788, "step": 113900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7886, "step": 114000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7779, "step": 114100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7843, "step": 114200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7921, "step": 114300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7831, "step": 114400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7921, "step": 114500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7797, "step": 114600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7825, "step": 114700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7901, "step": 114800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7936, "step": 114900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7924, "step": 115000 }, { "epoch": 0.01, "eval_loss": 0.7361956834793091, "eval_runtime": 203.9139, "eval_samples_per_second": 245.202, "eval_steps_per_second": 1.917, "step": 115000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7854, "step": 115100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7807, "step": 115200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7869, "step": 115300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7808, "step": 115400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7912, "step": 115500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7843, "step": 115600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7845, "step": 115700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7833, "step": 115800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7893, "step": 115900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.792, "step": 116000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7802, "step": 116100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7776, "step": 116200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7863, "step": 116300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7789, "step": 116400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.784, "step": 116500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7781, "step": 116600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7815, "step": 116700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7801, "step": 116800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7832, "step": 116900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7817, "step": 117000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.776, "step": 117100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7916, "step": 117200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7885, "step": 117300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7846, "step": 117400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7803, "step": 117500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7829, "step": 117600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7851, "step": 117700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7845, "step": 117800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7797, "step": 117900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7822, "step": 118000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7826, "step": 118100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7752, "step": 118200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7904, "step": 118300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7801, "step": 118400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7879, "step": 118500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7846, "step": 118600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7902, "step": 118700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.789, "step": 118800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7944, "step": 118900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7765, "step": 119000 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7845, "step": 119100 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7771, "step": 119200 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7854, "step": 119300 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7853, "step": 119400 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7841, "step": 119500 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.789, "step": 119600 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7791, "step": 119700 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7914, "step": 119800 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7797, "step": 119900 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.7792, "step": 120000 }, { "epoch": 0.01, "eval_loss": 0.7344588041305542, "eval_runtime": 205.7304, "eval_samples_per_second": 243.037, "eval_steps_per_second": 1.901, "step": 120000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7824, "step": 120100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7891, "step": 120200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7826, "step": 120300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7811, "step": 120400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7853, "step": 120500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7837, "step": 120600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7876, "step": 120700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7852, "step": 120800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7843, "step": 120900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7878, "step": 121000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7806, "step": 121100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7826, "step": 121200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7768, "step": 121300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7888, "step": 121400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7844, "step": 121500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7836, "step": 121600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7884, "step": 121700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.779, "step": 121800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7769, "step": 121900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7764, "step": 122000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7929, "step": 122100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.779, "step": 122200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7852, "step": 122300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.779, "step": 122400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.783, "step": 122500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7809, "step": 122600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7829, "step": 122700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7836, "step": 122800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7773, "step": 122900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7855, "step": 123000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7828, "step": 123100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7793, "step": 123200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7846, "step": 123300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.784, "step": 123400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7897, "step": 123500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7885, "step": 123600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7726, "step": 123700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.784, "step": 123800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.779, "step": 123900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7801, "step": 124000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7884, "step": 124100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7893, "step": 124200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7893, "step": 124300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7827, "step": 124400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7864, "step": 124500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7777, "step": 124600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7781, "step": 124700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7746, "step": 124800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7812, "step": 124900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7843, "step": 125000 }, { "epoch": 0.02, "eval_loss": 0.729735255241394, "eval_runtime": 204.6756, "eval_samples_per_second": 244.289, "eval_steps_per_second": 1.91, "step": 125000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7817, "step": 125100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7793, "step": 125200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7843, "step": 125300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.78, "step": 125400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7805, "step": 125500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7812, "step": 125600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7816, "step": 125700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7756, "step": 125800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7788, "step": 125900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7816, "step": 126000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7877, "step": 126100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7786, "step": 126200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7926, "step": 126300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7828, "step": 126400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7772, "step": 126500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7789, "step": 126600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7841, "step": 126700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7837, "step": 126800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.779, "step": 126900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7838, "step": 127000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7859, "step": 127100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7771, "step": 127200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7807, "step": 127300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7901, "step": 127400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7821, "step": 127500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7808, "step": 127600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7806, "step": 127700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7885, "step": 127800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7818, "step": 127900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7894, "step": 128000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7835, "step": 128100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7812, "step": 128200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7808, "step": 128300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7798, "step": 128400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7794, "step": 128500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7766, "step": 128600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.782, "step": 128700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7799, "step": 128800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7839, "step": 128900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7852, "step": 129000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.782, "step": 129100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7798, "step": 129200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7759, "step": 129300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7882, "step": 129400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7819, "step": 129500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7791, "step": 129600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7847, "step": 129700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7814, "step": 129800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7831, "step": 129900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7846, "step": 130000 }, { "epoch": 0.02, "eval_loss": 0.7291039824485779, "eval_runtime": 204.4531, "eval_samples_per_second": 244.555, "eval_steps_per_second": 1.912, "step": 130000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7752, "step": 130100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7824, "step": 130200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7754, "step": 130300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7784, "step": 130400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7819, "step": 130500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.782, "step": 130600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.784, "step": 130700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.779, "step": 130800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.789, "step": 130900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7808, "step": 131000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7837, "step": 131100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7828, "step": 131200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7815, "step": 131300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7792, "step": 131400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7742, "step": 131500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7827, "step": 131600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7775, "step": 131700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7828, "step": 131800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7739, "step": 131900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7829, "step": 132000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7808, "step": 132100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7876, "step": 132200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7873, "step": 132300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.774, "step": 132400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7753, "step": 132500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7778, "step": 132600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7808, "step": 132700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7802, "step": 132800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7763, "step": 132900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7826, "step": 133000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7775, "step": 133100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7767, "step": 133200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7807, "step": 133300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.771, "step": 133400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7756, "step": 133500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7899, "step": 133600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7806, "step": 133700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7824, "step": 133800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7861, "step": 133900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7855, "step": 134000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7809, "step": 134100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7871, "step": 134200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7776, "step": 134300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7819, "step": 134400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7874, "step": 134500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7743, "step": 134600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7834, "step": 134700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.781, "step": 134800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7756, "step": 134900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7804, "step": 135000 }, { "epoch": 0.02, "eval_loss": 0.7318330407142639, "eval_runtime": 204.4753, "eval_samples_per_second": 244.528, "eval_steps_per_second": 1.912, "step": 135000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.784, "step": 135100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7832, "step": 135200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.775, "step": 135300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7821, "step": 135400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7853, "step": 135500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7847, "step": 135600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7761, "step": 135700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7747, "step": 135800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7638, "step": 135900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7825, "step": 136000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7784, "step": 136100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7781, "step": 136200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7769, "step": 136300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7814, "step": 136400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.782, "step": 136500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.778, "step": 136600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7807, "step": 136700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7801, "step": 136800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7793, "step": 136900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7761, "step": 137000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7764, "step": 137100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7783, "step": 137200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7797, "step": 137300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7744, "step": 137400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7718, "step": 137500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7749, "step": 137600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7803, "step": 137700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7791, "step": 137800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7836, "step": 137900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7714, "step": 138000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.779, "step": 138100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7815, "step": 138200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7783, "step": 138300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7911, "step": 138400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7792, "step": 138500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7858, "step": 138600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7869, "step": 138700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7776, "step": 138800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.779, "step": 138900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7876, "step": 139000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7749, "step": 139100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7831, "step": 139200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7871, "step": 139300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7789, "step": 139400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7813, "step": 139500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7747, "step": 139600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7754, "step": 139700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7843, "step": 139800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7734, "step": 139900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7753, "step": 140000 }, { "epoch": 0.02, "eval_loss": 0.7310429811477661, "eval_runtime": 204.6514, "eval_samples_per_second": 244.318, "eval_steps_per_second": 1.911, "step": 140000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7814, "step": 140100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7825, "step": 140200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7843, "step": 140300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7859, "step": 140400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.785, "step": 140500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7739, "step": 140600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7833, "step": 140700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7838, "step": 140800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7798, "step": 140900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7794, "step": 141000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7736, "step": 141100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7801, "step": 141200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7792, "step": 141300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7881, "step": 141400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7768, "step": 141500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7847, "step": 141600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7801, "step": 141700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7727, "step": 141800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7758, "step": 141900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7818, "step": 142000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7754, "step": 142100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7821, "step": 142200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7774, "step": 142300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7866, "step": 142400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.784, "step": 142500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7779, "step": 142600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7788, "step": 142700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7854, "step": 142800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7786, "step": 142900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7735, "step": 143000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.769, "step": 143100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7758, "step": 143200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7769, "step": 143300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7734, "step": 143400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7763, "step": 143500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7779, "step": 143600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7771, "step": 143700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7806, "step": 143800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7746, "step": 143900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7776, "step": 144000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7777, "step": 144100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7735, "step": 144200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7892, "step": 144300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7797, "step": 144400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7764, "step": 144500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7708, "step": 144600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.781, "step": 144700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7758, "step": 144800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7751, "step": 144900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7842, "step": 145000 }, { "epoch": 0.02, "eval_loss": 0.7320641279220581, "eval_runtime": 204.3932, "eval_samples_per_second": 244.627, "eval_steps_per_second": 1.913, "step": 145000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7726, "step": 145100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7756, "step": 145200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7778, "step": 145300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7782, "step": 145400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7763, "step": 145500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7809, "step": 145600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7791, "step": 145700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7724, "step": 145800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7744, "step": 145900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7692, "step": 146000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7798, "step": 146100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7823, "step": 146200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7803, "step": 146300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.784, "step": 146400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7753, "step": 146500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.776, "step": 146600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7749, "step": 146700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7818, "step": 146800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.776, "step": 146900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7768, "step": 147000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7675, "step": 147100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7845, "step": 147200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7679, "step": 147300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7808, "step": 147400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7819, "step": 147500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7825, "step": 147600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7828, "step": 147700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7737, "step": 147800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7757, "step": 147900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7838, "step": 148000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7777, "step": 148100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7728, "step": 148200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.782, "step": 148300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7785, "step": 148400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7799, "step": 148500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7765, "step": 148600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7819, "step": 148700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7754, "step": 148800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7797, "step": 148900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7727, "step": 149000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.781, "step": 149100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7781, "step": 149200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7844, "step": 149300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7784, "step": 149400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7849, "step": 149500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7728, "step": 149600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7822, "step": 149700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7713, "step": 149800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7805, "step": 149900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7746, "step": 150000 }, { "epoch": 0.02, "eval_loss": 0.7317348718643188, "eval_runtime": 204.4686, "eval_samples_per_second": 244.536, "eval_steps_per_second": 1.912, "step": 150000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7819, "step": 150100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7762, "step": 150200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7858, "step": 150300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7745, "step": 150400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7839, "step": 150500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.777, "step": 150600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7763, "step": 150700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7808, "step": 150800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7755, "step": 150900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7785, "step": 151000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7781, "step": 151100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7764, "step": 151200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7834, "step": 151300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7782, "step": 151400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7797, "step": 151500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7827, "step": 151600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7778, "step": 151700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7718, "step": 151800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7789, "step": 151900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7757, "step": 152000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7819, "step": 152100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.776, "step": 152200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7709, "step": 152300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7663, "step": 152400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7754, "step": 152500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7736, "step": 152600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.78, "step": 152700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7863, "step": 152800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7872, "step": 152900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7718, "step": 153000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7849, "step": 153100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7748, "step": 153200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7802, "step": 153300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7824, "step": 153400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.78, "step": 153500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7617, "step": 153600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7846, "step": 153700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7751, "step": 153800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7776, "step": 153900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7833, "step": 154000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7772, "step": 154100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7713, "step": 154200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7791, "step": 154300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7766, "step": 154400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7729, "step": 154500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7828, "step": 154600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.778, "step": 154700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7709, "step": 154800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7767, "step": 154900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7727, "step": 155000 }, { "epoch": 0.02, "eval_loss": 0.7296523451805115, "eval_runtime": 204.9064, "eval_samples_per_second": 244.014, "eval_steps_per_second": 1.908, "step": 155000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7811, "step": 155100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.774, "step": 155200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7704, "step": 155300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7697, "step": 155400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7852, "step": 155500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.776, "step": 155600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7862, "step": 155700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7757, "step": 155800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7742, "step": 155900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.784, "step": 156000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7776, "step": 156100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7763, "step": 156200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.773, "step": 156300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7745, "step": 156400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7875, "step": 156500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7707, "step": 156600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7828, "step": 156700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7771, "step": 156800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.782, "step": 156900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7789, "step": 157000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.779, "step": 157100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7774, "step": 157200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7739, "step": 157300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7695, "step": 157400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7789, "step": 157500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7804, "step": 157600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7786, "step": 157700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7815, "step": 157800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7801, "step": 157900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.771, "step": 158000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.778, "step": 158100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.778, "step": 158200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7761, "step": 158300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7791, "step": 158400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7788, "step": 158500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7831, "step": 158600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7753, "step": 158700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7765, "step": 158800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7727, "step": 158900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7822, "step": 159000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7835, "step": 159100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7845, "step": 159200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7758, "step": 159300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.778, "step": 159400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7781, "step": 159500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.779, "step": 159600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7804, "step": 159700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7858, "step": 159800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7816, "step": 159900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7758, "step": 160000 }, { "epoch": 0.02, "eval_loss": 0.7311965823173523, "eval_runtime": 204.2454, "eval_samples_per_second": 244.804, "eval_steps_per_second": 1.914, "step": 160000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7818, "step": 160100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7772, "step": 160200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7755, "step": 160300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7755, "step": 160400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7766, "step": 160500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7814, "step": 160600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7766, "step": 160700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7804, "step": 160800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7762, "step": 160900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.782, "step": 161000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7811, "step": 161100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7711, "step": 161200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7732, "step": 161300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7794, "step": 161400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7771, "step": 161500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7719, "step": 161600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7816, "step": 161700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7737, "step": 161800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7777, "step": 161900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7753, "step": 162000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7708, "step": 162100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7839, "step": 162200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.779, "step": 162300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7734, "step": 162400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7788, "step": 162500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7733, "step": 162600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.779, "step": 162700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7731, "step": 162800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7755, "step": 162900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7699, "step": 163000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7761, "step": 163100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7758, "step": 163200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7778, "step": 163300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7707, "step": 163400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7804, "step": 163500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7808, "step": 163600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7851, "step": 163700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7682, "step": 163800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7722, "step": 163900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7686, "step": 164000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7809, "step": 164100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7749, "step": 164200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.774, "step": 164300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7774, "step": 164400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7768, "step": 164500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7769, "step": 164600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7791, "step": 164700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7739, "step": 164800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7772, "step": 164900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7766, "step": 165000 }, { "epoch": 0.02, "eval_loss": 0.7293242812156677, "eval_runtime": 203.969, "eval_samples_per_second": 245.135, "eval_steps_per_second": 1.917, "step": 165000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7751, "step": 165100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7839, "step": 165200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7753, "step": 165300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7717, "step": 165400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.773, "step": 165500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.78, "step": 165600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7807, "step": 165700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7743, "step": 165800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7656, "step": 165900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7711, "step": 166000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7709, "step": 166100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7791, "step": 166200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.776, "step": 166300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.774, "step": 166400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7742, "step": 166500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7772, "step": 166600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7763, "step": 166700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7741, "step": 166800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7762, "step": 166900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7754, "step": 167000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7808, "step": 167100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7877, "step": 167200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7804, "step": 167300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7815, "step": 167400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.776, "step": 167500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7763, "step": 167600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7767, "step": 167700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7704, "step": 167800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7688, "step": 167900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7718, "step": 168000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7731, "step": 168100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7793, "step": 168200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7717, "step": 168300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7747, "step": 168400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7779, "step": 168500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7778, "step": 168600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.776, "step": 168700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7797, "step": 168800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7748, "step": 168900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7706, "step": 169000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7753, "step": 169100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7775, "step": 169200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7726, "step": 169300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7697, "step": 169400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7797, "step": 169500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7716, "step": 169600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7729, "step": 169700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7834, "step": 169800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7735, "step": 169900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7655, "step": 170000 }, { "epoch": 0.02, "eval_loss": 0.7290298938751221, "eval_runtime": 204.777, "eval_samples_per_second": 244.168, "eval_steps_per_second": 1.909, "step": 170000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7776, "step": 170100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7747, "step": 170200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7698, "step": 170300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7737, "step": 170400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7761, "step": 170500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7688, "step": 170600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7751, "step": 170700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7763, "step": 170800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.778, "step": 170900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.774, "step": 171000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7704, "step": 171100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7787, "step": 171200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.778, "step": 171300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7804, "step": 171400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7755, "step": 171500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7802, "step": 171600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7734, "step": 171700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7719, "step": 171800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.774, "step": 171900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7683, "step": 172000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.778, "step": 172100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7776, "step": 172200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.776, "step": 172300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7837, "step": 172400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7762, "step": 172500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7659, "step": 172600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7768, "step": 172700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7764, "step": 172800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7699, "step": 172900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7756, "step": 173000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7754, "step": 173100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7735, "step": 173200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7759, "step": 173300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7688, "step": 173400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.776, "step": 173500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7693, "step": 173600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7706, "step": 173700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.771, "step": 173800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7605, "step": 173900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.779, "step": 174000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7851, "step": 174100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7722, "step": 174200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7775, "step": 174300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7729, "step": 174400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.771, "step": 174500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7793, "step": 174600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7774, "step": 174700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7744, "step": 174800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7795, "step": 174900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7697, "step": 175000 }, { "epoch": 0.02, "eval_loss": 0.7283743023872375, "eval_runtime": 201.412, "eval_samples_per_second": 248.247, "eval_steps_per_second": 1.941, "step": 175000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7731, "step": 175100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.767, "step": 175200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7729, "step": 175300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7739, "step": 175400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7719, "step": 175500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7656, "step": 175600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7762, "step": 175700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7776, "step": 175800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7751, "step": 175900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.772, "step": 176000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7727, "step": 176100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7722, "step": 176200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7791, "step": 176300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7657, "step": 176400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7787, "step": 176500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7797, "step": 176600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7771, "step": 176700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7659, "step": 176800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7677, "step": 176900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7741, "step": 177000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7746, "step": 177100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7794, "step": 177200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7776, "step": 177300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7826, "step": 177400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7723, "step": 177500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7657, "step": 177600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.783, "step": 177700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7805, "step": 177800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7726, "step": 177900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7766, "step": 178000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7798, "step": 178100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7791, "step": 178200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7787, "step": 178300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.78, "step": 178400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7747, "step": 178500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7673, "step": 178600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7761, "step": 178700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7661, "step": 178800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7753, "step": 178900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7813, "step": 179000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7724, "step": 179100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.773, "step": 179200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7788, "step": 179300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7777, "step": 179400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.78, "step": 179500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7769, "step": 179600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7661, "step": 179700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7675, "step": 179800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7745, "step": 179900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7744, "step": 180000 }, { "epoch": 0.02, "eval_loss": 0.7283092737197876, "eval_runtime": 201.9007, "eval_samples_per_second": 247.647, "eval_steps_per_second": 1.937, "step": 180000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.775, "step": 180100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.771, "step": 180200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7761, "step": 180300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7677, "step": 180400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7713, "step": 180500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.779, "step": 180600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7756, "step": 180700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7767, "step": 180800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7768, "step": 180900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7753, "step": 181000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7744, "step": 181100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7753, "step": 181200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7739, "step": 181300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7736, "step": 181400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.779, "step": 181500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7756, "step": 181600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7716, "step": 181700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7725, "step": 181800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7694, "step": 181900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7676, "step": 182000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7706, "step": 182100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7766, "step": 182200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7682, "step": 182300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7742, "step": 182400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7735, "step": 182500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7764, "step": 182600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7734, "step": 182700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.778, "step": 182800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7703, "step": 182900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7759, "step": 183000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7741, "step": 183100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7712, "step": 183200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7658, "step": 183300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7675, "step": 183400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7771, "step": 183500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7716, "step": 183600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7734, "step": 183700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7736, "step": 183800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7726, "step": 183900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7702, "step": 184000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7704, "step": 184100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7705, "step": 184200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7704, "step": 184300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7731, "step": 184400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7772, "step": 184500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7779, "step": 184600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7741, "step": 184700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7684, "step": 184800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7733, "step": 184900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7659, "step": 185000 }, { "epoch": 0.02, "eval_loss": 0.7254592180252075, "eval_runtime": 204.0508, "eval_samples_per_second": 245.037, "eval_steps_per_second": 1.916, "step": 185000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7773, "step": 185100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7836, "step": 185200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7776, "step": 185300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7731, "step": 185400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7704, "step": 185500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.777, "step": 185600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.777, "step": 185700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7719, "step": 185800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7826, "step": 185900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7735, "step": 186000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7698, "step": 186100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7742, "step": 186200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7775, "step": 186300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7694, "step": 186400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7673, "step": 186500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7715, "step": 186600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7707, "step": 186700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7723, "step": 186800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7677, "step": 186900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7808, "step": 187000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.771, "step": 187100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7761, "step": 187200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7752, "step": 187300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7744, "step": 187400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7642, "step": 187500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.773, "step": 187600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7727, "step": 187700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7677, "step": 187800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7706, "step": 187900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7859, "step": 188000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7713, "step": 188100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7719, "step": 188200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7689, "step": 188300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7736, "step": 188400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7755, "step": 188500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7708, "step": 188600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7658, "step": 188700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7745, "step": 188800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7769, "step": 188900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7686, "step": 189000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7723, "step": 189100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7676, "step": 189200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7679, "step": 189300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7771, "step": 189400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7754, "step": 189500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7695, "step": 189600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7723, "step": 189700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7807, "step": 189800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7714, "step": 189900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7783, "step": 190000 }, { "epoch": 0.02, "eval_loss": 0.7265344262123108, "eval_runtime": 203.2088, "eval_samples_per_second": 246.052, "eval_steps_per_second": 1.924, "step": 190000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7664, "step": 190100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7637, "step": 190200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.767, "step": 190300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7711, "step": 190400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7761, "step": 190500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7688, "step": 190600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7733, "step": 190700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7696, "step": 190800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7779, "step": 190900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7769, "step": 191000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7761, "step": 191100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7754, "step": 191200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7752, "step": 191300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7731, "step": 191400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7654, "step": 191500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7633, "step": 191600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7733, "step": 191700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7715, "step": 191800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7707, "step": 191900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7761, "step": 192000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7701, "step": 192100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7773, "step": 192200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7731, "step": 192300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.774, "step": 192400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7716, "step": 192500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7769, "step": 192600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7738, "step": 192700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7686, "step": 192800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7808, "step": 192900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7781, "step": 193000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7712, "step": 193100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7694, "step": 193200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7708, "step": 193300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7671, "step": 193400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7824, "step": 193500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7714, "step": 193600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7724, "step": 193700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7684, "step": 193800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7728, "step": 193900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7701, "step": 194000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7722, "step": 194100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7659, "step": 194200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7704, "step": 194300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7781, "step": 194400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7642, "step": 194500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7774, "step": 194600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7797, "step": 194700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7734, "step": 194800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7704, "step": 194900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7805, "step": 195000 }, { "epoch": 0.02, "eval_loss": 0.7270874381065369, "eval_runtime": 195.6141, "eval_samples_per_second": 255.605, "eval_steps_per_second": 1.999, "step": 195000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7684, "step": 195100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7705, "step": 195200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7669, "step": 195300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7723, "step": 195400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.764, "step": 195500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7657, "step": 195600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7747, "step": 195700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7726, "step": 195800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7584, "step": 195900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7742, "step": 196000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7728, "step": 196100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7787, "step": 196200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7683, "step": 196300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7768, "step": 196400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7764, "step": 196500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7723, "step": 196600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7724, "step": 196700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7741, "step": 196800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7713, "step": 196900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7739, "step": 197000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7731, "step": 197100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7689, "step": 197200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7816, "step": 197300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.772, "step": 197400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7701, "step": 197500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7733, "step": 197600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7703, "step": 197700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7746, "step": 197800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7717, "step": 197900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7679, "step": 198000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7644, "step": 198100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7709, "step": 198200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7701, "step": 198300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7714, "step": 198400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7712, "step": 198500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7717, "step": 198600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7628, "step": 198700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7641, "step": 198800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7729, "step": 198900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7681, "step": 199000 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7651, "step": 199100 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7654, "step": 199200 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7764, "step": 199300 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7763, "step": 199400 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7777, "step": 199500 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7793, "step": 199600 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7765, "step": 199700 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7692, "step": 199800 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.7731, "step": 199900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7679, "step": 200000 }, { "epoch": 0.03, "eval_loss": 0.7222184538841248, "eval_runtime": 194.621, "eval_samples_per_second": 256.91, "eval_steps_per_second": 2.009, "step": 200000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7685, "step": 200100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.775, "step": 200200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7666, "step": 200300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7751, "step": 200400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.771, "step": 200500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7684, "step": 200600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7713, "step": 200700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.775, "step": 200800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7721, "step": 200900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7721, "step": 201000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7759, "step": 201100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.769, "step": 201200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7758, "step": 201300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7762, "step": 201400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7727, "step": 201500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7703, "step": 201600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.772, "step": 201700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7697, "step": 201800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7714, "step": 201900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7691, "step": 202000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7765, "step": 202100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7716, "step": 202200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7705, "step": 202300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7772, "step": 202400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7725, "step": 202500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7809, "step": 202600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7706, "step": 202700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7754, "step": 202800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7685, "step": 202900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7771, "step": 203000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7747, "step": 203100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7733, "step": 203200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7722, "step": 203300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7715, "step": 203400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7695, "step": 203500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7694, "step": 203600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7768, "step": 203700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7713, "step": 203800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.772, "step": 203900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.769, "step": 204000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7704, "step": 204100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7713, "step": 204200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7686, "step": 204300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7701, "step": 204400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7698, "step": 204500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7715, "step": 204600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7621, "step": 204700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7715, "step": 204800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7742, "step": 204900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7686, "step": 205000 }, { "epoch": 0.03, "eval_loss": 0.7252817749977112, "eval_runtime": 194.5281, "eval_samples_per_second": 257.032, "eval_steps_per_second": 2.01, "step": 205000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7679, "step": 205100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7638, "step": 205200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7763, "step": 205300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7712, "step": 205400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7722, "step": 205500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7673, "step": 205600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7771, "step": 205700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7688, "step": 205800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7787, "step": 205900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7748, "step": 206000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7699, "step": 206100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7678, "step": 206200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7699, "step": 206300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7675, "step": 206400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7649, "step": 206500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7654, "step": 206600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7806, "step": 206700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7628, "step": 206800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7712, "step": 206900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7703, "step": 207000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7704, "step": 207100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7758, "step": 207200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7727, "step": 207300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7773, "step": 207400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7681, "step": 207500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7711, "step": 207600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7724, "step": 207700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7742, "step": 207800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7766, "step": 207900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7689, "step": 208000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7734, "step": 208100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7746, "step": 208200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7731, "step": 208300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.77, "step": 208400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7674, "step": 208500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7717, "step": 208600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7678, "step": 208700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7661, "step": 208800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7667, "step": 208900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7701, "step": 209000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7636, "step": 209100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7762, "step": 209200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.769, "step": 209300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7697, "step": 209400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7633, "step": 209500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7682, "step": 209600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7691, "step": 209700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7675, "step": 209800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.767, "step": 209900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7672, "step": 210000 }, { "epoch": 0.03, "eval_loss": 0.723176896572113, "eval_runtime": 194.7259, "eval_samples_per_second": 256.771, "eval_steps_per_second": 2.008, "step": 210000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7762, "step": 210100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7682, "step": 210200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7687, "step": 210300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7628, "step": 210400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7658, "step": 210500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.772, "step": 210600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7693, "step": 210700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7646, "step": 210800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7673, "step": 210900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7735, "step": 211000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7628, "step": 211100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7728, "step": 211200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7698, "step": 211300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7673, "step": 211400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7571, "step": 211500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7669, "step": 211600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7693, "step": 211700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7711, "step": 211800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7658, "step": 211900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7746, "step": 212000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7745, "step": 212100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7707, "step": 212200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7673, "step": 212300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7664, "step": 212400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7668, "step": 212500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7708, "step": 212600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7696, "step": 212700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7759, "step": 212800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7738, "step": 212900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7622, "step": 213000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7727, "step": 213100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7742, "step": 213200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7724, "step": 213300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7688, "step": 213400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.773, "step": 213500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7769, "step": 213600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.773, "step": 213700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7727, "step": 213800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7615, "step": 213900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7746, "step": 214000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7746, "step": 214100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7728, "step": 214200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7667, "step": 214300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7695, "step": 214400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7791, "step": 214500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7625, "step": 214600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7775, "step": 214700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7677, "step": 214800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7735, "step": 214900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.77, "step": 215000 }, { "epoch": 0.03, "eval_loss": 0.7234128713607788, "eval_runtime": 194.8336, "eval_samples_per_second": 256.629, "eval_steps_per_second": 2.007, "step": 215000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7726, "step": 215100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7702, "step": 215200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7733, "step": 215300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7634, "step": 215400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.772, "step": 215500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7756, "step": 215600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.767, "step": 215700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7629, "step": 215800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7731, "step": 215900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7702, "step": 216000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7721, "step": 216100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7644, "step": 216200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7731, "step": 216300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7685, "step": 216400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7659, "step": 216500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7724, "step": 216600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7723, "step": 216700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7666, "step": 216800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.767, "step": 216900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7699, "step": 217000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7642, "step": 217100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7689, "step": 217200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7648, "step": 217300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7664, "step": 217400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7716, "step": 217500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7675, "step": 217600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7768, "step": 217700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7738, "step": 217800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7696, "step": 217900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7716, "step": 218000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7729, "step": 218100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.777, "step": 218200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7695, "step": 218300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7709, "step": 218400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7604, "step": 218500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7721, "step": 218600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7689, "step": 218700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.776, "step": 218800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7658, "step": 218900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7721, "step": 219000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.767, "step": 219100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7748, "step": 219200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7639, "step": 219300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7745, "step": 219400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7619, "step": 219500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7702, "step": 219600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7677, "step": 219700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7704, "step": 219800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7682, "step": 219900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7662, "step": 220000 }, { "epoch": 0.03, "eval_loss": 0.7229312658309937, "eval_runtime": 194.6418, "eval_samples_per_second": 256.882, "eval_steps_per_second": 2.009, "step": 220000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7765, "step": 220100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7671, "step": 220200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7716, "step": 220300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7595, "step": 220400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7609, "step": 220500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7671, "step": 220600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7689, "step": 220700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7722, "step": 220800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.768, "step": 220900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7755, "step": 221000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7697, "step": 221100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7743, "step": 221200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7718, "step": 221300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7776, "step": 221400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7727, "step": 221500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7737, "step": 221600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7694, "step": 221700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7708, "step": 221800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7651, "step": 221900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7672, "step": 222000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.771, "step": 222100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7729, "step": 222200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7762, "step": 222300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.767, "step": 222400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7756, "step": 222500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7766, "step": 222600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7636, "step": 222700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7611, "step": 222800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7673, "step": 222900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.767, "step": 223000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7675, "step": 223100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7634, "step": 223200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7605, "step": 223300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7685, "step": 223400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7643, "step": 223500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7624, "step": 223600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7672, "step": 223700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7614, "step": 223800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7687, "step": 223900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7612, "step": 224000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7718, "step": 224100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7636, "step": 224200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7678, "step": 224300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.771, "step": 224400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7708, "step": 224500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7645, "step": 224600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7787, "step": 224700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7756, "step": 224800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7593, "step": 224900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7667, "step": 225000 }, { "epoch": 0.03, "eval_loss": 0.722387969493866, "eval_runtime": 194.7501, "eval_samples_per_second": 256.739, "eval_steps_per_second": 2.008, "step": 225000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7727, "step": 225100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7738, "step": 225200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7694, "step": 225300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.762, "step": 225400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7733, "step": 225500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7623, "step": 225600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7627, "step": 225700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7721, "step": 225800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7753, "step": 225900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7641, "step": 226000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7667, "step": 226100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7634, "step": 226200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7683, "step": 226300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7609, "step": 226400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7631, "step": 226500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7622, "step": 226600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7769, "step": 226700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7685, "step": 226800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7639, "step": 226900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7651, "step": 227000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7691, "step": 227100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7577, "step": 227200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7658, "step": 227300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7684, "step": 227400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7713, "step": 227500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7713, "step": 227600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7716, "step": 227700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7747, "step": 227800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7695, "step": 227900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7634, "step": 228000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7723, "step": 228100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7723, "step": 228200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7741, "step": 228300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7587, "step": 228400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7589, "step": 228500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7758, "step": 228600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7644, "step": 228700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7701, "step": 228800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7741, "step": 228900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7636, "step": 229000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7707, "step": 229100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7666, "step": 229200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7663, "step": 229300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7692, "step": 229400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7622, "step": 229500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.771, "step": 229600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7651, "step": 229700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7697, "step": 229800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7717, "step": 229900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7693, "step": 230000 }, { "epoch": 0.03, "eval_loss": 0.7221835851669312, "eval_runtime": 194.8361, "eval_samples_per_second": 256.626, "eval_steps_per_second": 2.007, "step": 230000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7716, "step": 230100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7736, "step": 230200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7649, "step": 230300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7708, "step": 230400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7716, "step": 230500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7673, "step": 230600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7688, "step": 230700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.775, "step": 230800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7708, "step": 230900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7664, "step": 231000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7676, "step": 231100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7647, "step": 231200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7725, "step": 231300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7684, "step": 231400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7678, "step": 231500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.775, "step": 231600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7693, "step": 231700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7675, "step": 231800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7665, "step": 231900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.779, "step": 232000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7696, "step": 232100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7744, "step": 232200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7696, "step": 232300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.779, "step": 232400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7684, "step": 232500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7705, "step": 232600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7714, "step": 232700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7662, "step": 232800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7618, "step": 232900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7698, "step": 233000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7733, "step": 233100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7628, "step": 233200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7725, "step": 233300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7726, "step": 233400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7668, "step": 233500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7633, "step": 233600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7647, "step": 233700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7679, "step": 233800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7599, "step": 233900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7755, "step": 234000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7617, "step": 234100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7732, "step": 234200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7689, "step": 234300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7758, "step": 234400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7668, "step": 234500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7633, "step": 234600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7643, "step": 234700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7615, "step": 234800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7681, "step": 234900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7715, "step": 235000 }, { "epoch": 0.03, "eval_loss": 0.7221626043319702, "eval_runtime": 194.6545, "eval_samples_per_second": 256.865, "eval_steps_per_second": 2.009, "step": 235000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7665, "step": 235100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7701, "step": 235200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7635, "step": 235300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7728, "step": 235400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7678, "step": 235500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7723, "step": 235600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.769, "step": 235700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.77, "step": 235800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7647, "step": 235900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7661, "step": 236000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7648, "step": 236100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7666, "step": 236200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7711, "step": 236300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7681, "step": 236400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7712, "step": 236500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7678, "step": 236600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7704, "step": 236700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.77, "step": 236800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7716, "step": 236900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7674, "step": 237000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7599, "step": 237100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7611, "step": 237200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7544, "step": 237300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7705, "step": 237400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.766, "step": 237500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7664, "step": 237600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7671, "step": 237700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7702, "step": 237800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7659, "step": 237900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7709, "step": 238000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7624, "step": 238100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.773, "step": 238200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7731, "step": 238300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7723, "step": 238400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7689, "step": 238500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7727, "step": 238600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7702, "step": 238700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7655, "step": 238800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7708, "step": 238900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7691, "step": 239000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7649, "step": 239100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.768, "step": 239200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7611, "step": 239300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7756, "step": 239400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7661, "step": 239500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7752, "step": 239600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7688, "step": 239700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7646, "step": 239800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7664, "step": 239900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7674, "step": 240000 }, { "epoch": 0.03, "eval_loss": 0.7220859527587891, "eval_runtime": 194.5722, "eval_samples_per_second": 256.974, "eval_steps_per_second": 2.01, "step": 240000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7673, "step": 240100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7708, "step": 240200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7686, "step": 240300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7711, "step": 240400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7651, "step": 240500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7653, "step": 240600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7707, "step": 240700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7674, "step": 240800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7666, "step": 240900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7708, "step": 241000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7705, "step": 241100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7732, "step": 241200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7715, "step": 241300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7647, "step": 241400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7633, "step": 241500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7582, "step": 241600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7667, "step": 241700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7666, "step": 241800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7657, "step": 241900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7742, "step": 242000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7711, "step": 242100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7655, "step": 242200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7674, "step": 242300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7691, "step": 242400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.77, "step": 242500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7629, "step": 242600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7735, "step": 242700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7706, "step": 242800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7676, "step": 242900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7646, "step": 243000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7639, "step": 243100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.761, "step": 243200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7624, "step": 243300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7781, "step": 243400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.769, "step": 243500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7654, "step": 243600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7622, "step": 243700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7616, "step": 243800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7614, "step": 243900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.772, "step": 244000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7723, "step": 244100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7661, "step": 244200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7763, "step": 244300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7662, "step": 244400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7718, "step": 244500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7706, "step": 244600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7611, "step": 244700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7652, "step": 244800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7664, "step": 244900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7705, "step": 245000 }, { "epoch": 0.03, "eval_loss": 0.7194216251373291, "eval_runtime": 194.6455, "eval_samples_per_second": 256.877, "eval_steps_per_second": 2.009, "step": 245000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7741, "step": 245100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7693, "step": 245200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7695, "step": 245300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.761, "step": 245400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.763, "step": 245500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7574, "step": 245600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7784, "step": 245700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7699, "step": 245800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.768, "step": 245900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7618, "step": 246000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.777, "step": 246100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7622, "step": 246200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7711, "step": 246300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7686, "step": 246400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7659, "step": 246500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7689, "step": 246600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7702, "step": 246700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7719, "step": 246800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7643, "step": 246900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7611, "step": 247000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7756, "step": 247100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7662, "step": 247200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7739, "step": 247300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7735, "step": 247400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7732, "step": 247500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7614, "step": 247600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7733, "step": 247700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7689, "step": 247800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7608, "step": 247900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7705, "step": 248000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7709, "step": 248100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7676, "step": 248200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7651, "step": 248300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7707, "step": 248400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.767, "step": 248500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7683, "step": 248600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7724, "step": 248700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7632, "step": 248800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7646, "step": 248900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7676, "step": 249000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7749, "step": 249100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7693, "step": 249200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7649, "step": 249300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7665, "step": 249400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7675, "step": 249500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7641, "step": 249600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.762, "step": 249700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7612, "step": 249800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7734, "step": 249900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7696, "step": 250000 }, { "epoch": 0.03, "eval_loss": 0.7245768904685974, "eval_runtime": 194.4227, "eval_samples_per_second": 257.172, "eval_steps_per_second": 2.011, "step": 250000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7699, "step": 250100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7704, "step": 250200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7681, "step": 250300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.771, "step": 250400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7663, "step": 250500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7752, "step": 250600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.767, "step": 250700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7667, "step": 250800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7763, "step": 250900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7698, "step": 251000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7647, "step": 251100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7696, "step": 251200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7659, "step": 251300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7712, "step": 251400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7685, "step": 251500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7737, "step": 251600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7662, "step": 251700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7757, "step": 251800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7649, "step": 251900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7747, "step": 252000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7694, "step": 252100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7667, "step": 252200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7701, "step": 252300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7662, "step": 252400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7636, "step": 252500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7746, "step": 252600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7643, "step": 252700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7711, "step": 252800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7673, "step": 252900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7618, "step": 253000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7657, "step": 253100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7666, "step": 253200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7694, "step": 253300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.767, "step": 253400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7732, "step": 253500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7688, "step": 253600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7715, "step": 253700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7637, "step": 253800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7661, "step": 253900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7687, "step": 254000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7585, "step": 254100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7596, "step": 254200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7583, "step": 254300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7663, "step": 254400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7661, "step": 254500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7706, "step": 254600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7647, "step": 254700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7739, "step": 254800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7636, "step": 254900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7651, "step": 255000 }, { "epoch": 0.03, "eval_loss": 0.7205132246017456, "eval_runtime": 195.1462, "eval_samples_per_second": 256.218, "eval_steps_per_second": 2.004, "step": 255000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7618, "step": 255100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7697, "step": 255200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.768, "step": 255300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7564, "step": 255400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7702, "step": 255500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7692, "step": 255600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7735, "step": 255700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7674, "step": 255800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7711, "step": 255900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7763, "step": 256000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7633, "step": 256100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7645, "step": 256200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7681, "step": 256300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7713, "step": 256400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7654, "step": 256500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7648, "step": 256600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7655, "step": 256700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7657, "step": 256800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.759, "step": 256900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7622, "step": 257000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7741, "step": 257100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.766, "step": 257200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7658, "step": 257300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7659, "step": 257400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7733, "step": 257500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7751, "step": 257600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7732, "step": 257700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7662, "step": 257800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7687, "step": 257900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7695, "step": 258000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7701, "step": 258100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7718, "step": 258200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7688, "step": 258300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7745, "step": 258400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.77, "step": 258500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7601, "step": 258600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7581, "step": 258700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.772, "step": 258800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7648, "step": 258900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7657, "step": 259000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7638, "step": 259100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7702, "step": 259200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7613, "step": 259300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7595, "step": 259400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7726, "step": 259500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7664, "step": 259600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7625, "step": 259700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7677, "step": 259800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7665, "step": 259900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7763, "step": 260000 }, { "epoch": 0.03, "eval_loss": 0.7205927968025208, "eval_runtime": 194.8057, "eval_samples_per_second": 256.666, "eval_steps_per_second": 2.007, "step": 260000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7714, "step": 260100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7733, "step": 260200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7685, "step": 260300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7677, "step": 260400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7695, "step": 260500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7685, "step": 260600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7639, "step": 260700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7708, "step": 260800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.767, "step": 260900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7733, "step": 261000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7683, "step": 261100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7666, "step": 261200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.775, "step": 261300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7657, "step": 261400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7689, "step": 261500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7669, "step": 261600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7612, "step": 261700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7652, "step": 261800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7657, "step": 261900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.762, "step": 262000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7686, "step": 262100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7683, "step": 262200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.768, "step": 262300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7636, "step": 262400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7684, "step": 262500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7619, "step": 262600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7674, "step": 262700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7727, "step": 262800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.768, "step": 262900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7603, "step": 263000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7673, "step": 263100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7757, "step": 263200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7697, "step": 263300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7554, "step": 263400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7668, "step": 263500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7634, "step": 263600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7643, "step": 263700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7645, "step": 263800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7667, "step": 263900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7687, "step": 264000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7624, "step": 264100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7649, "step": 264200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7665, "step": 264300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7667, "step": 264400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7646, "step": 264500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7677, "step": 264600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7706, "step": 264700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7597, "step": 264800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7627, "step": 264900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.763, "step": 265000 }, { "epoch": 0.03, "eval_loss": 0.7199035286903381, "eval_runtime": 194.6875, "eval_samples_per_second": 256.822, "eval_steps_per_second": 2.008, "step": 265000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7667, "step": 265100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7601, "step": 265200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7622, "step": 265300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7654, "step": 265400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7645, "step": 265500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7636, "step": 265600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7594, "step": 265700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7717, "step": 265800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7639, "step": 265900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7625, "step": 266000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7727, "step": 266100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.777, "step": 266200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7717, "step": 266300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7709, "step": 266400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7669, "step": 266500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.767, "step": 266600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7598, "step": 266700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7717, "step": 266800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7753, "step": 266900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7681, "step": 267000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7642, "step": 267100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7588, "step": 267200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7688, "step": 267300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7739, "step": 267400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.773, "step": 267500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7697, "step": 267600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7707, "step": 267700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7637, "step": 267800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.769, "step": 267900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7695, "step": 268000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7656, "step": 268100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7754, "step": 268200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7697, "step": 268300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7636, "step": 268400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7707, "step": 268500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7678, "step": 268600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7692, "step": 268700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.77, "step": 268800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7649, "step": 268900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7601, "step": 269000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7706, "step": 269100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7637, "step": 269200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.763, "step": 269300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7679, "step": 269400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7712, "step": 269500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7686, "step": 269600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.769, "step": 269700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7639, "step": 269800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7774, "step": 269900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7745, "step": 270000 }, { "epoch": 0.03, "eval_loss": 0.7194676399230957, "eval_runtime": 194.2339, "eval_samples_per_second": 257.422, "eval_steps_per_second": 2.013, "step": 270000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7683, "step": 270100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7613, "step": 270200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.766, "step": 270300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.755, "step": 270400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7642, "step": 270500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7656, "step": 270600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7717, "step": 270700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7655, "step": 270800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7734, "step": 270900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.761, "step": 271000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7736, "step": 271100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7589, "step": 271200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7676, "step": 271300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7663, "step": 271400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7703, "step": 271500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7663, "step": 271600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7691, "step": 271700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7654, "step": 271800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7685, "step": 271900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7647, "step": 272000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7674, "step": 272100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7602, "step": 272200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7654, "step": 272300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.763, "step": 272400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7632, "step": 272500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7648, "step": 272600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7654, "step": 272700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7682, "step": 272800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7674, "step": 272900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7649, "step": 273000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.762, "step": 273100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7641, "step": 273200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7698, "step": 273300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7598, "step": 273400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7611, "step": 273500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7716, "step": 273600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7633, "step": 273700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7649, "step": 273800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.774, "step": 273900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7679, "step": 274000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7637, "step": 274100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7687, "step": 274200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7629, "step": 274300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7623, "step": 274400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7695, "step": 274500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7637, "step": 274600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7716, "step": 274700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7706, "step": 274800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7746, "step": 274900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7706, "step": 275000 }, { "epoch": 0.03, "eval_loss": 0.720512330532074, "eval_runtime": 194.8789, "eval_samples_per_second": 256.57, "eval_steps_per_second": 2.006, "step": 275000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7691, "step": 275100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7644, "step": 275200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7571, "step": 275300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.763, "step": 275400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7668, "step": 275500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7686, "step": 275600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7689, "step": 275700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.766, "step": 275800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7637, "step": 275900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7685, "step": 276000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7615, "step": 276100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7604, "step": 276200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7588, "step": 276300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.767, "step": 276400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.757, "step": 276500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7731, "step": 276600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7732, "step": 276700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7553, "step": 276800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7654, "step": 276900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7559, "step": 277000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7685, "step": 277100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7659, "step": 277200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7641, "step": 277300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7655, "step": 277400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7691, "step": 277500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7625, "step": 277600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7777, "step": 277700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7594, "step": 277800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7605, "step": 277900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7733, "step": 278000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7716, "step": 278100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7658, "step": 278200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7749, "step": 278300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7688, "step": 278400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7756, "step": 278500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7651, "step": 278600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7616, "step": 278700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.765, "step": 278800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7677, "step": 278900 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7689, "step": 279000 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7681, "step": 279100 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7668, "step": 279200 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7708, "step": 279300 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7621, "step": 279400 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7628, "step": 279500 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.764, "step": 279600 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7679, "step": 279700 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.772, "step": 279800 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7634, "step": 279900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7669, "step": 280000 }, { "epoch": 0.04, "eval_loss": 0.7187725901603699, "eval_runtime": 195.0674, "eval_samples_per_second": 256.322, "eval_steps_per_second": 2.004, "step": 280000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7662, "step": 280100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7692, "step": 280200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7661, "step": 280300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7696, "step": 280400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7608, "step": 280500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7759, "step": 280600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.763, "step": 280700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7692, "step": 280800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7752, "step": 280900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7719, "step": 281000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7666, "step": 281100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7637, "step": 281200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7673, "step": 281300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7625, "step": 281400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7627, "step": 281500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7618, "step": 281600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7737, "step": 281700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7686, "step": 281800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7649, "step": 281900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.763, "step": 282000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7597, "step": 282100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7695, "step": 282200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7722, "step": 282300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7562, "step": 282400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7663, "step": 282500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7716, "step": 282600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.757, "step": 282700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7687, "step": 282800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7683, "step": 282900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7657, "step": 283000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7634, "step": 283100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7642, "step": 283200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7623, "step": 283300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7629, "step": 283400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7536, "step": 283500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7668, "step": 283600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7695, "step": 283700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7621, "step": 283800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7706, "step": 283900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7701, "step": 284000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7614, "step": 284100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7762, "step": 284200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7644, "step": 284300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7671, "step": 284400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7648, "step": 284500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7651, "step": 284600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7658, "step": 284700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7624, "step": 284800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7615, "step": 284900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7687, "step": 285000 }, { "epoch": 0.04, "eval_loss": 0.7185755968093872, "eval_runtime": 194.5575, "eval_samples_per_second": 256.993, "eval_steps_per_second": 2.01, "step": 285000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.765, "step": 285100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7702, "step": 285200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.769, "step": 285300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7607, "step": 285400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7635, "step": 285500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7618, "step": 285600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7641, "step": 285700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7583, "step": 285800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7606, "step": 285900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7645, "step": 286000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7572, "step": 286100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7647, "step": 286200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7565, "step": 286300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7703, "step": 286400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7683, "step": 286500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.764, "step": 286600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7571, "step": 286700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7616, "step": 286800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7671, "step": 286900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7631, "step": 287000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.759, "step": 287100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7575, "step": 287200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7595, "step": 287300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7659, "step": 287400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7663, "step": 287500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7584, "step": 287600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.76, "step": 287700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7737, "step": 287800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7708, "step": 287900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7648, "step": 288000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7555, "step": 288100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.754, "step": 288200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7583, "step": 288300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7656, "step": 288400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7637, "step": 288500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7603, "step": 288600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7722, "step": 288700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7653, "step": 288800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7629, "step": 288900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7703, "step": 289000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7726, "step": 289100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7632, "step": 289200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.76, "step": 289300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7579, "step": 289400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7627, "step": 289500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7727, "step": 289600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7626, "step": 289700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7633, "step": 289800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7699, "step": 289900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7649, "step": 290000 }, { "epoch": 0.04, "eval_loss": 0.7190911173820496, "eval_runtime": 194.7742, "eval_samples_per_second": 256.708, "eval_steps_per_second": 2.007, "step": 290000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7701, "step": 290100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7621, "step": 290200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7679, "step": 290300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7695, "step": 290400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7612, "step": 290500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7664, "step": 290600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7602, "step": 290700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7686, "step": 290800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7712, "step": 290900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7611, "step": 291000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7641, "step": 291100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7624, "step": 291200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7709, "step": 291300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7649, "step": 291400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7571, "step": 291500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7649, "step": 291600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7714, "step": 291700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7625, "step": 291800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7683, "step": 291900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7596, "step": 292000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7637, "step": 292100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7565, "step": 292200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7635, "step": 292300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.767, "step": 292400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7617, "step": 292500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7641, "step": 292600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7619, "step": 292700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7647, "step": 292800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.768, "step": 292900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.76, "step": 293000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7595, "step": 293100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.766, "step": 293200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7617, "step": 293300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7627, "step": 293400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7624, "step": 293500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7631, "step": 293600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7717, "step": 293700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7743, "step": 293800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7698, "step": 293900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7649, "step": 294000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7674, "step": 294100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7634, "step": 294200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7584, "step": 294300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7709, "step": 294400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7614, "step": 294500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7594, "step": 294600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.761, "step": 294700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7636, "step": 294800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7706, "step": 294900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7635, "step": 295000 }, { "epoch": 0.04, "eval_loss": 0.7187118530273438, "eval_runtime": 194.5781, "eval_samples_per_second": 256.966, "eval_steps_per_second": 2.009, "step": 295000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7665, "step": 295100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7644, "step": 295200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7766, "step": 295300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7564, "step": 295400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7706, "step": 295500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.764, "step": 295600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7541, "step": 295700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.764, "step": 295800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7681, "step": 295900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7442, "step": 296000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7584, "step": 296100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7531, "step": 296200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.767, "step": 296300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7694, "step": 296400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7614, "step": 296500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7723, "step": 296600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7603, "step": 296700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7658, "step": 296800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7664, "step": 296900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7624, "step": 297000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7595, "step": 297100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7667, "step": 297200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7563, "step": 297300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7568, "step": 297400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7659, "step": 297500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7657, "step": 297600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7657, "step": 297700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7674, "step": 297800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7573, "step": 297900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7621, "step": 298000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7621, "step": 298100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7679, "step": 298200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7656, "step": 298300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7648, "step": 298400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7578, "step": 298500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.763, "step": 298600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7636, "step": 298700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7608, "step": 298800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7622, "step": 298900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7595, "step": 299000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7663, "step": 299100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7634, "step": 299200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.767, "step": 299300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7657, "step": 299400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7635, "step": 299500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7689, "step": 299600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7621, "step": 299700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7648, "step": 299800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7626, "step": 299900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7623, "step": 300000 }, { "epoch": 0.04, "eval_loss": 0.7169745564460754, "eval_runtime": 194.6865, "eval_samples_per_second": 256.823, "eval_steps_per_second": 2.008, "step": 300000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7578, "step": 300100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7598, "step": 300200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.763, "step": 300300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7659, "step": 300400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7554, "step": 300500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7673, "step": 300600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.765, "step": 300700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7616, "step": 300800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7534, "step": 300900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7625, "step": 301000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7586, "step": 301100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7607, "step": 301200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7681, "step": 301300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7584, "step": 301400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7617, "step": 301500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7652, "step": 301600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7618, "step": 301700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7632, "step": 301800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7649, "step": 301900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7693, "step": 302000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7573, "step": 302100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7616, "step": 302200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7705, "step": 302300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7658, "step": 302400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7596, "step": 302500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7635, "step": 302600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7606, "step": 302700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7655, "step": 302800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7692, "step": 302900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7612, "step": 303000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7537, "step": 303100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7664, "step": 303200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7614, "step": 303300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.769, "step": 303400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7566, "step": 303500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7619, "step": 303600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7694, "step": 303700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7688, "step": 303800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7584, "step": 303900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7699, "step": 304000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7634, "step": 304100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7659, "step": 304200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7603, "step": 304300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7615, "step": 304400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7721, "step": 304500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7722, "step": 304600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7673, "step": 304700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7612, "step": 304800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.767, "step": 304900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7658, "step": 305000 }, { "epoch": 0.04, "eval_loss": 0.7181566953659058, "eval_runtime": 194.5398, "eval_samples_per_second": 257.017, "eval_steps_per_second": 2.01, "step": 305000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7649, "step": 305100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7618, "step": 305200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.764, "step": 305300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7604, "step": 305400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7665, "step": 305500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7599, "step": 305600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7588, "step": 305700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7568, "step": 305800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.762, "step": 305900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7637, "step": 306000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7652, "step": 306100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7624, "step": 306200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7629, "step": 306300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7632, "step": 306400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7616, "step": 306500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7631, "step": 306600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7547, "step": 306700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.767, "step": 306800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7639, "step": 306900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7628, "step": 307000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7635, "step": 307100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7718, "step": 307200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7666, "step": 307300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7569, "step": 307400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7552, "step": 307500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7646, "step": 307600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7667, "step": 307700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.766, "step": 307800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7583, "step": 307900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7624, "step": 308000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7678, "step": 308100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7628, "step": 308200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7686, "step": 308300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7674, "step": 308400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7512, "step": 308500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7602, "step": 308600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7594, "step": 308700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7637, "step": 308800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7644, "step": 308900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7664, "step": 309000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7576, "step": 309100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7609, "step": 309200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.754, "step": 309300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.764, "step": 309400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7686, "step": 309500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7548, "step": 309600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7564, "step": 309700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7593, "step": 309800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7698, "step": 309900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7591, "step": 310000 }, { "epoch": 0.04, "eval_loss": 0.7167325615882874, "eval_runtime": 194.6222, "eval_samples_per_second": 256.908, "eval_steps_per_second": 2.009, "step": 310000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7619, "step": 310100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7645, "step": 310200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7608, "step": 310300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7596, "step": 310400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7605, "step": 310500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.759, "step": 310600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7544, "step": 310700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7696, "step": 310800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7603, "step": 310900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7594, "step": 311000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7671, "step": 311100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7672, "step": 311200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7578, "step": 311300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7668, "step": 311400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7626, "step": 311500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7553, "step": 311600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7644, "step": 311700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7639, "step": 311800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7609, "step": 311900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7647, "step": 312000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7606, "step": 312100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7675, "step": 312200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7604, "step": 312300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7604, "step": 312400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7663, "step": 312500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7615, "step": 312600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7645, "step": 312700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7582, "step": 312800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7744, "step": 312900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7676, "step": 313000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7684, "step": 313100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.766, "step": 313200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7707, "step": 313300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7637, "step": 313400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7626, "step": 313500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7572, "step": 313600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7601, "step": 313700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.762, "step": 313800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7657, "step": 313900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7573, "step": 314000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7554, "step": 314100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7594, "step": 314200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.773, "step": 314300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7558, "step": 314400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7684, "step": 314500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.763, "step": 314600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7649, "step": 314700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7595, "step": 314800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7652, "step": 314900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7602, "step": 315000 }, { "epoch": 0.04, "eval_loss": 0.7169172167778015, "eval_runtime": 194.9797, "eval_samples_per_second": 256.437, "eval_steps_per_second": 2.005, "step": 315000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7552, "step": 315100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7659, "step": 315200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7598, "step": 315300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7657, "step": 315400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7602, "step": 315500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7659, "step": 315600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7673, "step": 315700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7641, "step": 315800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7592, "step": 315900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7594, "step": 316000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7594, "step": 316100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7586, "step": 316200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7617, "step": 316300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7637, "step": 316400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7599, "step": 316500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7607, "step": 316600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7609, "step": 316700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7585, "step": 316800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7537, "step": 316900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7574, "step": 317000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7672, "step": 317100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7591, "step": 317200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7626, "step": 317300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7625, "step": 317400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7627, "step": 317500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.763, "step": 317600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7603, "step": 317700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7576, "step": 317800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7614, "step": 317900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7636, "step": 318000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7652, "step": 318100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7668, "step": 318200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7616, "step": 318300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7643, "step": 318400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7607, "step": 318500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7597, "step": 318600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7618, "step": 318700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7618, "step": 318800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.756, "step": 318900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7641, "step": 319000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7638, "step": 319100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7649, "step": 319200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7633, "step": 319300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7624, "step": 319400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7619, "step": 319500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7627, "step": 319600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7629, "step": 319700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7721, "step": 319800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7593, "step": 319900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7621, "step": 320000 }, { "epoch": 0.04, "eval_loss": 0.7167797088623047, "eval_runtime": 194.6632, "eval_samples_per_second": 256.854, "eval_steps_per_second": 2.009, "step": 320000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7698, "step": 320100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7712, "step": 320200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7679, "step": 320300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7611, "step": 320400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7627, "step": 320500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7675, "step": 320600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7547, "step": 320700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7603, "step": 320800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7667, "step": 320900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7648, "step": 321000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7604, "step": 321100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7604, "step": 321200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7487, "step": 321300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7557, "step": 321400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7551, "step": 321500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7626, "step": 321600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7579, "step": 321700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7596, "step": 321800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7535, "step": 321900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.766, "step": 322000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7559, "step": 322100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7588, "step": 322200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7576, "step": 322300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.767, "step": 322400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7524, "step": 322500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7676, "step": 322600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7619, "step": 322700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7677, "step": 322800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7593, "step": 322900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7576, "step": 323000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7605, "step": 323100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7684, "step": 323200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7595, "step": 323300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7604, "step": 323400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.755, "step": 323500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7591, "step": 323600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.76, "step": 323700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7521, "step": 323800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7565, "step": 323900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7588, "step": 324000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7579, "step": 324100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7576, "step": 324200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7648, "step": 324300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7627, "step": 324400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7607, "step": 324500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.759, "step": 324600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7591, "step": 324700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7563, "step": 324800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7611, "step": 324900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.759, "step": 325000 }, { "epoch": 0.04, "eval_loss": 0.7153588533401489, "eval_runtime": 202.3735, "eval_samples_per_second": 247.068, "eval_steps_per_second": 1.932, "step": 325000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7655, "step": 325100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7587, "step": 325200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7612, "step": 325300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7648, "step": 325400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7624, "step": 325500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7596, "step": 325600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7614, "step": 325700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7604, "step": 325800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7635, "step": 325900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7671, "step": 326000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7598, "step": 326100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7637, "step": 326200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7608, "step": 326300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7585, "step": 326400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7516, "step": 326500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7611, "step": 326600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7545, "step": 326700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7649, "step": 326800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7611, "step": 326900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7637, "step": 327000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7556, "step": 327100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7685, "step": 327200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7601, "step": 327300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7552, "step": 327400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7624, "step": 327500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7576, "step": 327600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7607, "step": 327700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7648, "step": 327800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7605, "step": 327900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7615, "step": 328000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7657, "step": 328100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7593, "step": 328200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7565, "step": 328300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7625, "step": 328400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7571, "step": 328500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7611, "step": 328600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7653, "step": 328700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7635, "step": 328800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7648, "step": 328900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7602, "step": 329000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7594, "step": 329100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.763, "step": 329200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7681, "step": 329300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.762, "step": 329400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7692, "step": 329500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7567, "step": 329600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7554, "step": 329700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7606, "step": 329800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7618, "step": 329900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7636, "step": 330000 }, { "epoch": 0.04, "eval_loss": 0.7145840525627136, "eval_runtime": 203.3706, "eval_samples_per_second": 245.857, "eval_steps_per_second": 1.923, "step": 330000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7608, "step": 330100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7602, "step": 330200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.76, "step": 330300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7675, "step": 330400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7547, "step": 330500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7599, "step": 330600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7619, "step": 330700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7502, "step": 330800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7608, "step": 330900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7655, "step": 331000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7603, "step": 331100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7573, "step": 331200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.761, "step": 331300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7576, "step": 331400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7545, "step": 331500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7615, "step": 331600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.768, "step": 331700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7525, "step": 331800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7636, "step": 331900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7643, "step": 332000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.756, "step": 332100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7577, "step": 332200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7611, "step": 332300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7652, "step": 332400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7614, "step": 332500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7619, "step": 332600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7507, "step": 332700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7593, "step": 332800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7633, "step": 332900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7558, "step": 333000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7619, "step": 333100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7576, "step": 333200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7565, "step": 333300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7702, "step": 333400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.76, "step": 333500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7598, "step": 333600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.758, "step": 333700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7508, "step": 333800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7655, "step": 333900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.758, "step": 334000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7566, "step": 334100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7614, "step": 334200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7601, "step": 334300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7649, "step": 334400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7584, "step": 334500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.768, "step": 334600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7585, "step": 334700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.757, "step": 334800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7617, "step": 334900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7578, "step": 335000 }, { "epoch": 0.04, "eval_loss": 0.713698148727417, "eval_runtime": 201.6244, "eval_samples_per_second": 247.986, "eval_steps_per_second": 1.939, "step": 335000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7626, "step": 335100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.763, "step": 335200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7658, "step": 335300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7571, "step": 335400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7636, "step": 335500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7575, "step": 335600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.756, "step": 335700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7639, "step": 335800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7658, "step": 335900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7602, "step": 336000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7592, "step": 336100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.758, "step": 336200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.754, "step": 336300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7679, "step": 336400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7584, "step": 336500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7645, "step": 336600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7653, "step": 336700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7612, "step": 336800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7622, "step": 336900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7592, "step": 337000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7605, "step": 337100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7691, "step": 337200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7572, "step": 337300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7624, "step": 337400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7585, "step": 337500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7546, "step": 337600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7583, "step": 337700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7586, "step": 337800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7648, "step": 337900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7659, "step": 338000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7623, "step": 338100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7624, "step": 338200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7587, "step": 338300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.763, "step": 338400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7533, "step": 338500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7598, "step": 338600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.756, "step": 338700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7593, "step": 338800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7577, "step": 338900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7701, "step": 339000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7585, "step": 339100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7651, "step": 339200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7557, "step": 339300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7631, "step": 339400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7617, "step": 339500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7566, "step": 339600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7546, "step": 339700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7607, "step": 339800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7619, "step": 339900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7577, "step": 340000 }, { "epoch": 0.04, "eval_loss": 0.7148870229721069, "eval_runtime": 206.1839, "eval_samples_per_second": 242.502, "eval_steps_per_second": 1.896, "step": 340000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.76, "step": 340100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7591, "step": 340200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7645, "step": 340300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7567, "step": 340400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7626, "step": 340500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7601, "step": 340600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7562, "step": 340700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.761, "step": 340800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7628, "step": 340900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7623, "step": 341000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.762, "step": 341100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7591, "step": 341200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7552, "step": 341300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7608, "step": 341400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.763, "step": 341500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7619, "step": 341600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7573, "step": 341700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7618, "step": 341800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.765, "step": 341900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7579, "step": 342000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7607, "step": 342100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7635, "step": 342200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.763, "step": 342300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7624, "step": 342400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7551, "step": 342500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7581, "step": 342600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7595, "step": 342700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7647, "step": 342800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.762, "step": 342900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7617, "step": 343000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7654, "step": 343100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7572, "step": 343200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7606, "step": 343300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7637, "step": 343400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7588, "step": 343500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7583, "step": 343600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7579, "step": 343700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7603, "step": 343800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7535, "step": 343900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7681, "step": 344000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7619, "step": 344100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7603, "step": 344200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7511, "step": 344300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7604, "step": 344400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7575, "step": 344500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7594, "step": 344600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7643, "step": 344700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7545, "step": 344800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7607, "step": 344900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7604, "step": 345000 }, { "epoch": 0.04, "eval_loss": 0.7139725685119629, "eval_runtime": 207.3764, "eval_samples_per_second": 241.107, "eval_steps_per_second": 1.885, "step": 345000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7557, "step": 345100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7618, "step": 345200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7655, "step": 345300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7649, "step": 345400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7591, "step": 345500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7606, "step": 345600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.77, "step": 345700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7628, "step": 345800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7567, "step": 345900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.759, "step": 346000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7583, "step": 346100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7667, "step": 346200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7658, "step": 346300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7653, "step": 346400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7598, "step": 346500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7542, "step": 346600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7564, "step": 346700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7612, "step": 346800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7553, "step": 346900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7586, "step": 347000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.759, "step": 347100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7647, "step": 347200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7591, "step": 347300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7642, "step": 347400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7622, "step": 347500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7582, "step": 347600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7653, "step": 347700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7575, "step": 347800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.761, "step": 347900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7567, "step": 348000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7533, "step": 348100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7571, "step": 348200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7628, "step": 348300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7684, "step": 348400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.766, "step": 348500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7635, "step": 348600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7632, "step": 348700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7594, "step": 348800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7609, "step": 348900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.767, "step": 349000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7592, "step": 349100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.76, "step": 349200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7619, "step": 349300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.764, "step": 349400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7587, "step": 349500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7615, "step": 349600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7656, "step": 349700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7575, "step": 349800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7704, "step": 349900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.757, "step": 350000 }, { "epoch": 0.04, "eval_loss": 0.7151290774345398, "eval_runtime": 202.07, "eval_samples_per_second": 247.439, "eval_steps_per_second": 1.935, "step": 350000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7566, "step": 350100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7636, "step": 350200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7581, "step": 350300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7555, "step": 350400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7659, "step": 350500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7588, "step": 350600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7592, "step": 350700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.774, "step": 350800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7666, "step": 350900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7521, "step": 351000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7638, "step": 351100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7525, "step": 351200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7696, "step": 351300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7608, "step": 351400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7708, "step": 351500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7661, "step": 351600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7556, "step": 351700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7646, "step": 351800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7542, "step": 351900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7617, "step": 352000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7604, "step": 352100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7675, "step": 352200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7608, "step": 352300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7594, "step": 352400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7616, "step": 352500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7606, "step": 352600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7627, "step": 352700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7551, "step": 352800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7635, "step": 352900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.765, "step": 353000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7532, "step": 353100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7623, "step": 353200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7597, "step": 353300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.761, "step": 353400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7601, "step": 353500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7614, "step": 353600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7654, "step": 353700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7593, "step": 353800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7599, "step": 353900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7657, "step": 354000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.761, "step": 354100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7581, "step": 354200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7598, "step": 354300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7589, "step": 354400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.761, "step": 354500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7533, "step": 354600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7598, "step": 354700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7547, "step": 354800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7592, "step": 354900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.756, "step": 355000 }, { "epoch": 0.04, "eval_loss": 0.7139496207237244, "eval_runtime": 207.6971, "eval_samples_per_second": 240.735, "eval_steps_per_second": 1.883, "step": 355000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7638, "step": 355100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.766, "step": 355200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7596, "step": 355300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7515, "step": 355400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.759, "step": 355500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7606, "step": 355600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7551, "step": 355700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7617, "step": 355800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.755, "step": 355900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7601, "step": 356000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7663, "step": 356100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7656, "step": 356200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7536, "step": 356300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7603, "step": 356400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7488, "step": 356500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7574, "step": 356600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7542, "step": 356700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7525, "step": 356800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7629, "step": 356900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7647, "step": 357000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7601, "step": 357100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7627, "step": 357200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7598, "step": 357300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.755, "step": 357400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7601, "step": 357500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7622, "step": 357600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.765, "step": 357700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7677, "step": 357800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7581, "step": 357900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7593, "step": 358000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7536, "step": 358100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7655, "step": 358200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7633, "step": 358300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7684, "step": 358400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.76, "step": 358500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.754, "step": 358600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7695, "step": 358700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7635, "step": 358800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7534, "step": 358900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7649, "step": 359000 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7533, "step": 359100 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7572, "step": 359200 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7605, "step": 359300 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7697, "step": 359400 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7636, "step": 359500 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7652, "step": 359600 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7534, "step": 359700 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7609, "step": 359800 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7646, "step": 359900 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.7629, "step": 360000 }, { "epoch": 0.04, "eval_loss": 0.7152824401855469, "eval_runtime": 208.4686, "eval_samples_per_second": 239.844, "eval_steps_per_second": 1.876, "step": 360000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7635, "step": 360100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7533, "step": 360200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7581, "step": 360300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.761, "step": 360400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7543, "step": 360500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7669, "step": 360600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7636, "step": 360700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7532, "step": 360800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7569, "step": 360900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7602, "step": 361000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7584, "step": 361100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7626, "step": 361200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7617, "step": 361300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7614, "step": 361400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7516, "step": 361500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7563, "step": 361600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7585, "step": 361700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7622, "step": 361800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7536, "step": 361900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7595, "step": 362000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7656, "step": 362100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7618, "step": 362200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7617, "step": 362300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7599, "step": 362400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7596, "step": 362500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7565, "step": 362600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7568, "step": 362700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7638, "step": 362800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7601, "step": 362900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7627, "step": 363000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7593, "step": 363100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7632, "step": 363200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7648, "step": 363300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7637, "step": 363400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7548, "step": 363500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7531, "step": 363600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7647, "step": 363700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7528, "step": 363800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7554, "step": 363900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7648, "step": 364000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7609, "step": 364100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7669, "step": 364200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7589, "step": 364300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7597, "step": 364400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7591, "step": 364500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7639, "step": 364600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7589, "step": 364700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7586, "step": 364800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7588, "step": 364900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7514, "step": 365000 }, { "epoch": 0.05, "eval_loss": 0.7134825587272644, "eval_runtime": 207.3356, "eval_samples_per_second": 241.155, "eval_steps_per_second": 1.886, "step": 365000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.758, "step": 365100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7596, "step": 365200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.758, "step": 365300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7564, "step": 365400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.758, "step": 365500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7561, "step": 365600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7591, "step": 365700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7604, "step": 365800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7612, "step": 365900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7524, "step": 366000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7612, "step": 366100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7632, "step": 366200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7587, "step": 366300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7567, "step": 366400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7673, "step": 366500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.761, "step": 366600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7617, "step": 366700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7555, "step": 366800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7616, "step": 366900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7633, "step": 367000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7562, "step": 367100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.762, "step": 367200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7595, "step": 367300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7565, "step": 367400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7603, "step": 367500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7602, "step": 367600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7541, "step": 367700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7625, "step": 367800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7619, "step": 367900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7648, "step": 368000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7551, "step": 368100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7599, "step": 368200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7592, "step": 368300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7578, "step": 368400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.762, "step": 368500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7662, "step": 368600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.765, "step": 368700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7617, "step": 368800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7563, "step": 368900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7629, "step": 369000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.759, "step": 369100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7568, "step": 369200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7516, "step": 369300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7581, "step": 369400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7618, "step": 369500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7618, "step": 369600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7615, "step": 369700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7546, "step": 369800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7539, "step": 369900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7619, "step": 370000 }, { "epoch": 0.05, "eval_loss": 0.7130681276321411, "eval_runtime": 206.0455, "eval_samples_per_second": 242.665, "eval_steps_per_second": 1.898, "step": 370000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7508, "step": 370100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7708, "step": 370200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7684, "step": 370300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7577, "step": 370400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7615, "step": 370500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7602, "step": 370600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7611, "step": 370700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7628, "step": 370800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.761, "step": 370900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7633, "step": 371000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7625, "step": 371100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7689, "step": 371200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7555, "step": 371300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7628, "step": 371400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7671, "step": 371500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7599, "step": 371600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7597, "step": 371700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7485, "step": 371800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7635, "step": 371900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7571, "step": 372000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7617, "step": 372100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7592, "step": 372200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7554, "step": 372300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7533, "step": 372400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7585, "step": 372500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7625, "step": 372600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7617, "step": 372700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7545, "step": 372800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.761, "step": 372900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7525, "step": 373000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7601, "step": 373100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7511, "step": 373200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7583, "step": 373300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7504, "step": 373400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7557, "step": 373500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7576, "step": 373600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7548, "step": 373700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7587, "step": 373800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7612, "step": 373900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7483, "step": 374000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7605, "step": 374100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7587, "step": 374200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7514, "step": 374300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7559, "step": 374400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7568, "step": 374500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7589, "step": 374600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.768, "step": 374700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7546, "step": 374800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7539, "step": 374900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7567, "step": 375000 }, { "epoch": 0.05, "eval_loss": 0.7135566473007202, "eval_runtime": 204.9801, "eval_samples_per_second": 243.926, "eval_steps_per_second": 1.908, "step": 375000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7598, "step": 375100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.757, "step": 375200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.762, "step": 375300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7555, "step": 375400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7506, "step": 375500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7609, "step": 375600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7537, "step": 375700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7514, "step": 375800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7562, "step": 375900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7613, "step": 376000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7653, "step": 376100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7598, "step": 376200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7548, "step": 376300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7692, "step": 376400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7672, "step": 376500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7587, "step": 376600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7601, "step": 376700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7573, "step": 376800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7578, "step": 376900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7612, "step": 377000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7519, "step": 377100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7536, "step": 377200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7654, "step": 377300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7595, "step": 377400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7553, "step": 377500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7519, "step": 377600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7528, "step": 377700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7573, "step": 377800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7646, "step": 377900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.76, "step": 378000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7532, "step": 378100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.758, "step": 378200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7623, "step": 378300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7527, "step": 378400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7528, "step": 378500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7599, "step": 378600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7558, "step": 378700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7612, "step": 378800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7676, "step": 378900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7634, "step": 379000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7587, "step": 379100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7557, "step": 379200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7586, "step": 379300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.762, "step": 379400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7585, "step": 379500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7613, "step": 379600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7558, "step": 379700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7618, "step": 379800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7591, "step": 379900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7669, "step": 380000 }, { "epoch": 0.05, "eval_loss": 0.7119324207305908, "eval_runtime": 208.8545, "eval_samples_per_second": 239.401, "eval_steps_per_second": 1.872, "step": 380000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.759, "step": 380100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7641, "step": 380200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7555, "step": 380300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7571, "step": 380400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7625, "step": 380500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7574, "step": 380600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7611, "step": 380700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7534, "step": 380800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7488, "step": 380900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7629, "step": 381000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7611, "step": 381100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7576, "step": 381200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7564, "step": 381300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7591, "step": 381400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7538, "step": 381500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7587, "step": 381600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7453, "step": 381700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7649, "step": 381800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7644, "step": 381900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7588, "step": 382000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7584, "step": 382100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7628, "step": 382200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7626, "step": 382300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.75, "step": 382400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.762, "step": 382500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7619, "step": 382600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7596, "step": 382700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7584, "step": 382800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7545, "step": 382900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7534, "step": 383000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7576, "step": 383100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7555, "step": 383200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7624, "step": 383300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7609, "step": 383400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7597, "step": 383500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7539, "step": 383600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7612, "step": 383700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7605, "step": 383800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7586, "step": 383900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7553, "step": 384000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7551, "step": 384100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7603, "step": 384200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7568, "step": 384300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.759, "step": 384400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7541, "step": 384500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7675, "step": 384600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7577, "step": 384700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.757, "step": 384800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.767, "step": 384900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7601, "step": 385000 }, { "epoch": 0.05, "eval_loss": 0.7137901782989502, "eval_runtime": 207.8203, "eval_samples_per_second": 240.593, "eval_steps_per_second": 1.881, "step": 385000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7588, "step": 385100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7611, "step": 385200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7597, "step": 385300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7605, "step": 385400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7568, "step": 385500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7614, "step": 385600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7491, "step": 385700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7593, "step": 385800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7566, "step": 385900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.763, "step": 386000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7629, "step": 386100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7573, "step": 386200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7601, "step": 386300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7619, "step": 386400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7638, "step": 386500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.762, "step": 386600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7586, "step": 386700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7663, "step": 386800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7563, "step": 386900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7658, "step": 387000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7513, "step": 387100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7542, "step": 387200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7627, "step": 387300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7561, "step": 387400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7597, "step": 387500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7591, "step": 387600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7543, "step": 387700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7592, "step": 387800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7578, "step": 387900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7532, "step": 388000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7542, "step": 388100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7666, "step": 388200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7598, "step": 388300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.757, "step": 388400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7518, "step": 388500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7537, "step": 388600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7594, "step": 388700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7647, "step": 388800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7613, "step": 388900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7603, "step": 389000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7557, "step": 389100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7515, "step": 389200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7598, "step": 389300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7605, "step": 389400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7553, "step": 389500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7568, "step": 389600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7567, "step": 389700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7535, "step": 389800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.755, "step": 389900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7586, "step": 390000 }, { "epoch": 0.05, "eval_loss": 0.7126809358596802, "eval_runtime": 205.3357, "eval_samples_per_second": 243.504, "eval_steps_per_second": 1.904, "step": 390000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7565, "step": 390100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7695, "step": 390200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7582, "step": 390300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7606, "step": 390400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.76, "step": 390500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7528, "step": 390600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7538, "step": 390700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7572, "step": 390800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7548, "step": 390900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7691, "step": 391000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7545, "step": 391100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7637, "step": 391200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7593, "step": 391300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7596, "step": 391400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7605, "step": 391500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.76, "step": 391600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7581, "step": 391700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7536, "step": 391800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7611, "step": 391900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7585, "step": 392000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7574, "step": 392100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.762, "step": 392200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7565, "step": 392300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7575, "step": 392400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.757, "step": 392500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7557, "step": 392600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7609, "step": 392700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7581, "step": 392800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7528, "step": 392900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7603, "step": 393000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7482, "step": 393100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7556, "step": 393200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7626, "step": 393300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.756, "step": 393400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7604, "step": 393500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7542, "step": 393600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.755, "step": 393700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7601, "step": 393800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7666, "step": 393900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7522, "step": 394000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7494, "step": 394100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7545, "step": 394200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7515, "step": 394300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7498, "step": 394400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7581, "step": 394500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7617, "step": 394600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7559, "step": 394700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7532, "step": 394800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7563, "step": 394900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7632, "step": 395000 }, { "epoch": 0.05, "eval_loss": 0.7117016911506653, "eval_runtime": 209.4826, "eval_samples_per_second": 238.683, "eval_steps_per_second": 1.867, "step": 395000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7551, "step": 395100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7584, "step": 395200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7595, "step": 395300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7461, "step": 395400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7648, "step": 395500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7611, "step": 395600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7656, "step": 395700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.766, "step": 395800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7506, "step": 395900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.756, "step": 396000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7549, "step": 396100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7534, "step": 396200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7614, "step": 396300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7542, "step": 396400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7591, "step": 396500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7639, "step": 396600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7554, "step": 396700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.759, "step": 396800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7559, "step": 396900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7587, "step": 397000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.755, "step": 397100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7644, "step": 397200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7617, "step": 397300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7533, "step": 397400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7544, "step": 397500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7552, "step": 397600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7508, "step": 397700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7564, "step": 397800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7571, "step": 397900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7568, "step": 398000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7581, "step": 398100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7604, "step": 398200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.761, "step": 398300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7634, "step": 398400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7492, "step": 398500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7561, "step": 398600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7583, "step": 398700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.754, "step": 398800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7575, "step": 398900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.756, "step": 399000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7522, "step": 399100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7599, "step": 399200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7631, "step": 399300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7542, "step": 399400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7636, "step": 399500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7557, "step": 399600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7626, "step": 399700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7476, "step": 399800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.751, "step": 399900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7598, "step": 400000 }, { "epoch": 0.05, "eval_loss": 0.7114148139953613, "eval_runtime": 211.9423, "eval_samples_per_second": 235.913, "eval_steps_per_second": 1.845, "step": 400000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7558, "step": 400100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7566, "step": 400200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7607, "step": 400300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7639, "step": 400400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7553, "step": 400500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7585, "step": 400600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.761, "step": 400700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.76, "step": 400800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7603, "step": 400900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7573, "step": 401000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7606, "step": 401100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7615, "step": 401200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7589, "step": 401300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7542, "step": 401400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7552, "step": 401500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7508, "step": 401600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7629, "step": 401700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7546, "step": 401800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7477, "step": 401900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7523, "step": 402000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7546, "step": 402100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7533, "step": 402200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.762, "step": 402300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7628, "step": 402400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7518, "step": 402500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7574, "step": 402600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.758, "step": 402700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7622, "step": 402800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.761, "step": 402900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7495, "step": 403000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.757, "step": 403100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7587, "step": 403200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.759, "step": 403300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7605, "step": 403400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7495, "step": 403500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7573, "step": 403600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.761, "step": 403700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7522, "step": 403800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7556, "step": 403900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7516, "step": 404000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7603, "step": 404100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7555, "step": 404200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7614, "step": 404300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.762, "step": 404400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7632, "step": 404500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7519, "step": 404600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7572, "step": 404700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.756, "step": 404800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7687, "step": 404900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7576, "step": 405000 }, { "epoch": 0.05, "eval_loss": 0.7116851806640625, "eval_runtime": 206.952, "eval_samples_per_second": 241.602, "eval_steps_per_second": 1.889, "step": 405000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7596, "step": 405100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7585, "step": 405200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.756, "step": 405300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7582, "step": 405400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7686, "step": 405500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7559, "step": 405600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7558, "step": 405700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7608, "step": 405800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7543, "step": 405900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7568, "step": 406000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7605, "step": 406100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7548, "step": 406200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7482, "step": 406300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7514, "step": 406400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7536, "step": 406500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7569, "step": 406600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7531, "step": 406700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7491, "step": 406800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7567, "step": 406900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7518, "step": 407000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7591, "step": 407100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7515, "step": 407200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.763, "step": 407300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7642, "step": 407400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7523, "step": 407500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7603, "step": 407600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7598, "step": 407700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7514, "step": 407800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7586, "step": 407900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7585, "step": 408000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.756, "step": 408100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7557, "step": 408200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7591, "step": 408300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7616, "step": 408400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7572, "step": 408500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7604, "step": 408600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7561, "step": 408700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7607, "step": 408800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7492, "step": 408900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7557, "step": 409000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.757, "step": 409100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7613, "step": 409200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7496, "step": 409300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7532, "step": 409400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7543, "step": 409500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.759, "step": 409600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7557, "step": 409700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7575, "step": 409800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7571, "step": 409900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7543, "step": 410000 }, { "epoch": 0.05, "eval_loss": 0.7122207283973694, "eval_runtime": 208.3668, "eval_samples_per_second": 239.961, "eval_steps_per_second": 1.876, "step": 410000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7475, "step": 410100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7558, "step": 410200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7517, "step": 410300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7542, "step": 410400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.752, "step": 410500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7615, "step": 410600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7582, "step": 410700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7578, "step": 410800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7604, "step": 410900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7602, "step": 411000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7533, "step": 411100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7529, "step": 411200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7546, "step": 411300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7529, "step": 411400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7555, "step": 411500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.753, "step": 411600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7579, "step": 411700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7568, "step": 411800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7496, "step": 411900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7548, "step": 412000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7552, "step": 412100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7528, "step": 412200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7529, "step": 412300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.749, "step": 412400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7562, "step": 412500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7562, "step": 412600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7598, "step": 412700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7523, "step": 412800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7657, "step": 412900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7616, "step": 413000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7545, "step": 413100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7544, "step": 413200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7585, "step": 413300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7511, "step": 413400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7584, "step": 413500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7631, "step": 413600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7652, "step": 413700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7579, "step": 413800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.752, "step": 413900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7614, "step": 414000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7729, "step": 414100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7505, "step": 414200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7638, "step": 414300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.755, "step": 414400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7571, "step": 414500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7588, "step": 414600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7541, "step": 414700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7596, "step": 414800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7531, "step": 414900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7521, "step": 415000 }, { "epoch": 0.05, "eval_loss": 0.7115651369094849, "eval_runtime": 208.0252, "eval_samples_per_second": 240.355, "eval_steps_per_second": 1.88, "step": 415000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7533, "step": 415100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7516, "step": 415200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7587, "step": 415300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7641, "step": 415400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7587, "step": 415500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7519, "step": 415600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.754, "step": 415700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7553, "step": 415800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7562, "step": 415900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7511, "step": 416000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7607, "step": 416100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7528, "step": 416200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7584, "step": 416300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7604, "step": 416400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.764, "step": 416500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7486, "step": 416600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7513, "step": 416700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7504, "step": 416800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7605, "step": 416900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7661, "step": 417000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7549, "step": 417100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7587, "step": 417200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.76, "step": 417300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7601, "step": 417400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7532, "step": 417500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7502, "step": 417600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7588, "step": 417700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7515, "step": 417800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7554, "step": 417900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.759, "step": 418000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7466, "step": 418100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7559, "step": 418200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7562, "step": 418300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7569, "step": 418400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7602, "step": 418500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7507, "step": 418600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7506, "step": 418700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7545, "step": 418800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7688, "step": 418900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.751, "step": 419000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7604, "step": 419100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7586, "step": 419200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7612, "step": 419300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7577, "step": 419400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.754, "step": 419500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7617, "step": 419600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.768, "step": 419700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.762, "step": 419800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7588, "step": 419900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.752, "step": 420000 }, { "epoch": 0.05, "eval_loss": 0.7127427458763123, "eval_runtime": 205.6068, "eval_samples_per_second": 243.183, "eval_steps_per_second": 1.902, "step": 420000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7531, "step": 420100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7571, "step": 420200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7541, "step": 420300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7521, "step": 420400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7596, "step": 420500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7583, "step": 420600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7708, "step": 420700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7481, "step": 420800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7585, "step": 420900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7558, "step": 421000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7523, "step": 421100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.757, "step": 421200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7612, "step": 421300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7683, "step": 421400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7524, "step": 421500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7535, "step": 421600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7694, "step": 421700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7548, "step": 421800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7541, "step": 421900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7636, "step": 422000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7581, "step": 422100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.759, "step": 422200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7531, "step": 422300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.753, "step": 422400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7558, "step": 422500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7579, "step": 422600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7554, "step": 422700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.758, "step": 422800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.757, "step": 422900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7523, "step": 423000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7506, "step": 423100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7602, "step": 423200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7548, "step": 423300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7492, "step": 423400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7597, "step": 423500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.761, "step": 423600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7567, "step": 423700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7617, "step": 423800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7531, "step": 423900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7561, "step": 424000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.758, "step": 424100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7486, "step": 424200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7587, "step": 424300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7535, "step": 424400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.763, "step": 424500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7553, "step": 424600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7579, "step": 424700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7556, "step": 424800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7582, "step": 424900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7533, "step": 425000 }, { "epoch": 0.05, "eval_loss": 0.7107843160629272, "eval_runtime": 208.9656, "eval_samples_per_second": 239.274, "eval_steps_per_second": 1.871, "step": 425000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7617, "step": 425100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7495, "step": 425200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7586, "step": 425300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7503, "step": 425400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7506, "step": 425500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7436, "step": 425600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.756, "step": 425700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7583, "step": 425800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7504, "step": 425900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7544, "step": 426000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.755, "step": 426100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7623, "step": 426200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.758, "step": 426300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7511, "step": 426400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7602, "step": 426500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7623, "step": 426600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7565, "step": 426700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7542, "step": 426800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7634, "step": 426900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7603, "step": 427000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7569, "step": 427100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7512, "step": 427200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7494, "step": 427300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.761, "step": 427400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7501, "step": 427500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7534, "step": 427600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7535, "step": 427700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.77, "step": 427800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.76, "step": 427900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.751, "step": 428000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7552, "step": 428100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7524, "step": 428200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.757, "step": 428300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.757, "step": 428400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7508, "step": 428500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.754, "step": 428600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7589, "step": 428700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7541, "step": 428800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.759, "step": 428900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7562, "step": 429000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7529, "step": 429100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7567, "step": 429200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7606, "step": 429300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7547, "step": 429400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7545, "step": 429500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7622, "step": 429600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7544, "step": 429700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7416, "step": 429800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7599, "step": 429900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7542, "step": 430000 }, { "epoch": 0.05, "eval_loss": 0.7116101384162903, "eval_runtime": 204.4491, "eval_samples_per_second": 244.56, "eval_steps_per_second": 1.912, "step": 430000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7571, "step": 430100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7484, "step": 430200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7517, "step": 430300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7526, "step": 430400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7528, "step": 430500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7527, "step": 430600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7511, "step": 430700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7611, "step": 430800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7642, "step": 430900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.759, "step": 431000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7528, "step": 431100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7613, "step": 431200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7555, "step": 431300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7586, "step": 431400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7515, "step": 431500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7588, "step": 431600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7613, "step": 431700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7556, "step": 431800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7584, "step": 431900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.756, "step": 432000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.75, "step": 432100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7567, "step": 432200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7451, "step": 432300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7557, "step": 432400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7531, "step": 432500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.758, "step": 432600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7551, "step": 432700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7524, "step": 432800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7656, "step": 432900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7564, "step": 433000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7581, "step": 433100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7548, "step": 433200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7535, "step": 433300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7534, "step": 433400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7555, "step": 433500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7558, "step": 433600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7566, "step": 433700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7615, "step": 433800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.76, "step": 433900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7565, "step": 434000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.763, "step": 434100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7635, "step": 434200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.748, "step": 434300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7651, "step": 434400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7524, "step": 434500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7663, "step": 434600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7551, "step": 434700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7602, "step": 434800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7555, "step": 434900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7575, "step": 435000 }, { "epoch": 0.05, "eval_loss": 0.7120763063430786, "eval_runtime": 207.7863, "eval_samples_per_second": 240.632, "eval_steps_per_second": 1.882, "step": 435000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7568, "step": 435100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7604, "step": 435200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7561, "step": 435300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7585, "step": 435400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7553, "step": 435500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7611, "step": 435600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7513, "step": 435700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7577, "step": 435800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7595, "step": 435900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7539, "step": 436000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7514, "step": 436100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.746, "step": 436200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7545, "step": 436300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7569, "step": 436400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7573, "step": 436500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7617, "step": 436600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7526, "step": 436700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7572, "step": 436800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7555, "step": 436900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7608, "step": 437000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7576, "step": 437100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7625, "step": 437200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7555, "step": 437300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7535, "step": 437400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7569, "step": 437500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7547, "step": 437600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7568, "step": 437700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.754, "step": 437800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7567, "step": 437900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7509, "step": 438000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7636, "step": 438100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7553, "step": 438200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7653, "step": 438300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7584, "step": 438400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7594, "step": 438500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7596, "step": 438600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7579, "step": 438700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7619, "step": 438800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7542, "step": 438900 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.756, "step": 439000 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7569, "step": 439100 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7496, "step": 439200 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7636, "step": 439300 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7539, "step": 439400 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7537, "step": 439500 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7653, "step": 439600 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7601, "step": 439700 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7553, "step": 439800 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.7581, "step": 439900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7565, "step": 440000 }, { "epoch": 0.06, "eval_loss": 0.7116917967796326, "eval_runtime": 201.9048, "eval_samples_per_second": 247.642, "eval_steps_per_second": 1.937, "step": 440000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.754, "step": 440100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7506, "step": 440200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7537, "step": 440300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7628, "step": 440400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.75, "step": 440500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7644, "step": 440600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7544, "step": 440700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7534, "step": 440800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7571, "step": 440900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7498, "step": 441000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7643, "step": 441100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7567, "step": 441200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7594, "step": 441300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.752, "step": 441400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7573, "step": 441500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7532, "step": 441600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7491, "step": 441700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7497, "step": 441800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7569, "step": 441900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7574, "step": 442000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7567, "step": 442100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7544, "step": 442200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7598, "step": 442300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7552, "step": 442400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7524, "step": 442500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7544, "step": 442600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7556, "step": 442700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7586, "step": 442800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7489, "step": 442900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7543, "step": 443000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7498, "step": 443100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7621, "step": 443200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.758, "step": 443300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7576, "step": 443400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7599, "step": 443500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7564, "step": 443600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7554, "step": 443700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7538, "step": 443800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7576, "step": 443900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7586, "step": 444000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7559, "step": 444100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7517, "step": 444200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.758, "step": 444300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7529, "step": 444400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7558, "step": 444500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.751, "step": 444600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.762, "step": 444700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7562, "step": 444800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7557, "step": 444900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7413, "step": 445000 }, { "epoch": 0.06, "eval_loss": 0.709259033203125, "eval_runtime": 200.7785, "eval_samples_per_second": 249.031, "eval_steps_per_second": 1.947, "step": 445000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7507, "step": 445100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7482, "step": 445200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7603, "step": 445300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7497, "step": 445400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.75, "step": 445500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7572, "step": 445600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7569, "step": 445700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7583, "step": 445800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7571, "step": 445900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7461, "step": 446000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7622, "step": 446100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7543, "step": 446200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7508, "step": 446300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.759, "step": 446400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7586, "step": 446500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7518, "step": 446600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7617, "step": 446700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7566, "step": 446800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7656, "step": 446900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7577, "step": 447000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7543, "step": 447100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7453, "step": 447200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.763, "step": 447300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7653, "step": 447400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7597, "step": 447500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7574, "step": 447600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7496, "step": 447700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7532, "step": 447800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7607, "step": 447900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7581, "step": 448000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7507, "step": 448100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7512, "step": 448200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.75, "step": 448300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7553, "step": 448400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7618, "step": 448500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7523, "step": 448600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7553, "step": 448700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7527, "step": 448800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7618, "step": 448900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.754, "step": 449000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7555, "step": 449100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.758, "step": 449200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7528, "step": 449300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7573, "step": 449400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7574, "step": 449500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.75, "step": 449600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7521, "step": 449700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7524, "step": 449800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7521, "step": 449900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7546, "step": 450000 }, { "epoch": 0.06, "eval_loss": 0.7122431993484497, "eval_runtime": 203.9361, "eval_samples_per_second": 245.175, "eval_steps_per_second": 1.917, "step": 450000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7601, "step": 450100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7584, "step": 450200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7548, "step": 450300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7633, "step": 450400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7519, "step": 450500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7528, "step": 450600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7573, "step": 450700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7509, "step": 450800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7639, "step": 450900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7537, "step": 451000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.749, "step": 451100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.771, "step": 451200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7631, "step": 451300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7563, "step": 451400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7605, "step": 451500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.758, "step": 451600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7613, "step": 451700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7508, "step": 451800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7527, "step": 451900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7603, "step": 452000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7561, "step": 452100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7544, "step": 452200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7545, "step": 452300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7637, "step": 452400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7542, "step": 452500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7577, "step": 452600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7612, "step": 452700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7545, "step": 452800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7505, "step": 452900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7494, "step": 453000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7501, "step": 453100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7546, "step": 453200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7632, "step": 453300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7535, "step": 453400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7535, "step": 453500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7521, "step": 453600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7564, "step": 453700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7503, "step": 453800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7522, "step": 453900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7595, "step": 454000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7481, "step": 454100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7554, "step": 454200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7486, "step": 454300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7514, "step": 454400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7492, "step": 454500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7569, "step": 454600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.756, "step": 454700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7603, "step": 454800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7499, "step": 454900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7464, "step": 455000 }, { "epoch": 0.06, "eval_loss": 0.7123976349830627, "eval_runtime": 206.7053, "eval_samples_per_second": 241.89, "eval_steps_per_second": 1.892, "step": 455000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7556, "step": 455100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7565, "step": 455200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7551, "step": 455300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7507, "step": 455400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7529, "step": 455500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7633, "step": 455600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7603, "step": 455700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7516, "step": 455800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.763, "step": 455900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7536, "step": 456000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7594, "step": 456100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7548, "step": 456200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.754, "step": 456300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7591, "step": 456400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7522, "step": 456500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7576, "step": 456600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7591, "step": 456700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7559, "step": 456800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7557, "step": 456900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7505, "step": 457000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7553, "step": 457100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7508, "step": 457200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7514, "step": 457300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7543, "step": 457400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7553, "step": 457500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7526, "step": 457600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7501, "step": 457700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7532, "step": 457800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7501, "step": 457900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.753, "step": 458000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7537, "step": 458100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7588, "step": 458200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7542, "step": 458300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.755, "step": 458400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7562, "step": 458500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7439, "step": 458600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7521, "step": 458700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7575, "step": 458800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7525, "step": 458900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7598, "step": 459000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7542, "step": 459100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7514, "step": 459200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7596, "step": 459300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7502, "step": 459400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7538, "step": 459500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7464, "step": 459600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7581, "step": 459700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7577, "step": 459800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7486, "step": 459900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7493, "step": 460000 }, { "epoch": 0.06, "eval_loss": 0.7095310688018799, "eval_runtime": 207.1686, "eval_samples_per_second": 241.349, "eval_steps_per_second": 1.887, "step": 460000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7572, "step": 460100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7611, "step": 460200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7505, "step": 460300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7565, "step": 460400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7591, "step": 460500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7493, "step": 460600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7562, "step": 460700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7679, "step": 460800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7555, "step": 460900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7552, "step": 461000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7614, "step": 461100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7554, "step": 461200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7523, "step": 461300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.754, "step": 461400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7564, "step": 461500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7558, "step": 461600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7565, "step": 461700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7502, "step": 461800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7504, "step": 461900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7574, "step": 462000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7613, "step": 462100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.758, "step": 462200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7599, "step": 462300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7551, "step": 462400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7567, "step": 462500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7559, "step": 462600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7523, "step": 462700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7473, "step": 462800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7597, "step": 462900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7582, "step": 463000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.753, "step": 463100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7486, "step": 463200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7609, "step": 463300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7681, "step": 463400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.754, "step": 463500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7519, "step": 463600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.761, "step": 463700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7592, "step": 463800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7665, "step": 463900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7545, "step": 464000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7569, "step": 464100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.754, "step": 464200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7521, "step": 464300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7483, "step": 464400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7527, "step": 464500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7509, "step": 464600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7532, "step": 464700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7572, "step": 464800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7568, "step": 464900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7604, "step": 465000 }, { "epoch": 0.06, "eval_loss": 0.7111442685127258, "eval_runtime": 204.3031, "eval_samples_per_second": 244.734, "eval_steps_per_second": 1.914, "step": 465000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7524, "step": 465100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7548, "step": 465200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7633, "step": 465300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7486, "step": 465400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7549, "step": 465500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7585, "step": 465600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7517, "step": 465700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7608, "step": 465800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.762, "step": 465900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7539, "step": 466000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7556, "step": 466100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7541, "step": 466200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7579, "step": 466300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7524, "step": 466400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7547, "step": 466500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7541, "step": 466600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7515, "step": 466700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7546, "step": 466800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7509, "step": 466900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7571, "step": 467000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7553, "step": 467100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7563, "step": 467200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7601, "step": 467300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7526, "step": 467400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7565, "step": 467500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7569, "step": 467600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7542, "step": 467700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7524, "step": 467800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7597, "step": 467900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7566, "step": 468000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7504, "step": 468100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7567, "step": 468200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7554, "step": 468300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7563, "step": 468400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7516, "step": 468500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7551, "step": 468600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7518, "step": 468700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.75, "step": 468800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7579, "step": 468900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7501, "step": 469000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7519, "step": 469100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7582, "step": 469200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7532, "step": 469300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7534, "step": 469400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7532, "step": 469500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.756, "step": 469600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7563, "step": 469700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7569, "step": 469800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7504, "step": 469900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7594, "step": 470000 }, { "epoch": 0.06, "eval_loss": 0.7078810334205627, "eval_runtime": 207.1667, "eval_samples_per_second": 241.352, "eval_steps_per_second": 1.887, "step": 470000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7621, "step": 470100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7545, "step": 470200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7572, "step": 470300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7501, "step": 470400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7478, "step": 470500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7577, "step": 470600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7579, "step": 470700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7577, "step": 470800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7561, "step": 470900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7599, "step": 471000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7542, "step": 471100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7561, "step": 471200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.757, "step": 471300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7572, "step": 471400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7473, "step": 471500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7509, "step": 471600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7522, "step": 471700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7498, "step": 471800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7531, "step": 471900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7522, "step": 472000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7492, "step": 472100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7532, "step": 472200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7582, "step": 472300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7561, "step": 472400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7558, "step": 472500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7532, "step": 472600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7639, "step": 472700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7627, "step": 472800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7615, "step": 472900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7537, "step": 473000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7484, "step": 473100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7521, "step": 473200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7587, "step": 473300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7514, "step": 473400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7536, "step": 473500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7589, "step": 473600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7513, "step": 473700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.749, "step": 473800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.755, "step": 473900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7502, "step": 474000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7524, "step": 474100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7487, "step": 474200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7487, "step": 474300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7482, "step": 474400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7557, "step": 474500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7482, "step": 474600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7633, "step": 474700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7523, "step": 474800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7531, "step": 474900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7497, "step": 475000 }, { "epoch": 0.06, "eval_loss": 0.7096142768859863, "eval_runtime": 204.9206, "eval_samples_per_second": 243.997, "eval_steps_per_second": 1.908, "step": 475000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.755, "step": 475100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7581, "step": 475200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7555, "step": 475300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7523, "step": 475400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7536, "step": 475500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7542, "step": 475600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7584, "step": 475700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7605, "step": 475800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7558, "step": 475900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7535, "step": 476000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7517, "step": 476100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7554, "step": 476200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7601, "step": 476300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7551, "step": 476400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7452, "step": 476500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7535, "step": 476600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7648, "step": 476700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7567, "step": 476800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7524, "step": 476900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7525, "step": 477000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7586, "step": 477100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7579, "step": 477200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.756, "step": 477300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.755, "step": 477400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7458, "step": 477500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7556, "step": 477600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.759, "step": 477700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7605, "step": 477800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7446, "step": 477900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7582, "step": 478000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7573, "step": 478100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7519, "step": 478200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.763, "step": 478300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7517, "step": 478400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7571, "step": 478500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7518, "step": 478600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7499, "step": 478700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7508, "step": 478800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7551, "step": 478900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7527, "step": 479000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7603, "step": 479100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7546, "step": 479200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7511, "step": 479300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7577, "step": 479400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7544, "step": 479500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7577, "step": 479600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7601, "step": 479700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7576, "step": 479800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7564, "step": 479900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7574, "step": 480000 }, { "epoch": 0.06, "eval_loss": 0.7097885012626648, "eval_runtime": 206.2654, "eval_samples_per_second": 242.406, "eval_steps_per_second": 1.896, "step": 480000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7484, "step": 480100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7632, "step": 480200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7471, "step": 480300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7524, "step": 480400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.759, "step": 480500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.757, "step": 480600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7551, "step": 480700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.757, "step": 480800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7576, "step": 480900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7572, "step": 481000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7605, "step": 481100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7563, "step": 481200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7518, "step": 481300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7527, "step": 481400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7486, "step": 481500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7611, "step": 481600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.755, "step": 481700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7498, "step": 481800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7588, "step": 481900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.75, "step": 482000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7537, "step": 482100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7531, "step": 482200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7565, "step": 482300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7632, "step": 482400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7575, "step": 482500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7501, "step": 482600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7613, "step": 482700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7469, "step": 482800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7478, "step": 482900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7483, "step": 483000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7671, "step": 483100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7607, "step": 483200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.757, "step": 483300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7576, "step": 483400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.749, "step": 483500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7655, "step": 483600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7532, "step": 483700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7516, "step": 483800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7553, "step": 483900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7561, "step": 484000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7547, "step": 484100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7488, "step": 484200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7495, "step": 484300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7575, "step": 484400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7566, "step": 484500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7506, "step": 484600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7532, "step": 484700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7634, "step": 484800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7581, "step": 484900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7614, "step": 485000 }, { "epoch": 0.06, "eval_loss": 0.7099955677986145, "eval_runtime": 205.5411, "eval_samples_per_second": 243.26, "eval_steps_per_second": 1.902, "step": 485000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7562, "step": 485100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7537, "step": 485200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7601, "step": 485300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7528, "step": 485400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7568, "step": 485500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7549, "step": 485600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7552, "step": 485700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7549, "step": 485800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7541, "step": 485900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7506, "step": 486000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7495, "step": 486100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7509, "step": 486200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7629, "step": 486300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.754, "step": 486400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7473, "step": 486500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7577, "step": 486600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7444, "step": 486700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7576, "step": 486800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.751, "step": 486900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7501, "step": 487000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7528, "step": 487100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7544, "step": 487200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7576, "step": 487300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7503, "step": 487400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7503, "step": 487500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7562, "step": 487600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7558, "step": 487700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7514, "step": 487800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.753, "step": 487900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.763, "step": 488000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7503, "step": 488100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7504, "step": 488200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7616, "step": 488300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7531, "step": 488400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7542, "step": 488500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7491, "step": 488600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7598, "step": 488700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7572, "step": 488800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7559, "step": 488900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.752, "step": 489000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7522, "step": 489100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7615, "step": 489200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7554, "step": 489300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7546, "step": 489400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7541, "step": 489500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.751, "step": 489600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7542, "step": 489700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7501, "step": 489800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7628, "step": 489900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7532, "step": 490000 }, { "epoch": 0.06, "eval_loss": 0.7070909142494202, "eval_runtime": 202.7407, "eval_samples_per_second": 246.62, "eval_steps_per_second": 1.929, "step": 490000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7553, "step": 490100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7537, "step": 490200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7506, "step": 490300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7577, "step": 490400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.759, "step": 490500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.757, "step": 490600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7526, "step": 490700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7504, "step": 490800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7539, "step": 490900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7493, "step": 491000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7499, "step": 491100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7543, "step": 491200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7537, "step": 491300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7605, "step": 491400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7539, "step": 491500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7569, "step": 491600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7565, "step": 491700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7545, "step": 491800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7557, "step": 491900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7506, "step": 492000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7455, "step": 492100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7558, "step": 492200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7515, "step": 492300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7542, "step": 492400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7566, "step": 492500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.764, "step": 492600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7514, "step": 492700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7526, "step": 492800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7607, "step": 492900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7582, "step": 493000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7499, "step": 493100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7476, "step": 493200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7532, "step": 493300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7495, "step": 493400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7516, "step": 493500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7519, "step": 493600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7504, "step": 493700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7604, "step": 493800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7659, "step": 493900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7634, "step": 494000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7544, "step": 494100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.759, "step": 494200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7548, "step": 494300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7552, "step": 494400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7603, "step": 494500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7491, "step": 494600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7604, "step": 494700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7641, "step": 494800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7522, "step": 494900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.757, "step": 495000 }, { "epoch": 0.06, "eval_loss": 0.7100406885147095, "eval_runtime": 203.2704, "eval_samples_per_second": 245.978, "eval_steps_per_second": 1.924, "step": 495000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7614, "step": 495100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7603, "step": 495200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7505, "step": 495300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7522, "step": 495400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7668, "step": 495500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.764, "step": 495600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7613, "step": 495700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7431, "step": 495800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.758, "step": 495900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7526, "step": 496000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7512, "step": 496100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7551, "step": 496200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7526, "step": 496300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7574, "step": 496400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7586, "step": 496500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7588, "step": 496600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7622, "step": 496700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7525, "step": 496800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7578, "step": 496900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7463, "step": 497000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7556, "step": 497100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7579, "step": 497200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7543, "step": 497300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7643, "step": 497400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7584, "step": 497500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7516, "step": 497600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7525, "step": 497700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7608, "step": 497800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7506, "step": 497900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7628, "step": 498000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7529, "step": 498100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7502, "step": 498200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7571, "step": 498300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.754, "step": 498400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7561, "step": 498500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7532, "step": 498600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7561, "step": 498700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7528, "step": 498800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7491, "step": 498900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7533, "step": 499000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7538, "step": 499100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7533, "step": 499200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7542, "step": 499300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7581, "step": 499400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7484, "step": 499500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.754, "step": 499600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.751, "step": 499700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7556, "step": 499800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.758, "step": 499900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7668, "step": 500000 }, { "epoch": 0.06, "eval_loss": 0.7097185850143433, "eval_runtime": 205.5908, "eval_samples_per_second": 243.202, "eval_steps_per_second": 1.902, "step": 500000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7578, "step": 500100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7565, "step": 500200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7536, "step": 500300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.758, "step": 500400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7436, "step": 500500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7531, "step": 500600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7543, "step": 500700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7558, "step": 500800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7553, "step": 500900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7533, "step": 501000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7529, "step": 501100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.756, "step": 501200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7516, "step": 501300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7567, "step": 501400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7543, "step": 501500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7526, "step": 501600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7532, "step": 501700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7474, "step": 501800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7578, "step": 501900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.748, "step": 502000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7532, "step": 502100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7455, "step": 502200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7496, "step": 502300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.758, "step": 502400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.754, "step": 502500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7494, "step": 502600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.749, "step": 502700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7578, "step": 502800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7544, "step": 502900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7616, "step": 503000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7589, "step": 503100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7474, "step": 503200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.754, "step": 503300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7578, "step": 503400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7536, "step": 503500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7603, "step": 503600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7593, "step": 503700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7582, "step": 503800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7582, "step": 503900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.762, "step": 504000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7479, "step": 504100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7551, "step": 504200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7561, "step": 504300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7524, "step": 504400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7603, "step": 504500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7489, "step": 504600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7513, "step": 504700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7542, "step": 504800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7539, "step": 504900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7528, "step": 505000 }, { "epoch": 0.06, "eval_loss": 0.7079646587371826, "eval_runtime": 203.5169, "eval_samples_per_second": 245.68, "eval_steps_per_second": 1.921, "step": 505000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7546, "step": 505100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7527, "step": 505200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.744, "step": 505300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7543, "step": 505400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7564, "step": 505500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7563, "step": 505600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7491, "step": 505700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7552, "step": 505800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7593, "step": 505900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7542, "step": 506000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7565, "step": 506100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7463, "step": 506200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7438, "step": 506300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7563, "step": 506400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7444, "step": 506500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7559, "step": 506600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7561, "step": 506700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.756, "step": 506800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7622, "step": 506900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7568, "step": 507000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7446, "step": 507100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7589, "step": 507200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7574, "step": 507300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7526, "step": 507400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7545, "step": 507500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7511, "step": 507600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7579, "step": 507700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7557, "step": 507800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7484, "step": 507900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7529, "step": 508000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7567, "step": 508100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7508, "step": 508200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7467, "step": 508300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7573, "step": 508400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.755, "step": 508500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7558, "step": 508600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7548, "step": 508700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7548, "step": 508800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7601, "step": 508900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7537, "step": 509000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7499, "step": 509100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7511, "step": 509200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7498, "step": 509300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7599, "step": 509400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7513, "step": 509500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7504, "step": 509600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7534, "step": 509700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7598, "step": 509800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7569, "step": 509900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7575, "step": 510000 }, { "epoch": 0.06, "eval_loss": 0.707108199596405, "eval_runtime": 206.3893, "eval_samples_per_second": 242.261, "eval_steps_per_second": 1.894, "step": 510000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7552, "step": 510100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7506, "step": 510200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7445, "step": 510300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.757, "step": 510400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7564, "step": 510500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7539, "step": 510600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7538, "step": 510700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7511, "step": 510800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7482, "step": 510900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7531, "step": 511000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7516, "step": 511100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7536, "step": 511200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7541, "step": 511300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7569, "step": 511400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7516, "step": 511500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.765, "step": 511600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7605, "step": 511700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7531, "step": 511800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.758, "step": 511900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7588, "step": 512000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7615, "step": 512100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7611, "step": 512200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7571, "step": 512300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7484, "step": 512400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7595, "step": 512500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7527, "step": 512600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.758, "step": 512700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7584, "step": 512800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7552, "step": 512900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7585, "step": 513000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7538, "step": 513100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7579, "step": 513200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7549, "step": 513300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7574, "step": 513400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7662, "step": 513500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7461, "step": 513600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7531, "step": 513700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7455, "step": 513800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.758, "step": 513900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7445, "step": 514000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7555, "step": 514100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7506, "step": 514200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7572, "step": 514300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7564, "step": 514400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7575, "step": 514500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7506, "step": 514600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7551, "step": 514700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.757, "step": 514800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7523, "step": 514900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7527, "step": 515000 }, { "epoch": 0.06, "eval_loss": 0.7088050842285156, "eval_runtime": 207.7635, "eval_samples_per_second": 240.658, "eval_steps_per_second": 1.882, "step": 515000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7485, "step": 515100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7511, "step": 515200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7461, "step": 515300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7516, "step": 515400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7602, "step": 515500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7473, "step": 515600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7525, "step": 515700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7526, "step": 515800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7538, "step": 515900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7562, "step": 516000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7503, "step": 516100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7542, "step": 516200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7523, "step": 516300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7597, "step": 516400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7583, "step": 516500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7575, "step": 516600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7468, "step": 516700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.754, "step": 516800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7534, "step": 516900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7596, "step": 517000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7555, "step": 517100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.749, "step": 517200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7502, "step": 517300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.76, "step": 517400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7523, "step": 517500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7569, "step": 517600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7592, "step": 517700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7494, "step": 517800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7497, "step": 517900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7531, "step": 518000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7459, "step": 518100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.75, "step": 518200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7597, "step": 518300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7606, "step": 518400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7465, "step": 518500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7562, "step": 518600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7499, "step": 518700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.757, "step": 518800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7521, "step": 518900 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7518, "step": 519000 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7455, "step": 519100 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.756, "step": 519200 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7495, "step": 519300 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.743, "step": 519400 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7508, "step": 519500 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7598, "step": 519600 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7581, "step": 519700 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7478, "step": 519800 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.7573, "step": 519900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7521, "step": 520000 }, { "epoch": 0.07, "eval_loss": 0.7072643637657166, "eval_runtime": 206.4566, "eval_samples_per_second": 242.182, "eval_steps_per_second": 1.894, "step": 520000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7476, "step": 520100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7629, "step": 520200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7535, "step": 520300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7528, "step": 520400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7505, "step": 520500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7567, "step": 520600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7473, "step": 520700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7501, "step": 520800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7511, "step": 520900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7573, "step": 521000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.746, "step": 521100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7538, "step": 521200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7522, "step": 521300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7507, "step": 521400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7541, "step": 521500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7555, "step": 521600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7416, "step": 521700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7478, "step": 521800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7479, "step": 521900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7464, "step": 522000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7477, "step": 522100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7538, "step": 522200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7551, "step": 522300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7478, "step": 522400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7569, "step": 522500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7491, "step": 522600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7581, "step": 522700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7524, "step": 522800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7487, "step": 522900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7486, "step": 523000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7496, "step": 523100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7561, "step": 523200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7522, "step": 523300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7623, "step": 523400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7517, "step": 523500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7615, "step": 523600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7507, "step": 523700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7533, "step": 523800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7465, "step": 523900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7585, "step": 524000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7574, "step": 524100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7582, "step": 524200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7619, "step": 524300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7561, "step": 524400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7469, "step": 524500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7489, "step": 524600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7526, "step": 524700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.75, "step": 524800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7557, "step": 524900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7494, "step": 525000 }, { "epoch": 0.07, "eval_loss": 0.706498384475708, "eval_runtime": 208.8156, "eval_samples_per_second": 239.446, "eval_steps_per_second": 1.872, "step": 525000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7474, "step": 525100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7493, "step": 525200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7535, "step": 525300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.751, "step": 525400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7543, "step": 525500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7433, "step": 525600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7511, "step": 525700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7485, "step": 525800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7573, "step": 525900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7564, "step": 526000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7534, "step": 526100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7539, "step": 526200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7527, "step": 526300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7571, "step": 526400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7552, "step": 526500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7569, "step": 526600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7477, "step": 526700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7505, "step": 526800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7599, "step": 526900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7496, "step": 527000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7578, "step": 527100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7508, "step": 527200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7483, "step": 527300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7543, "step": 527400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7522, "step": 527500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7523, "step": 527600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.742, "step": 527700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7558, "step": 527800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7504, "step": 527900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7576, "step": 528000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7549, "step": 528100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7581, "step": 528200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7525, "step": 528300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7549, "step": 528400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7521, "step": 528500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7517, "step": 528600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7549, "step": 528700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7494, "step": 528800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7505, "step": 528900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7503, "step": 529000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7567, "step": 529100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7482, "step": 529200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7505, "step": 529300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7531, "step": 529400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7607, "step": 529500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7541, "step": 529600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7555, "step": 529700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7515, "step": 529800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7565, "step": 529900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7522, "step": 530000 }, { "epoch": 0.07, "eval_loss": 0.7084597945213318, "eval_runtime": 205.2857, "eval_samples_per_second": 243.563, "eval_steps_per_second": 1.905, "step": 530000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7481, "step": 530100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7557, "step": 530200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7515, "step": 530300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.751, "step": 530400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7498, "step": 530500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7555, "step": 530600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7553, "step": 530700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7505, "step": 530800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7495, "step": 530900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7585, "step": 531000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7493, "step": 531100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7552, "step": 531200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7413, "step": 531300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7493, "step": 531400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7562, "step": 531500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.744, "step": 531600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7537, "step": 531700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7535, "step": 531800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7564, "step": 531900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7504, "step": 532000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.761, "step": 532100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7515, "step": 532200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7482, "step": 532300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7476, "step": 532400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7517, "step": 532500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7565, "step": 532600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7602, "step": 532700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7495, "step": 532800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7615, "step": 532900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7532, "step": 533000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.761, "step": 533100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7517, "step": 533200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7591, "step": 533300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7556, "step": 533400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7514, "step": 533500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7512, "step": 533600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7504, "step": 533700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7531, "step": 533800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.754, "step": 533900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7506, "step": 534000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7507, "step": 534100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7531, "step": 534200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7531, "step": 534300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7519, "step": 534400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7578, "step": 534500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7498, "step": 534600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7485, "step": 534700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7483, "step": 534800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7535, "step": 534900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7452, "step": 535000 }, { "epoch": 0.07, "eval_loss": 0.7075052857398987, "eval_runtime": 208.0268, "eval_samples_per_second": 240.354, "eval_steps_per_second": 1.88, "step": 535000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7579, "step": 535100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7543, "step": 535200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7571, "step": 535300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7514, "step": 535400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7505, "step": 535500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7508, "step": 535600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7476, "step": 535700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7521, "step": 535800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7483, "step": 535900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7529, "step": 536000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7501, "step": 536100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7511, "step": 536200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7558, "step": 536300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7546, "step": 536400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7537, "step": 536500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7491, "step": 536600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7581, "step": 536700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.749, "step": 536800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7564, "step": 536900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7527, "step": 537000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7506, "step": 537100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7543, "step": 537200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7403, "step": 537300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7558, "step": 537400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7562, "step": 537500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.75, "step": 537600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7542, "step": 537700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7548, "step": 537800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.749, "step": 537900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7538, "step": 538000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7544, "step": 538100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.757, "step": 538200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7432, "step": 538300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7521, "step": 538400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.749, "step": 538500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7547, "step": 538600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7545, "step": 538700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7504, "step": 538800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7459, "step": 538900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7503, "step": 539000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7475, "step": 539100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7574, "step": 539200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7455, "step": 539300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7572, "step": 539400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7526, "step": 539500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7501, "step": 539600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7531, "step": 539700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7574, "step": 539800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7568, "step": 539900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7577, "step": 540000 }, { "epoch": 0.07, "eval_loss": 0.7067614197731018, "eval_runtime": 204.8198, "eval_samples_per_second": 244.117, "eval_steps_per_second": 1.909, "step": 540000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7481, "step": 540100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7508, "step": 540200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7509, "step": 540300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7584, "step": 540400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.751, "step": 540500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7531, "step": 540600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7496, "step": 540700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7463, "step": 540800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7565, "step": 540900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7506, "step": 541000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7485, "step": 541100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.748, "step": 541200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7518, "step": 541300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7444, "step": 541400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.757, "step": 541500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7464, "step": 541600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7559, "step": 541700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7562, "step": 541800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7577, "step": 541900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7519, "step": 542000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7498, "step": 542100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7501, "step": 542200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.751, "step": 542300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.75, "step": 542400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7574, "step": 542500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.748, "step": 542600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7465, "step": 542700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7543, "step": 542800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7465, "step": 542900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7578, "step": 543000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7506, "step": 543100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7548, "step": 543200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7593, "step": 543300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7523, "step": 543400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7452, "step": 543500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7548, "step": 543600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7448, "step": 543700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7555, "step": 543800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.752, "step": 543900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7504, "step": 544000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7531, "step": 544100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.751, "step": 544200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7529, "step": 544300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7512, "step": 544400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7545, "step": 544500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7425, "step": 544600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7541, "step": 544700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7477, "step": 544800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7497, "step": 544900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7554, "step": 545000 }, { "epoch": 0.07, "eval_loss": 0.7069408297538757, "eval_runtime": 210.1496, "eval_samples_per_second": 237.926, "eval_steps_per_second": 1.861, "step": 545000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7463, "step": 545100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7504, "step": 545200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7525, "step": 545300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7485, "step": 545400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7466, "step": 545500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.752, "step": 545600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7528, "step": 545700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7473, "step": 545800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7557, "step": 545900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7589, "step": 546000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7464, "step": 546100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7515, "step": 546200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7553, "step": 546300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7522, "step": 546400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7566, "step": 546500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7474, "step": 546600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7534, "step": 546700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7539, "step": 546800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.754, "step": 546900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7551, "step": 547000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7536, "step": 547100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7494, "step": 547200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7497, "step": 547300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7462, "step": 547400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7535, "step": 547500 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7512, "step": 547600 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.747, "step": 547700 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7616, "step": 547800 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7534, "step": 547900 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7509, "step": 548000 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7483, "step": 548100 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7558, "step": 548200 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7515, "step": 548300 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.749, "step": 548400 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.7517, "step": 548500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7527, "step": 548600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7513, "step": 548700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7481, "step": 548800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7568, "step": 548900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.757, "step": 549000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7548, "step": 549100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7437, "step": 549200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7462, "step": 549300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7477, "step": 549400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7556, "step": 549500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7475, "step": 549600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7486, "step": 549700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7539, "step": 549800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.736, "step": 549900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7538, "step": 550000 }, { "epoch": 1.0, "eval_loss": 0.7065879106521606, "eval_runtime": 206.6566, "eval_samples_per_second": 241.947, "eval_steps_per_second": 1.892, "step": 550000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7471, "step": 550100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.752, "step": 550200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7536, "step": 550300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7484, "step": 550400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.754, "step": 550500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7556, "step": 550600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7546, "step": 550700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7466, "step": 550800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7483, "step": 550900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7544, "step": 551000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.755, "step": 551100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7501, "step": 551200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7534, "step": 551300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7565, "step": 551400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7518, "step": 551500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7415, "step": 551600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7463, "step": 551700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7486, "step": 551800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7536, "step": 551900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.748, "step": 552000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7527, "step": 552100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7514, "step": 552200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7508, "step": 552300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.753, "step": 552400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7492, "step": 552500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7538, "step": 552600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7479, "step": 552700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7424, "step": 552800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7471, "step": 552900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7484, "step": 553000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7497, "step": 553100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7502, "step": 553200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7512, "step": 553300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7479, "step": 553400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7545, "step": 553500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7538, "step": 553600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7457, "step": 553700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.748, "step": 553800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7542, "step": 553900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7522, "step": 554000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7511, "step": 554100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7513, "step": 554200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7507, "step": 554300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.749, "step": 554400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7525, "step": 554500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.745, "step": 554600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7477, "step": 554700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7431, "step": 554800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7484, "step": 554900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7504, "step": 555000 }, { "epoch": 1.0, "eval_loss": 0.7056695222854614, "eval_runtime": 207.0507, "eval_samples_per_second": 241.487, "eval_steps_per_second": 1.888, "step": 555000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7544, "step": 555100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7493, "step": 555200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7499, "step": 555300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7516, "step": 555400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7491, "step": 555500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7498, "step": 555600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.754, "step": 555700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7491, "step": 555800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7598, "step": 555900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7441, "step": 556000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7517, "step": 556100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7554, "step": 556200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7586, "step": 556300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7444, "step": 556400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7507, "step": 556500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7623, "step": 556600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7556, "step": 556700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7477, "step": 556800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7524, "step": 556900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7514, "step": 557000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7515, "step": 557100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7526, "step": 557200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.75, "step": 557300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7456, "step": 557400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.744, "step": 557500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.753, "step": 557600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7499, "step": 557700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7545, "step": 557800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7474, "step": 557900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7564, "step": 558000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7557, "step": 558100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7493, "step": 558200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7555, "step": 558300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.756, "step": 558400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7533, "step": 558500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7498, "step": 558600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7491, "step": 558700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.749, "step": 558800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7558, "step": 558900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7496, "step": 559000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7573, "step": 559100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7554, "step": 559200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.747, "step": 559300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7582, "step": 559400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7483, "step": 559500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7481, "step": 559600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7517, "step": 559700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7512, "step": 559800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7464, "step": 559900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7515, "step": 560000 }, { "epoch": 1.0, "eval_loss": 0.7081322073936462, "eval_runtime": 206.8909, "eval_samples_per_second": 241.673, "eval_steps_per_second": 1.89, "step": 560000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7498, "step": 560100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7554, "step": 560200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7475, "step": 560300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7534, "step": 560400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7576, "step": 560500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7497, "step": 560600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7492, "step": 560700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.748, "step": 560800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7494, "step": 560900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7481, "step": 561000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7451, "step": 561100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7497, "step": 561200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7522, "step": 561300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7512, "step": 561400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7482, "step": 561500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7471, "step": 561600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7563, "step": 561700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7522, "step": 561800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7495, "step": 561900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7542, "step": 562000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7539, "step": 562100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7527, "step": 562200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7526, "step": 562300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7533, "step": 562400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7538, "step": 562500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7517, "step": 562600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7531, "step": 562700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7569, "step": 562800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.752, "step": 562900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7542, "step": 563000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.751, "step": 563100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7579, "step": 563200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7539, "step": 563300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7605, "step": 563400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7516, "step": 563500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7507, "step": 563600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7557, "step": 563700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7529, "step": 563800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7468, "step": 563900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7533, "step": 564000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7473, "step": 564100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7448, "step": 564200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.744, "step": 564300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7534, "step": 564400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7539, "step": 564500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7501, "step": 564600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.748, "step": 564700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7456, "step": 564800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7492, "step": 564900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.751, "step": 565000 }, { "epoch": 1.0, "eval_loss": 0.7076017260551453, "eval_runtime": 205.7703, "eval_samples_per_second": 242.989, "eval_steps_per_second": 1.9, "step": 565000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7521, "step": 565100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7523, "step": 565200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7549, "step": 565300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7541, "step": 565400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7489, "step": 565500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7518, "step": 565600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7514, "step": 565700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7469, "step": 565800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7511, "step": 565900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7509, "step": 566000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7507, "step": 566100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7457, "step": 566200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7483, "step": 566300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.757, "step": 566400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7373, "step": 566500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7527, "step": 566600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7454, "step": 566700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7581, "step": 566800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7448, "step": 566900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.746, "step": 567000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7542, "step": 567100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7456, "step": 567200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7546, "step": 567300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7481, "step": 567400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7573, "step": 567500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7544, "step": 567600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7568, "step": 567700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7568, "step": 567800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7545, "step": 567900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7516, "step": 568000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7508, "step": 568100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7515, "step": 568200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7465, "step": 568300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7445, "step": 568400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7479, "step": 568500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7424, "step": 568600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.758, "step": 568700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7569, "step": 568800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7576, "step": 568900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7513, "step": 569000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7489, "step": 569100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7515, "step": 569200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7624, "step": 569300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7411, "step": 569400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7461, "step": 569500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7484, "step": 569600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.75, "step": 569700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7538, "step": 569800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7449, "step": 569900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7449, "step": 570000 }, { "epoch": 1.0, "eval_loss": 0.7060913443565369, "eval_runtime": 203.6261, "eval_samples_per_second": 245.548, "eval_steps_per_second": 1.92, "step": 570000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7523, "step": 570100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7506, "step": 570200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7501, "step": 570300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7496, "step": 570400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7529, "step": 570500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.753, "step": 570600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7474, "step": 570700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7472, "step": 570800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7534, "step": 570900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.753, "step": 571000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7518, "step": 571100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7477, "step": 571200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7521, "step": 571300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7466, "step": 571400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7451, "step": 571500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7553, "step": 571600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7623, "step": 571700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7601, "step": 571800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7516, "step": 571900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7411, "step": 572000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7535, "step": 572100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.757, "step": 572200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7578, "step": 572300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7557, "step": 572400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7516, "step": 572500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7552, "step": 572600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7534, "step": 572700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7525, "step": 572800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.749, "step": 572900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7517, "step": 573000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7547, "step": 573100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.753, "step": 573200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7495, "step": 573300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7558, "step": 573400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7572, "step": 573500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.751, "step": 573600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7548, "step": 573700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7624, "step": 573800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7528, "step": 573900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7514, "step": 574000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7481, "step": 574100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7404, "step": 574200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7498, "step": 574300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7517, "step": 574400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7499, "step": 574500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7486, "step": 574600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7454, "step": 574700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7504, "step": 574800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7479, "step": 574900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7527, "step": 575000 }, { "epoch": 1.0, "eval_loss": 0.706642746925354, "eval_runtime": 208.8939, "eval_samples_per_second": 239.356, "eval_steps_per_second": 1.872, "step": 575000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7403, "step": 575100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7485, "step": 575200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7529, "step": 575300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7499, "step": 575400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7438, "step": 575500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7522, "step": 575600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7504, "step": 575700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7542, "step": 575800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7492, "step": 575900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7586, "step": 576000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7452, "step": 576100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7586, "step": 576200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7534, "step": 576300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7487, "step": 576400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7466, "step": 576500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7493, "step": 576600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7523, "step": 576700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7576, "step": 576800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7491, "step": 576900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.75, "step": 577000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7532, "step": 577100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.752, "step": 577200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7489, "step": 577300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7448, "step": 577400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7424, "step": 577500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7511, "step": 577600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7458, "step": 577700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7559, "step": 577800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7532, "step": 577900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7484, "step": 578000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7601, "step": 578100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.75, "step": 578200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.751, "step": 578300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7512, "step": 578400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7536, "step": 578500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7517, "step": 578600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.75, "step": 578700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7525, "step": 578800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7451, "step": 578900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7539, "step": 579000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.751, "step": 579100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7529, "step": 579200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7585, "step": 579300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.747, "step": 579400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7585, "step": 579500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7515, "step": 579600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.747, "step": 579700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7517, "step": 579800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7525, "step": 579900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7472, "step": 580000 }, { "epoch": 1.0, "eval_loss": 0.7070211172103882, "eval_runtime": 209.8407, "eval_samples_per_second": 238.276, "eval_steps_per_second": 1.863, "step": 580000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7508, "step": 580100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7495, "step": 580200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7487, "step": 580300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.749, "step": 580400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7535, "step": 580500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7514, "step": 580600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7508, "step": 580700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.743, "step": 580800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7519, "step": 580900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7541, "step": 581000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.751, "step": 581100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7439, "step": 581200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7474, "step": 581300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7516, "step": 581400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7519, "step": 581500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.754, "step": 581600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7522, "step": 581700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7534, "step": 581800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7541, "step": 581900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.751, "step": 582000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7499, "step": 582100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7428, "step": 582200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7446, "step": 582300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7489, "step": 582400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.757, "step": 582500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7524, "step": 582600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7551, "step": 582700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7472, "step": 582800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7513, "step": 582900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7523, "step": 583000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7539, "step": 583100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7519, "step": 583200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.748, "step": 583300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7541, "step": 583400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7525, "step": 583500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7595, "step": 583600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7494, "step": 583700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7577, "step": 583800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7517, "step": 583900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.751, "step": 584000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7548, "step": 584100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7503, "step": 584200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.754, "step": 584300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7446, "step": 584400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7435, "step": 584500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7576, "step": 584600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7559, "step": 584700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7496, "step": 584800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7582, "step": 584900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7552, "step": 585000 }, { "epoch": 1.0, "eval_loss": 0.7080877423286438, "eval_runtime": 206.6514, "eval_samples_per_second": 241.953, "eval_steps_per_second": 1.892, "step": 585000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.756, "step": 585100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7493, "step": 585200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.754, "step": 585300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7527, "step": 585400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7462, "step": 585500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.741, "step": 585600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7516, "step": 585700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7477, "step": 585800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7558, "step": 585900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7535, "step": 586000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7488, "step": 586100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7466, "step": 586200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.753, "step": 586300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7472, "step": 586400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7603, "step": 586500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7537, "step": 586600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7422, "step": 586700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7482, "step": 586800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.748, "step": 586900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7561, "step": 587000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7481, "step": 587100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7512, "step": 587200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7469, "step": 587300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7455, "step": 587400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7559, "step": 587500 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7425, "step": 587600 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7608, "step": 587700 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7604, "step": 587800 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7451, "step": 587900 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7523, "step": 588000 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7458, "step": 588100 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7497, "step": 588200 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7504, "step": 588300 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7471, "step": 588400 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.7524, "step": 588500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7502, "step": 588600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7462, "step": 588700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7432, "step": 588800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7536, "step": 588900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.747, "step": 589000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7535, "step": 589100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7546, "step": 589200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7439, "step": 589300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7576, "step": 589400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7497, "step": 589500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7531, "step": 589600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7515, "step": 589700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7488, "step": 589800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7498, "step": 589900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7457, "step": 590000 }, { "epoch": 1.01, "eval_loss": 0.7071018218994141, "eval_runtime": 206.2108, "eval_samples_per_second": 242.47, "eval_steps_per_second": 1.896, "step": 590000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.746, "step": 590100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7486, "step": 590200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7495, "step": 590300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7531, "step": 590400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7535, "step": 590500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7473, "step": 590600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7541, "step": 590700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7565, "step": 590800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7512, "step": 590900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7491, "step": 591000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7529, "step": 591100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7504, "step": 591200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7488, "step": 591300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.75, "step": 591400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7543, "step": 591500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7453, "step": 591600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7474, "step": 591700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7467, "step": 591800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.75, "step": 591900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7498, "step": 592000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7503, "step": 592100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7557, "step": 592200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7489, "step": 592300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7572, "step": 592400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7504, "step": 592500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7502, "step": 592600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7505, "step": 592700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7495, "step": 592800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7521, "step": 592900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7454, "step": 593000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7552, "step": 593100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7561, "step": 593200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7609, "step": 593300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7519, "step": 593400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7521, "step": 593500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.748, "step": 593600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7438, "step": 593700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7561, "step": 593800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7512, "step": 593900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7506, "step": 594000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7422, "step": 594100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7483, "step": 594200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7583, "step": 594300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7539, "step": 594400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7409, "step": 594500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7452, "step": 594600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7476, "step": 594700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7447, "step": 594800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7539, "step": 594900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7546, "step": 595000 }, { "epoch": 1.01, "eval_loss": 0.7045419812202454, "eval_runtime": 208.0159, "eval_samples_per_second": 240.366, "eval_steps_per_second": 1.88, "step": 595000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7529, "step": 595100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7526, "step": 595200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7423, "step": 595300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7444, "step": 595400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7467, "step": 595500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7478, "step": 595600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7504, "step": 595700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.747, "step": 595800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7472, "step": 595900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7481, "step": 596000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7542, "step": 596100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7514, "step": 596200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.751, "step": 596300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.752, "step": 596400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7501, "step": 596500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7445, "step": 596600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7599, "step": 596700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.752, "step": 596800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7529, "step": 596900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7502, "step": 597000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7549, "step": 597100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7526, "step": 597200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7497, "step": 597300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.748, "step": 597400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.757, "step": 597500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7474, "step": 597600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7532, "step": 597700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7518, "step": 597800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7406, "step": 597900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7524, "step": 598000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7507, "step": 598100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7475, "step": 598200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7513, "step": 598300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7465, "step": 598400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7577, "step": 598500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7545, "step": 598600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7608, "step": 598700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.749, "step": 598800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.754, "step": 598900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7443, "step": 599000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7497, "step": 599100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7492, "step": 599200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7479, "step": 599300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7572, "step": 599400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7451, "step": 599500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7487, "step": 599600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7466, "step": 599700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7531, "step": 599800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7563, "step": 599900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7548, "step": 600000 }, { "epoch": 1.01, "eval_loss": 0.7058716416358948, "eval_runtime": 208.0063, "eval_samples_per_second": 240.377, "eval_steps_per_second": 1.88, "step": 600000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7497, "step": 600100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7461, "step": 600200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7455, "step": 600300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7523, "step": 600400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7519, "step": 600500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7519, "step": 600600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7513, "step": 600700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7481, "step": 600800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7524, "step": 600900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7484, "step": 601000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7561, "step": 601100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7521, "step": 601200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7447, "step": 601300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7536, "step": 601400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7453, "step": 601500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7542, "step": 601600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7552, "step": 601700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7562, "step": 601800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7398, "step": 601900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7484, "step": 602000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7499, "step": 602100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7484, "step": 602200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7508, "step": 602300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.746, "step": 602400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7612, "step": 602500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7452, "step": 602600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7479, "step": 602700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7522, "step": 602800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7444, "step": 602900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7527, "step": 603000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7461, "step": 603100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7485, "step": 603200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7471, "step": 603300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7507, "step": 603400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7501, "step": 603500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7454, "step": 603600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7544, "step": 603700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7515, "step": 603800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.747, "step": 603900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7485, "step": 604000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7502, "step": 604100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.742, "step": 604200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7452, "step": 604300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7482, "step": 604400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7547, "step": 604500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7557, "step": 604600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7567, "step": 604700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7561, "step": 604800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7571, "step": 604900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7534, "step": 605000 }, { "epoch": 1.01, "eval_loss": 0.7062010765075684, "eval_runtime": 208.4287, "eval_samples_per_second": 239.89, "eval_steps_per_second": 1.876, "step": 605000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7426, "step": 605100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7526, "step": 605200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7513, "step": 605300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7491, "step": 605400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7551, "step": 605500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7504, "step": 605600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7562, "step": 605700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7502, "step": 605800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7519, "step": 605900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7428, "step": 606000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7534, "step": 606100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7437, "step": 606200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7579, "step": 606300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7474, "step": 606400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7469, "step": 606500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7528, "step": 606600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7485, "step": 606700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7497, "step": 606800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7397, "step": 606900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7453, "step": 607000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7536, "step": 607100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7493, "step": 607200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7498, "step": 607300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7527, "step": 607400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7474, "step": 607500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7529, "step": 607600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7516, "step": 607700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7463, "step": 607800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7534, "step": 607900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7485, "step": 608000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7505, "step": 608100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7525, "step": 608200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7544, "step": 608300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7557, "step": 608400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7502, "step": 608500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7487, "step": 608600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7479, "step": 608700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7574, "step": 608800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7516, "step": 608900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7451, "step": 609000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7401, "step": 609100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7547, "step": 609200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7431, "step": 609300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7484, "step": 609400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7502, "step": 609500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7556, "step": 609600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7443, "step": 609700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7575, "step": 609800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7422, "step": 609900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7439, "step": 610000 }, { "epoch": 1.01, "eval_loss": 0.7046141624450684, "eval_runtime": 373.2611, "eval_samples_per_second": 133.954, "eval_steps_per_second": 1.048, "step": 610000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7474, "step": 610100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.745, "step": 610200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7486, "step": 610300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7512, "step": 610400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.753, "step": 610500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7563, "step": 610600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7489, "step": 610700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7528, "step": 610800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7554, "step": 610900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7509, "step": 611000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7524, "step": 611100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7452, "step": 611200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.752, "step": 611300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7526, "step": 611400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.746, "step": 611500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7519, "step": 611600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7543, "step": 611700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7486, "step": 611800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7459, "step": 611900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7546, "step": 612000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7559, "step": 612100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7576, "step": 612200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7448, "step": 612300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7552, "step": 612400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7458, "step": 612500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7468, "step": 612600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7431, "step": 612700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7474, "step": 612800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7586, "step": 612900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7521, "step": 613000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7496, "step": 613100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.756, "step": 613200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7486, "step": 613300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7491, "step": 613400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7561, "step": 613500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7432, "step": 613600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7555, "step": 613700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7532, "step": 613800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.749, "step": 613900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7481, "step": 614000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7515, "step": 614100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7529, "step": 614200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7548, "step": 614300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7517, "step": 614400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7496, "step": 614500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7481, "step": 614600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7553, "step": 614700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.75, "step": 614800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7523, "step": 614900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7561, "step": 615000 }, { "epoch": 1.01, "eval_loss": 0.7065125107765198, "eval_runtime": 214.9751, "eval_samples_per_second": 232.585, "eval_steps_per_second": 1.819, "step": 615000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7444, "step": 615100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7515, "step": 615200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7464, "step": 615300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7506, "step": 615400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7536, "step": 615500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7514, "step": 615600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.751, "step": 615700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.749, "step": 615800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7528, "step": 615900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7479, "step": 616000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7514, "step": 616100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7539, "step": 616200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7429, "step": 616300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.757, "step": 616400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7461, "step": 616500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7435, "step": 616600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7539, "step": 616700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7556, "step": 616800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7481, "step": 616900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7552, "step": 617000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7508, "step": 617100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7542, "step": 617200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7543, "step": 617300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7407, "step": 617400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7558, "step": 617500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7516, "step": 617600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7444, "step": 617700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7498, "step": 617800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7451, "step": 617900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7506, "step": 618000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7445, "step": 618100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7476, "step": 618200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7547, "step": 618300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7558, "step": 618400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7562, "step": 618500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7423, "step": 618600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.755, "step": 618700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7487, "step": 618800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7481, "step": 618900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7373, "step": 619000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.755, "step": 619100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7499, "step": 619200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7516, "step": 619300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7492, "step": 619400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7471, "step": 619500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7487, "step": 619600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7457, "step": 619700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7486, "step": 619800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7451, "step": 619900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7499, "step": 620000 }, { "epoch": 1.01, "eval_loss": 0.7037102580070496, "eval_runtime": 209.2906, "eval_samples_per_second": 238.902, "eval_steps_per_second": 1.868, "step": 620000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.751, "step": 620100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7465, "step": 620200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7468, "step": 620300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7526, "step": 620400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7438, "step": 620500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7465, "step": 620600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7511, "step": 620700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7506, "step": 620800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7546, "step": 620900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7497, "step": 621000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7466, "step": 621100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7447, "step": 621200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7476, "step": 621300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.751, "step": 621400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7475, "step": 621500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7477, "step": 621600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7462, "step": 621700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.754, "step": 621800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7541, "step": 621900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7568, "step": 622000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7493, "step": 622100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7536, "step": 622200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7504, "step": 622300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7541, "step": 622400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7378, "step": 622500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7438, "step": 622600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7463, "step": 622700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7499, "step": 622800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7531, "step": 622900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7487, "step": 623000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7577, "step": 623100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.756, "step": 623200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7581, "step": 623300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7468, "step": 623400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7511, "step": 623500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7493, "step": 623600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7485, "step": 623700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7548, "step": 623800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7426, "step": 623900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7519, "step": 624000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7447, "step": 624100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7472, "step": 624200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7415, "step": 624300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7477, "step": 624400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7485, "step": 624500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7448, "step": 624600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7535, "step": 624700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7469, "step": 624800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7452, "step": 624900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.756, "step": 625000 }, { "epoch": 1.01, "eval_loss": 0.7073270082473755, "eval_runtime": 206.4574, "eval_samples_per_second": 242.181, "eval_steps_per_second": 1.894, "step": 625000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7446, "step": 625100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7478, "step": 625200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7352, "step": 625300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7512, "step": 625400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7483, "step": 625500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7567, "step": 625600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7525, "step": 625700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7415, "step": 625800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7483, "step": 625900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7455, "step": 626000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7434, "step": 626100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7495, "step": 626200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7376, "step": 626300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7482, "step": 626400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7449, "step": 626500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7536, "step": 626600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7548, "step": 626700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7457, "step": 626800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7392, "step": 626900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.747, "step": 627000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7485, "step": 627100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7497, "step": 627200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7541, "step": 627300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7509, "step": 627400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7557, "step": 627500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7531, "step": 627600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7479, "step": 627700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7498, "step": 627800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7522, "step": 627900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.755, "step": 628000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.75, "step": 628100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7473, "step": 628200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7572, "step": 628300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7456, "step": 628400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7525, "step": 628500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7546, "step": 628600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7505, "step": 628700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7424, "step": 628800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7524, "step": 628900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7441, "step": 629000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7533, "step": 629100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.742, "step": 629200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.74, "step": 629300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7562, "step": 629400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.754, "step": 629500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7489, "step": 629600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7516, "step": 629700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.747, "step": 629800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7411, "step": 629900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7489, "step": 630000 }, { "epoch": 1.01, "eval_loss": 0.7038618922233582, "eval_runtime": 209.1827, "eval_samples_per_second": 239.025, "eval_steps_per_second": 1.869, "step": 630000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7476, "step": 630100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7389, "step": 630200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7459, "step": 630300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7471, "step": 630400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7438, "step": 630500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7518, "step": 630600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7394, "step": 630700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7537, "step": 630800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7576, "step": 630900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7484, "step": 631000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7543, "step": 631100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7439, "step": 631200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7559, "step": 631300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7468, "step": 631400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7566, "step": 631500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7428, "step": 631600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7503, "step": 631700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7559, "step": 631800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7455, "step": 631900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7574, "step": 632000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7469, "step": 632100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7569, "step": 632200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7503, "step": 632300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7474, "step": 632400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7502, "step": 632500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7453, "step": 632600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7489, "step": 632700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7448, "step": 632800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7471, "step": 632900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7487, "step": 633000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7498, "step": 633100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7478, "step": 633200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7484, "step": 633300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7477, "step": 633400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7425, "step": 633500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7538, "step": 633600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7581, "step": 633700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7483, "step": 633800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7436, "step": 633900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7487, "step": 634000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7506, "step": 634100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7543, "step": 634200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7464, "step": 634300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7526, "step": 634400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7488, "step": 634500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7503, "step": 634600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7444, "step": 634700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7578, "step": 634800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7458, "step": 634900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7517, "step": 635000 }, { "epoch": 1.01, "eval_loss": 0.7031733393669128, "eval_runtime": 207.7004, "eval_samples_per_second": 240.731, "eval_steps_per_second": 1.883, "step": 635000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7551, "step": 635100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7427, "step": 635200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7443, "step": 635300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7556, "step": 635400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7451, "step": 635500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7473, "step": 635600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7458, "step": 635700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.754, "step": 635800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7521, "step": 635900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7475, "step": 636000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7475, "step": 636100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7527, "step": 636200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7491, "step": 636300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7445, "step": 636400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7518, "step": 636500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7409, "step": 636600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7558, "step": 636700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7408, "step": 636800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7441, "step": 636900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7565, "step": 637000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7538, "step": 637100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7352, "step": 637200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7508, "step": 637300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7447, "step": 637400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7502, "step": 637500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7556, "step": 637600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7517, "step": 637700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7449, "step": 637800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7603, "step": 637900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7445, "step": 638000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7587, "step": 638100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7447, "step": 638200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7491, "step": 638300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.747, "step": 638400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.747, "step": 638500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7464, "step": 638600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7532, "step": 638700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7627, "step": 638800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7489, "step": 638900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.753, "step": 639000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7538, "step": 639100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7502, "step": 639200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7453, "step": 639300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7436, "step": 639400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7556, "step": 639500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7518, "step": 639600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7477, "step": 639700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7545, "step": 639800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7469, "step": 639900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.749, "step": 640000 }, { "epoch": 1.01, "eval_loss": 0.7051756381988525, "eval_runtime": 209.0555, "eval_samples_per_second": 239.171, "eval_steps_per_second": 1.87, "step": 640000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.754, "step": 640100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7523, "step": 640200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7495, "step": 640300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7523, "step": 640400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7455, "step": 640500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7535, "step": 640600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7511, "step": 640700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7472, "step": 640800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7459, "step": 640900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7463, "step": 641000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7528, "step": 641100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7502, "step": 641200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7456, "step": 641300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7476, "step": 641400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7473, "step": 641500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7514, "step": 641600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7516, "step": 641700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7513, "step": 641800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7416, "step": 641900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7535, "step": 642000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7513, "step": 642100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.752, "step": 642200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7508, "step": 642300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7454, "step": 642400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7505, "step": 642500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7536, "step": 642600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.752, "step": 642700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7424, "step": 642800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7475, "step": 642900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7464, "step": 643000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7522, "step": 643100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7511, "step": 643200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7463, "step": 643300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7479, "step": 643400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7524, "step": 643500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7472, "step": 643600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7448, "step": 643700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7452, "step": 643800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7454, "step": 643900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7439, "step": 644000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7496, "step": 644100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7458, "step": 644200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7594, "step": 644300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7432, "step": 644400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7442, "step": 644500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7546, "step": 644600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.756, "step": 644700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.751, "step": 644800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7492, "step": 644900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7537, "step": 645000 }, { "epoch": 1.01, "eval_loss": 0.7038088440895081, "eval_runtime": 209.5065, "eval_samples_per_second": 238.656, "eval_steps_per_second": 1.866, "step": 645000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7515, "step": 645100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.748, "step": 645200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7504, "step": 645300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7488, "step": 645400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7502, "step": 645500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7492, "step": 645600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.748, "step": 645700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7514, "step": 645800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7509, "step": 645900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7489, "step": 646000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7472, "step": 646100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7512, "step": 646200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7537, "step": 646300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.752, "step": 646400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7441, "step": 646500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7554, "step": 646600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.751, "step": 646700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7527, "step": 646800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7494, "step": 646900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7506, "step": 647000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7517, "step": 647100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.749, "step": 647200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7455, "step": 647300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.748, "step": 647400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7541, "step": 647500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7481, "step": 647600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7588, "step": 647700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7446, "step": 647800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7502, "step": 647900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7505, "step": 648000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7539, "step": 648100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7526, "step": 648200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7424, "step": 648300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7507, "step": 648400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7483, "step": 648500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.748, "step": 648600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7492, "step": 648700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7449, "step": 648800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.744, "step": 648900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7469, "step": 649000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.747, "step": 649100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7501, "step": 649200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7419, "step": 649300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7493, "step": 649400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7437, "step": 649500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7507, "step": 649600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7504, "step": 649700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7519, "step": 649800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7496, "step": 649900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7524, "step": 650000 }, { "epoch": 1.01, "eval_loss": 0.706175684928894, "eval_runtime": 205.9298, "eval_samples_per_second": 242.801, "eval_steps_per_second": 1.899, "step": 650000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7542, "step": 650100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7541, "step": 650200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7518, "step": 650300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7565, "step": 650400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7414, "step": 650500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7536, "step": 650600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7578, "step": 650700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7358, "step": 650800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7448, "step": 650900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7596, "step": 651000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7406, "step": 651100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7474, "step": 651200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7558, "step": 651300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7543, "step": 651400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7438, "step": 651500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7542, "step": 651600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7489, "step": 651700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7492, "step": 651800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7439, "step": 651900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.753, "step": 652000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7508, "step": 652100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7501, "step": 652200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7455, "step": 652300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7522, "step": 652400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7437, "step": 652500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7527, "step": 652600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7537, "step": 652700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7535, "step": 652800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7428, "step": 652900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7456, "step": 653000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7468, "step": 653100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7512, "step": 653200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7502, "step": 653300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.751, "step": 653400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7483, "step": 653500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7513, "step": 653600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7521, "step": 653700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7498, "step": 653800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7538, "step": 653900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7546, "step": 654000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7499, "step": 654100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7519, "step": 654200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7428, "step": 654300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7434, "step": 654400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7576, "step": 654500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.743, "step": 654600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.747, "step": 654700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.75, "step": 654800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7444, "step": 654900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7487, "step": 655000 }, { "epoch": 1.01, "eval_loss": 0.7067192196846008, "eval_runtime": 206.9501, "eval_samples_per_second": 241.604, "eval_steps_per_second": 1.889, "step": 655000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7569, "step": 655100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.745, "step": 655200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7474, "step": 655300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7498, "step": 655400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7486, "step": 655500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7511, "step": 655600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7473, "step": 655700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7508, "step": 655800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.745, "step": 655900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7461, "step": 656000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7486, "step": 656100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7542, "step": 656200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7493, "step": 656300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7524, "step": 656400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7453, "step": 656500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7494, "step": 656600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7444, "step": 656700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7563, "step": 656800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7551, "step": 656900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7479, "step": 657000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.75, "step": 657100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7537, "step": 657200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7601, "step": 657300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7528, "step": 657400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7425, "step": 657500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7451, "step": 657600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.742, "step": 657700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7575, "step": 657800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7495, "step": 657900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7555, "step": 658000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7468, "step": 658100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.744, "step": 658200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7447, "step": 658300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7512, "step": 658400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.752, "step": 658500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7426, "step": 658600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7567, "step": 658700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7432, "step": 658800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7489, "step": 658900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7528, "step": 659000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7513, "step": 659100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7413, "step": 659200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.753, "step": 659300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7487, "step": 659400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7438, "step": 659500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7567, "step": 659600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7454, "step": 659700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7444, "step": 659800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7519, "step": 659900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.756, "step": 660000 }, { "epoch": 1.01, "eval_loss": 0.7032054662704468, "eval_runtime": 206.5049, "eval_samples_per_second": 242.125, "eval_steps_per_second": 1.893, "step": 660000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7469, "step": 660100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7478, "step": 660200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7482, "step": 660300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7467, "step": 660400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7582, "step": 660500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7556, "step": 660600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.751, "step": 660700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7468, "step": 660800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7435, "step": 660900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7473, "step": 661000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7439, "step": 661100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7562, "step": 661200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7494, "step": 661300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7404, "step": 661400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.741, "step": 661500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7589, "step": 661600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7394, "step": 661700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7477, "step": 661800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7504, "step": 661900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.748, "step": 662000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.755, "step": 662100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7466, "step": 662200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7501, "step": 662300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7546, "step": 662400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7476, "step": 662500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7403, "step": 662600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7485, "step": 662700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7528, "step": 662800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7464, "step": 662900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7509, "step": 663000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7471, "step": 663100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7465, "step": 663200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7543, "step": 663300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7591, "step": 663400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7531, "step": 663500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7589, "step": 663600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7463, "step": 663700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7469, "step": 663800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7408, "step": 663900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7579, "step": 664000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7525, "step": 664100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7423, "step": 664200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7494, "step": 664300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.754, "step": 664400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7544, "step": 664500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7473, "step": 664600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7419, "step": 664700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7438, "step": 664800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7463, "step": 664900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7472, "step": 665000 }, { "epoch": 1.01, "eval_loss": 0.7017516493797302, "eval_runtime": 208.408, "eval_samples_per_second": 239.914, "eval_steps_per_second": 1.876, "step": 665000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7439, "step": 665100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7444, "step": 665200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7434, "step": 665300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7423, "step": 665400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7492, "step": 665500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7514, "step": 665600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7532, "step": 665700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7522, "step": 665800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7515, "step": 665900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7457, "step": 666000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7469, "step": 666100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7483, "step": 666200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7481, "step": 666300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7502, "step": 666400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7466, "step": 666500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7479, "step": 666600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7435, "step": 666700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7516, "step": 666800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7483, "step": 666900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7503, "step": 667000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7494, "step": 667100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7507, "step": 667200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7589, "step": 667300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.755, "step": 667400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7495, "step": 667500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7462, "step": 667600 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7453, "step": 667700 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7458, "step": 667800 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.753, "step": 667900 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7561, "step": 668000 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7529, "step": 668100 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7513, "step": 668200 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7467, "step": 668300 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7483, "step": 668400 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.7495, "step": 668500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7444, "step": 668600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7536, "step": 668700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7472, "step": 668800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7457, "step": 668900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7536, "step": 669000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7492, "step": 669100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.741, "step": 669200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7473, "step": 669300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7473, "step": 669400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7525, "step": 669500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7495, "step": 669600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7539, "step": 669700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7443, "step": 669800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7522, "step": 669900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.747, "step": 670000 }, { "epoch": 1.02, "eval_loss": 0.7028328776359558, "eval_runtime": 203.5887, "eval_samples_per_second": 245.593, "eval_steps_per_second": 1.921, "step": 670000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7525, "step": 670100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7531, "step": 670200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7528, "step": 670300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7491, "step": 670400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7434, "step": 670500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7553, "step": 670600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7475, "step": 670700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.747, "step": 670800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7435, "step": 670900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7536, "step": 671000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.749, "step": 671100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7469, "step": 671200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7522, "step": 671300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7515, "step": 671400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7546, "step": 671500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7443, "step": 671600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7467, "step": 671700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.75, "step": 671800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7491, "step": 671900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7461, "step": 672000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7526, "step": 672100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7475, "step": 672200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7475, "step": 672300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.746, "step": 672400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7421, "step": 672500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7531, "step": 672600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7515, "step": 672700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7513, "step": 672800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.749, "step": 672900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7473, "step": 673000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7428, "step": 673100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7473, "step": 673200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7416, "step": 673300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7436, "step": 673400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7491, "step": 673500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7445, "step": 673600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.752, "step": 673700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7447, "step": 673800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7538, "step": 673900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7475, "step": 674000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7504, "step": 674100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.752, "step": 674200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7487, "step": 674300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7394, "step": 674400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7513, "step": 674500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7515, "step": 674600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7457, "step": 674700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7563, "step": 674800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7452, "step": 674900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7403, "step": 675000 }, { "epoch": 1.02, "eval_loss": 0.7030300498008728, "eval_runtime": 207.0756, "eval_samples_per_second": 241.458, "eval_steps_per_second": 1.888, "step": 675000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7489, "step": 675100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7441, "step": 675200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7485, "step": 675300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7493, "step": 675400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7522, "step": 675500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7471, "step": 675600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7491, "step": 675700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7436, "step": 675800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7567, "step": 675900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7499, "step": 676000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7484, "step": 676100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7442, "step": 676200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7513, "step": 676300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7449, "step": 676400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7469, "step": 676500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7474, "step": 676600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7428, "step": 676700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7503, "step": 676800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7463, "step": 676900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7477, "step": 677000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7553, "step": 677100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7452, "step": 677200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7481, "step": 677300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7461, "step": 677400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7526, "step": 677500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7522, "step": 677600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7482, "step": 677700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7484, "step": 677800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.746, "step": 677900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7482, "step": 678000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7476, "step": 678100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7483, "step": 678200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7541, "step": 678300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7461, "step": 678400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7492, "step": 678500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7526, "step": 678600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7509, "step": 678700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.735, "step": 678800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7525, "step": 678900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7501, "step": 679000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7533, "step": 679100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7497, "step": 679200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.748, "step": 679300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7553, "step": 679400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7468, "step": 679500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7457, "step": 679600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7565, "step": 679700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7455, "step": 679800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7433, "step": 679900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7414, "step": 680000 }, { "epoch": 1.02, "eval_loss": 0.7032363414764404, "eval_runtime": 207.9683, "eval_samples_per_second": 240.421, "eval_steps_per_second": 1.88, "step": 680000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7463, "step": 680100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.749, "step": 680200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.75, "step": 680300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7486, "step": 680400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7544, "step": 680500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7487, "step": 680600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7526, "step": 680700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7563, "step": 680800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7468, "step": 680900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7456, "step": 681000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7485, "step": 681100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7463, "step": 681200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7532, "step": 681300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7493, "step": 681400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7577, "step": 681500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7496, "step": 681600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7445, "step": 681700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7498, "step": 681800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7395, "step": 681900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7416, "step": 682000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7559, "step": 682100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7456, "step": 682200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7466, "step": 682300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7582, "step": 682400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7541, "step": 682500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7457, "step": 682600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7577, "step": 682700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.744, "step": 682800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7434, "step": 682900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7565, "step": 683000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.747, "step": 683100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7511, "step": 683200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7511, "step": 683300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7472, "step": 683400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.739, "step": 683500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7476, "step": 683600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7509, "step": 683700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7439, "step": 683800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7526, "step": 683900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7525, "step": 684000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7534, "step": 684100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7541, "step": 684200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7392, "step": 684300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7427, "step": 684400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7448, "step": 684500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.751, "step": 684600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7434, "step": 684700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7398, "step": 684800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7528, "step": 684900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7464, "step": 685000 }, { "epoch": 1.02, "eval_loss": 0.7048628330230713, "eval_runtime": 203.5531, "eval_samples_per_second": 245.636, "eval_steps_per_second": 1.921, "step": 685000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7476, "step": 685100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7459, "step": 685200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7454, "step": 685300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7488, "step": 685400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7422, "step": 685500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7476, "step": 685600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7443, "step": 685700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7533, "step": 685800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7475, "step": 685900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7421, "step": 686000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7323, "step": 686100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7483, "step": 686200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7467, "step": 686300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7499, "step": 686400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7446, "step": 686500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7483, "step": 686600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7464, "step": 686700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7433, "step": 686800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7502, "step": 686900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7574, "step": 687000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.751, "step": 687100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7529, "step": 687200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7479, "step": 687300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7476, "step": 687400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7485, "step": 687500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7449, "step": 687600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7507, "step": 687700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7451, "step": 687800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7504, "step": 687900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7459, "step": 688000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7452, "step": 688100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7477, "step": 688200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7522, "step": 688300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7433, "step": 688400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7528, "step": 688500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7508, "step": 688600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7519, "step": 688700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7501, "step": 688800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7498, "step": 688900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7578, "step": 689000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7454, "step": 689100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7494, "step": 689200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7518, "step": 689300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7478, "step": 689400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7443, "step": 689500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7497, "step": 689600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7457, "step": 689700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7499, "step": 689800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7504, "step": 689900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.743, "step": 690000 }, { "epoch": 1.02, "eval_loss": 0.702966570854187, "eval_runtime": 202.3143, "eval_samples_per_second": 247.14, "eval_steps_per_second": 1.933, "step": 690000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7421, "step": 690100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7542, "step": 690200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7451, "step": 690300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7497, "step": 690400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7509, "step": 690500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7468, "step": 690600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7558, "step": 690700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7488, "step": 690800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7475, "step": 690900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7531, "step": 691000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7447, "step": 691100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7496, "step": 691200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7516, "step": 691300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7511, "step": 691400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7474, "step": 691500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7348, "step": 691600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7459, "step": 691700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7378, "step": 691800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7466, "step": 691900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7451, "step": 692000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7473, "step": 692100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.749, "step": 692200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7494, "step": 692300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7469, "step": 692400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.743, "step": 692500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7541, "step": 692600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7484, "step": 692700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7521, "step": 692800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7485, "step": 692900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7449, "step": 693000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7428, "step": 693100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7443, "step": 693200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7499, "step": 693300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.745, "step": 693400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7504, "step": 693500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7401, "step": 693600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7456, "step": 693700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7488, "step": 693800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7489, "step": 693900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7414, "step": 694000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7557, "step": 694100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7463, "step": 694200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7393, "step": 694300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7434, "step": 694400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7401, "step": 694500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7444, "step": 694600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7514, "step": 694700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7503, "step": 694800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7513, "step": 694900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7447, "step": 695000 }, { "epoch": 1.02, "eval_loss": 0.7024686932563782, "eval_runtime": 202.3612, "eval_samples_per_second": 247.083, "eval_steps_per_second": 1.932, "step": 695000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7501, "step": 695100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7465, "step": 695200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7533, "step": 695300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7491, "step": 695400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7456, "step": 695500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7448, "step": 695600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7527, "step": 695700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7359, "step": 695800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.749, "step": 695900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7486, "step": 696000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7469, "step": 696100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7519, "step": 696200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7419, "step": 696300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7487, "step": 696400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.753, "step": 696500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7471, "step": 696600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7476, "step": 696700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7451, "step": 696800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.751, "step": 696900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7491, "step": 697000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7492, "step": 697100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7512, "step": 697200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.744, "step": 697300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.748, "step": 697400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7457, "step": 697500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7453, "step": 697600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7488, "step": 697700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7464, "step": 697800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7524, "step": 697900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7532, "step": 698000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7496, "step": 698100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7469, "step": 698200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7469, "step": 698300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7518, "step": 698400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7435, "step": 698500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.749, "step": 698600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7496, "step": 698700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7509, "step": 698800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7534, "step": 698900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.75, "step": 699000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7462, "step": 699100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.739, "step": 699200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7495, "step": 699300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7498, "step": 699400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7466, "step": 699500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7468, "step": 699600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7455, "step": 699700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.754, "step": 699800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7462, "step": 699900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7507, "step": 700000 }, { "epoch": 1.02, "eval_loss": 0.7030143737792969, "eval_runtime": 195.8231, "eval_samples_per_second": 255.332, "eval_steps_per_second": 1.997, "step": 700000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7501, "step": 700100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7476, "step": 700200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7483, "step": 700300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7418, "step": 700400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7494, "step": 700500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7489, "step": 700600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7509, "step": 700700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7408, "step": 700800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7379, "step": 700900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7463, "step": 701000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7494, "step": 701100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7467, "step": 701200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7497, "step": 701300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.755, "step": 701400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.749, "step": 701500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7453, "step": 701600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7531, "step": 701700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7472, "step": 701800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7554, "step": 701900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7488, "step": 702000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7406, "step": 702100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7449, "step": 702200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7547, "step": 702300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7498, "step": 702400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7585, "step": 702500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7433, "step": 702600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7446, "step": 702700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7468, "step": 702800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7469, "step": 702900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7448, "step": 703000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7585, "step": 703100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7462, "step": 703200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7437, "step": 703300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7467, "step": 703400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7459, "step": 703500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7478, "step": 703600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7426, "step": 703700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7402, "step": 703800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7469, "step": 703900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.747, "step": 704000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7563, "step": 704100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7534, "step": 704200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7536, "step": 704300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7429, "step": 704400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7537, "step": 704500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7507, "step": 704600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7477, "step": 704700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7445, "step": 704800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7474, "step": 704900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7537, "step": 705000 }, { "epoch": 1.02, "eval_loss": 0.7021443247795105, "eval_runtime": 201.7932, "eval_samples_per_second": 247.778, "eval_steps_per_second": 1.938, "step": 705000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7524, "step": 705100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7489, "step": 705200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7479, "step": 705300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.751, "step": 705400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7491, "step": 705500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7562, "step": 705600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.749, "step": 705700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7417, "step": 705800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7428, "step": 705900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7468, "step": 706000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7523, "step": 706100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7462, "step": 706200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7558, "step": 706300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7539, "step": 706400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7386, "step": 706500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.746, "step": 706600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7516, "step": 706700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7484, "step": 706800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7527, "step": 706900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7506, "step": 707000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7437, "step": 707100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.744, "step": 707200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7453, "step": 707300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7415, "step": 707400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7473, "step": 707500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.753, "step": 707600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7499, "step": 707700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7502, "step": 707800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7434, "step": 707900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7496, "step": 708000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7481, "step": 708100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7548, "step": 708200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7537, "step": 708300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7499, "step": 708400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7484, "step": 708500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7516, "step": 708600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7466, "step": 708700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7416, "step": 708800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7456, "step": 708900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7454, "step": 709000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.751, "step": 709100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7425, "step": 709200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.746, "step": 709300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7448, "step": 709400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7502, "step": 709500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.754, "step": 709600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7435, "step": 709700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7441, "step": 709800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.749, "step": 709900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7432, "step": 710000 }, { "epoch": 1.02, "eval_loss": 0.703992486000061, "eval_runtime": 194.4138, "eval_samples_per_second": 257.183, "eval_steps_per_second": 2.011, "step": 710000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7514, "step": 710100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7514, "step": 710200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7436, "step": 710300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7468, "step": 710400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7516, "step": 710500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7378, "step": 710600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7541, "step": 710700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7542, "step": 710800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7501, "step": 710900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7478, "step": 711000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7479, "step": 711100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.752, "step": 711200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7481, "step": 711300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.749, "step": 711400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7454, "step": 711500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7486, "step": 711600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7428, "step": 711700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7499, "step": 711800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7465, "step": 711900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7534, "step": 712000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7571, "step": 712100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.748, "step": 712200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7443, "step": 712300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7488, "step": 712400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7402, "step": 712500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7483, "step": 712600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7473, "step": 712700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7469, "step": 712800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7525, "step": 712900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7462, "step": 713000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7496, "step": 713100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.746, "step": 713200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7498, "step": 713300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7425, "step": 713400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7588, "step": 713500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7391, "step": 713600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7508, "step": 713700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7441, "step": 713800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7417, "step": 713900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7429, "step": 714000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7538, "step": 714100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.747, "step": 714200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7509, "step": 714300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7383, "step": 714400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7379, "step": 714500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7471, "step": 714600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7453, "step": 714700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7538, "step": 714800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7452, "step": 714900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7467, "step": 715000 }, { "epoch": 1.02, "eval_loss": 0.7032497525215149, "eval_runtime": 194.5615, "eval_samples_per_second": 256.988, "eval_steps_per_second": 2.01, "step": 715000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.751, "step": 715100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7441, "step": 715200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7475, "step": 715300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7546, "step": 715400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7498, "step": 715500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7481, "step": 715600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7536, "step": 715700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7513, "step": 715800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7528, "step": 715900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7478, "step": 716000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7479, "step": 716100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.752, "step": 716200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7525, "step": 716300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7423, "step": 716400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7352, "step": 716500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7479, "step": 716600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7468, "step": 716700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.755, "step": 716800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7451, "step": 716900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7493, "step": 717000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7468, "step": 717100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7469, "step": 717200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7454, "step": 717300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7464, "step": 717400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7426, "step": 717500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7432, "step": 717600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7444, "step": 717700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7477, "step": 717800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7453, "step": 717900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7494, "step": 718000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7486, "step": 718100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7421, "step": 718200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.753, "step": 718300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7503, "step": 718400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7418, "step": 718500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7467, "step": 718600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7425, "step": 718700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.742, "step": 718800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.75, "step": 718900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7408, "step": 719000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.747, "step": 719100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7427, "step": 719200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7486, "step": 719300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7435, "step": 719400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7469, "step": 719500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7467, "step": 719600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7419, "step": 719700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7484, "step": 719800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7522, "step": 719900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7438, "step": 720000 }, { "epoch": 1.02, "eval_loss": 0.7018641233444214, "eval_runtime": 194.6657, "eval_samples_per_second": 256.851, "eval_steps_per_second": 2.009, "step": 720000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7488, "step": 720100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7477, "step": 720200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7452, "step": 720300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7454, "step": 720400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7436, "step": 720500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7482, "step": 720600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7481, "step": 720700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7458, "step": 720800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.754, "step": 720900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7452, "step": 721000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7423, "step": 721100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7443, "step": 721200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7497, "step": 721300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7397, "step": 721400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7508, "step": 721500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7491, "step": 721600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.747, "step": 721700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7498, "step": 721800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7408, "step": 721900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7473, "step": 722000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7566, "step": 722100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7355, "step": 722200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7451, "step": 722300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7381, "step": 722400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7427, "step": 722500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7576, "step": 722600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7517, "step": 722700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7465, "step": 722800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7485, "step": 722900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.748, "step": 723000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7451, "step": 723100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.75, "step": 723200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7543, "step": 723300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7508, "step": 723400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7442, "step": 723500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7414, "step": 723600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7485, "step": 723700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7487, "step": 723800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7464, "step": 723900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7424, "step": 724000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7398, "step": 724100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.752, "step": 724200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7499, "step": 724300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.753, "step": 724400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7429, "step": 724500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7409, "step": 724600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7463, "step": 724700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7505, "step": 724800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7414, "step": 724900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7504, "step": 725000 }, { "epoch": 1.02, "eval_loss": 0.7017471790313721, "eval_runtime": 194.844, "eval_samples_per_second": 256.616, "eval_steps_per_second": 2.007, "step": 725000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7516, "step": 725100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7511, "step": 725200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7399, "step": 725300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7465, "step": 725400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7412, "step": 725500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7451, "step": 725600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7429, "step": 725700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7482, "step": 725800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7459, "step": 725900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7482, "step": 726000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7413, "step": 726100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7498, "step": 726200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7509, "step": 726300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7482, "step": 726400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.746, "step": 726500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7523, "step": 726600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7461, "step": 726700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7511, "step": 726800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7501, "step": 726900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7479, "step": 727000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7396, "step": 727100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7516, "step": 727200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7375, "step": 727300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7489, "step": 727400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7447, "step": 727500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7462, "step": 727600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7501, "step": 727700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7471, "step": 727800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7517, "step": 727900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7451, "step": 728000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7535, "step": 728100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7444, "step": 728200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7475, "step": 728300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7486, "step": 728400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7449, "step": 728500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7494, "step": 728600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.741, "step": 728700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7486, "step": 728800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.738, "step": 728900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7451, "step": 729000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7575, "step": 729100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7515, "step": 729200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.74, "step": 729300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7557, "step": 729400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7446, "step": 729500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7481, "step": 729600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7454, "step": 729700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7491, "step": 729800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7464, "step": 729900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.759, "step": 730000 }, { "epoch": 1.02, "eval_loss": 0.7034239768981934, "eval_runtime": 194.8342, "eval_samples_per_second": 256.628, "eval_steps_per_second": 2.007, "step": 730000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7446, "step": 730100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7516, "step": 730200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7441, "step": 730300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7474, "step": 730400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7424, "step": 730500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7469, "step": 730600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7462, "step": 730700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7489, "step": 730800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7404, "step": 730900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7488, "step": 731000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7459, "step": 731100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7448, "step": 731200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.747, "step": 731300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7486, "step": 731400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7445, "step": 731500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7442, "step": 731600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7482, "step": 731700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7421, "step": 731800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7468, "step": 731900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7515, "step": 732000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7503, "step": 732100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7414, "step": 732200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7519, "step": 732300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7505, "step": 732400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7481, "step": 732500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7446, "step": 732600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.745, "step": 732700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7488, "step": 732800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7493, "step": 732900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7514, "step": 733000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7457, "step": 733100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.749, "step": 733200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7429, "step": 733300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7457, "step": 733400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7414, "step": 733500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7458, "step": 733600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7534, "step": 733700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7494, "step": 733800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7447, "step": 733900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7447, "step": 734000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7481, "step": 734100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.748, "step": 734200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7492, "step": 734300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7554, "step": 734400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7465, "step": 734500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7448, "step": 734600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7529, "step": 734700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7488, "step": 734800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7436, "step": 734900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7413, "step": 735000 }, { "epoch": 1.02, "eval_loss": 0.7015842199325562, "eval_runtime": 194.9341, "eval_samples_per_second": 256.497, "eval_steps_per_second": 2.006, "step": 735000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7468, "step": 735100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7461, "step": 735200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7444, "step": 735300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7441, "step": 735400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7466, "step": 735500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7514, "step": 735600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7466, "step": 735700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7429, "step": 735800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7448, "step": 735900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7422, "step": 736000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7424, "step": 736100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7499, "step": 736200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7478, "step": 736300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7432, "step": 736400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7545, "step": 736500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7482, "step": 736600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7469, "step": 736700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7476, "step": 736800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7477, "step": 736900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.747, "step": 737000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7467, "step": 737100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7441, "step": 737200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7445, "step": 737300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7497, "step": 737400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7504, "step": 737500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7459, "step": 737600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7368, "step": 737700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7442, "step": 737800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7536, "step": 737900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7452, "step": 738000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7466, "step": 738100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7439, "step": 738200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7501, "step": 738300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7489, "step": 738400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7518, "step": 738500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7423, "step": 738600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7422, "step": 738700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7402, "step": 738800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7392, "step": 738900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7497, "step": 739000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7463, "step": 739100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7495, "step": 739200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7437, "step": 739300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7553, "step": 739400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7488, "step": 739500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7493, "step": 739600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7491, "step": 739700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7561, "step": 739800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7437, "step": 739900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7356, "step": 740000 }, { "epoch": 1.02, "eval_loss": 0.7015743255615234, "eval_runtime": 194.8311, "eval_samples_per_second": 256.633, "eval_steps_per_second": 2.007, "step": 740000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7378, "step": 740100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7426, "step": 740200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7509, "step": 740300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7481, "step": 740400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7512, "step": 740500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7417, "step": 740600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7433, "step": 740700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7517, "step": 740800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7469, "step": 740900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7519, "step": 741000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7462, "step": 741100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7505, "step": 741200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7497, "step": 741300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.751, "step": 741400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7529, "step": 741500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7486, "step": 741600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7375, "step": 741700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7466, "step": 741800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7388, "step": 741900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7576, "step": 742000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7557, "step": 742100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7475, "step": 742200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7437, "step": 742300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.749, "step": 742400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7438, "step": 742500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7458, "step": 742600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7406, "step": 742700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.75, "step": 742800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7449, "step": 742900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7405, "step": 743000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7478, "step": 743100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7433, "step": 743200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.751, "step": 743300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7456, "step": 743400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7481, "step": 743500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7457, "step": 743600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7435, "step": 743700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7425, "step": 743800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7442, "step": 743900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7443, "step": 744000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7332, "step": 744100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7501, "step": 744200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7501, "step": 744300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7394, "step": 744400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7451, "step": 744500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7569, "step": 744600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7505, "step": 744700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.74, "step": 744800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7506, "step": 744900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.75, "step": 745000 }, { "epoch": 1.02, "eval_loss": 0.7021865844726562, "eval_runtime": 194.4109, "eval_samples_per_second": 257.187, "eval_steps_per_second": 2.011, "step": 745000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7434, "step": 745100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7442, "step": 745200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7453, "step": 745300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7435, "step": 745400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.756, "step": 745500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7456, "step": 745600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7367, "step": 745700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7459, "step": 745800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.747, "step": 745900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7492, "step": 746000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7408, "step": 746100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7453, "step": 746200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7467, "step": 746300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7516, "step": 746400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7448, "step": 746500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7448, "step": 746600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7488, "step": 746700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7424, "step": 746800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7508, "step": 746900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7463, "step": 747000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7399, "step": 747100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.744, "step": 747200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7423, "step": 747300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7441, "step": 747400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7428, "step": 747500 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7412, "step": 747600 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7382, "step": 747700 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7497, "step": 747800 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7483, "step": 747900 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7494, "step": 748000 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7543, "step": 748100 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7523, "step": 748200 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7428, "step": 748300 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7447, "step": 748400 }, { "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.7406, "step": 748500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7425, "step": 748600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7489, "step": 748700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7455, "step": 748800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7431, "step": 748900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7439, "step": 749000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7475, "step": 749100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7517, "step": 749200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7483, "step": 749300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7465, "step": 749400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.748, "step": 749500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7443, "step": 749600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7483, "step": 749700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7461, "step": 749800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.751, "step": 749900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.748, "step": 750000 }, { "epoch": 1.03, "eval_loss": 0.6999218463897705, "eval_runtime": 203.751, "eval_samples_per_second": 245.398, "eval_steps_per_second": 1.919, "step": 750000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7494, "step": 750100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7473, "step": 750200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7418, "step": 750300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.743, "step": 750400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7485, "step": 750500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7498, "step": 750600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7494, "step": 750700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7445, "step": 750800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.746, "step": 750900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7514, "step": 751000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7452, "step": 751100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7524, "step": 751200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7498, "step": 751300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7443, "step": 751400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7574, "step": 751500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7535, "step": 751600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7432, "step": 751700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7459, "step": 751800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7423, "step": 751900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7476, "step": 752000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7456, "step": 752100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7478, "step": 752200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.745, "step": 752300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7453, "step": 752400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7396, "step": 752500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7487, "step": 752600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7481, "step": 752700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7397, "step": 752800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.745, "step": 752900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7429, "step": 753000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7406, "step": 753100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7427, "step": 753200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.746, "step": 753300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7502, "step": 753400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7456, "step": 753500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7444, "step": 753600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7393, "step": 753700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7543, "step": 753800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7456, "step": 753900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7469, "step": 754000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7449, "step": 754100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.745, "step": 754200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7465, "step": 754300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7493, "step": 754400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.742, "step": 754500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7411, "step": 754600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7521, "step": 754700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7432, "step": 754800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7419, "step": 754900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7469, "step": 755000 }, { "epoch": 1.03, "eval_loss": 0.7005174160003662, "eval_runtime": 195.8763, "eval_samples_per_second": 255.263, "eval_steps_per_second": 1.996, "step": 755000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7447, "step": 755100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7506, "step": 755200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7392, "step": 755300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7429, "step": 755400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7463, "step": 755500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7493, "step": 755600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7572, "step": 755700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7442, "step": 755800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7502, "step": 755900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7445, "step": 756000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7467, "step": 756100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.743, "step": 756200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7499, "step": 756300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7626, "step": 756400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7449, "step": 756500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.744, "step": 756600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7507, "step": 756700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7452, "step": 756800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7517, "step": 756900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7429, "step": 757000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7457, "step": 757100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7441, "step": 757200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7381, "step": 757300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.75, "step": 757400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.748, "step": 757500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7408, "step": 757600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7455, "step": 757700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.745, "step": 757800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7465, "step": 757900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7448, "step": 758000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7402, "step": 758100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.746, "step": 758200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7444, "step": 758300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7413, "step": 758400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7421, "step": 758500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7484, "step": 758600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7421, "step": 758700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7437, "step": 758800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7392, "step": 758900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7421, "step": 759000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7501, "step": 759100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.747, "step": 759200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7464, "step": 759300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7418, "step": 759400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7516, "step": 759500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7458, "step": 759600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7449, "step": 759700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7479, "step": 759800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7424, "step": 759900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7468, "step": 760000 }, { "epoch": 1.03, "eval_loss": 0.7038360834121704, "eval_runtime": 195.7056, "eval_samples_per_second": 255.486, "eval_steps_per_second": 1.998, "step": 760000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7371, "step": 760100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7448, "step": 760200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7443, "step": 760300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7451, "step": 760400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7467, "step": 760500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.753, "step": 760600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7479, "step": 760700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7475, "step": 760800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7427, "step": 760900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7478, "step": 761000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7476, "step": 761100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7468, "step": 761200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7403, "step": 761300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7536, "step": 761400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7437, "step": 761500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7392, "step": 761600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7493, "step": 761700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7457, "step": 761800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7487, "step": 761900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7502, "step": 762000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7546, "step": 762100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.749, "step": 762200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7488, "step": 762300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7458, "step": 762400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7466, "step": 762500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7448, "step": 762600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7567, "step": 762700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7442, "step": 762800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7465, "step": 762900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.75, "step": 763000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.749, "step": 763100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7535, "step": 763200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.749, "step": 763300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7511, "step": 763400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7464, "step": 763500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7505, "step": 763600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7484, "step": 763700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7447, "step": 763800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7436, "step": 763900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7468, "step": 764000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7554, "step": 764100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.743, "step": 764200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7445, "step": 764300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7509, "step": 764400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7446, "step": 764500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7503, "step": 764600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7415, "step": 764700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7479, "step": 764800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7519, "step": 764900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7416, "step": 765000 }, { "epoch": 1.03, "eval_loss": 0.7031464576721191, "eval_runtime": 194.5711, "eval_samples_per_second": 256.975, "eval_steps_per_second": 2.01, "step": 765000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7455, "step": 765100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7459, "step": 765200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7473, "step": 765300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.74, "step": 765400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7394, "step": 765500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.741, "step": 765600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7479, "step": 765700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7422, "step": 765800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7447, "step": 765900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7463, "step": 766000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7473, "step": 766100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7522, "step": 766200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7495, "step": 766300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7513, "step": 766400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7407, "step": 766500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7523, "step": 766600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7524, "step": 766700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7486, "step": 766800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7453, "step": 766900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7349, "step": 767000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7483, "step": 767100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7435, "step": 767200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.754, "step": 767300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7403, "step": 767400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7456, "step": 767500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.748, "step": 767600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7443, "step": 767700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7415, "step": 767800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7546, "step": 767900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7407, "step": 768000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7485, "step": 768100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7426, "step": 768200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7505, "step": 768300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7397, "step": 768400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7489, "step": 768500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7463, "step": 768600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7461, "step": 768700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7489, "step": 768800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7354, "step": 768900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7485, "step": 769000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7366, "step": 769100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7465, "step": 769200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7504, "step": 769300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7459, "step": 769400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7551, "step": 769500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.743, "step": 769600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.747, "step": 769700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7497, "step": 769800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7529, "step": 769900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7507, "step": 770000 }, { "epoch": 1.03, "eval_loss": 0.7009237408638, "eval_runtime": 194.6679, "eval_samples_per_second": 256.848, "eval_steps_per_second": 2.009, "step": 770000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7431, "step": 770100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7414, "step": 770200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7497, "step": 770300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7478, "step": 770400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7472, "step": 770500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7453, "step": 770600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7473, "step": 770700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7512, "step": 770800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7429, "step": 770900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7515, "step": 771000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7583, "step": 771100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.739, "step": 771200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7411, "step": 771300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7431, "step": 771400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7536, "step": 771500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7417, "step": 771600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7456, "step": 771700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.744, "step": 771800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7409, "step": 771900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7451, "step": 772000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.741, "step": 772100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7478, "step": 772200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7395, "step": 772300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7417, "step": 772400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7406, "step": 772500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7409, "step": 772600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7462, "step": 772700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7464, "step": 772800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7517, "step": 772900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7482, "step": 773000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7427, "step": 773100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7508, "step": 773200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7544, "step": 773300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7437, "step": 773400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7387, "step": 773500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7513, "step": 773600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7531, "step": 773700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.741, "step": 773800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7451, "step": 773900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.747, "step": 774000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7413, "step": 774100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7366, "step": 774200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7503, "step": 774300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7486, "step": 774400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7427, "step": 774500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7453, "step": 774600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7429, "step": 774700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7398, "step": 774800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7437, "step": 774900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7374, "step": 775000 }, { "epoch": 1.03, "eval_loss": 0.7024406790733337, "eval_runtime": 205.3494, "eval_samples_per_second": 243.487, "eval_steps_per_second": 1.904, "step": 775000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7377, "step": 775100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7448, "step": 775200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7435, "step": 775300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.744, "step": 775400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7447, "step": 775500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.742, "step": 775600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7407, "step": 775700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7434, "step": 775800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.742, "step": 775900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7477, "step": 776000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7466, "step": 776100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7489, "step": 776200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7569, "step": 776300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7474, "step": 776400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.738, "step": 776500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.745, "step": 776600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7487, "step": 776700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7483, "step": 776800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7351, "step": 776900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7365, "step": 777000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.743, "step": 777100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7438, "step": 777200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7414, "step": 777300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7489, "step": 777400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7456, "step": 777500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7486, "step": 777600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7446, "step": 777700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7393, "step": 777800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7438, "step": 777900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7429, "step": 778000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7476, "step": 778100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7449, "step": 778200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7432, "step": 778300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7508, "step": 778400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.747, "step": 778500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7443, "step": 778600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.747, "step": 778700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7446, "step": 778800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7451, "step": 778900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7433, "step": 779000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7474, "step": 779100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7452, "step": 779200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7435, "step": 779300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7512, "step": 779400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7399, "step": 779500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7431, "step": 779600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7447, "step": 779700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7417, "step": 779800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7479, "step": 779900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7498, "step": 780000 }, { "epoch": 1.03, "eval_loss": 0.7008019685745239, "eval_runtime": 200.2027, "eval_samples_per_second": 249.747, "eval_steps_per_second": 1.953, "step": 780000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7443, "step": 780100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7459, "step": 780200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7499, "step": 780300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7472, "step": 780400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7415, "step": 780500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7422, "step": 780600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.745, "step": 780700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7411, "step": 780800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7473, "step": 780900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.746, "step": 781000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.744, "step": 781100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.744, "step": 781200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7451, "step": 781300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7338, "step": 781400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7445, "step": 781500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.755, "step": 781600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7404, "step": 781700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7472, "step": 781800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.748, "step": 781900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.74, "step": 782000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7421, "step": 782100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7394, "step": 782200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7425, "step": 782300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7385, "step": 782400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7464, "step": 782500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7419, "step": 782600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7465, "step": 782700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7459, "step": 782800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7471, "step": 782900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7429, "step": 783000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7377, "step": 783100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7412, "step": 783200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7393, "step": 783300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7414, "step": 783400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7493, "step": 783500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7376, "step": 783600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7465, "step": 783700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7366, "step": 783800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7514, "step": 783900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7444, "step": 784000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.746, "step": 784100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7511, "step": 784200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7422, "step": 784300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7424, "step": 784400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7403, "step": 784500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7417, "step": 784600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7425, "step": 784700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7423, "step": 784800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.744, "step": 784900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7466, "step": 785000 }, { "epoch": 1.03, "eval_loss": 0.700626015663147, "eval_runtime": 204.4778, "eval_samples_per_second": 244.525, "eval_steps_per_second": 1.912, "step": 785000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7413, "step": 785100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7426, "step": 785200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7476, "step": 785300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7436, "step": 785400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7453, "step": 785500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7337, "step": 785600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7409, "step": 785700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7355, "step": 785800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7384, "step": 785900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7438, "step": 786000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7414, "step": 786100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7392, "step": 786200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7495, "step": 786300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7426, "step": 786400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7398, "step": 786500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7446, "step": 786600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7404, "step": 786700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7476, "step": 786800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7463, "step": 786900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7471, "step": 787000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7502, "step": 787100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7472, "step": 787200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7447, "step": 787300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7418, "step": 787400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7438, "step": 787500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7464, "step": 787600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7424, "step": 787700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7404, "step": 787800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7469, "step": 787900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7434, "step": 788000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7483, "step": 788100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7538, "step": 788200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7408, "step": 788300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7401, "step": 788400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7404, "step": 788500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7433, "step": 788600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7503, "step": 788700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7432, "step": 788800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7466, "step": 788900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7419, "step": 789000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7502, "step": 789100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7434, "step": 789200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7453, "step": 789300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7427, "step": 789400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7469, "step": 789500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7451, "step": 789600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7449, "step": 789700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7469, "step": 789800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7435, "step": 789900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7369, "step": 790000 }, { "epoch": 1.03, "eval_loss": 0.7005957961082458, "eval_runtime": 811.9429, "eval_samples_per_second": 61.581, "eval_steps_per_second": 0.482, "step": 790000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7367, "step": 790100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7442, "step": 790200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7474, "step": 790300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7424, "step": 790400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7444, "step": 790500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7543, "step": 790600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7459, "step": 790700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.745, "step": 790800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7442, "step": 790900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7463, "step": 791000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7449, "step": 791100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7431, "step": 791200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7506, "step": 791300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7435, "step": 791400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7432, "step": 791500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7392, "step": 791600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7366, "step": 791700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7397, "step": 791800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7471, "step": 791900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7434, "step": 792000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7406, "step": 792100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7408, "step": 792200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.742, "step": 792300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.74, "step": 792400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.748, "step": 792500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7442, "step": 792600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7478, "step": 792700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7483, "step": 792800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7502, "step": 792900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7433, "step": 793000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7479, "step": 793100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7345, "step": 793200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7393, "step": 793300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7389, "step": 793400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7481, "step": 793500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.743, "step": 793600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7408, "step": 793700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7485, "step": 793800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.736, "step": 793900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7406, "step": 794000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7383, "step": 794100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7493, "step": 794200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7443, "step": 794300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7448, "step": 794400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7416, "step": 794500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.75, "step": 794600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7363, "step": 794700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.742, "step": 794800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.743, "step": 794900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7459, "step": 795000 }, { "epoch": 1.03, "eval_loss": 0.6998762488365173, "eval_runtime": 199.9792, "eval_samples_per_second": 250.026, "eval_steps_per_second": 1.955, "step": 795000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7467, "step": 795100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7489, "step": 795200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7436, "step": 795300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7432, "step": 795400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7402, "step": 795500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7516, "step": 795600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7417, "step": 795700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7412, "step": 795800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.752, "step": 795900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7429, "step": 796000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7426, "step": 796100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7472, "step": 796200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7474, "step": 796300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7364, "step": 796400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7493, "step": 796500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7498, "step": 796600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7446, "step": 796700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7421, "step": 796800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7439, "step": 796900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7411, "step": 797000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7459, "step": 797100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7471, "step": 797200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7387, "step": 797300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.746, "step": 797400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7409, "step": 797500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7524, "step": 797600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7469, "step": 797700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7453, "step": 797800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7435, "step": 797900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7379, "step": 798000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7403, "step": 798100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7359, "step": 798200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7329, "step": 798300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7503, "step": 798400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7488, "step": 798500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7449, "step": 798600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.743, "step": 798700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7435, "step": 798800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7513, "step": 798900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7456, "step": 799000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7475, "step": 799100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7456, "step": 799200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7405, "step": 799300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7472, "step": 799400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7452, "step": 799500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7385, "step": 799600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7481, "step": 799700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7458, "step": 799800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7482, "step": 799900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7431, "step": 800000 }, { "epoch": 1.03, "eval_loss": 0.7010047435760498, "eval_runtime": 203.5185, "eval_samples_per_second": 245.678, "eval_steps_per_second": 1.921, "step": 800000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7501, "step": 800100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7451, "step": 800200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7495, "step": 800300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7466, "step": 800400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7426, "step": 800500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7472, "step": 800600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7467, "step": 800700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7473, "step": 800800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7416, "step": 800900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7475, "step": 801000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7471, "step": 801100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7432, "step": 801200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7389, "step": 801300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7481, "step": 801400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7404, "step": 801500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7383, "step": 801600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7405, "step": 801700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7441, "step": 801800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7464, "step": 801900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7475, "step": 802000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7495, "step": 802100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7462, "step": 802200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7398, "step": 802300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.74, "step": 802400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7505, "step": 802500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7404, "step": 802600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7422, "step": 802700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7295, "step": 802800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7492, "step": 802900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7384, "step": 803000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7462, "step": 803100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7489, "step": 803200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7465, "step": 803300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7405, "step": 803400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7408, "step": 803500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7383, "step": 803600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7463, "step": 803700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7519, "step": 803800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7358, "step": 803900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7501, "step": 804000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7486, "step": 804100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7511, "step": 804200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7484, "step": 804300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7405, "step": 804400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7485, "step": 804500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.745, "step": 804600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.744, "step": 804700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7479, "step": 804800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7505, "step": 804900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7425, "step": 805000 }, { "epoch": 1.03, "eval_loss": 0.7028763890266418, "eval_runtime": 203.1272, "eval_samples_per_second": 246.151, "eval_steps_per_second": 1.925, "step": 805000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7455, "step": 805100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7414, "step": 805200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.74, "step": 805300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7376, "step": 805400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7376, "step": 805500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7468, "step": 805600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7518, "step": 805700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7433, "step": 805800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7445, "step": 805900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7433, "step": 806000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7458, "step": 806100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7524, "step": 806200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7458, "step": 806300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7416, "step": 806400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7443, "step": 806500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7457, "step": 806600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.746, "step": 806700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7429, "step": 806800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7548, "step": 806900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.744, "step": 807000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7375, "step": 807100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7339, "step": 807200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7479, "step": 807300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7441, "step": 807400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.739, "step": 807500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7417, "step": 807600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7428, "step": 807700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7383, "step": 807800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7469, "step": 807900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7405, "step": 808000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7445, "step": 808100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7467, "step": 808200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7369, "step": 808300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7411, "step": 808400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7493, "step": 808500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7516, "step": 808600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.746, "step": 808700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7493, "step": 808800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7431, "step": 808900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.748, "step": 809000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7505, "step": 809100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7419, "step": 809200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7459, "step": 809300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.748, "step": 809400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7493, "step": 809500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7459, "step": 809600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7459, "step": 809700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7454, "step": 809800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7512, "step": 809900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7459, "step": 810000 }, { "epoch": 1.03, "eval_loss": 0.7009583711624146, "eval_runtime": 204.6869, "eval_samples_per_second": 244.276, "eval_steps_per_second": 1.91, "step": 810000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7445, "step": 810100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7447, "step": 810200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7425, "step": 810300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7425, "step": 810400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7391, "step": 810500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7412, "step": 810600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7472, "step": 810700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7502, "step": 810800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7474, "step": 810900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7421, "step": 811000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7482, "step": 811100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7407, "step": 811200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7457, "step": 811300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7481, "step": 811400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7421, "step": 811500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7443, "step": 811600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7419, "step": 811700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7545, "step": 811800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7409, "step": 811900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7469, "step": 812000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7398, "step": 812100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7414, "step": 812200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7416, "step": 812300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7443, "step": 812400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7417, "step": 812500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7416, "step": 812600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.741, "step": 812700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7388, "step": 812800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7398, "step": 812900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7413, "step": 813000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7467, "step": 813100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7491, "step": 813200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7447, "step": 813300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7407, "step": 813400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7425, "step": 813500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7398, "step": 813600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7409, "step": 813700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7419, "step": 813800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.742, "step": 813900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7353, "step": 814000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7452, "step": 814100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.733, "step": 814200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7485, "step": 814300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7379, "step": 814400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7482, "step": 814500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7437, "step": 814600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.754, "step": 814700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7526, "step": 814800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7474, "step": 814900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7481, "step": 815000 }, { "epoch": 1.03, "eval_loss": 0.7012917399406433, "eval_runtime": 200.7367, "eval_samples_per_second": 249.083, "eval_steps_per_second": 1.948, "step": 815000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7414, "step": 815100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7409, "step": 815200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7491, "step": 815300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7515, "step": 815400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7502, "step": 815500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7423, "step": 815600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7384, "step": 815700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7474, "step": 815800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7531, "step": 815900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7427, "step": 816000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7499, "step": 816100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7479, "step": 816200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7407, "step": 816300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7473, "step": 816400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7416, "step": 816500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7453, "step": 816600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7486, "step": 816700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7483, "step": 816800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7399, "step": 816900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7495, "step": 817000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7402, "step": 817100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7414, "step": 817200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7442, "step": 817300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7453, "step": 817400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7375, "step": 817500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7454, "step": 817600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7448, "step": 817700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7384, "step": 817800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7459, "step": 817900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7401, "step": 818000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7549, "step": 818100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7479, "step": 818200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7447, "step": 818300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7474, "step": 818400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7511, "step": 818500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7446, "step": 818600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7369, "step": 818700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.742, "step": 818800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7329, "step": 818900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.746, "step": 819000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7443, "step": 819100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7433, "step": 819200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7371, "step": 819300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7518, "step": 819400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7397, "step": 819500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7468, "step": 819600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7423, "step": 819700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7424, "step": 819800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7421, "step": 819900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7434, "step": 820000 }, { "epoch": 1.03, "eval_loss": 0.6996245980262756, "eval_runtime": 1267.1182, "eval_samples_per_second": 39.46, "eval_steps_per_second": 0.309, "step": 820000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7475, "step": 820100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7456, "step": 820200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7463, "step": 820300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7408, "step": 820400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7424, "step": 820500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7453, "step": 820600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7388, "step": 820700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7395, "step": 820800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7433, "step": 820900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7404, "step": 821000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7401, "step": 821100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7451, "step": 821200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7398, "step": 821300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.737, "step": 821400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7458, "step": 821500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7355, "step": 821600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7443, "step": 821700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7432, "step": 821800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7395, "step": 821900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7427, "step": 822000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7411, "step": 822100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7488, "step": 822200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7413, "step": 822300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7427, "step": 822400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7496, "step": 822500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7341, "step": 822600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7473, "step": 822700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7352, "step": 822800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7464, "step": 822900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7427, "step": 823000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7381, "step": 823100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7444, "step": 823200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7475, "step": 823300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7499, "step": 823400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7506, "step": 823500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7469, "step": 823600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.748, "step": 823700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7387, "step": 823800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7413, "step": 823900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7444, "step": 824000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7448, "step": 824100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7436, "step": 824200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.744, "step": 824300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7414, "step": 824400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7475, "step": 824500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7445, "step": 824600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7401, "step": 824700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7397, "step": 824800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7426, "step": 824900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7383, "step": 825000 }, { "epoch": 1.03, "eval_loss": 0.6995617747306824, "eval_runtime": 200.2123, "eval_samples_per_second": 249.735, "eval_steps_per_second": 1.953, "step": 825000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7432, "step": 825100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7508, "step": 825200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7401, "step": 825300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7344, "step": 825400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7358, "step": 825500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7451, "step": 825600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7447, "step": 825700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7376, "step": 825800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7451, "step": 825900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7492, "step": 826000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7397, "step": 826100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7555, "step": 826200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7439, "step": 826300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7346, "step": 826400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7452, "step": 826500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7482, "step": 826600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7516, "step": 826700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7458, "step": 826800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7445, "step": 826900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7475, "step": 827000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.749, "step": 827100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7462, "step": 827200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.742, "step": 827300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7456, "step": 827400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7488, "step": 827500 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7472, "step": 827600 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.745, "step": 827700 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7493, "step": 827800 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7443, "step": 827900 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7409, "step": 828000 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7439, "step": 828100 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7498, "step": 828200 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7473, "step": 828300 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.7427, "step": 828400 }, { "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.745, "step": 828500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7464, "step": 828600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7418, "step": 828700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7519, "step": 828800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7434, "step": 828900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7409, "step": 829000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7502, "step": 829100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7418, "step": 829200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7481, "step": 829300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7487, "step": 829400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.749, "step": 829500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7389, "step": 829600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7414, "step": 829700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7424, "step": 829800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7384, "step": 829900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7431, "step": 830000 }, { "epoch": 1.04, "eval_loss": 0.6999587416648865, "eval_runtime": 202.6692, "eval_samples_per_second": 246.707, "eval_steps_per_second": 1.929, "step": 830000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7394, "step": 830100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7423, "step": 830200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7485, "step": 830300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7446, "step": 830400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7487, "step": 830500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7432, "step": 830600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7405, "step": 830700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7506, "step": 830800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7434, "step": 830900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.747, "step": 831000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7542, "step": 831100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7376, "step": 831200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7431, "step": 831300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7429, "step": 831400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7522, "step": 831500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7399, "step": 831600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7361, "step": 831700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7455, "step": 831800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7453, "step": 831900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7379, "step": 832000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7407, "step": 832100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7414, "step": 832200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7429, "step": 832300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7448, "step": 832400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7458, "step": 832500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7514, "step": 832600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7462, "step": 832700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7488, "step": 832800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7474, "step": 832900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7448, "step": 833000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7426, "step": 833100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7395, "step": 833200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7456, "step": 833300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7416, "step": 833400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7432, "step": 833500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7465, "step": 833600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7458, "step": 833700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7466, "step": 833800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7472, "step": 833900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7382, "step": 834000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7412, "step": 834100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7449, "step": 834200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7355, "step": 834300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7388, "step": 834400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.739, "step": 834500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7421, "step": 834600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7427, "step": 834700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7405, "step": 834800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7495, "step": 834900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7506, "step": 835000 }, { "epoch": 1.04, "eval_loss": 0.6982614994049072, "eval_runtime": 206.0848, "eval_samples_per_second": 242.619, "eval_steps_per_second": 1.897, "step": 835000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7449, "step": 835100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7396, "step": 835200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7433, "step": 835300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7414, "step": 835400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7448, "step": 835500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7412, "step": 835600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7403, "step": 835700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7347, "step": 835800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.739, "step": 835900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7449, "step": 836000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7368, "step": 836100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.746, "step": 836200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7514, "step": 836300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7519, "step": 836400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7458, "step": 836500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7347, "step": 836600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7333, "step": 836700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7424, "step": 836800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7452, "step": 836900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7352, "step": 837000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7427, "step": 837100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7457, "step": 837200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7532, "step": 837300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7462, "step": 837400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7473, "step": 837500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7508, "step": 837600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.748, "step": 837700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.74, "step": 837800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7379, "step": 837900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7426, "step": 838000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7464, "step": 838100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7435, "step": 838200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.743, "step": 838300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7478, "step": 838400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7428, "step": 838500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7473, "step": 838600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7439, "step": 838700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.745, "step": 838800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7465, "step": 838900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7448, "step": 839000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7466, "step": 839100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7387, "step": 839200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.752, "step": 839300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7442, "step": 839400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7448, "step": 839500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 839600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7468, "step": 839700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7462, "step": 839800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7488, "step": 839900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7435, "step": 840000 }, { "epoch": 1.04, "eval_loss": 0.6995810866355896, "eval_runtime": 246.5346, "eval_samples_per_second": 202.811, "eval_steps_per_second": 1.586, "step": 840000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7401, "step": 840100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7497, "step": 840200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7433, "step": 840300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7469, "step": 840400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7448, "step": 840500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7446, "step": 840600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7409, "step": 840700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7359, "step": 840800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7439, "step": 840900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7434, "step": 841000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7441, "step": 841100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7404, "step": 841200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7459, "step": 841300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7429, "step": 841400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7458, "step": 841500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.731, "step": 841600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7486, "step": 841700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7497, "step": 841800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.743, "step": 841900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7458, "step": 842000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7434, "step": 842100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7465, "step": 842200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7442, "step": 842300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7534, "step": 842400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7515, "step": 842500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7495, "step": 842600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7411, "step": 842700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7437, "step": 842800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7532, "step": 842900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7403, "step": 843000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7421, "step": 843100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7397, "step": 843200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7445, "step": 843300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7456, "step": 843400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7444, "step": 843500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7404, "step": 843600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7458, "step": 843700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7547, "step": 843800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7428, "step": 843900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7453, "step": 844000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7466, "step": 844100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7403, "step": 844200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7421, "step": 844300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7475, "step": 844400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.732, "step": 844500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7397, "step": 844600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7391, "step": 844700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7444, "step": 844800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7507, "step": 844900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.738, "step": 845000 }, { "epoch": 1.04, "eval_loss": 0.6997884511947632, "eval_runtime": 199.6827, "eval_samples_per_second": 250.397, "eval_steps_per_second": 1.958, "step": 845000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7477, "step": 845100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7383, "step": 845200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7499, "step": 845300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7455, "step": 845400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7454, "step": 845500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.74, "step": 845600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.743, "step": 845700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7453, "step": 845800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.733, "step": 845900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7417, "step": 846000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7406, "step": 846100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7517, "step": 846200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7502, "step": 846300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7443, "step": 846400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7424, "step": 846500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7449, "step": 846600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7461, "step": 846700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.74, "step": 846800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7473, "step": 846900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7371, "step": 847000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7428, "step": 847100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7438, "step": 847200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7441, "step": 847300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7422, "step": 847400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7359, "step": 847500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7457, "step": 847600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7464, "step": 847700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7414, "step": 847800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7486, "step": 847900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7476, "step": 848000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7463, "step": 848100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7448, "step": 848200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7452, "step": 848300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7422, "step": 848400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.741, "step": 848500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7445, "step": 848600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7436, "step": 848700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7467, "step": 848800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7436, "step": 848900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7411, "step": 849000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7381, "step": 849100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7526, "step": 849200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7402, "step": 849300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7391, "step": 849400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7427, "step": 849500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7394, "step": 849600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7443, "step": 849700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7414, "step": 849800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7455, "step": 849900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7429, "step": 850000 }, { "epoch": 1.04, "eval_loss": 0.7002251744270325, "eval_runtime": 203.5229, "eval_samples_per_second": 245.673, "eval_steps_per_second": 1.921, "step": 850000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7459, "step": 850100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7453, "step": 850200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7459, "step": 850300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7375, "step": 850400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7444, "step": 850500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7441, "step": 850600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7419, "step": 850700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7461, "step": 850800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.744, "step": 850900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.746, "step": 851000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7416, "step": 851100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.744, "step": 851200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7492, "step": 851300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7421, "step": 851400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7468, "step": 851500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7398, "step": 851600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7407, "step": 851700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7342, "step": 851800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.748, "step": 851900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7433, "step": 852000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7421, "step": 852100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7478, "step": 852200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7417, "step": 852300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7406, "step": 852400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7424, "step": 852500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7493, "step": 852600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7472, "step": 852700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7385, "step": 852800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7411, "step": 852900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7379, "step": 853000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7523, "step": 853100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7487, "step": 853200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7426, "step": 853300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7445, "step": 853400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7466, "step": 853500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7443, "step": 853600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7361, "step": 853700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7429, "step": 853800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7425, "step": 853900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7453, "step": 854000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7451, "step": 854100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7424, "step": 854200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7395, "step": 854300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7442, "step": 854400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.744, "step": 854500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7428, "step": 854600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7438, "step": 854700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7438, "step": 854800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7417, "step": 854900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7463, "step": 855000 }, { "epoch": 1.04, "eval_loss": 0.6967592835426331, "eval_runtime": 200.9972, "eval_samples_per_second": 248.76, "eval_steps_per_second": 1.945, "step": 855000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7475, "step": 855100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7376, "step": 855200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7478, "step": 855300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7522, "step": 855400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7403, "step": 855500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7422, "step": 855600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7505, "step": 855700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7469, "step": 855800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7367, "step": 855900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7405, "step": 856000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7443, "step": 856100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7457, "step": 856200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7462, "step": 856300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7437, "step": 856400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7434, "step": 856500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.743, "step": 856600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7478, "step": 856700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7445, "step": 856800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7474, "step": 856900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7415, "step": 857000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7357, "step": 857100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7384, "step": 857200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7451, "step": 857300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7388, "step": 857400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.754, "step": 857500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7418, "step": 857600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7378, "step": 857700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7379, "step": 857800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7367, "step": 857900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7488, "step": 858000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7411, "step": 858100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.739, "step": 858200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7405, "step": 858300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7469, "step": 858400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7375, "step": 858500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.744, "step": 858600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7478, "step": 858700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.745, "step": 858800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.742, "step": 858900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7404, "step": 859000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.736, "step": 859100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7374, "step": 859200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7428, "step": 859300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7414, "step": 859400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7418, "step": 859500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7454, "step": 859600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7464, "step": 859700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7372, "step": 859800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7462, "step": 859900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7378, "step": 860000 }, { "epoch": 1.04, "eval_loss": 0.6985325813293457, "eval_runtime": 202.728, "eval_samples_per_second": 246.636, "eval_steps_per_second": 1.929, "step": 860000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7434, "step": 860100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7443, "step": 860200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7426, "step": 860300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7466, "step": 860400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7476, "step": 860500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7473, "step": 860600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7392, "step": 860700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7467, "step": 860800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7418, "step": 860900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7407, "step": 861000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7428, "step": 861100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7414, "step": 861200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7407, "step": 861300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7509, "step": 861400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7453, "step": 861500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7435, "step": 861600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7407, "step": 861700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7465, "step": 861800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7432, "step": 861900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7457, "step": 862000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7377, "step": 862100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7407, "step": 862200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.742, "step": 862300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7506, "step": 862400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7381, "step": 862500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7363, "step": 862600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7362, "step": 862700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.748, "step": 862800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7408, "step": 862900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7513, "step": 863000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.746, "step": 863100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7449, "step": 863200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7438, "step": 863300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7418, "step": 863400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7366, "step": 863500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7361, "step": 863600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7431, "step": 863700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7389, "step": 863800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7408, "step": 863900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7413, "step": 864000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7478, "step": 864100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7457, "step": 864200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7528, "step": 864300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7397, "step": 864400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7343, "step": 864500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.741, "step": 864600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7338, "step": 864700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7395, "step": 864800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7461, "step": 864900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 865000 }, { "epoch": 1.04, "eval_loss": 0.6987593173980713, "eval_runtime": 201.2811, "eval_samples_per_second": 248.409, "eval_steps_per_second": 1.943, "step": 865000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7335, "step": 865100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7464, "step": 865200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7376, "step": 865300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7385, "step": 865400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7392, "step": 865500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7466, "step": 865600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7401, "step": 865700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7389, "step": 865800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7445, "step": 865900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7412, "step": 866000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7432, "step": 866100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7429, "step": 866200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7368, "step": 866300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7454, "step": 866400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7465, "step": 866500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7426, "step": 866600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.747, "step": 866700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7406, "step": 866800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7508, "step": 866900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7404, "step": 867000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7393, "step": 867100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.745, "step": 867200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 867300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7381, "step": 867400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7363, "step": 867500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7504, "step": 867600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7425, "step": 867700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7467, "step": 867800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7455, "step": 867900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7464, "step": 868000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.746, "step": 868100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7512, "step": 868200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7477, "step": 868300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7383, "step": 868400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7483, "step": 868500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.748, "step": 868600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7442, "step": 868700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7552, "step": 868800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7437, "step": 868900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7442, "step": 869000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7425, "step": 869100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7467, "step": 869200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7376, "step": 869300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.747, "step": 869400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7438, "step": 869500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7426, "step": 869600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7335, "step": 869700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 869800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7406, "step": 869900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.741, "step": 870000 }, { "epoch": 1.04, "eval_loss": 0.6997235417366028, "eval_runtime": 202.6711, "eval_samples_per_second": 246.705, "eval_steps_per_second": 1.929, "step": 870000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7454, "step": 870100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7428, "step": 870200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7396, "step": 870300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7408, "step": 870400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7455, "step": 870500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7397, "step": 870600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7371, "step": 870700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7409, "step": 870800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7422, "step": 870900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7407, "step": 871000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7445, "step": 871100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.745, "step": 871200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7477, "step": 871300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7401, "step": 871400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7366, "step": 871500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7446, "step": 871600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.746, "step": 871700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7486, "step": 871800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7431, "step": 871900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7437, "step": 872000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7435, "step": 872100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7334, "step": 872200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7399, "step": 872300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7378, "step": 872400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7423, "step": 872500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.743, "step": 872600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7394, "step": 872700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7442, "step": 872800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7473, "step": 872900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.735, "step": 873000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7452, "step": 873100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7365, "step": 873200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7373, "step": 873300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7407, "step": 873400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7449, "step": 873500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7422, "step": 873600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7461, "step": 873700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7397, "step": 873800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7488, "step": 873900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7365, "step": 874000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7446, "step": 874100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7401, "step": 874200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7484, "step": 874300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7435, "step": 874400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7435, "step": 874500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7426, "step": 874600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.739, "step": 874700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.743, "step": 874800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7459, "step": 874900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7302, "step": 875000 }, { "epoch": 1.04, "eval_loss": 0.6992688775062561, "eval_runtime": 203.061, "eval_samples_per_second": 246.231, "eval_steps_per_second": 1.926, "step": 875000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7419, "step": 875100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7349, "step": 875200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7426, "step": 875300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7419, "step": 875400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7495, "step": 875500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7407, "step": 875600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7458, "step": 875700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7474, "step": 875800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7356, "step": 875900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7437, "step": 876000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7395, "step": 876100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7409, "step": 876200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.746, "step": 876300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7391, "step": 876400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7461, "step": 876500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7464, "step": 876600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7387, "step": 876700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7415, "step": 876800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7404, "step": 876900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 877000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7438, "step": 877100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7452, "step": 877200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.745, "step": 877300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7512, "step": 877400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7395, "step": 877500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7423, "step": 877600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7508, "step": 877700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7499, "step": 877800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7441, "step": 877900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7501, "step": 878000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7422, "step": 878100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7448, "step": 878200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7412, "step": 878300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 878400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7432, "step": 878500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7401, "step": 878600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7392, "step": 878700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7425, "step": 878800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7501, "step": 878900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7465, "step": 879000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7424, "step": 879100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7415, "step": 879200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7358, "step": 879300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7419, "step": 879400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7424, "step": 879500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7401, "step": 879600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7343, "step": 879700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7486, "step": 879800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 879900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7422, "step": 880000 }, { "epoch": 1.04, "eval_loss": 0.6993662714958191, "eval_runtime": 203.8774, "eval_samples_per_second": 245.245, "eval_steps_per_second": 1.918, "step": 880000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.743, "step": 880100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7492, "step": 880200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7415, "step": 880300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7412, "step": 880400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.747, "step": 880500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7434, "step": 880600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7407, "step": 880700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7346, "step": 880800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7473, "step": 880900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 881000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7475, "step": 881100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7339, "step": 881200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7415, "step": 881300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7424, "step": 881400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7377, "step": 881500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7506, "step": 881600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7359, "step": 881700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7374, "step": 881800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7406, "step": 881900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7528, "step": 882000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7488, "step": 882100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.739, "step": 882200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7327, "step": 882300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.738, "step": 882400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7452, "step": 882500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7334, "step": 882600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7366, "step": 882700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7471, "step": 882800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.738, "step": 882900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7486, "step": 883000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7422, "step": 883100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.743, "step": 883200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7399, "step": 883300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.739, "step": 883400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7445, "step": 883500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7399, "step": 883600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7422, "step": 883700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7528, "step": 883800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7425, "step": 883900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.74, "step": 884000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7467, "step": 884100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7399, "step": 884200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7504, "step": 884300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7473, "step": 884400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7383, "step": 884500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7426, "step": 884600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.74, "step": 884700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7341, "step": 884800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7444, "step": 884900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7413, "step": 885000 }, { "epoch": 1.04, "eval_loss": 0.6965381503105164, "eval_runtime": 239.7641, "eval_samples_per_second": 208.538, "eval_steps_per_second": 1.631, "step": 885000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7505, "step": 885100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7406, "step": 885200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7422, "step": 885300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7428, "step": 885400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7437, "step": 885500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7347, "step": 885600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7432, "step": 885700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7437, "step": 885800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7377, "step": 885900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7465, "step": 886000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7388, "step": 886100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7406, "step": 886200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7392, "step": 886300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7464, "step": 886400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7413, "step": 886500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7394, "step": 886600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7446, "step": 886700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7469, "step": 886800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7453, "step": 886900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7373, "step": 887000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7411, "step": 887100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7349, "step": 887200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7397, "step": 887300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7411, "step": 887400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7551, "step": 887500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7427, "step": 887600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7468, "step": 887700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7407, "step": 887800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7476, "step": 887900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7427, "step": 888000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7422, "step": 888100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7397, "step": 888200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7405, "step": 888300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7416, "step": 888400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.738, "step": 888500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.736, "step": 888600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7431, "step": 888700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7439, "step": 888800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7423, "step": 888900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7404, "step": 889000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7423, "step": 889100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.733, "step": 889200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7438, "step": 889300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7494, "step": 889400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7437, "step": 889500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7407, "step": 889600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7398, "step": 889700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.741, "step": 889800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7421, "step": 889900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7437, "step": 890000 }, { "epoch": 1.04, "eval_loss": 0.6979761123657227, "eval_runtime": 233.9852, "eval_samples_per_second": 213.689, "eval_steps_per_second": 1.671, "step": 890000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7452, "step": 890100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7365, "step": 890200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7411, "step": 890300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7524, "step": 890400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7429, "step": 890500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7435, "step": 890600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7476, "step": 890700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7491, "step": 890800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7427, "step": 890900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7372, "step": 891000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7376, "step": 891100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.742, "step": 891200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7417, "step": 891300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7448, "step": 891400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7401, "step": 891500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7509, "step": 891600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7399, "step": 891700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7418, "step": 891800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7443, "step": 891900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7408, "step": 892000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.746, "step": 892100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.744, "step": 892200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7432, "step": 892300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7366, "step": 892400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7423, "step": 892500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7448, "step": 892600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7393, "step": 892700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7364, "step": 892800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7382, "step": 892900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7388, "step": 893000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7421, "step": 893100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7392, "step": 893200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.743, "step": 893300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7349, "step": 893400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7446, "step": 893500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7326, "step": 893600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7419, "step": 893700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7486, "step": 893800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7431, "step": 893900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7451, "step": 894000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7364, "step": 894100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.749, "step": 894200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7403, "step": 894300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7434, "step": 894400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7472, "step": 894500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7385, "step": 894600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7456, "step": 894700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.745, "step": 894800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7405, "step": 894900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7454, "step": 895000 }, { "epoch": 1.04, "eval_loss": 0.700946033000946, "eval_runtime": 205.605, "eval_samples_per_second": 243.185, "eval_steps_per_second": 1.902, "step": 895000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7402, "step": 895100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7376, "step": 895200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7409, "step": 895300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7381, "step": 895400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.741, "step": 895500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7418, "step": 895600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.745, "step": 895700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7399, "step": 895800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7501, "step": 895900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7461, "step": 896000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.738, "step": 896100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7518, "step": 896200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7416, "step": 896300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7401, "step": 896400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7474, "step": 896500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7319, "step": 896600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7381, "step": 896700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7418, "step": 896800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7532, "step": 896900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7484, "step": 897000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7454, "step": 897100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7449, "step": 897200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7488, "step": 897300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.743, "step": 897400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7473, "step": 897500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7418, "step": 897600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7451, "step": 897700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.741, "step": 897800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7475, "step": 897900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7443, "step": 898000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7422, "step": 898100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7464, "step": 898200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7366, "step": 898300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.748, "step": 898400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7421, "step": 898500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7388, "step": 898600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.746, "step": 898700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7385, "step": 898800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7439, "step": 898900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7462, "step": 899000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7401, "step": 899100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7385, "step": 899200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7579, "step": 899300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.749, "step": 899400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 899500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7378, "step": 899600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7416, "step": 899700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7469, "step": 899800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7458, "step": 899900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7479, "step": 900000 }, { "epoch": 1.04, "eval_loss": 0.6999132633209229, "eval_runtime": 205.565, "eval_samples_per_second": 243.232, "eval_steps_per_second": 1.902, "step": 900000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7477, "step": 900100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7395, "step": 900200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7405, "step": 900300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7413, "step": 900400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7369, "step": 900500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7475, "step": 900600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7424, "step": 900700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7469, "step": 900800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7359, "step": 900900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.745, "step": 901000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7431, "step": 901100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7494, "step": 901200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7343, "step": 901300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7458, "step": 901400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7432, "step": 901500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7379, "step": 901600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7422, "step": 901700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7456, "step": 901800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7479, "step": 901900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7376, "step": 902000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7448, "step": 902100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.747, "step": 902200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7448, "step": 902300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7382, "step": 902400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7487, "step": 902500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7408, "step": 902600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7446, "step": 902700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.742, "step": 902800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7433, "step": 902900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7456, "step": 903000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7382, "step": 903100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7391, "step": 903200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7396, "step": 903300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7393, "step": 903400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7437, "step": 903500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7417, "step": 903600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7501, "step": 903700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7416, "step": 903800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7363, "step": 903900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7417, "step": 904000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7413, "step": 904100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7299, "step": 904200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7411, "step": 904300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7389, "step": 904400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7423, "step": 904500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.743, "step": 904600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7484, "step": 904700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7453, "step": 904800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7395, "step": 904900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7397, "step": 905000 }, { "epoch": 1.04, "eval_loss": 0.6989642381668091, "eval_runtime": 206.2345, "eval_samples_per_second": 242.442, "eval_steps_per_second": 1.896, "step": 905000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7324, "step": 905100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7356, "step": 905200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.736, "step": 905300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7378, "step": 905400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7433, "step": 905500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7461, "step": 905600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7468, "step": 905700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7368, "step": 905800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7373, "step": 905900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7482, "step": 906000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7437, "step": 906100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7494, "step": 906200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7444, "step": 906300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.742, "step": 906400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7423, "step": 906500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7341, "step": 906600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7455, "step": 906700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7465, "step": 906800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7532, "step": 906900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.741, "step": 907000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7396, "step": 907100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7542, "step": 907200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7455, "step": 907300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7367, "step": 907400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7426, "step": 907500 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7405, "step": 907600 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7423, "step": 907700 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.743, "step": 907800 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7451, "step": 907900 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7455, "step": 908000 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7467, "step": 908100 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7435, "step": 908200 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7391, "step": 908300 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7424, "step": 908400 }, { "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.7412, "step": 908500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7433, "step": 908600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7383, "step": 908700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7346, "step": 908800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7471, "step": 908900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7415, "step": 909000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7416, "step": 909100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7518, "step": 909200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7387, "step": 909300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7361, "step": 909400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7427, "step": 909500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7407, "step": 909600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7486, "step": 909700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7452, "step": 909800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7424, "step": 909900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7307, "step": 910000 }, { "epoch": 1.05, "eval_loss": 0.6977934241294861, "eval_runtime": 205.0361, "eval_samples_per_second": 243.859, "eval_steps_per_second": 1.907, "step": 910000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7399, "step": 910100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7391, "step": 910200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7433, "step": 910300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.733, "step": 910400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7446, "step": 910500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.743, "step": 910600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7435, "step": 910700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.752, "step": 910800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7387, "step": 910900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7389, "step": 911000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7371, "step": 911100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7389, "step": 911200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7419, "step": 911300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7463, "step": 911400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7491, "step": 911500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7371, "step": 911600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7423, "step": 911700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7523, "step": 911800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7469, "step": 911900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7441, "step": 912000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7302, "step": 912100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7467, "step": 912200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7334, "step": 912300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7415, "step": 912400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7424, "step": 912500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7474, "step": 912600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7519, "step": 912700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.744, "step": 912800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7366, "step": 912900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7366, "step": 913000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7453, "step": 913100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7385, "step": 913200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.744, "step": 913300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7409, "step": 913400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7377, "step": 913500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7446, "step": 913600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.737, "step": 913700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7421, "step": 913800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7379, "step": 913900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7496, "step": 914000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7403, "step": 914100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.741, "step": 914200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.74, "step": 914300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7409, "step": 914400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7371, "step": 914500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.75, "step": 914600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7448, "step": 914700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7422, "step": 914800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7424, "step": 914900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7405, "step": 915000 }, { "epoch": 1.05, "eval_loss": 0.6987533569335938, "eval_runtime": 204.9728, "eval_samples_per_second": 243.935, "eval_steps_per_second": 1.908, "step": 915000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7498, "step": 915100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7415, "step": 915200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7363, "step": 915300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7389, "step": 915400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7439, "step": 915500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7421, "step": 915600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7436, "step": 915700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7378, "step": 915800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7455, "step": 915900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7457, "step": 916000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7426, "step": 916100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7319, "step": 916200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7439, "step": 916300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7434, "step": 916400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7373, "step": 916500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7461, "step": 916600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7389, "step": 916700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7424, "step": 916800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7374, "step": 916900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7408, "step": 917000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7518, "step": 917100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7459, "step": 917200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7479, "step": 917300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7392, "step": 917400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7444, "step": 917500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7362, "step": 917600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7444, "step": 917700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7347, "step": 917800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7395, "step": 917900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7483, "step": 918000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7448, "step": 918100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7425, "step": 918200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7433, "step": 918300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7402, "step": 918400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7418, "step": 918500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7401, "step": 918600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.747, "step": 918700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.75, "step": 918800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7441, "step": 918900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7415, "step": 919000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7446, "step": 919100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7436, "step": 919200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7442, "step": 919300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7446, "step": 919400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7473, "step": 919500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7415, "step": 919600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7484, "step": 919700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.741, "step": 919800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7436, "step": 919900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7487, "step": 920000 }, { "epoch": 1.05, "eval_loss": 0.6991868019104004, "eval_runtime": 204.4782, "eval_samples_per_second": 244.525, "eval_steps_per_second": 1.912, "step": 920000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7414, "step": 920100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7463, "step": 920200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7284, "step": 920300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.747, "step": 920400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7382, "step": 920500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7484, "step": 920600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7448, "step": 920700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7422, "step": 920800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7365, "step": 920900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7411, "step": 921000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7393, "step": 921100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7494, "step": 921200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7398, "step": 921300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7495, "step": 921400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7347, "step": 921500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7428, "step": 921600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.738, "step": 921700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7372, "step": 921800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7335, "step": 921900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7389, "step": 922000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7491, "step": 922100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7418, "step": 922200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7376, "step": 922300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7486, "step": 922400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7363, "step": 922500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7446, "step": 922600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7456, "step": 922700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7312, "step": 922800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7399, "step": 922900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7414, "step": 923000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7469, "step": 923100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7482, "step": 923200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7419, "step": 923300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.738, "step": 923400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7407, "step": 923500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7372, "step": 923600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7361, "step": 923700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7459, "step": 923800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7384, "step": 923900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.737, "step": 924000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7391, "step": 924100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7377, "step": 924200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7366, "step": 924300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7376, "step": 924400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7419, "step": 924500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7434, "step": 924600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7452, "step": 924700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7409, "step": 924800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.749, "step": 924900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7532, "step": 925000 }, { "epoch": 1.05, "eval_loss": 0.6977989673614502, "eval_runtime": 208.233, "eval_samples_per_second": 240.116, "eval_steps_per_second": 1.878, "step": 925000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7394, "step": 925100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7474, "step": 925200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7469, "step": 925300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7437, "step": 925400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7383, "step": 925500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7399, "step": 925600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.74, "step": 925700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7405, "step": 925800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7435, "step": 925900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7428, "step": 926000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7361, "step": 926100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7337, "step": 926200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7349, "step": 926300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7488, "step": 926400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7418, "step": 926500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7401, "step": 926600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7381, "step": 926700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7459, "step": 926800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7344, "step": 926900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7417, "step": 927000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7395, "step": 927100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7415, "step": 927200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7432, "step": 927300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7434, "step": 927400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7455, "step": 927500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7401, "step": 927600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7342, "step": 927700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7329, "step": 927800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7487, "step": 927900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7313, "step": 928000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7485, "step": 928100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7429, "step": 928200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7408, "step": 928300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.734, "step": 928400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7511, "step": 928500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7419, "step": 928600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7438, "step": 928700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7333, "step": 928800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7401, "step": 928900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7412, "step": 929000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7368, "step": 929100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7403, "step": 929200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.735, "step": 929300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7407, "step": 929400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7453, "step": 929500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7386, "step": 929600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7381, "step": 929700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7352, "step": 929800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7445, "step": 929900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7344, "step": 930000 }, { "epoch": 1.05, "eval_loss": 0.6975210905075073, "eval_runtime": 216.9106, "eval_samples_per_second": 230.51, "eval_steps_per_second": 1.803, "step": 930000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7411, "step": 930100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7329, "step": 930200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7425, "step": 930300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7517, "step": 930400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7381, "step": 930500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7392, "step": 930600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7438, "step": 930700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7388, "step": 930800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7357, "step": 930900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7369, "step": 931000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7492, "step": 931100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7398, "step": 931200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7393, "step": 931300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7397, "step": 931400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7357, "step": 931500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7398, "step": 931600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7352, "step": 931700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7459, "step": 931800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7452, "step": 931900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7417, "step": 932000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7356, "step": 932100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7398, "step": 932200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7447, "step": 932300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7445, "step": 932400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7318, "step": 932500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7409, "step": 932600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7418, "step": 932700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.744, "step": 932800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7394, "step": 932900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7401, "step": 933000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7396, "step": 933100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7395, "step": 933200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7405, "step": 933300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7511, "step": 933400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7447, "step": 933500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7408, "step": 933600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7404, "step": 933700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7415, "step": 933800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7425, "step": 933900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7471, "step": 934000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7464, "step": 934100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7306, "step": 934200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7407, "step": 934300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7428, "step": 934400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7461, "step": 934500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7445, "step": 934600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7338, "step": 934700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7441, "step": 934800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7433, "step": 934900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7459, "step": 935000 }, { "epoch": 1.05, "eval_loss": 0.6974779963493347, "eval_runtime": 204.3626, "eval_samples_per_second": 244.663, "eval_steps_per_second": 1.913, "step": 935000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7473, "step": 935100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7458, "step": 935200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7437, "step": 935300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7432, "step": 935400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7454, "step": 935500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7367, "step": 935600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7408, "step": 935700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7423, "step": 935800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7368, "step": 935900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7438, "step": 936000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7447, "step": 936100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7417, "step": 936200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7375, "step": 936300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7439, "step": 936400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7427, "step": 936500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7311, "step": 936600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7441, "step": 936700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7478, "step": 936800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.738, "step": 936900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7391, "step": 937000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7391, "step": 937100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7381, "step": 937200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7425, "step": 937300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7477, "step": 937400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7402, "step": 937500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.744, "step": 937600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7307, "step": 937700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7489, "step": 937800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7399, "step": 937900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7412, "step": 938000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7427, "step": 938100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7411, "step": 938200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7333, "step": 938300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7388, "step": 938400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7431, "step": 938500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.741, "step": 938600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7439, "step": 938700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7477, "step": 938800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7336, "step": 938900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7435, "step": 939000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.738, "step": 939100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7381, "step": 939200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7429, "step": 939300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7397, "step": 939400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7449, "step": 939500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7367, "step": 939600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7424, "step": 939700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7459, "step": 939800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7464, "step": 939900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.739, "step": 940000 }, { "epoch": 1.05, "eval_loss": 0.6990136504173279, "eval_runtime": 204.6451, "eval_samples_per_second": 244.325, "eval_steps_per_second": 1.911, "step": 940000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7424, "step": 940100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7435, "step": 940200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7447, "step": 940300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7432, "step": 940400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.739, "step": 940500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7442, "step": 940600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7493, "step": 940700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7375, "step": 940800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7459, "step": 940900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7422, "step": 941000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7422, "step": 941100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7407, "step": 941200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7464, "step": 941300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7329, "step": 941400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7418, "step": 941500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7309, "step": 941600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7379, "step": 941700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.742, "step": 941800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7499, "step": 941900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7445, "step": 942000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7355, "step": 942100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7361, "step": 942200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7419, "step": 942300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7526, "step": 942400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7393, "step": 942500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7341, "step": 942600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7374, "step": 942700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7332, "step": 942800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7368, "step": 942900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7378, "step": 943000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7484, "step": 943100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7371, "step": 943200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7436, "step": 943300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7334, "step": 943400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7436, "step": 943500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7425, "step": 943600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7411, "step": 943700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7432, "step": 943800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7329, "step": 943900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7417, "step": 944000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7409, "step": 944100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7473, "step": 944200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7461, "step": 944300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7408, "step": 944400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7393, "step": 944500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7419, "step": 944600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7444, "step": 944700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7457, "step": 944800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7437, "step": 944900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.734, "step": 945000 }, { "epoch": 1.05, "eval_loss": 0.6982185244560242, "eval_runtime": 204.5633, "eval_samples_per_second": 244.423, "eval_steps_per_second": 1.911, "step": 945000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7505, "step": 945100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7374, "step": 945200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7432, "step": 945300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7415, "step": 945400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.74, "step": 945500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.736, "step": 945600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7451, "step": 945700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7459, "step": 945800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7437, "step": 945900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.738, "step": 946000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7414, "step": 946100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7422, "step": 946200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7383, "step": 946300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.739, "step": 946400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7383, "step": 946500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7427, "step": 946600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7411, "step": 946700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7361, "step": 946800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7437, "step": 946900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7392, "step": 947000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7504, "step": 947100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7378, "step": 947200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7438, "step": 947300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7416, "step": 947400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7495, "step": 947500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7335, "step": 947600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7454, "step": 947700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7423, "step": 947800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7394, "step": 947900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7416, "step": 948000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7429, "step": 948100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7415, "step": 948200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7357, "step": 948300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7336, "step": 948400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7387, "step": 948500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7383, "step": 948600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7393, "step": 948700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7408, "step": 948800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7386, "step": 948900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7466, "step": 949000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7416, "step": 949100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7424, "step": 949200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7477, "step": 949300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7459, "step": 949400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7447, "step": 949500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7409, "step": 949600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7493, "step": 949700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7402, "step": 949800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7428, "step": 949900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7367, "step": 950000 }, { "epoch": 1.05, "eval_loss": 0.6980754137039185, "eval_runtime": 204.3491, "eval_samples_per_second": 244.679, "eval_steps_per_second": 1.913, "step": 950000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7385, "step": 950100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7424, "step": 950200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7374, "step": 950300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7292, "step": 950400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7337, "step": 950500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7401, "step": 950600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7325, "step": 950700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7405, "step": 950800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7438, "step": 950900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7413, "step": 951000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7464, "step": 951100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7407, "step": 951200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7474, "step": 951300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7426, "step": 951400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7411, "step": 951500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7395, "step": 951600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7454, "step": 951700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7389, "step": 951800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7476, "step": 951900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7353, "step": 952000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.744, "step": 952100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7402, "step": 952200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.742, "step": 952300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7452, "step": 952400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7401, "step": 952500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7409, "step": 952600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7395, "step": 952700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7457, "step": 952800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7469, "step": 952900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7438, "step": 953000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7446, "step": 953100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7399, "step": 953200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7405, "step": 953300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7498, "step": 953400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7509, "step": 953500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7401, "step": 953600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7484, "step": 953700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7441, "step": 953800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7394, "step": 953900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7576, "step": 954000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7409, "step": 954100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7379, "step": 954200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7408, "step": 954300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7409, "step": 954400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7414, "step": 954500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7443, "step": 954600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7446, "step": 954700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7358, "step": 954800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7348, "step": 954900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7348, "step": 955000 }, { "epoch": 1.05, "eval_loss": 0.6982489824295044, "eval_runtime": 203.9889, "eval_samples_per_second": 245.111, "eval_steps_per_second": 1.917, "step": 955000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7462, "step": 955100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7368, "step": 955200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.74, "step": 955300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7392, "step": 955400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.737, "step": 955500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7408, "step": 955600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7364, "step": 955700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7447, "step": 955800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7502, "step": 955900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7391, "step": 956000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7395, "step": 956100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.743, "step": 956200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.734, "step": 956300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7389, "step": 956400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7453, "step": 956500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7388, "step": 956600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7374, "step": 956700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7429, "step": 956800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7445, "step": 956900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7394, "step": 957000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7444, "step": 957100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7395, "step": 957200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7501, "step": 957300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7369, "step": 957400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7422, "step": 957500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7388, "step": 957600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7469, "step": 957700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7388, "step": 957800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7332, "step": 957900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7392, "step": 958000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7454, "step": 958100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.742, "step": 958200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7408, "step": 958300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7401, "step": 958400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.738, "step": 958500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7394, "step": 958600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7331, "step": 958700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7415, "step": 958800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7325, "step": 958900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7415, "step": 959000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7429, "step": 959100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7514, "step": 959200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7362, "step": 959300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7468, "step": 959400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.748, "step": 959500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7403, "step": 959600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7368, "step": 959700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7392, "step": 959800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7388, "step": 959900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7407, "step": 960000 }, { "epoch": 1.05, "eval_loss": 0.6997035145759583, "eval_runtime": 205.1198, "eval_samples_per_second": 243.76, "eval_steps_per_second": 1.906, "step": 960000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7357, "step": 960100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7404, "step": 960200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7439, "step": 960300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7374, "step": 960400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7441, "step": 960500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7375, "step": 960600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7348, "step": 960700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7431, "step": 960800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7341, "step": 960900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7382, "step": 961000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7387, "step": 961100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7506, "step": 961200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7377, "step": 961300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7459, "step": 961400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7457, "step": 961500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7443, "step": 961600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7386, "step": 961700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7458, "step": 961800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7387, "step": 961900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.744, "step": 962000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7475, "step": 962100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7498, "step": 962200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7417, "step": 962300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7446, "step": 962400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.74, "step": 962500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7537, "step": 962600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7386, "step": 962700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7426, "step": 962800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7428, "step": 962900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7428, "step": 963000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7422, "step": 963100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7432, "step": 963200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7419, "step": 963300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7424, "step": 963400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7378, "step": 963500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7391, "step": 963600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7415, "step": 963700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7432, "step": 963800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7445, "step": 963900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7507, "step": 964000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7338, "step": 964100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7369, "step": 964200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7439, "step": 964300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7428, "step": 964400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7374, "step": 964500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7443, "step": 964600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.742, "step": 964700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7384, "step": 964800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7462, "step": 964900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7456, "step": 965000 }, { "epoch": 1.05, "eval_loss": 0.6977582573890686, "eval_runtime": 205.3704, "eval_samples_per_second": 243.463, "eval_steps_per_second": 1.904, "step": 965000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.738, "step": 965100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7361, "step": 965200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7419, "step": 965300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7467, "step": 965400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7454, "step": 965500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7471, "step": 965600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.735, "step": 965700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7388, "step": 965800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7389, "step": 965900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7421, "step": 966000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7296, "step": 966100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7422, "step": 966200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7375, "step": 966300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7419, "step": 966400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7418, "step": 966500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7382, "step": 966600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7396, "step": 966700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7413, "step": 966800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7435, "step": 966900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7451, "step": 967000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7369, "step": 967100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7381, "step": 967200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7359, "step": 967300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7446, "step": 967400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7396, "step": 967500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.747, "step": 967600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7434, "step": 967700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.743, "step": 967800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.744, "step": 967900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7415, "step": 968000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7432, "step": 968100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7484, "step": 968200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7487, "step": 968300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7419, "step": 968400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.744, "step": 968500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7334, "step": 968600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7393, "step": 968700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7391, "step": 968800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7454, "step": 968900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7423, "step": 969000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7426, "step": 969100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7563, "step": 969200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.733, "step": 969300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7431, "step": 969400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7379, "step": 969500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7354, "step": 969600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7404, "step": 969700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7464, "step": 969800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7513, "step": 969900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7427, "step": 970000 }, { "epoch": 1.05, "eval_loss": 0.6963879466056824, "eval_runtime": 209.1727, "eval_samples_per_second": 239.037, "eval_steps_per_second": 1.869, "step": 970000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7392, "step": 970100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.746, "step": 970200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7465, "step": 970300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7325, "step": 970400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7468, "step": 970500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7389, "step": 970600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7417, "step": 970700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7378, "step": 970800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.741, "step": 970900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7387, "step": 971000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7426, "step": 971100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7417, "step": 971200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7401, "step": 971300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.738, "step": 971400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.732, "step": 971500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7478, "step": 971600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7411, "step": 971700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7406, "step": 971800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7429, "step": 971900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.741, "step": 972000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7381, "step": 972100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7446, "step": 972200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.75, "step": 972300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7402, "step": 972400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.737, "step": 972500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7432, "step": 972600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7399, "step": 972700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7389, "step": 972800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7395, "step": 972900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7441, "step": 973000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7394, "step": 973100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7485, "step": 973200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7389, "step": 973300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7374, "step": 973400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7362, "step": 973500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7499, "step": 973600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7348, "step": 973700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7426, "step": 973800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7395, "step": 973900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7355, "step": 974000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7337, "step": 974100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7413, "step": 974200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7429, "step": 974300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7331, "step": 974400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7427, "step": 974500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7371, "step": 974600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7417, "step": 974700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7394, "step": 974800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7378, "step": 974900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7329, "step": 975000 }, { "epoch": 1.05, "eval_loss": 0.695773184299469, "eval_runtime": 1508.1794, "eval_samples_per_second": 33.153, "eval_steps_per_second": 0.259, "step": 975000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7478, "step": 975100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7404, "step": 975200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7385, "step": 975300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7419, "step": 975400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7425, "step": 975500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7439, "step": 975600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7448, "step": 975700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7299, "step": 975800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7468, "step": 975900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7358, "step": 976000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.738, "step": 976100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7392, "step": 976200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7489, "step": 976300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7491, "step": 976400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7427, "step": 976500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.741, "step": 976600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7392, "step": 976700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7368, "step": 976800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7381, "step": 976900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7452, "step": 977000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7359, "step": 977100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7445, "step": 977200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7402, "step": 977300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7412, "step": 977400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7409, "step": 977500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7399, "step": 977600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7264, "step": 977700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7403, "step": 977800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7408, "step": 977900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.739, "step": 978000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7427, "step": 978100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7372, "step": 978200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7307, "step": 978300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7414, "step": 978400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7355, "step": 978500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7371, "step": 978600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7348, "step": 978700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.737, "step": 978800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7355, "step": 978900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7371, "step": 979000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7335, "step": 979100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7379, "step": 979200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7368, "step": 979300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7451, "step": 979400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7462, "step": 979500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7413, "step": 979600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7391, "step": 979700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7453, "step": 979800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7424, "step": 979900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7382, "step": 980000 }, { "epoch": 1.05, "eval_loss": 0.696016252040863, "eval_runtime": 207.2945, "eval_samples_per_second": 241.203, "eval_steps_per_second": 1.886, "step": 980000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7427, "step": 980100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7421, "step": 980200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7441, "step": 980300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7375, "step": 980400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7371, "step": 980500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7353, "step": 980600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7399, "step": 980700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7287, "step": 980800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7376, "step": 980900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7445, "step": 981000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7414, "step": 981100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7401, "step": 981200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.736, "step": 981300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7451, "step": 981400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7448, "step": 981500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7395, "step": 981600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7433, "step": 981700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7372, "step": 981800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7327, "step": 981900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7463, "step": 982000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7365, "step": 982100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7427, "step": 982200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7416, "step": 982300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7444, "step": 982400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7436, "step": 982500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7427, "step": 982600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7452, "step": 982700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7354, "step": 982800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7465, "step": 982900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.737, "step": 983000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7472, "step": 983100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7445, "step": 983200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7419, "step": 983300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7404, "step": 983400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7419, "step": 983500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7432, "step": 983600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.739, "step": 983700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7445, "step": 983800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7407, "step": 983900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7428, "step": 984000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7373, "step": 984100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7428, "step": 984200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7379, "step": 984300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7423, "step": 984400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.737, "step": 984500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7307, "step": 984600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7372, "step": 984700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7325, "step": 984800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.747, "step": 984900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7441, "step": 985000 }, { "epoch": 1.05, "eval_loss": 0.6967244744300842, "eval_runtime": 207.5707, "eval_samples_per_second": 240.882, "eval_steps_per_second": 1.884, "step": 985000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7406, "step": 985100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7421, "step": 985200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.745, "step": 985300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7411, "step": 985400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7472, "step": 985500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7447, "step": 985600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7432, "step": 985700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7429, "step": 985800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7398, "step": 985900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7402, "step": 986000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7336, "step": 986100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7422, "step": 986200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7397, "step": 986300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7433, "step": 986400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7394, "step": 986500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7455, "step": 986600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7393, "step": 986700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7441, "step": 986800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7468, "step": 986900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7387, "step": 987000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7425, "step": 987100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7426, "step": 987200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7461, "step": 987300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7459, "step": 987400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7398, "step": 987500 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7388, "step": 987600 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7343, "step": 987700 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7423, "step": 987800 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7439, "step": 987900 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7336, "step": 988000 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7446, "step": 988100 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.743, "step": 988200 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7493, "step": 988300 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7445, "step": 988400 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.7408, "step": 988500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7324, "step": 988600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7447, "step": 988700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7362, "step": 988800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7437, "step": 988900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7425, "step": 989000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7436, "step": 989100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7443, "step": 989200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7424, "step": 989300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7386, "step": 989400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7324, "step": 989500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7483, "step": 989600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7422, "step": 989700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7489, "step": 989800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.734, "step": 989900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7399, "step": 990000 }, { "epoch": 1.06, "eval_loss": 0.6987284421920776, "eval_runtime": 268.5781, "eval_samples_per_second": 186.166, "eval_steps_per_second": 1.456, "step": 990000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7337, "step": 990100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7326, "step": 990200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7465, "step": 990300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7358, "step": 990400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7437, "step": 990500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7444, "step": 990600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7364, "step": 990700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7443, "step": 990800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7458, "step": 990900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7389, "step": 991000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7373, "step": 991100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7433, "step": 991200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7444, "step": 991300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7342, "step": 991400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7449, "step": 991500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7415, "step": 991600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7419, "step": 991700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7452, "step": 991800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7495, "step": 991900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7355, "step": 992000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7415, "step": 992100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7471, "step": 992200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7399, "step": 992300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7399, "step": 992400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7433, "step": 992500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7355, "step": 992600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7356, "step": 992700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7395, "step": 992800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.739, "step": 992900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7457, "step": 993000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7348, "step": 993100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7453, "step": 993200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.741, "step": 993300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.743, "step": 993400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.731, "step": 993500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7323, "step": 993600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7281, "step": 993700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7364, "step": 993800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7402, "step": 993900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.739, "step": 994000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.736, "step": 994100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7418, "step": 994200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7393, "step": 994300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7418, "step": 994400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7348, "step": 994500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7467, "step": 994600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7384, "step": 994700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7356, "step": 994800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7392, "step": 994900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7389, "step": 995000 }, { "epoch": 1.06, "eval_loss": 0.6964648365974426, "eval_runtime": 207.6129, "eval_samples_per_second": 240.833, "eval_steps_per_second": 1.883, "step": 995000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7428, "step": 995100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7436, "step": 995200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7434, "step": 995300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7438, "step": 995400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7419, "step": 995500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7402, "step": 995600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7361, "step": 995700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7359, "step": 995800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7443, "step": 995900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7478, "step": 996000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7385, "step": 996100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7322, "step": 996200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7345, "step": 996300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7415, "step": 996400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7471, "step": 996500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.738, "step": 996600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7329, "step": 996700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.739, "step": 996800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7363, "step": 996900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7431, "step": 997000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7388, "step": 997100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.744, "step": 997200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7352, "step": 997300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7438, "step": 997400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7409, "step": 997500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7379, "step": 997600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7434, "step": 997700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7482, "step": 997800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7386, "step": 997900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.741, "step": 998000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7382, "step": 998100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7437, "step": 998200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7393, "step": 998300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7425, "step": 998400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7378, "step": 998500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7423, "step": 998600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7422, "step": 998700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7411, "step": 998800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7403, "step": 998900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7408, "step": 999000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.739, "step": 999100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7431, "step": 999200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7384, "step": 999300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7438, "step": 999400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7467, "step": 999500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7317, "step": 999600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7489, "step": 999700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7481, "step": 999800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.741, "step": 999900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7415, "step": 1000000 }, { "epoch": 1.06, "eval_loss": 0.6989625692367554, "eval_runtime": 213.1026, "eval_samples_per_second": 234.629, "eval_steps_per_second": 1.835, "step": 1000000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7464, "step": 1000100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7428, "step": 1000200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7445, "step": 1000300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7344, "step": 1000400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7474, "step": 1000500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7396, "step": 1000600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7353, "step": 1000700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7416, "step": 1000800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7492, "step": 1000900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7446, "step": 1001000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7362, "step": 1001100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7481, "step": 1001200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.736, "step": 1001300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.738, "step": 1001400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.741, "step": 1001500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7339, "step": 1001600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7372, "step": 1001700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7474, "step": 1001800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7408, "step": 1001900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7416, "step": 1002000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7397, "step": 1002100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7425, "step": 1002200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7335, "step": 1002300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7386, "step": 1002400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7431, "step": 1002500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7383, "step": 1002600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7398, "step": 1002700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7369, "step": 1002800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7386, "step": 1002900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7301, "step": 1003000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7477, "step": 1003100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7404, "step": 1003200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7409, "step": 1003300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7427, "step": 1003400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7313, "step": 1003500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7409, "step": 1003600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7424, "step": 1003700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7425, "step": 1003800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7397, "step": 1003900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.735, "step": 1004000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7491, "step": 1004100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7458, "step": 1004200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.737, "step": 1004300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7394, "step": 1004400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7367, "step": 1004500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7437, "step": 1004600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7437, "step": 1004700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7343, "step": 1004800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7489, "step": 1004900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7365, "step": 1005000 }, { "epoch": 1.06, "eval_loss": 0.6978911757469177, "eval_runtime": 207.4706, "eval_samples_per_second": 240.998, "eval_steps_per_second": 1.885, "step": 1005000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7385, "step": 1005100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7458, "step": 1005200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7411, "step": 1005300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7375, "step": 1005400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.736, "step": 1005500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.742, "step": 1005600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7397, "step": 1005700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7323, "step": 1005800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7382, "step": 1005900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.744, "step": 1006000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7378, "step": 1006100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7372, "step": 1006200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7372, "step": 1006300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7397, "step": 1006400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7364, "step": 1006500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7413, "step": 1006600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7429, "step": 1006700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7383, "step": 1006800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7414, "step": 1006900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7437, "step": 1007000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7354, "step": 1007100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7403, "step": 1007200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.739, "step": 1007300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7312, "step": 1007400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7457, "step": 1007500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7411, "step": 1007600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7353, "step": 1007700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7469, "step": 1007800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7415, "step": 1007900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.735, "step": 1008000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7344, "step": 1008100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7464, "step": 1008200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7429, "step": 1008300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7361, "step": 1008400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7347, "step": 1008500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7434, "step": 1008600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7486, "step": 1008700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7333, "step": 1008800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.738, "step": 1008900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7403, "step": 1009000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7432, "step": 1009100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7445, "step": 1009200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7517, "step": 1009300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7463, "step": 1009400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7359, "step": 1009500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7506, "step": 1009600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7392, "step": 1009700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7359, "step": 1009800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7329, "step": 1009900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7488, "step": 1010000 }, { "epoch": 1.06, "eval_loss": 0.6958174705505371, "eval_runtime": 207.0775, "eval_samples_per_second": 241.456, "eval_steps_per_second": 1.888, "step": 1010000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7424, "step": 1010100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7383, "step": 1010200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7449, "step": 1010300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7378, "step": 1010400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7387, "step": 1010500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7514, "step": 1010600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7441, "step": 1010700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7403, "step": 1010800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7446, "step": 1010900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7443, "step": 1011000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7398, "step": 1011100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7395, "step": 1011200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7366, "step": 1011300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7393, "step": 1011400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.744, "step": 1011500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7366, "step": 1011600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7383, "step": 1011700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7403, "step": 1011800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7531, "step": 1011900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7406, "step": 1012000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.743, "step": 1012100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7489, "step": 1012200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7451, "step": 1012300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.751, "step": 1012400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.743, "step": 1012500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.736, "step": 1012600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7382, "step": 1012700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7355, "step": 1012800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7431, "step": 1012900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7361, "step": 1013000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7369, "step": 1013100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7377, "step": 1013200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7405, "step": 1013300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7428, "step": 1013400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7439, "step": 1013500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.733, "step": 1013600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7418, "step": 1013700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7452, "step": 1013800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7407, "step": 1013900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7368, "step": 1014000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7484, "step": 1014100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7393, "step": 1014200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7503, "step": 1014300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.746, "step": 1014400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7436, "step": 1014500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7375, "step": 1014600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7413, "step": 1014700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7408, "step": 1014800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7376, "step": 1014900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7367, "step": 1015000 }, { "epoch": 1.06, "eval_loss": 0.6987661719322205, "eval_runtime": 209.5042, "eval_samples_per_second": 238.659, "eval_steps_per_second": 1.866, "step": 1015000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7435, "step": 1015100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7402, "step": 1015200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7399, "step": 1015300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7362, "step": 1015400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7376, "step": 1015500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7463, "step": 1015600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7389, "step": 1015700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7467, "step": 1015800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7381, "step": 1015900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7472, "step": 1016000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7406, "step": 1016100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7451, "step": 1016200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7378, "step": 1016300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7464, "step": 1016400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7346, "step": 1016500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7462, "step": 1016600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7393, "step": 1016700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7409, "step": 1016800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7437, "step": 1016900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7431, "step": 1017000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7413, "step": 1017100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7326, "step": 1017200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.735, "step": 1017300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7411, "step": 1017400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7339, "step": 1017500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7386, "step": 1017600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.747, "step": 1017700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7395, "step": 1017800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7385, "step": 1017900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7429, "step": 1018000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7412, "step": 1018100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7423, "step": 1018200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7471, "step": 1018300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7417, "step": 1018400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7467, "step": 1018500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7443, "step": 1018600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7399, "step": 1018700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.739, "step": 1018800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7398, "step": 1018900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7394, "step": 1019000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7392, "step": 1019100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7463, "step": 1019200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7431, "step": 1019300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7423, "step": 1019400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7452, "step": 1019500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7393, "step": 1019600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7404, "step": 1019700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7445, "step": 1019800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7392, "step": 1019900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.738, "step": 1020000 }, { "epoch": 1.06, "eval_loss": 0.696831226348877, "eval_runtime": 208.2143, "eval_samples_per_second": 240.137, "eval_steps_per_second": 1.878, "step": 1020000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.737, "step": 1020100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7356, "step": 1020200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7333, "step": 1020300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7478, "step": 1020400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7416, "step": 1020500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.735, "step": 1020600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7411, "step": 1020700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7463, "step": 1020800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7389, "step": 1020900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7425, "step": 1021000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7382, "step": 1021100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.741, "step": 1021200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7511, "step": 1021300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.744, "step": 1021400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7435, "step": 1021500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7349, "step": 1021600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7412, "step": 1021700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7461, "step": 1021800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7411, "step": 1021900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7445, "step": 1022000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7469, "step": 1022100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7348, "step": 1022200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7429, "step": 1022300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7359, "step": 1022400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.742, "step": 1022500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7352, "step": 1022600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7347, "step": 1022700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7397, "step": 1022800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7339, "step": 1022900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7378, "step": 1023000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7342, "step": 1023100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7394, "step": 1023200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7421, "step": 1023300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7365, "step": 1023400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7391, "step": 1023500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7383, "step": 1023600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7434, "step": 1023700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7448, "step": 1023800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7331, "step": 1023900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7363, "step": 1024000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7434, "step": 1024100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7417, "step": 1024200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.747, "step": 1024300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7394, "step": 1024400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7357, "step": 1024500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7395, "step": 1024600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7388, "step": 1024700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7433, "step": 1024800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7425, "step": 1024900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7331, "step": 1025000 }, { "epoch": 1.06, "eval_loss": 0.6981194019317627, "eval_runtime": 207.532, "eval_samples_per_second": 240.927, "eval_steps_per_second": 1.884, "step": 1025000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7348, "step": 1025100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.745, "step": 1025200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7424, "step": 1025300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7396, "step": 1025400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.741, "step": 1025500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7563, "step": 1025600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7464, "step": 1025700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7383, "step": 1025800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7457, "step": 1025900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7314, "step": 1026000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7405, "step": 1026100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7381, "step": 1026200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7424, "step": 1026300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7317, "step": 1026400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7384, "step": 1026500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7392, "step": 1026600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7393, "step": 1026700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.746, "step": 1026800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7416, "step": 1026900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7367, "step": 1027000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7335, "step": 1027100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7472, "step": 1027200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7344, "step": 1027300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7435, "step": 1027400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7395, "step": 1027500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.738, "step": 1027600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.743, "step": 1027700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.736, "step": 1027800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7412, "step": 1027900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7386, "step": 1028000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7422, "step": 1028100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7405, "step": 1028200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7403, "step": 1028300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7381, "step": 1028400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7477, "step": 1028500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7383, "step": 1028600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7502, "step": 1028700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7355, "step": 1028800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7385, "step": 1028900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.741, "step": 1029000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7436, "step": 1029100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7457, "step": 1029200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7327, "step": 1029300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7474, "step": 1029400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7413, "step": 1029500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7494, "step": 1029600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7382, "step": 1029700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7446, "step": 1029800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7399, "step": 1029900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7334, "step": 1030000 }, { "epoch": 1.06, "eval_loss": 0.6968112587928772, "eval_runtime": 207.3926, "eval_samples_per_second": 241.089, "eval_steps_per_second": 1.885, "step": 1030000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7485, "step": 1030100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7401, "step": 1030200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7361, "step": 1030300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7444, "step": 1030400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7374, "step": 1030500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7364, "step": 1030600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7374, "step": 1030700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.742, "step": 1030800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7438, "step": 1030900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7486, "step": 1031000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7362, "step": 1031100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7438, "step": 1031200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7411, "step": 1031300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7328, "step": 1031400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7355, "step": 1031500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7522, "step": 1031600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7428, "step": 1031700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.743, "step": 1031800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7441, "step": 1031900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.74, "step": 1032000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.741, "step": 1032100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7383, "step": 1032200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7406, "step": 1032300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.739, "step": 1032400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7393, "step": 1032500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7414, "step": 1032600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7383, "step": 1032700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7368, "step": 1032800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7473, "step": 1032900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7397, "step": 1033000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7378, "step": 1033100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7422, "step": 1033200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7391, "step": 1033300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7443, "step": 1033400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7473, "step": 1033500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7419, "step": 1033600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7392, "step": 1033700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7476, "step": 1033800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7375, "step": 1033900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7446, "step": 1034000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7407, "step": 1034100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7384, "step": 1034200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7399, "step": 1034300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7381, "step": 1034400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7373, "step": 1034500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7421, "step": 1034600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7391, "step": 1034700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7424, "step": 1034800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7405, "step": 1034900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7376, "step": 1035000 }, { "epoch": 1.06, "eval_loss": 0.696743905544281, "eval_runtime": 206.4144, "eval_samples_per_second": 242.231, "eval_steps_per_second": 1.894, "step": 1035000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7357, "step": 1035100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7365, "step": 1035200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7381, "step": 1035300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7462, "step": 1035400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7352, "step": 1035500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7346, "step": 1035600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7379, "step": 1035700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.747, "step": 1035800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7416, "step": 1035900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7395, "step": 1036000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7386, "step": 1036100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7421, "step": 1036200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7364, "step": 1036300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7389, "step": 1036400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7486, "step": 1036500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7406, "step": 1036600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7365, "step": 1036700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7438, "step": 1036800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7446, "step": 1036900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.738, "step": 1037000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.74, "step": 1037100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.744, "step": 1037200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7406, "step": 1037300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7403, "step": 1037400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7474, "step": 1037500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7316, "step": 1037600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7518, "step": 1037700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7468, "step": 1037800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7448, "step": 1037900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7467, "step": 1038000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7332, "step": 1038100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7472, "step": 1038200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7368, "step": 1038300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7453, "step": 1038400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7441, "step": 1038500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7382, "step": 1038600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7407, "step": 1038700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7374, "step": 1038800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7374, "step": 1038900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7387, "step": 1039000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7452, "step": 1039100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7344, "step": 1039200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7358, "step": 1039300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7421, "step": 1039400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7329, "step": 1039500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7323, "step": 1039600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7436, "step": 1039700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7416, "step": 1039800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7409, "step": 1039900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.745, "step": 1040000 }, { "epoch": 1.06, "eval_loss": 0.6981737613677979, "eval_runtime": 207.5374, "eval_samples_per_second": 240.92, "eval_steps_per_second": 1.884, "step": 1040000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7378, "step": 1040100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7391, "step": 1040200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7423, "step": 1040300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7443, "step": 1040400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7336, "step": 1040500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7365, "step": 1040600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7359, "step": 1040700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7386, "step": 1040800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7298, "step": 1040900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7429, "step": 1041000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7471, "step": 1041100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7439, "step": 1041200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7403, "step": 1041300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7429, "step": 1041400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7377, "step": 1041500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7386, "step": 1041600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7346, "step": 1041700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7329, "step": 1041800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.734, "step": 1041900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7392, "step": 1042000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7294, "step": 1042100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7398, "step": 1042200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7371, "step": 1042300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7434, "step": 1042400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7442, "step": 1042500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7465, "step": 1042600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.737, "step": 1042700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7477, "step": 1042800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.741, "step": 1042900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.74, "step": 1043000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7378, "step": 1043100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7403, "step": 1043200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7498, "step": 1043300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7406, "step": 1043400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7386, "step": 1043500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7477, "step": 1043600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7452, "step": 1043700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7367, "step": 1043800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7399, "step": 1043900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7449, "step": 1044000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7494, "step": 1044100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7423, "step": 1044200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7306, "step": 1044300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7393, "step": 1044400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7403, "step": 1044500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7343, "step": 1044600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7431, "step": 1044700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7337, "step": 1044800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.745, "step": 1044900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7412, "step": 1045000 }, { "epoch": 1.06, "eval_loss": 0.69672030210495, "eval_runtime": 207.4695, "eval_samples_per_second": 240.999, "eval_steps_per_second": 1.885, "step": 1045000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7434, "step": 1045100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7424, "step": 1045200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.737, "step": 1045300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7408, "step": 1045400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7357, "step": 1045500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.738, "step": 1045600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7361, "step": 1045700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.738, "step": 1045800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7445, "step": 1045900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7441, "step": 1046000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7417, "step": 1046100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7396, "step": 1046200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7401, "step": 1046300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7392, "step": 1046400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7413, "step": 1046500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7452, "step": 1046600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.736, "step": 1046700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.742, "step": 1046800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7399, "step": 1046900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7354, "step": 1047000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.737, "step": 1047100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7466, "step": 1047200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7408, "step": 1047300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7304, "step": 1047400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7416, "step": 1047500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7413, "step": 1047600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7399, "step": 1047700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7427, "step": 1047800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7357, "step": 1047900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7402, "step": 1048000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7297, "step": 1048100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7422, "step": 1048200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.743, "step": 1048300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7384, "step": 1048400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7475, "step": 1048500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7456, "step": 1048600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7412, "step": 1048700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7349, "step": 1048800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7421, "step": 1048900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7375, "step": 1049000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7342, "step": 1049100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7407, "step": 1049200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.742, "step": 1049300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7424, "step": 1049400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7326, "step": 1049500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7371, "step": 1049600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7368, "step": 1049700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.738, "step": 1049800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7367, "step": 1049900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7432, "step": 1050000 }, { "epoch": 1.06, "eval_loss": 0.6958229541778564, "eval_runtime": 207.4587, "eval_samples_per_second": 241.012, "eval_steps_per_second": 1.885, "step": 1050000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7355, "step": 1050100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7431, "step": 1050200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7382, "step": 1050300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7368, "step": 1050400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7383, "step": 1050500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.736, "step": 1050600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.73, "step": 1050700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7343, "step": 1050800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7427, "step": 1050900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7387, "step": 1051000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7387, "step": 1051100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7416, "step": 1051200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.736, "step": 1051300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7409, "step": 1051400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7421, "step": 1051500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7498, "step": 1051600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7298, "step": 1051700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7369, "step": 1051800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7407, "step": 1051900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7361, "step": 1052000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7419, "step": 1052100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.745, "step": 1052200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7389, "step": 1052300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7393, "step": 1052400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7492, "step": 1052500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.739, "step": 1052600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7371, "step": 1052700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7493, "step": 1052800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7386, "step": 1052900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7424, "step": 1053000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.738, "step": 1053100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7345, "step": 1053200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7344, "step": 1053300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7392, "step": 1053400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.741, "step": 1053500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.733, "step": 1053600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7417, "step": 1053700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7292, "step": 1053800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7353, "step": 1053900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7429, "step": 1054000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7421, "step": 1054100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7413, "step": 1054200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7379, "step": 1054300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7321, "step": 1054400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7425, "step": 1054500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7421, "step": 1054600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7346, "step": 1054700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7317, "step": 1054800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7394, "step": 1054900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7335, "step": 1055000 }, { "epoch": 1.06, "eval_loss": 0.6936817765235901, "eval_runtime": 206.5774, "eval_samples_per_second": 242.04, "eval_steps_per_second": 1.893, "step": 1055000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7351, "step": 1055100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7501, "step": 1055200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7323, "step": 1055300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7454, "step": 1055400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7403, "step": 1055500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7323, "step": 1055600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7394, "step": 1055700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.741, "step": 1055800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7434, "step": 1055900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7397, "step": 1056000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7366, "step": 1056100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7431, "step": 1056200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7396, "step": 1056300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7344, "step": 1056400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7397, "step": 1056500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7421, "step": 1056600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7369, "step": 1056700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7345, "step": 1056800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7431, "step": 1056900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7423, "step": 1057000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7476, "step": 1057100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7394, "step": 1057200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7417, "step": 1057300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7448, "step": 1057400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7426, "step": 1057500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7373, "step": 1057600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7398, "step": 1057700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7339, "step": 1057800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.745, "step": 1057900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7357, "step": 1058000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7412, "step": 1058100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7319, "step": 1058200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7382, "step": 1058300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7525, "step": 1058400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7433, "step": 1058500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7414, "step": 1058600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7358, "step": 1058700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7293, "step": 1058800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7354, "step": 1058900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7484, "step": 1059000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7421, "step": 1059100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7415, "step": 1059200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7439, "step": 1059300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7327, "step": 1059400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7383, "step": 1059500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7418, "step": 1059600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7386, "step": 1059700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7406, "step": 1059800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7361, "step": 1059900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7367, "step": 1060000 }, { "epoch": 1.06, "eval_loss": 0.6966774463653564, "eval_runtime": 205.9379, "eval_samples_per_second": 242.792, "eval_steps_per_second": 1.899, "step": 1060000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7478, "step": 1060100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.743, "step": 1060200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7433, "step": 1060300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7385, "step": 1060400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7483, "step": 1060500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7462, "step": 1060600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7514, "step": 1060700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.742, "step": 1060800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7302, "step": 1060900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7443, "step": 1061000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7412, "step": 1061100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7417, "step": 1061200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.741, "step": 1061300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7446, "step": 1061400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7432, "step": 1061500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7354, "step": 1061600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7382, "step": 1061700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7374, "step": 1061800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7436, "step": 1061900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7509, "step": 1062000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7411, "step": 1062100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.737, "step": 1062200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7336, "step": 1062300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7422, "step": 1062400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7373, "step": 1062500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7407, "step": 1062600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7442, "step": 1062700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.74, "step": 1062800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7343, "step": 1062900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7418, "step": 1063000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7405, "step": 1063100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7332, "step": 1063200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7426, "step": 1063300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7392, "step": 1063400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.739, "step": 1063500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7392, "step": 1063600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7377, "step": 1063700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7315, "step": 1063800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7341, "step": 1063900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7453, "step": 1064000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7387, "step": 1064100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7393, "step": 1064200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7419, "step": 1064300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7354, "step": 1064400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7439, "step": 1064500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.74, "step": 1064600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7392, "step": 1064700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7407, "step": 1064800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7427, "step": 1064900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7457, "step": 1065000 }, { "epoch": 1.06, "eval_loss": 0.6973162293434143, "eval_runtime": 207.8802, "eval_samples_per_second": 240.523, "eval_steps_per_second": 1.881, "step": 1065000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.744, "step": 1065100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7423, "step": 1065200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7326, "step": 1065300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7448, "step": 1065400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7401, "step": 1065500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7445, "step": 1065600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7334, "step": 1065700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7393, "step": 1065800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7387, "step": 1065900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7442, "step": 1066000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7481, "step": 1066100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7499, "step": 1066200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7386, "step": 1066300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7347, "step": 1066400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7412, "step": 1066500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7334, "step": 1066600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7404, "step": 1066700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7433, "step": 1066800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7493, "step": 1066900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7358, "step": 1067000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7391, "step": 1067100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7397, "step": 1067200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7429, "step": 1067300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7392, "step": 1067400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7385, "step": 1067500 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7355, "step": 1067600 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7409, "step": 1067700 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7415, "step": 1067800 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7315, "step": 1067900 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7412, "step": 1068000 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7439, "step": 1068100 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.741, "step": 1068200 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7335, "step": 1068300 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7401, "step": 1068400 }, { "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.7401, "step": 1068500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7347, "step": 1068600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7409, "step": 1068700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7488, "step": 1068800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7434, "step": 1068900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7448, "step": 1069000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7452, "step": 1069100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7438, "step": 1069200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7406, "step": 1069300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.741, "step": 1069400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7452, "step": 1069500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7344, "step": 1069600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7435, "step": 1069700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7358, "step": 1069800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.74, "step": 1069900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.737, "step": 1070000 }, { "epoch": 1.07, "eval_loss": 0.6998035311698914, "eval_runtime": 205.4459, "eval_samples_per_second": 243.373, "eval_steps_per_second": 1.903, "step": 1070000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7441, "step": 1070100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7326, "step": 1070200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7414, "step": 1070300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7344, "step": 1070400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7354, "step": 1070500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7413, "step": 1070600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7467, "step": 1070700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7338, "step": 1070800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7431, "step": 1070900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7408, "step": 1071000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7364, "step": 1071100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7402, "step": 1071200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7464, "step": 1071300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7377, "step": 1071400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7283, "step": 1071500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7419, "step": 1071600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7441, "step": 1071700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7377, "step": 1071800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7447, "step": 1071900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7418, "step": 1072000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7455, "step": 1072100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.736, "step": 1072200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7462, "step": 1072300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7351, "step": 1072400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7445, "step": 1072500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7472, "step": 1072600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.743, "step": 1072700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7456, "step": 1072800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7412, "step": 1072900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7386, "step": 1073000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7334, "step": 1073100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7437, "step": 1073200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7368, "step": 1073300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7382, "step": 1073400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7411, "step": 1073500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7374, "step": 1073600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7409, "step": 1073700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7331, "step": 1073800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.739, "step": 1073900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7367, "step": 1074000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7383, "step": 1074100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7395, "step": 1074200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7378, "step": 1074300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7468, "step": 1074400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7455, "step": 1074500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7394, "step": 1074600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7395, "step": 1074700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7404, "step": 1074800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7464, "step": 1074900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7453, "step": 1075000 }, { "epoch": 1.07, "eval_loss": 0.697563886642456, "eval_runtime": 211.4095, "eval_samples_per_second": 236.508, "eval_steps_per_second": 1.849, "step": 1075000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7388, "step": 1075100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7381, "step": 1075200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7388, "step": 1075300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7452, "step": 1075400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7481, "step": 1075500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7375, "step": 1075600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7397, "step": 1075700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7382, "step": 1075800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7362, "step": 1075900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7358, "step": 1076000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7441, "step": 1076100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7345, "step": 1076200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7376, "step": 1076300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7371, "step": 1076400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7429, "step": 1076500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.742, "step": 1076600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7353, "step": 1076700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7432, "step": 1076800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7427, "step": 1076900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7414, "step": 1077000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7361, "step": 1077100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7419, "step": 1077200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7344, "step": 1077300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7419, "step": 1077400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7369, "step": 1077500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7437, "step": 1077600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7322, "step": 1077700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7454, "step": 1077800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.739, "step": 1077900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7462, "step": 1078000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7467, "step": 1078100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7436, "step": 1078200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7368, "step": 1078300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7373, "step": 1078400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7481, "step": 1078500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.74, "step": 1078600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7361, "step": 1078700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7393, "step": 1078800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7384, "step": 1078900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7381, "step": 1079000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7446, "step": 1079100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7465, "step": 1079200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7346, "step": 1079300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7348, "step": 1079400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7416, "step": 1079500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7382, "step": 1079600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7489, "step": 1079700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7334, "step": 1079800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7319, "step": 1079900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7428, "step": 1080000 }, { "epoch": 1.07, "eval_loss": 0.6988189816474915, "eval_runtime": 207.2542, "eval_samples_per_second": 241.25, "eval_steps_per_second": 1.887, "step": 1080000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.735, "step": 1080100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7394, "step": 1080200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7371, "step": 1080300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7447, "step": 1080400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7382, "step": 1080500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7415, "step": 1080600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7425, "step": 1080700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7366, "step": 1080800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.737, "step": 1080900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7406, "step": 1081000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.739, "step": 1081100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7458, "step": 1081200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7368, "step": 1081300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7478, "step": 1081400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7414, "step": 1081500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7445, "step": 1081600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7404, "step": 1081700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7413, "step": 1081800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7421, "step": 1081900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.734, "step": 1082000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7459, "step": 1082100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7358, "step": 1082200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7399, "step": 1082300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7406, "step": 1082400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7347, "step": 1082500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7371, "step": 1082600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7451, "step": 1082700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7368, "step": 1082800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7369, "step": 1082900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7428, "step": 1083000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7385, "step": 1083100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7431, "step": 1083200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7372, "step": 1083300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7404, "step": 1083400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7367, "step": 1083500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7354, "step": 1083600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7429, "step": 1083700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7468, "step": 1083800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7389, "step": 1083900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7374, "step": 1084000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.743, "step": 1084100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7415, "step": 1084200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7394, "step": 1084300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.742, "step": 1084400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7434, "step": 1084500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7353, "step": 1084600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7337, "step": 1084700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7435, "step": 1084800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7422, "step": 1084900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7345, "step": 1085000 }, { "epoch": 1.07, "eval_loss": 0.6958624124526978, "eval_runtime": 206.4977, "eval_samples_per_second": 242.133, "eval_steps_per_second": 1.893, "step": 1085000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7327, "step": 1085100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7492, "step": 1085200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7361, "step": 1085300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7472, "step": 1085400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7398, "step": 1085500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7326, "step": 1085600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7425, "step": 1085700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7385, "step": 1085800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7388, "step": 1085900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7396, "step": 1086000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7401, "step": 1086100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.741, "step": 1086200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7424, "step": 1086300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7419, "step": 1086400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7377, "step": 1086500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7373, "step": 1086600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7413, "step": 1086700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7414, "step": 1086800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7411, "step": 1086900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7392, "step": 1087000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7386, "step": 1087100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7421, "step": 1087200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7375, "step": 1087300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7331, "step": 1087400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7481, "step": 1087500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7343, "step": 1087600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7457, "step": 1087700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7357, "step": 1087800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7459, "step": 1087900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7364, "step": 1088000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7385, "step": 1088100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7404, "step": 1088200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7419, "step": 1088300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7435, "step": 1088400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7368, "step": 1088500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7352, "step": 1088600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7409, "step": 1088700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7394, "step": 1088800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7444, "step": 1088900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7407, "step": 1089000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7437, "step": 1089100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7333, "step": 1089200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7389, "step": 1089300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7374, "step": 1089400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7407, "step": 1089500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7382, "step": 1089600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7406, "step": 1089700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7401, "step": 1089800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7303, "step": 1089900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7422, "step": 1090000 }, { "epoch": 1.07, "eval_loss": 0.6945422887802124, "eval_runtime": 205.0592, "eval_samples_per_second": 243.832, "eval_steps_per_second": 1.907, "step": 1090000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7389, "step": 1090100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7356, "step": 1090200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7507, "step": 1090300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7411, "step": 1090400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7447, "step": 1090500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7347, "step": 1090600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7378, "step": 1090700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7387, "step": 1090800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7336, "step": 1090900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7411, "step": 1091000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7407, "step": 1091100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7342, "step": 1091200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7338, "step": 1091300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7305, "step": 1091400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7449, "step": 1091500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7363, "step": 1091600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7443, "step": 1091700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7426, "step": 1091800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7448, "step": 1091900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7388, "step": 1092000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7405, "step": 1092100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7352, "step": 1092200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7401, "step": 1092300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7366, "step": 1092400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7415, "step": 1092500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7366, "step": 1092600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7319, "step": 1092700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7435, "step": 1092800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7376, "step": 1092900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7372, "step": 1093000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7307, "step": 1093100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7383, "step": 1093200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7374, "step": 1093300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7344, "step": 1093400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7429, "step": 1093500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7413, "step": 1093600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7338, "step": 1093700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7427, "step": 1093800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7388, "step": 1093900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7393, "step": 1094000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7432, "step": 1094100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7372, "step": 1094200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7381, "step": 1094300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7372, "step": 1094400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7504, "step": 1094500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7376, "step": 1094600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7433, "step": 1094700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7415, "step": 1094800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7363, "step": 1094900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7387, "step": 1095000 }, { "epoch": 1.07, "eval_loss": 0.6956625580787659, "eval_runtime": 205.1057, "eval_samples_per_second": 243.777, "eval_steps_per_second": 1.906, "step": 1095000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7404, "step": 1095100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7412, "step": 1095200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7423, "step": 1095300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.744, "step": 1095400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7391, "step": 1095500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7423, "step": 1095600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.736, "step": 1095700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7364, "step": 1095800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7416, "step": 1095900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.739, "step": 1096000 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7328, "step": 1096100 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7415, "step": 1096200 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7429, "step": 1096300 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7412, "step": 1096400 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7432, "step": 1096500 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7327, "step": 1096600 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7371, "step": 1096700 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7455, "step": 1096800 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7368, "step": 1096900 }, { "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.7427, "step": 1097000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7354, "step": 1097100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7441, "step": 1097200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7364, "step": 1097300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7413, "step": 1097400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7428, "step": 1097500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7402, "step": 1097600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7392, "step": 1097700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7326, "step": 1097800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7385, "step": 1097900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7356, "step": 1098000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7374, "step": 1098100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7322, "step": 1098200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7402, "step": 1098300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7379, "step": 1098400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7316, "step": 1098500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7348, "step": 1098600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7379, "step": 1098700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7417, "step": 1098800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7305, "step": 1098900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7362, "step": 1099000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7443, "step": 1099100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7333, "step": 1099200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7443, "step": 1099300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7335, "step": 1099400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7405, "step": 1099500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.74, "step": 1099600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7389, "step": 1099700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7343, "step": 1099800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7472, "step": 1099900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7377, "step": 1100000 }, { "epoch": 2.0, "eval_loss": 0.6958077549934387, "eval_runtime": 205.2026, "eval_samples_per_second": 243.662, "eval_steps_per_second": 1.905, "step": 1100000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7351, "step": 1100100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7286, "step": 1100200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7371, "step": 1100300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7451, "step": 1100400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7381, "step": 1100500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7334, "step": 1100600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7349, "step": 1100700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7399, "step": 1100800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7506, "step": 1100900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.742, "step": 1101000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7438, "step": 1101100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7433, "step": 1101200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7311, "step": 1101300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.733, "step": 1101400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7338, "step": 1101500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.736, "step": 1101600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7386, "step": 1101700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7413, "step": 1101800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7429, "step": 1101900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7322, "step": 1102000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.741, "step": 1102100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7346, "step": 1102200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7366, "step": 1102300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.738, "step": 1102400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7407, "step": 1102500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7412, "step": 1102600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7354, "step": 1102700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7375, "step": 1102800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7412, "step": 1102900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7333, "step": 1103000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7423, "step": 1103100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7371, "step": 1103200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7314, "step": 1103300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7319, "step": 1103400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7473, "step": 1103500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7389, "step": 1103600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.733, "step": 1103700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7346, "step": 1103800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7434, "step": 1103900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.739, "step": 1104000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7373, "step": 1104100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7414, "step": 1104200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7422, "step": 1104300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7366, "step": 1104400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7435, "step": 1104500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7334, "step": 1104600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7436, "step": 1104700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7435, "step": 1104800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7357, "step": 1104900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7351, "step": 1105000 }, { "epoch": 2.0, "eval_loss": 0.6972557902336121, "eval_runtime": 204.8484, "eval_samples_per_second": 244.083, "eval_steps_per_second": 1.909, "step": 1105000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7471, "step": 1105100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.747, "step": 1105200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7434, "step": 1105300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7365, "step": 1105400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7405, "step": 1105500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7391, "step": 1105600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7423, "step": 1105700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7334, "step": 1105800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7358, "step": 1105900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7317, "step": 1106000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7353, "step": 1106100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7381, "step": 1106200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7387, "step": 1106300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7404, "step": 1106400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7365, "step": 1106500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7404, "step": 1106600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.738, "step": 1106700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7408, "step": 1106800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.746, "step": 1106900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7362, "step": 1107000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7466, "step": 1107100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7348, "step": 1107200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7458, "step": 1107300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7418, "step": 1107400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7377, "step": 1107500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7404, "step": 1107600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.743, "step": 1107700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7417, "step": 1107800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7373, "step": 1107900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7362, "step": 1108000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7324, "step": 1108100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7376, "step": 1108200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7358, "step": 1108300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7406, "step": 1108400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.735, "step": 1108500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7334, "step": 1108600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.742, "step": 1108700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7398, "step": 1108800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7419, "step": 1108900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7389, "step": 1109000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7401, "step": 1109100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7366, "step": 1109200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7314, "step": 1109300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7363, "step": 1109400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7401, "step": 1109500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7337, "step": 1109600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7345, "step": 1109700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7365, "step": 1109800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7398, "step": 1109900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7326, "step": 1110000 }, { "epoch": 2.0, "eval_loss": 0.6954222917556763, "eval_runtime": 204.2756, "eval_samples_per_second": 244.767, "eval_steps_per_second": 1.914, "step": 1110000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7297, "step": 1110100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7431, "step": 1110200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7428, "step": 1110300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7386, "step": 1110400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7356, "step": 1110500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7482, "step": 1110600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7387, "step": 1110700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.739, "step": 1110800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7383, "step": 1110900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7404, "step": 1111000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7382, "step": 1111100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7422, "step": 1111200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7411, "step": 1111300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.739, "step": 1111400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7418, "step": 1111500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7341, "step": 1111600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7403, "step": 1111700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7453, "step": 1111800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.746, "step": 1111900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7343, "step": 1112000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7417, "step": 1112100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7424, "step": 1112200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7453, "step": 1112300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7359, "step": 1112400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7384, "step": 1112500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7428, "step": 1112600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7317, "step": 1112700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.736, "step": 1112800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7372, "step": 1112900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7455, "step": 1113000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7385, "step": 1113100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.729, "step": 1113200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7395, "step": 1113300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7396, "step": 1113400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7406, "step": 1113500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.743, "step": 1113600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7332, "step": 1113700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7442, "step": 1113800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7375, "step": 1113900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7315, "step": 1114000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7451, "step": 1114100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7379, "step": 1114200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7386, "step": 1114300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.733, "step": 1114400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7376, "step": 1114500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7425, "step": 1114600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7442, "step": 1114700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.734, "step": 1114800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7432, "step": 1114900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7308, "step": 1115000 }, { "epoch": 2.0, "eval_loss": 0.6944382786750793, "eval_runtime": 204.8261, "eval_samples_per_second": 244.109, "eval_steps_per_second": 1.909, "step": 1115000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7412, "step": 1115100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7352, "step": 1115200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7448, "step": 1115300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7333, "step": 1115400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7306, "step": 1115500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7424, "step": 1115600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.735, "step": 1115700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7356, "step": 1115800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7414, "step": 1115900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7445, "step": 1116000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7421, "step": 1116100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7437, "step": 1116200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.743, "step": 1116300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7421, "step": 1116400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7407, "step": 1116500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7388, "step": 1116600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7436, "step": 1116700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7343, "step": 1116800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7326, "step": 1116900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7366, "step": 1117000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7356, "step": 1117100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7465, "step": 1117200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7431, "step": 1117300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7452, "step": 1117400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7465, "step": 1117500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7377, "step": 1117600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7382, "step": 1117700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7457, "step": 1117800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7344, "step": 1117900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7308, "step": 1118000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7402, "step": 1118100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7404, "step": 1118200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7446, "step": 1118300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7293, "step": 1118400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7347, "step": 1118500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7383, "step": 1118600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7395, "step": 1118700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7397, "step": 1118800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7381, "step": 1118900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7393, "step": 1119000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7405, "step": 1119100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7393, "step": 1119200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7373, "step": 1119300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7396, "step": 1119400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7385, "step": 1119500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7439, "step": 1119600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7369, "step": 1119700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7351, "step": 1119800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7398, "step": 1119900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7343, "step": 1120000 }, { "epoch": 2.0, "eval_loss": 0.6974844336509705, "eval_runtime": 206.0821, "eval_samples_per_second": 242.622, "eval_steps_per_second": 1.897, "step": 1120000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7413, "step": 1120100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7464, "step": 1120200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7459, "step": 1120300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7381, "step": 1120400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7375, "step": 1120500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7367, "step": 1120600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7426, "step": 1120700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7417, "step": 1120800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7453, "step": 1120900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7341, "step": 1121000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7432, "step": 1121100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7427, "step": 1121200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7353, "step": 1121300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7366, "step": 1121400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7339, "step": 1121500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7403, "step": 1121600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7405, "step": 1121700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7352, "step": 1121800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7437, "step": 1121900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7475, "step": 1122000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7391, "step": 1122100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7457, "step": 1122200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7461, "step": 1122300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7399, "step": 1122400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7436, "step": 1122500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7438, "step": 1122600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7275, "step": 1122700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7349, "step": 1122800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7438, "step": 1122900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7385, "step": 1123000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7395, "step": 1123100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.74, "step": 1123200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7358, "step": 1123300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7299, "step": 1123400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7409, "step": 1123500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7364, "step": 1123600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7404, "step": 1123700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7425, "step": 1123800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7456, "step": 1123900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7364, "step": 1124000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7377, "step": 1124100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7413, "step": 1124200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7406, "step": 1124300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7296, "step": 1124400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7404, "step": 1124500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7389, "step": 1124600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7411, "step": 1124700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7474, "step": 1124800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7399, "step": 1124900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7416, "step": 1125000 }, { "epoch": 2.0, "eval_loss": 0.6972522139549255, "eval_runtime": 204.9765, "eval_samples_per_second": 243.93, "eval_steps_per_second": 1.908, "step": 1125000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7383, "step": 1125100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7481, "step": 1125200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7454, "step": 1125300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7371, "step": 1125400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7409, "step": 1125500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.736, "step": 1125600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7386, "step": 1125700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.741, "step": 1125800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7334, "step": 1125900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7289, "step": 1126000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7452, "step": 1126100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7336, "step": 1126200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.741, "step": 1126300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.737, "step": 1126400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.733, "step": 1126500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7458, "step": 1126600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7397, "step": 1126700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7366, "step": 1126800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7426, "step": 1126900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7363, "step": 1127000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7408, "step": 1127100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7413, "step": 1127200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7403, "step": 1127300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7371, "step": 1127400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7428, "step": 1127500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7409, "step": 1127600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.738, "step": 1127700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7491, "step": 1127800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7404, "step": 1127900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.738, "step": 1128000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.741, "step": 1128100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7405, "step": 1128200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7384, "step": 1128300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.743, "step": 1128400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.739, "step": 1128500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7381, "step": 1128600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7446, "step": 1128700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7399, "step": 1128800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7376, "step": 1128900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7468, "step": 1129000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7408, "step": 1129100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.742, "step": 1129200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7295, "step": 1129300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7399, "step": 1129400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7382, "step": 1129500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.735, "step": 1129600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7357, "step": 1129700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7349, "step": 1129800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.737, "step": 1129900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7346, "step": 1130000 }, { "epoch": 2.0, "eval_loss": 0.6970519423484802, "eval_runtime": 208.5639, "eval_samples_per_second": 239.735, "eval_steps_per_second": 1.875, "step": 1130000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7418, "step": 1130100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7404, "step": 1130200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7337, "step": 1130300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7478, "step": 1130400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7362, "step": 1130500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7486, "step": 1130600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.734, "step": 1130700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7291, "step": 1130800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7302, "step": 1130900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7449, "step": 1131000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7466, "step": 1131100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7435, "step": 1131200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7359, "step": 1131300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7421, "step": 1131400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7368, "step": 1131500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7419, "step": 1131600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7416, "step": 1131700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7342, "step": 1131800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7439, "step": 1131900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7435, "step": 1132000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7386, "step": 1132100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7391, "step": 1132200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.742, "step": 1132300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.738, "step": 1132400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7403, "step": 1132500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7366, "step": 1132600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7401, "step": 1132700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7424, "step": 1132800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7405, "step": 1132900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7361, "step": 1133000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7434, "step": 1133100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7436, "step": 1133200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7468, "step": 1133300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7438, "step": 1133400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7433, "step": 1133500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7412, "step": 1133600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.743, "step": 1133700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7426, "step": 1133800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7416, "step": 1133900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.735, "step": 1134000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7357, "step": 1134100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7359, "step": 1134200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7372, "step": 1134300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7488, "step": 1134400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7379, "step": 1134500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7384, "step": 1134600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7342, "step": 1134700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7472, "step": 1134800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7424, "step": 1134900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7441, "step": 1135000 }, { "epoch": 2.0, "eval_loss": 0.697513222694397, "eval_runtime": 203.9175, "eval_samples_per_second": 245.197, "eval_steps_per_second": 1.917, "step": 1135000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.743, "step": 1135100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7362, "step": 1135200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7441, "step": 1135300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.732, "step": 1135400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7422, "step": 1135500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7338, "step": 1135600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.741, "step": 1135700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7363, "step": 1135800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7352, "step": 1135900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7451, "step": 1136000 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7287, "step": 1136100 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7508, "step": 1136200 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7481, "step": 1136300 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7365, "step": 1136400 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7343, "step": 1136500 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7335, "step": 1136600 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7389, "step": 1136700 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7348, "step": 1136800 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7366, "step": 1136900 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.7419, "step": 1137000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.738, "step": 1137100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7356, "step": 1137200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7344, "step": 1137300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7427, "step": 1137400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7387, "step": 1137500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7392, "step": 1137600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.742, "step": 1137700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7364, "step": 1137800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.74, "step": 1137900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.741, "step": 1138000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7449, "step": 1138100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7408, "step": 1138200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7381, "step": 1138300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7405, "step": 1138400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7394, "step": 1138500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7297, "step": 1138600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7356, "step": 1138700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7431, "step": 1138800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7359, "step": 1138900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7433, "step": 1139000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7341, "step": 1139100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7439, "step": 1139200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7441, "step": 1139300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7352, "step": 1139400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7405, "step": 1139500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7395, "step": 1139600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7377, "step": 1139700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7417, "step": 1139800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7326, "step": 1139900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7472, "step": 1140000 }, { "epoch": 2.01, "eval_loss": 0.6973596215248108, "eval_runtime": 206.0928, "eval_samples_per_second": 242.609, "eval_steps_per_second": 1.897, "step": 1140000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7397, "step": 1140100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7391, "step": 1140200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7375, "step": 1140300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7427, "step": 1140400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7344, "step": 1140500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7413, "step": 1140600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7467, "step": 1140700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7469, "step": 1140800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7444, "step": 1140900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7388, "step": 1141000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7407, "step": 1141100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7346, "step": 1141200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7393, "step": 1141300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.733, "step": 1141400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7337, "step": 1141500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7438, "step": 1141600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7453, "step": 1141700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7455, "step": 1141800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7435, "step": 1141900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.738, "step": 1142000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.741, "step": 1142100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7363, "step": 1142200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7424, "step": 1142300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7393, "step": 1142400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7385, "step": 1142500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7362, "step": 1142600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7372, "step": 1142700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7429, "step": 1142800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.74, "step": 1142900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7399, "step": 1143000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7351, "step": 1143100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7322, "step": 1143200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7444, "step": 1143300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7372, "step": 1143400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.74, "step": 1143500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7415, "step": 1143600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7392, "step": 1143700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7375, "step": 1143800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7307, "step": 1143900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7391, "step": 1144000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7417, "step": 1144100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7405, "step": 1144200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.735, "step": 1144300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7428, "step": 1144400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7309, "step": 1144500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7407, "step": 1144600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7427, "step": 1144700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.738, "step": 1144800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7453, "step": 1144900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7378, "step": 1145000 }, { "epoch": 2.01, "eval_loss": 0.6971275210380554, "eval_runtime": 204.9142, "eval_samples_per_second": 244.005, "eval_steps_per_second": 1.908, "step": 1145000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7419, "step": 1145100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7389, "step": 1145200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7442, "step": 1145300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.74, "step": 1145400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7444, "step": 1145500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7376, "step": 1145600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7451, "step": 1145700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7452, "step": 1145800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7329, "step": 1145900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.745, "step": 1146000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7393, "step": 1146100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7431, "step": 1146200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7396, "step": 1146300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7318, "step": 1146400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7457, "step": 1146500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7363, "step": 1146600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7348, "step": 1146700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7489, "step": 1146800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7324, "step": 1146900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7428, "step": 1147000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7509, "step": 1147100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7473, "step": 1147200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7451, "step": 1147300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7373, "step": 1147400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7328, "step": 1147500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7398, "step": 1147600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7336, "step": 1147700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7436, "step": 1147800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7397, "step": 1147900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7394, "step": 1148000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7409, "step": 1148100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7387, "step": 1148200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7375, "step": 1148300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7493, "step": 1148400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7395, "step": 1148500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.744, "step": 1148600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7351, "step": 1148700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7367, "step": 1148800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7434, "step": 1148900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.744, "step": 1149000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7419, "step": 1149100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7369, "step": 1149200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7379, "step": 1149300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7386, "step": 1149400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7472, "step": 1149500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7434, "step": 1149600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7425, "step": 1149700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7401, "step": 1149800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7407, "step": 1149900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7363, "step": 1150000 }, { "epoch": 2.01, "eval_loss": 0.6946325898170471, "eval_runtime": 203.4327, "eval_samples_per_second": 245.782, "eval_steps_per_second": 1.922, "step": 1150000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7403, "step": 1150100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7445, "step": 1150200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.744, "step": 1150300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7364, "step": 1150400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7393, "step": 1150500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7438, "step": 1150600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7376, "step": 1150700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.741, "step": 1150800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7333, "step": 1150900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7521, "step": 1151000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7355, "step": 1151100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7426, "step": 1151200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7414, "step": 1151300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7346, "step": 1151400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7366, "step": 1151500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7415, "step": 1151600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.736, "step": 1151700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7359, "step": 1151800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7354, "step": 1151900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.739, "step": 1152000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.737, "step": 1152100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7371, "step": 1152200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7439, "step": 1152300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7399, "step": 1152400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7365, "step": 1152500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7383, "step": 1152600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7377, "step": 1152700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7335, "step": 1152800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.735, "step": 1152900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7376, "step": 1153000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7449, "step": 1153100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7525, "step": 1153200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7413, "step": 1153300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7426, "step": 1153400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7381, "step": 1153500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7457, "step": 1153600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7373, "step": 1153700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7403, "step": 1153800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7352, "step": 1153900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7406, "step": 1154000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7442, "step": 1154100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7437, "step": 1154200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7382, "step": 1154300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7376, "step": 1154400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7389, "step": 1154500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7397, "step": 1154600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7323, "step": 1154700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7384, "step": 1154800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7381, "step": 1154900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7347, "step": 1155000 }, { "epoch": 2.01, "eval_loss": 0.6950593590736389, "eval_runtime": 204.9854, "eval_samples_per_second": 243.92, "eval_steps_per_second": 1.907, "step": 1155000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7406, "step": 1155100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7383, "step": 1155200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7453, "step": 1155300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.731, "step": 1155400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7315, "step": 1155500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7413, "step": 1155600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7367, "step": 1155700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7409, "step": 1155800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7364, "step": 1155900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7325, "step": 1156000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7365, "step": 1156100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7432, "step": 1156200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7409, "step": 1156300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7419, "step": 1156400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7359, "step": 1156500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7374, "step": 1156600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7398, "step": 1156700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7407, "step": 1156800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7483, "step": 1156900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7337, "step": 1157000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7431, "step": 1157100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7316, "step": 1157200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7437, "step": 1157300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7426, "step": 1157400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7414, "step": 1157500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7234, "step": 1157600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7386, "step": 1157700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7319, "step": 1157800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7378, "step": 1157900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7378, "step": 1158000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7368, "step": 1158100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7381, "step": 1158200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7398, "step": 1158300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7369, "step": 1158400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.731, "step": 1158500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7303, "step": 1158600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7334, "step": 1158700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7401, "step": 1158800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7398, "step": 1158900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7402, "step": 1159000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7406, "step": 1159100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7405, "step": 1159200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7381, "step": 1159300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7383, "step": 1159400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7374, "step": 1159500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.738, "step": 1159600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7422, "step": 1159700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7416, "step": 1159800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7387, "step": 1159900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7399, "step": 1160000 }, { "epoch": 2.01, "eval_loss": 0.6963341236114502, "eval_runtime": 207.6431, "eval_samples_per_second": 240.798, "eval_steps_per_second": 1.883, "step": 1160000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7384, "step": 1160100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7422, "step": 1160200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7385, "step": 1160300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.733, "step": 1160400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7396, "step": 1160500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7404, "step": 1160600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.741, "step": 1160700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7347, "step": 1160800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7409, "step": 1160900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7337, "step": 1161000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7364, "step": 1161100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7367, "step": 1161200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7358, "step": 1161300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7468, "step": 1161400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7392, "step": 1161500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7405, "step": 1161600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7371, "step": 1161700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7433, "step": 1161800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7398, "step": 1161900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7384, "step": 1162000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7405, "step": 1162100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7383, "step": 1162200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7445, "step": 1162300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7438, "step": 1162400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7378, "step": 1162500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7392, "step": 1162600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7408, "step": 1162700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7476, "step": 1162800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.736, "step": 1162900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7372, "step": 1163000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7404, "step": 1163100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7461, "step": 1163200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7362, "step": 1163300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7416, "step": 1163400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7436, "step": 1163500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7287, "step": 1163600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7411, "step": 1163700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7363, "step": 1163800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.734, "step": 1163900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7458, "step": 1164000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7446, "step": 1164100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7355, "step": 1164200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7422, "step": 1164300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7409, "step": 1164400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7433, "step": 1164500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7383, "step": 1164600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7364, "step": 1164700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7418, "step": 1164800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7317, "step": 1164900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7436, "step": 1165000 }, { "epoch": 2.01, "eval_loss": 0.6949604153633118, "eval_runtime": 204.0116, "eval_samples_per_second": 245.084, "eval_steps_per_second": 1.917, "step": 1165000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7324, "step": 1165100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7423, "step": 1165200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7464, "step": 1165300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7393, "step": 1165400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7481, "step": 1165500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7438, "step": 1165600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7394, "step": 1165700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7402, "step": 1165800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7427, "step": 1165900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7334, "step": 1166000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7399, "step": 1166100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7384, "step": 1166200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7342, "step": 1166300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.735, "step": 1166400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.736, "step": 1166500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7396, "step": 1166600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7365, "step": 1166700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7454, "step": 1166800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7443, "step": 1166900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7501, "step": 1167000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7388, "step": 1167100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7427, "step": 1167200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7385, "step": 1167300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7379, "step": 1167400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7373, "step": 1167500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7429, "step": 1167600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7384, "step": 1167700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7348, "step": 1167800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7336, "step": 1167900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.734, "step": 1168000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7365, "step": 1168100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7366, "step": 1168200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7353, "step": 1168300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7278, "step": 1168400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7379, "step": 1168500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7367, "step": 1168600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7335, "step": 1168700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7387, "step": 1168800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7362, "step": 1168900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7387, "step": 1169000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7367, "step": 1169100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7358, "step": 1169200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7422, "step": 1169300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7399, "step": 1169400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7368, "step": 1169500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.741, "step": 1169600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7292, "step": 1169700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7412, "step": 1169800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7418, "step": 1169900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7322, "step": 1170000 }, { "epoch": 2.01, "eval_loss": 0.694342315196991, "eval_runtime": 205.9534, "eval_samples_per_second": 242.773, "eval_steps_per_second": 1.898, "step": 1170000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7358, "step": 1170100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7388, "step": 1170200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7428, "step": 1170300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7428, "step": 1170400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7467, "step": 1170500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7352, "step": 1170600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7388, "step": 1170700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7354, "step": 1170800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.741, "step": 1170900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7326, "step": 1171000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7351, "step": 1171100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7297, "step": 1171200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7411, "step": 1171300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7438, "step": 1171400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7415, "step": 1171500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7434, "step": 1171600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7393, "step": 1171700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7526, "step": 1171800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7416, "step": 1171900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7341, "step": 1172000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7419, "step": 1172100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7419, "step": 1172200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.744, "step": 1172300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7352, "step": 1172400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7347, "step": 1172500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7384, "step": 1172600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7358, "step": 1172700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7356, "step": 1172800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7394, "step": 1172900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7372, "step": 1173000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7375, "step": 1173100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7405, "step": 1173200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7454, "step": 1173300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7278, "step": 1173400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7468, "step": 1173500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7283, "step": 1173600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7406, "step": 1173700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.728, "step": 1173800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7357, "step": 1173900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7415, "step": 1174000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7437, "step": 1174100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7473, "step": 1174200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7284, "step": 1174300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.741, "step": 1174400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7366, "step": 1174500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7355, "step": 1174600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7381, "step": 1174700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7328, "step": 1174800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7356, "step": 1174900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7448, "step": 1175000 }, { "epoch": 2.01, "eval_loss": 0.6968173980712891, "eval_runtime": 204.8877, "eval_samples_per_second": 244.036, "eval_steps_per_second": 1.908, "step": 1175000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7362, "step": 1175100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7403, "step": 1175200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7428, "step": 1175300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.732, "step": 1175400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7435, "step": 1175500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7351, "step": 1175600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7414, "step": 1175700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7399, "step": 1175800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7479, "step": 1175900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7411, "step": 1176000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7391, "step": 1176100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7323, "step": 1176200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7421, "step": 1176300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7444, "step": 1176400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7456, "step": 1176500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7399, "step": 1176600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7388, "step": 1176700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7434, "step": 1176800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7382, "step": 1176900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7379, "step": 1177000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7458, "step": 1177100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7425, "step": 1177200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.731, "step": 1177300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7426, "step": 1177400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7353, "step": 1177500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7379, "step": 1177600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.731, "step": 1177700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.731, "step": 1177800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.739, "step": 1177900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7453, "step": 1178000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7371, "step": 1178100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.743, "step": 1178200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7448, "step": 1178300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7323, "step": 1178400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7402, "step": 1178500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7389, "step": 1178600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7337, "step": 1178700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7331, "step": 1178800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7375, "step": 1178900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7331, "step": 1179000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7433, "step": 1179100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7341, "step": 1179200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7319, "step": 1179300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7442, "step": 1179400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7394, "step": 1179500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7383, "step": 1179600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7403, "step": 1179700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7374, "step": 1179800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.743, "step": 1179900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7408, "step": 1180000 }, { "epoch": 2.01, "eval_loss": 0.6931548714637756, "eval_runtime": 204.556, "eval_samples_per_second": 244.432, "eval_steps_per_second": 1.911, "step": 1180000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7345, "step": 1180100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7397, "step": 1180200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7415, "step": 1180300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7376, "step": 1180400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7352, "step": 1180500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7417, "step": 1180600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7369, "step": 1180700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7355, "step": 1180800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7391, "step": 1180900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7357, "step": 1181000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7346, "step": 1181100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7364, "step": 1181200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7363, "step": 1181300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7384, "step": 1181400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.74, "step": 1181500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7316, "step": 1181600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7392, "step": 1181700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7278, "step": 1181800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7375, "step": 1181900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7296, "step": 1182000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7391, "step": 1182100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7422, "step": 1182200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7391, "step": 1182300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7304, "step": 1182400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.737, "step": 1182500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7411, "step": 1182600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7379, "step": 1182700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7321, "step": 1182800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7467, "step": 1182900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7402, "step": 1183000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7408, "step": 1183100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7314, "step": 1183200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7464, "step": 1183300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7338, "step": 1183400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7429, "step": 1183500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.741, "step": 1183600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7345, "step": 1183700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.731, "step": 1183800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7383, "step": 1183900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7389, "step": 1184000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7386, "step": 1184100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7401, "step": 1184200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7405, "step": 1184300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7468, "step": 1184400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7359, "step": 1184500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7349, "step": 1184600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7434, "step": 1184700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7451, "step": 1184800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7412, "step": 1184900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7382, "step": 1185000 }, { "epoch": 2.01, "eval_loss": 0.6951209306716919, "eval_runtime": 201.2242, "eval_samples_per_second": 248.479, "eval_steps_per_second": 1.943, "step": 1185000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7323, "step": 1185100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7456, "step": 1185200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7353, "step": 1185300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7289, "step": 1185400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.744, "step": 1185500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7453, "step": 1185600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7365, "step": 1185700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7326, "step": 1185800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7383, "step": 1185900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7391, "step": 1186000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7428, "step": 1186100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7383, "step": 1186200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.738, "step": 1186300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7464, "step": 1186400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7383, "step": 1186500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7414, "step": 1186600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7396, "step": 1186700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7365, "step": 1186800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7361, "step": 1186900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.736, "step": 1187000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7342, "step": 1187100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7407, "step": 1187200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7493, "step": 1187300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7433, "step": 1187400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7381, "step": 1187500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7385, "step": 1187600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7373, "step": 1187700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7349, "step": 1187800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7406, "step": 1187900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7434, "step": 1188000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7343, "step": 1188100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7402, "step": 1188200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7508, "step": 1188300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7334, "step": 1188400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7326, "step": 1188500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7464, "step": 1188600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7403, "step": 1188700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7432, "step": 1188800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7416, "step": 1188900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7339, "step": 1189000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7392, "step": 1189100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7429, "step": 1189200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7374, "step": 1189300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7373, "step": 1189400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7347, "step": 1189500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7419, "step": 1189600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7438, "step": 1189700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7286, "step": 1189800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7412, "step": 1189900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.734, "step": 1190000 }, { "epoch": 2.01, "eval_loss": 0.6967108845710754, "eval_runtime": 205.9224, "eval_samples_per_second": 242.81, "eval_steps_per_second": 1.899, "step": 1190000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.74, "step": 1190100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.743, "step": 1190200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7405, "step": 1190300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7356, "step": 1190400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.737, "step": 1190500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7415, "step": 1190600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7412, "step": 1190700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7454, "step": 1190800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7361, "step": 1190900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7419, "step": 1191000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7401, "step": 1191100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7421, "step": 1191200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7347, "step": 1191300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7323, "step": 1191400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7391, "step": 1191500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7369, "step": 1191600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.734, "step": 1191700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.733, "step": 1191800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7392, "step": 1191900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7435, "step": 1192000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7342, "step": 1192100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.735, "step": 1192200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7311, "step": 1192300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7407, "step": 1192400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.735, "step": 1192500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7392, "step": 1192600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7408, "step": 1192700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7435, "step": 1192800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7365, "step": 1192900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.731, "step": 1193000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7424, "step": 1193100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7471, "step": 1193200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7388, "step": 1193300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7376, "step": 1193400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7507, "step": 1193500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7384, "step": 1193600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7476, "step": 1193700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7341, "step": 1193800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7397, "step": 1193900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7408, "step": 1194000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7459, "step": 1194100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7355, "step": 1194200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.74, "step": 1194300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7405, "step": 1194400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7386, "step": 1194500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.739, "step": 1194600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.741, "step": 1194700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7456, "step": 1194800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7387, "step": 1194900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7382, "step": 1195000 }, { "epoch": 2.01, "eval_loss": 0.695705771446228, "eval_runtime": 203.2356, "eval_samples_per_second": 246.02, "eval_steps_per_second": 1.924, "step": 1195000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7405, "step": 1195100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7458, "step": 1195200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7403, "step": 1195300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7413, "step": 1195400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7441, "step": 1195500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7442, "step": 1195600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7388, "step": 1195700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7407, "step": 1195800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7368, "step": 1195900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7411, "step": 1196000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7366, "step": 1196100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7453, "step": 1196200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7401, "step": 1196300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7362, "step": 1196400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.74, "step": 1196500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7433, "step": 1196600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7466, "step": 1196700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7421, "step": 1196800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7444, "step": 1196900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7425, "step": 1197000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7412, "step": 1197100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7316, "step": 1197200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7403, "step": 1197300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7302, "step": 1197400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7369, "step": 1197500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7335, "step": 1197600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7399, "step": 1197700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7324, "step": 1197800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7413, "step": 1197900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.728, "step": 1198000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.742, "step": 1198100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7369, "step": 1198200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7473, "step": 1198300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7393, "step": 1198400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7348, "step": 1198500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7449, "step": 1198600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7426, "step": 1198700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7434, "step": 1198800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.745, "step": 1198900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7437, "step": 1199000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7381, "step": 1199100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7446, "step": 1199200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7195, "step": 1199300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7357, "step": 1199400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7391, "step": 1199500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7401, "step": 1199600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7251, "step": 1199700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7459, "step": 1199800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7413, "step": 1199900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7372, "step": 1200000 }, { "epoch": 2.01, "eval_loss": 0.6952381134033203, "eval_runtime": 204.566, "eval_samples_per_second": 244.42, "eval_steps_per_second": 1.911, "step": 1200000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7405, "step": 1200100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7403, "step": 1200200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7406, "step": 1200300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.733, "step": 1200400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7425, "step": 1200500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7405, "step": 1200600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7373, "step": 1200700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7385, "step": 1200800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7376, "step": 1200900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7361, "step": 1201000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7409, "step": 1201100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7398, "step": 1201200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7433, "step": 1201300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7375, "step": 1201400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7379, "step": 1201500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7406, "step": 1201600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7445, "step": 1201700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7364, "step": 1201800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7444, "step": 1201900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7442, "step": 1202000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7391, "step": 1202100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7359, "step": 1202200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7431, "step": 1202300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.746, "step": 1202400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7427, "step": 1202500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.741, "step": 1202600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7354, "step": 1202700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7392, "step": 1202800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7357, "step": 1202900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7389, "step": 1203000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7403, "step": 1203100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.734, "step": 1203200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7382, "step": 1203300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7363, "step": 1203400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7414, "step": 1203500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7413, "step": 1203600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7387, "step": 1203700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7324, "step": 1203800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7413, "step": 1203900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7316, "step": 1204000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7388, "step": 1204100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7271, "step": 1204200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7449, "step": 1204300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7382, "step": 1204400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7352, "step": 1204500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7393, "step": 1204600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.739, "step": 1204700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.744, "step": 1204800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7376, "step": 1204900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.738, "step": 1205000 }, { "epoch": 2.01, "eval_loss": 0.6934911012649536, "eval_runtime": 204.7959, "eval_samples_per_second": 244.145, "eval_steps_per_second": 1.909, "step": 1205000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7345, "step": 1205100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7413, "step": 1205200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7446, "step": 1205300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7431, "step": 1205400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7417, "step": 1205500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7429, "step": 1205600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.744, "step": 1205700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7455, "step": 1205800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7446, "step": 1205900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.739, "step": 1206000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7356, "step": 1206100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7328, "step": 1206200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7454, "step": 1206300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7372, "step": 1206400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.748, "step": 1206500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7385, "step": 1206600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7355, "step": 1206700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7376, "step": 1206800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7413, "step": 1206900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7387, "step": 1207000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7396, "step": 1207100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7399, "step": 1207200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.736, "step": 1207300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7402, "step": 1207400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7445, "step": 1207500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7366, "step": 1207600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7326, "step": 1207700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7368, "step": 1207800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7396, "step": 1207900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7361, "step": 1208000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7435, "step": 1208100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7394, "step": 1208200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.734, "step": 1208300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7371, "step": 1208400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7434, "step": 1208500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7419, "step": 1208600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7387, "step": 1208700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7358, "step": 1208800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7417, "step": 1208900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7436, "step": 1209000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7413, "step": 1209100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7473, "step": 1209200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7383, "step": 1209300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7334, "step": 1209400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.735, "step": 1209500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7431, "step": 1209600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7349, "step": 1209700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7421, "step": 1209800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7327, "step": 1209900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7297, "step": 1210000 }, { "epoch": 2.01, "eval_loss": 0.6941564679145813, "eval_runtime": 208.372, "eval_samples_per_second": 239.955, "eval_steps_per_second": 1.876, "step": 1210000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7428, "step": 1210100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7321, "step": 1210200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7345, "step": 1210300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7458, "step": 1210400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7386, "step": 1210500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7421, "step": 1210600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7416, "step": 1210700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7336, "step": 1210800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7438, "step": 1210900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7417, "step": 1211000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7308, "step": 1211100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7362, "step": 1211200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7459, "step": 1211300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7409, "step": 1211400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7407, "step": 1211500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7381, "step": 1211600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7341, "step": 1211700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7466, "step": 1211800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7421, "step": 1211900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7471, "step": 1212000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7485, "step": 1212100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7399, "step": 1212200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7291, "step": 1212300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7321, "step": 1212400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7398, "step": 1212500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7459, "step": 1212600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7331, "step": 1212700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7371, "step": 1212800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7449, "step": 1212900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7459, "step": 1213000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.738, "step": 1213100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7275, "step": 1213200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7372, "step": 1213300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7458, "step": 1213400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.734, "step": 1213500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7399, "step": 1213600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7355, "step": 1213700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7402, "step": 1213800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7348, "step": 1213900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7337, "step": 1214000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7358, "step": 1214100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7432, "step": 1214200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7454, "step": 1214300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7426, "step": 1214400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7389, "step": 1214500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7345, "step": 1214600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.739, "step": 1214700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7423, "step": 1214800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.738, "step": 1214900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7337, "step": 1215000 }, { "epoch": 2.01, "eval_loss": 0.6927644610404968, "eval_runtime": 209.8566, "eval_samples_per_second": 238.258, "eval_steps_per_second": 1.863, "step": 1215000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7454, "step": 1215100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7301, "step": 1215200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7409, "step": 1215300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7455, "step": 1215400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7359, "step": 1215500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.743, "step": 1215600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7427, "step": 1215700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7469, "step": 1215800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7444, "step": 1215900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7419, "step": 1216000 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7342, "step": 1216100 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7415, "step": 1216200 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7325, "step": 1216300 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7405, "step": 1216400 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7427, "step": 1216500 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7455, "step": 1216600 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7354, "step": 1216700 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7402, "step": 1216800 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7381, "step": 1216900 }, { "epoch": 2.01, "learning_rate": 2e-05, "loss": 0.7443, "step": 1217000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7386, "step": 1217100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7423, "step": 1217200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.743, "step": 1217300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7383, "step": 1217400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7412, "step": 1217500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7434, "step": 1217600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7348, "step": 1217700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7437, "step": 1217800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7351, "step": 1217900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7446, "step": 1218000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7377, "step": 1218100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7356, "step": 1218200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7385, "step": 1218300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7429, "step": 1218400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7377, "step": 1218500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7375, "step": 1218600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7438, "step": 1218700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7376, "step": 1218800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.739, "step": 1218900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7357, "step": 1219000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.742, "step": 1219100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7396, "step": 1219200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7414, "step": 1219300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7337, "step": 1219400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7407, "step": 1219500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7447, "step": 1219600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7362, "step": 1219700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7441, "step": 1219800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7358, "step": 1219900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7448, "step": 1220000 }, { "epoch": 2.02, "eval_loss": 0.6960386633872986, "eval_runtime": 206.1862, "eval_samples_per_second": 242.499, "eval_steps_per_second": 1.896, "step": 1220000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7404, "step": 1220100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7331, "step": 1220200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7353, "step": 1220300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7407, "step": 1220400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.738, "step": 1220500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7484, "step": 1220600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7386, "step": 1220700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7367, "step": 1220800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7342, "step": 1220900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.732, "step": 1221000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7474, "step": 1221100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7486, "step": 1221200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7436, "step": 1221300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7416, "step": 1221400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7381, "step": 1221500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7385, "step": 1221600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7388, "step": 1221700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7292, "step": 1221800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7328, "step": 1221900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7417, "step": 1222000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7407, "step": 1222100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7431, "step": 1222200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7371, "step": 1222300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7411, "step": 1222400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7385, "step": 1222500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7427, "step": 1222600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7464, "step": 1222700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7354, "step": 1222800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7331, "step": 1222900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7343, "step": 1223000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7403, "step": 1223100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7378, "step": 1223200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7456, "step": 1223300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7447, "step": 1223400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.735, "step": 1223500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7371, "step": 1223600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7355, "step": 1223700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7377, "step": 1223800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7426, "step": 1223900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7362, "step": 1224000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7349, "step": 1224100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7389, "step": 1224200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.74, "step": 1224300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7377, "step": 1224400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7377, "step": 1224500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.739, "step": 1224600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7345, "step": 1224700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7421, "step": 1224800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7443, "step": 1224900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7322, "step": 1225000 }, { "epoch": 2.02, "eval_loss": 0.6961241960525513, "eval_runtime": 205.2014, "eval_samples_per_second": 243.663, "eval_steps_per_second": 1.905, "step": 1225000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7402, "step": 1225100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7337, "step": 1225200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7434, "step": 1225300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7331, "step": 1225400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7341, "step": 1225500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7433, "step": 1225600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7431, "step": 1225700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7372, "step": 1225800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7352, "step": 1225900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7484, "step": 1226000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7402, "step": 1226100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7422, "step": 1226200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7373, "step": 1226300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7348, "step": 1226400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.743, "step": 1226500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7398, "step": 1226600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7386, "step": 1226700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7447, "step": 1226800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7357, "step": 1226900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7361, "step": 1227000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7427, "step": 1227100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7361, "step": 1227200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7323, "step": 1227300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7379, "step": 1227400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7398, "step": 1227500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7419, "step": 1227600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7427, "step": 1227700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7415, "step": 1227800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7422, "step": 1227900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7402, "step": 1228000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7294, "step": 1228100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7448, "step": 1228200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7397, "step": 1228300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7382, "step": 1228400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7313, "step": 1228500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.738, "step": 1228600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7419, "step": 1228700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.743, "step": 1228800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7319, "step": 1228900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7451, "step": 1229000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7391, "step": 1229100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7438, "step": 1229200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7392, "step": 1229300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7384, "step": 1229400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7385, "step": 1229500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7345, "step": 1229600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7416, "step": 1229700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7404, "step": 1229800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7408, "step": 1229900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7475, "step": 1230000 }, { "epoch": 2.02, "eval_loss": 0.6950798630714417, "eval_runtime": 207.3877, "eval_samples_per_second": 241.094, "eval_steps_per_second": 1.885, "step": 1230000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7386, "step": 1230100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7372, "step": 1230200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7469, "step": 1230300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7359, "step": 1230400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7274, "step": 1230500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7428, "step": 1230600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7393, "step": 1230700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7346, "step": 1230800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7365, "step": 1230900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7472, "step": 1231000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7373, "step": 1231100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7465, "step": 1231200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7367, "step": 1231300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7367, "step": 1231400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7453, "step": 1231500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.738, "step": 1231600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7378, "step": 1231700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7454, "step": 1231800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7357, "step": 1231900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7355, "step": 1232000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7406, "step": 1232100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7423, "step": 1232200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7385, "step": 1232300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7381, "step": 1232400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.74, "step": 1232500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7418, "step": 1232600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7441, "step": 1232700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7326, "step": 1232800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7296, "step": 1232900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7347, "step": 1233000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7404, "step": 1233100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7357, "step": 1233200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7347, "step": 1233300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7433, "step": 1233400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7346, "step": 1233500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7399, "step": 1233600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7371, "step": 1233700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7389, "step": 1233800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7394, "step": 1233900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7342, "step": 1234000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7403, "step": 1234100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7352, "step": 1234200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7424, "step": 1234300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7349, "step": 1234400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7373, "step": 1234500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7274, "step": 1234600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7379, "step": 1234700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7402, "step": 1234800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7418, "step": 1234900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7402, "step": 1235000 }, { "epoch": 2.02, "eval_loss": 0.6963469386100769, "eval_runtime": 206.1765, "eval_samples_per_second": 242.511, "eval_steps_per_second": 1.896, "step": 1235000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7367, "step": 1235100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7442, "step": 1235200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7365, "step": 1235300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7379, "step": 1235400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7492, "step": 1235500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7357, "step": 1235600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7448, "step": 1235700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7431, "step": 1235800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.742, "step": 1235900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7443, "step": 1236000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7421, "step": 1236100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7377, "step": 1236200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7364, "step": 1236300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.739, "step": 1236400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7367, "step": 1236500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7421, "step": 1236600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7357, "step": 1236700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7394, "step": 1236800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7401, "step": 1236900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7352, "step": 1237000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7417, "step": 1237100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7406, "step": 1237200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7391, "step": 1237300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7435, "step": 1237400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7497, "step": 1237500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7369, "step": 1237600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7387, "step": 1237700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.74, "step": 1237800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7432, "step": 1237900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7319, "step": 1238000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7385, "step": 1238100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7357, "step": 1238200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7389, "step": 1238300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7358, "step": 1238400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7434, "step": 1238500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7368, "step": 1238600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.742, "step": 1238700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7334, "step": 1238800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7362, "step": 1238900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7432, "step": 1239000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7374, "step": 1239100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7419, "step": 1239200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.743, "step": 1239300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7368, "step": 1239400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7486, "step": 1239500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.742, "step": 1239600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7404, "step": 1239700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7396, "step": 1239800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7396, "step": 1239900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.74, "step": 1240000 }, { "epoch": 2.02, "eval_loss": 0.6954966187477112, "eval_runtime": 203.811, "eval_samples_per_second": 245.325, "eval_steps_per_second": 1.918, "step": 1240000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7319, "step": 1240100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7337, "step": 1240200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7309, "step": 1240300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7376, "step": 1240400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7356, "step": 1240500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7345, "step": 1240600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7373, "step": 1240700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.734, "step": 1240800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7441, "step": 1240900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7332, "step": 1241000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7419, "step": 1241100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7381, "step": 1241200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7383, "step": 1241300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7495, "step": 1241400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7409, "step": 1241500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7339, "step": 1241600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7357, "step": 1241700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7429, "step": 1241800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7365, "step": 1241900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.742, "step": 1242000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7417, "step": 1242100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7359, "step": 1242200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7376, "step": 1242300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7402, "step": 1242400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7301, "step": 1242500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7433, "step": 1242600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7349, "step": 1242700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.735, "step": 1242800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7369, "step": 1242900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7355, "step": 1243000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7316, "step": 1243100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7386, "step": 1243200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7416, "step": 1243300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7426, "step": 1243400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7411, "step": 1243500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7351, "step": 1243600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7373, "step": 1243700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7423, "step": 1243800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7444, "step": 1243900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7421, "step": 1244000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7369, "step": 1244100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.74, "step": 1244200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7322, "step": 1244300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7351, "step": 1244400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7432, "step": 1244500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7373, "step": 1244600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7438, "step": 1244700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.738, "step": 1244800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7349, "step": 1244900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7396, "step": 1245000 }, { "epoch": 2.02, "eval_loss": 0.6947163939476013, "eval_runtime": 204.6375, "eval_samples_per_second": 244.335, "eval_steps_per_second": 1.911, "step": 1245000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.745, "step": 1245100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7383, "step": 1245200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.734, "step": 1245300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7408, "step": 1245400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.742, "step": 1245500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7388, "step": 1245600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7432, "step": 1245700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7357, "step": 1245800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7359, "step": 1245900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7404, "step": 1246000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7322, "step": 1246100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.748, "step": 1246200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7367, "step": 1246300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7444, "step": 1246400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7446, "step": 1246500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7412, "step": 1246600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.738, "step": 1246700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7363, "step": 1246800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7367, "step": 1246900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7416, "step": 1247000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7329, "step": 1247100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7377, "step": 1247200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7381, "step": 1247300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7494, "step": 1247400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.737, "step": 1247500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7387, "step": 1247600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7344, "step": 1247700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.739, "step": 1247800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7404, "step": 1247900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7366, "step": 1248000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7432, "step": 1248100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7292, "step": 1248200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7405, "step": 1248300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7338, "step": 1248400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.741, "step": 1248500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.744, "step": 1248600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7388, "step": 1248700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7341, "step": 1248800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.737, "step": 1248900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7435, "step": 1249000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7342, "step": 1249100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7403, "step": 1249200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7329, "step": 1249300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7296, "step": 1249400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7346, "step": 1249500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.739, "step": 1249600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7371, "step": 1249700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7414, "step": 1249800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7438, "step": 1249900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7443, "step": 1250000 }, { "epoch": 2.02, "eval_loss": 0.695235550403595, "eval_runtime": 202.6416, "eval_samples_per_second": 246.741, "eval_steps_per_second": 1.93, "step": 1250000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.738, "step": 1250100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7421, "step": 1250200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7433, "step": 1250300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7405, "step": 1250400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7399, "step": 1250500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7376, "step": 1250600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7302, "step": 1250700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7367, "step": 1250800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7358, "step": 1250900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7497, "step": 1251000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7371, "step": 1251100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7365, "step": 1251200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7349, "step": 1251300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7415, "step": 1251400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7318, "step": 1251500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7438, "step": 1251600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7393, "step": 1251700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7366, "step": 1251800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.738, "step": 1251900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7316, "step": 1252000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7358, "step": 1252100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7347, "step": 1252200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.74, "step": 1252300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7336, "step": 1252400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7346, "step": 1252500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7467, "step": 1252600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7472, "step": 1252700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7377, "step": 1252800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7341, "step": 1252900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7383, "step": 1253000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7382, "step": 1253100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7359, "step": 1253200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7371, "step": 1253300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.734, "step": 1253400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7428, "step": 1253500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7412, "step": 1253600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7426, "step": 1253700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7375, "step": 1253800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7375, "step": 1253900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.742, "step": 1254000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7439, "step": 1254100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7353, "step": 1254200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7289, "step": 1254300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7393, "step": 1254400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7269, "step": 1254500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7438, "step": 1254600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.739, "step": 1254700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7387, "step": 1254800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7441, "step": 1254900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7311, "step": 1255000 }, { "epoch": 2.02, "eval_loss": 0.6938926577568054, "eval_runtime": 203.8632, "eval_samples_per_second": 245.263, "eval_steps_per_second": 1.918, "step": 1255000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7364, "step": 1255100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7388, "step": 1255200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7394, "step": 1255300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7383, "step": 1255400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7372, "step": 1255500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7349, "step": 1255600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.74, "step": 1255700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7373, "step": 1255800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7365, "step": 1255900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7335, "step": 1256000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7432, "step": 1256100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.745, "step": 1256200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.739, "step": 1256300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7342, "step": 1256400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7318, "step": 1256500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7383, "step": 1256600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7464, "step": 1256700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7437, "step": 1256800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7434, "step": 1256900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7395, "step": 1257000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7357, "step": 1257100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7449, "step": 1257200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7357, "step": 1257300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7306, "step": 1257400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7416, "step": 1257500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7336, "step": 1257600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.736, "step": 1257700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7383, "step": 1257800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7321, "step": 1257900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7402, "step": 1258000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.747, "step": 1258100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7292, "step": 1258200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7343, "step": 1258300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.736, "step": 1258400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7361, "step": 1258500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7381, "step": 1258600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7479, "step": 1258700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7378, "step": 1258800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7373, "step": 1258900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.735, "step": 1259000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7331, "step": 1259100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7423, "step": 1259200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7444, "step": 1259300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7384, "step": 1259400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7394, "step": 1259500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7437, "step": 1259600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7369, "step": 1259700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7409, "step": 1259800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7455, "step": 1259900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7301, "step": 1260000 }, { "epoch": 2.02, "eval_loss": 0.694320023059845, "eval_runtime": 204.6582, "eval_samples_per_second": 244.31, "eval_steps_per_second": 1.911, "step": 1260000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.734, "step": 1260100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7318, "step": 1260200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7398, "step": 1260300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7354, "step": 1260400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7363, "step": 1260500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7482, "step": 1260600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7386, "step": 1260700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7369, "step": 1260800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7337, "step": 1260900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7305, "step": 1261000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7384, "step": 1261100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7347, "step": 1261200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7382, "step": 1261300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7365, "step": 1261400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7353, "step": 1261500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7436, "step": 1261600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7404, "step": 1261700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7334, "step": 1261800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7334, "step": 1261900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7364, "step": 1262000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7291, "step": 1262100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7446, "step": 1262200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7334, "step": 1262300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7316, "step": 1262400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7361, "step": 1262500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7368, "step": 1262600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7423, "step": 1262700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7345, "step": 1262800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.732, "step": 1262900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7257, "step": 1263000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7346, "step": 1263100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7388, "step": 1263200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7368, "step": 1263300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7396, "step": 1263400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7342, "step": 1263500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7403, "step": 1263600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7402, "step": 1263700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7368, "step": 1263800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7457, "step": 1263900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7363, "step": 1264000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.736, "step": 1264100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7437, "step": 1264200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7417, "step": 1264300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7447, "step": 1264400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7368, "step": 1264500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7422, "step": 1264600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7355, "step": 1264700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7425, "step": 1264800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7334, "step": 1264900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7318, "step": 1265000 }, { "epoch": 2.02, "eval_loss": 0.6940274238586426, "eval_runtime": 201.3695, "eval_samples_per_second": 248.3, "eval_steps_per_second": 1.942, "step": 1265000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.734, "step": 1265100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7417, "step": 1265200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7407, "step": 1265300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.736, "step": 1265400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.736, "step": 1265500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7399, "step": 1265600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7391, "step": 1265700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7348, "step": 1265800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7361, "step": 1265900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7397, "step": 1266000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7344, "step": 1266100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7346, "step": 1266200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7418, "step": 1266300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7349, "step": 1266400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7406, "step": 1266500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7395, "step": 1266600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7352, "step": 1266700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7428, "step": 1266800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7355, "step": 1266900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7404, "step": 1267000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7347, "step": 1267100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7391, "step": 1267200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7363, "step": 1267300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7363, "step": 1267400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7382, "step": 1267500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7349, "step": 1267600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7324, "step": 1267700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7397, "step": 1267800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7365, "step": 1267900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7341, "step": 1268000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7392, "step": 1268100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7416, "step": 1268200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7356, "step": 1268300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7383, "step": 1268400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7416, "step": 1268500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7396, "step": 1268600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7378, "step": 1268700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7343, "step": 1268800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7311, "step": 1268900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7384, "step": 1269000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7324, "step": 1269100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7448, "step": 1269200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.736, "step": 1269300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.747, "step": 1269400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7397, "step": 1269500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7396, "step": 1269600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7312, "step": 1269700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7458, "step": 1269800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7331, "step": 1269900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7363, "step": 1270000 }, { "epoch": 2.02, "eval_loss": 0.6949335932731628, "eval_runtime": 194.6116, "eval_samples_per_second": 256.922, "eval_steps_per_second": 2.009, "step": 1270000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7393, "step": 1270100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7409, "step": 1270200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7412, "step": 1270300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7361, "step": 1270400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.733, "step": 1270500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7385, "step": 1270600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.729, "step": 1270700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7352, "step": 1270800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7366, "step": 1270900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7345, "step": 1271000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7509, "step": 1271100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7401, "step": 1271200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7389, "step": 1271300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7429, "step": 1271400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7388, "step": 1271500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7294, "step": 1271600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7428, "step": 1271700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7442, "step": 1271800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7417, "step": 1271900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7431, "step": 1272000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7336, "step": 1272100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7331, "step": 1272200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7397, "step": 1272300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7362, "step": 1272400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7355, "step": 1272500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7283, "step": 1272600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7351, "step": 1272700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7422, "step": 1272800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7425, "step": 1272900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7368, "step": 1273000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7389, "step": 1273100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7394, "step": 1273200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7409, "step": 1273300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7335, "step": 1273400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7429, "step": 1273500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7403, "step": 1273600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7393, "step": 1273700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7323, "step": 1273800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7364, "step": 1273900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7342, "step": 1274000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7405, "step": 1274100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7369, "step": 1274200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7373, "step": 1274300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7424, "step": 1274400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7386, "step": 1274500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7366, "step": 1274600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7323, "step": 1274700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.747, "step": 1274800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7413, "step": 1274900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.744, "step": 1275000 }, { "epoch": 2.02, "eval_loss": 0.6966413259506226, "eval_runtime": 194.7482, "eval_samples_per_second": 256.742, "eval_steps_per_second": 2.008, "step": 1275000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7382, "step": 1275100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7388, "step": 1275200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7404, "step": 1275300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7386, "step": 1275400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7392, "step": 1275500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7298, "step": 1275600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7425, "step": 1275700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7347, "step": 1275800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7332, "step": 1275900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7412, "step": 1276000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7352, "step": 1276100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7423, "step": 1276200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7376, "step": 1276300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7393, "step": 1276400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7397, "step": 1276500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7473, "step": 1276600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7334, "step": 1276700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.738, "step": 1276800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7336, "step": 1276900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.741, "step": 1277000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7354, "step": 1277100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7396, "step": 1277200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7364, "step": 1277300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7381, "step": 1277400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7282, "step": 1277500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.744, "step": 1277600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.738, "step": 1277700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7387, "step": 1277800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7436, "step": 1277900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7356, "step": 1278000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7337, "step": 1278100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7319, "step": 1278200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7382, "step": 1278300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7394, "step": 1278400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7447, "step": 1278500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7439, "step": 1278600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7358, "step": 1278700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.741, "step": 1278800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7356, "step": 1278900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7342, "step": 1279000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7352, "step": 1279100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7351, "step": 1279200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.74, "step": 1279300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7344, "step": 1279400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7409, "step": 1279500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.733, "step": 1279600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7389, "step": 1279700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7361, "step": 1279800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7404, "step": 1279900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7366, "step": 1280000 }, { "epoch": 2.02, "eval_loss": 0.6938179135322571, "eval_runtime": 194.8718, "eval_samples_per_second": 256.579, "eval_steps_per_second": 2.006, "step": 1280000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7385, "step": 1280100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7348, "step": 1280200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.739, "step": 1280300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7373, "step": 1280400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.737, "step": 1280500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7456, "step": 1280600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7344, "step": 1280700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7414, "step": 1280800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7347, "step": 1280900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7451, "step": 1281000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7289, "step": 1281100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7319, "step": 1281200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7417, "step": 1281300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7356, "step": 1281400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7485, "step": 1281500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7421, "step": 1281600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7379, "step": 1281700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7408, "step": 1281800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7339, "step": 1281900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7378, "step": 1282000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7429, "step": 1282100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7463, "step": 1282200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7446, "step": 1282300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7364, "step": 1282400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7378, "step": 1282500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7375, "step": 1282600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7443, "step": 1282700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7395, "step": 1282800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7466, "step": 1282900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7435, "step": 1283000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.734, "step": 1283100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7443, "step": 1283200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7384, "step": 1283300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7408, "step": 1283400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.734, "step": 1283500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7359, "step": 1283600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7455, "step": 1283700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7294, "step": 1283800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7343, "step": 1283900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.74, "step": 1284000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7428, "step": 1284100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7325, "step": 1284200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7399, "step": 1284300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7366, "step": 1284400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7326, "step": 1284500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.741, "step": 1284600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7408, "step": 1284700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7368, "step": 1284800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7392, "step": 1284900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7429, "step": 1285000 }, { "epoch": 2.02, "eval_loss": 0.6942003965377808, "eval_runtime": 194.811, "eval_samples_per_second": 256.659, "eval_steps_per_second": 2.007, "step": 1285000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.746, "step": 1285100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.731, "step": 1285200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7371, "step": 1285300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7398, "step": 1285400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7427, "step": 1285500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7357, "step": 1285600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.736, "step": 1285700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7411, "step": 1285800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7375, "step": 1285900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7436, "step": 1286000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7371, "step": 1286100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7329, "step": 1286200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7314, "step": 1286300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7436, "step": 1286400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.742, "step": 1286500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7466, "step": 1286600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7333, "step": 1286700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7392, "step": 1286800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7388, "step": 1286900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7376, "step": 1287000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.732, "step": 1287100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7414, "step": 1287200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7281, "step": 1287300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7333, "step": 1287400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7404, "step": 1287500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7427, "step": 1287600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7394, "step": 1287700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7346, "step": 1287800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7426, "step": 1287900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7433, "step": 1288000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7424, "step": 1288100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7407, "step": 1288200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7364, "step": 1288300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.739, "step": 1288400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7347, "step": 1288500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7309, "step": 1288600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7294, "step": 1288700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7395, "step": 1288800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7403, "step": 1288900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7373, "step": 1289000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7425, "step": 1289100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7301, "step": 1289200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7464, "step": 1289300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7382, "step": 1289400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.738, "step": 1289500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7404, "step": 1289600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7461, "step": 1289700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7397, "step": 1289800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7442, "step": 1289900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7432, "step": 1290000 }, { "epoch": 2.02, "eval_loss": 0.6959330439567566, "eval_runtime": 194.4213, "eval_samples_per_second": 257.173, "eval_steps_per_second": 2.011, "step": 1290000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7382, "step": 1290100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7387, "step": 1290200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7367, "step": 1290300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7371, "step": 1290400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7418, "step": 1290500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7471, "step": 1290600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7344, "step": 1290700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7395, "step": 1290800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7371, "step": 1290900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7363, "step": 1291000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7374, "step": 1291100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.731, "step": 1291200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7402, "step": 1291300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7402, "step": 1291400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7339, "step": 1291500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7357, "step": 1291600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7414, "step": 1291700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7392, "step": 1291800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7419, "step": 1291900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7353, "step": 1292000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7395, "step": 1292100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.735, "step": 1292200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7269, "step": 1292300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7413, "step": 1292400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7339, "step": 1292500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7253, "step": 1292600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7454, "step": 1292700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7374, "step": 1292800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7338, "step": 1292900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7291, "step": 1293000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7402, "step": 1293100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7434, "step": 1293200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7401, "step": 1293300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7373, "step": 1293400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7505, "step": 1293500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7379, "step": 1293600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7331, "step": 1293700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7365, "step": 1293800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7374, "step": 1293900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7423, "step": 1294000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7418, "step": 1294100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7295, "step": 1294200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7368, "step": 1294300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7404, "step": 1294400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7365, "step": 1294500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7279, "step": 1294600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7464, "step": 1294700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7356, "step": 1294800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7393, "step": 1294900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7356, "step": 1295000 }, { "epoch": 2.02, "eval_loss": 0.6947426795959473, "eval_runtime": 194.7398, "eval_samples_per_second": 256.753, "eval_steps_per_second": 2.008, "step": 1295000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7377, "step": 1295100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.738, "step": 1295200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7332, "step": 1295300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7383, "step": 1295400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7352, "step": 1295500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7356, "step": 1295600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.733, "step": 1295700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7316, "step": 1295800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7358, "step": 1295900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7393, "step": 1296000 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7286, "step": 1296100 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7319, "step": 1296200 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7399, "step": 1296300 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7413, "step": 1296400 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7379, "step": 1296500 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7481, "step": 1296600 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7442, "step": 1296700 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7344, "step": 1296800 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.7392, "step": 1296900 }, { "epoch": 2.02, "learning_rate": 2e-05, "loss": 0.739, "step": 1297000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7392, "step": 1297100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7371, "step": 1297200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7404, "step": 1297300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7328, "step": 1297400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7357, "step": 1297500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7372, "step": 1297600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7436, "step": 1297700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7372, "step": 1297800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7377, "step": 1297900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7414, "step": 1298000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7343, "step": 1298100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7373, "step": 1298200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7395, "step": 1298300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7428, "step": 1298400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7384, "step": 1298500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.732, "step": 1298600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7382, "step": 1298700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7348, "step": 1298800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7393, "step": 1298900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7349, "step": 1299000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7381, "step": 1299100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7414, "step": 1299200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7352, "step": 1299300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7377, "step": 1299400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.739, "step": 1299500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7394, "step": 1299600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.739, "step": 1299700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7424, "step": 1299800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.738, "step": 1299900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7398, "step": 1300000 }, { "epoch": 2.03, "eval_loss": 0.6939392685890198, "eval_runtime": 194.6648, "eval_samples_per_second": 256.852, "eval_steps_per_second": 2.009, "step": 1300000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7394, "step": 1300100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7337, "step": 1300200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7403, "step": 1300300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7346, "step": 1300400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7308, "step": 1300500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7409, "step": 1300600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7396, "step": 1300700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7376, "step": 1300800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7365, "step": 1300900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7344, "step": 1301000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7363, "step": 1301100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7426, "step": 1301200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7341, "step": 1301300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.739, "step": 1301400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7333, "step": 1301500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7344, "step": 1301600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7328, "step": 1301700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7366, "step": 1301800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7422, "step": 1301900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7359, "step": 1302000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7324, "step": 1302100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7327, "step": 1302200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7397, "step": 1302300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7444, "step": 1302400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.74, "step": 1302500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7313, "step": 1302600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7334, "step": 1302700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7399, "step": 1302800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7437, "step": 1302900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7357, "step": 1303000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7409, "step": 1303100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7356, "step": 1303200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7334, "step": 1303300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7296, "step": 1303400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7387, "step": 1303500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.736, "step": 1303600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7412, "step": 1303700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.731, "step": 1303800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.731, "step": 1303900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7432, "step": 1304000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7408, "step": 1304100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7387, "step": 1304200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7349, "step": 1304300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7407, "step": 1304400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7385, "step": 1304500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7287, "step": 1304600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7357, "step": 1304700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7393, "step": 1304800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7469, "step": 1304900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7362, "step": 1305000 }, { "epoch": 2.03, "eval_loss": 0.6939355731010437, "eval_runtime": 194.437, "eval_samples_per_second": 257.153, "eval_steps_per_second": 2.011, "step": 1305000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.736, "step": 1305100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7421, "step": 1305200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7323, "step": 1305300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7407, "step": 1305400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7369, "step": 1305500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7339, "step": 1305600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.734, "step": 1305700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7356, "step": 1305800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7309, "step": 1305900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7377, "step": 1306000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7321, "step": 1306100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7328, "step": 1306200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7372, "step": 1306300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7372, "step": 1306400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7332, "step": 1306500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7362, "step": 1306600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7367, "step": 1306700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7354, "step": 1306800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7355, "step": 1306900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7289, "step": 1307000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7317, "step": 1307100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7366, "step": 1307200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7352, "step": 1307300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7342, "step": 1307400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7303, "step": 1307500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7389, "step": 1307600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7388, "step": 1307700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7334, "step": 1307800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7373, "step": 1307900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7356, "step": 1308000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7344, "step": 1308100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7397, "step": 1308200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7344, "step": 1308300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7331, "step": 1308400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7323, "step": 1308500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7284, "step": 1308600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7364, "step": 1308700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.734, "step": 1308800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7338, "step": 1308900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7399, "step": 1309000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7434, "step": 1309100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7388, "step": 1309200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7336, "step": 1309300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7309, "step": 1309400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7347, "step": 1309500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7295, "step": 1309600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7352, "step": 1309700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7408, "step": 1309800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7439, "step": 1309900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7384, "step": 1310000 }, { "epoch": 2.03, "eval_loss": 0.6949591636657715, "eval_runtime": 194.8114, "eval_samples_per_second": 256.659, "eval_steps_per_second": 2.007, "step": 1310000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7343, "step": 1310100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.733, "step": 1310200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7374, "step": 1310300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7384, "step": 1310400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7405, "step": 1310500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7411, "step": 1310600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7449, "step": 1310700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7406, "step": 1310800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7362, "step": 1310900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7328, "step": 1311000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.741, "step": 1311100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7426, "step": 1311200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7335, "step": 1311300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7412, "step": 1311400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7434, "step": 1311500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7399, "step": 1311600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7415, "step": 1311700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7296, "step": 1311800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.753, "step": 1311900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7412, "step": 1312000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7409, "step": 1312100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7457, "step": 1312200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7352, "step": 1312300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7346, "step": 1312400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7301, "step": 1312500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7414, "step": 1312600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7345, "step": 1312700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7292, "step": 1312800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7406, "step": 1312900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7396, "step": 1313000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7335, "step": 1313100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7325, "step": 1313200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7372, "step": 1313300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.736, "step": 1313400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7318, "step": 1313500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7352, "step": 1313600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7441, "step": 1313700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7331, "step": 1313800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7367, "step": 1313900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7283, "step": 1314000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7329, "step": 1314100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.738, "step": 1314200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7384, "step": 1314300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7352, "step": 1314400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7335, "step": 1314500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7332, "step": 1314600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7403, "step": 1314700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7405, "step": 1314800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7411, "step": 1314900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.735, "step": 1315000 }, { "epoch": 2.03, "eval_loss": 0.6930851340293884, "eval_runtime": 194.6929, "eval_samples_per_second": 256.815, "eval_steps_per_second": 2.008, "step": 1315000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7406, "step": 1315100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7514, "step": 1315200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7276, "step": 1315300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7426, "step": 1315400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7318, "step": 1315500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7337, "step": 1315600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7381, "step": 1315700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7484, "step": 1315800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7327, "step": 1315900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7302, "step": 1316000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7361, "step": 1316100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7411, "step": 1316200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7397, "step": 1316300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7377, "step": 1316400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7374, "step": 1316500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7326, "step": 1316600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7332, "step": 1316700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7352, "step": 1316800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.736, "step": 1316900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7368, "step": 1317000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7326, "step": 1317100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7415, "step": 1317200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7381, "step": 1317300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7279, "step": 1317400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.731, "step": 1317500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7262, "step": 1317600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7366, "step": 1317700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7373, "step": 1317800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7346, "step": 1317900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7417, "step": 1318000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7413, "step": 1318100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7311, "step": 1318200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7444, "step": 1318300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7402, "step": 1318400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7434, "step": 1318500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.731, "step": 1318600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7398, "step": 1318700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7374, "step": 1318800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7384, "step": 1318900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7383, "step": 1319000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7348, "step": 1319100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7404, "step": 1319200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7403, "step": 1319300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7346, "step": 1319400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7401, "step": 1319500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7451, "step": 1319600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7344, "step": 1319700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7321, "step": 1319800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7306, "step": 1319900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.743, "step": 1320000 }, { "epoch": 2.03, "eval_loss": 0.6944131851196289, "eval_runtime": 194.7661, "eval_samples_per_second": 256.718, "eval_steps_per_second": 2.008, "step": 1320000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7387, "step": 1320100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.735, "step": 1320200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7299, "step": 1320300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7302, "step": 1320400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7383, "step": 1320500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7287, "step": 1320600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7416, "step": 1320700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7258, "step": 1320800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7365, "step": 1320900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7279, "step": 1321000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7345, "step": 1321100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7378, "step": 1321200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7383, "step": 1321300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.736, "step": 1321400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7379, "step": 1321500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7384, "step": 1321600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7377, "step": 1321700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7411, "step": 1321800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7376, "step": 1321900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.732, "step": 1322000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7356, "step": 1322100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7408, "step": 1322200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7343, "step": 1322300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7305, "step": 1322400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7368, "step": 1322500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7366, "step": 1322600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7328, "step": 1322700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7367, "step": 1322800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7403, "step": 1322900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7346, "step": 1323000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7363, "step": 1323100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7341, "step": 1323200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7328, "step": 1323300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7278, "step": 1323400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7314, "step": 1323500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7298, "step": 1323600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7298, "step": 1323700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7375, "step": 1323800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7339, "step": 1323900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7328, "step": 1324000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7344, "step": 1324100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7292, "step": 1324200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7344, "step": 1324300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.734, "step": 1324400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.734, "step": 1324500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.735, "step": 1324600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7375, "step": 1324700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7438, "step": 1324800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7375, "step": 1324900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7297, "step": 1325000 }, { "epoch": 2.03, "eval_loss": 0.6931760311126709, "eval_runtime": 194.305, "eval_samples_per_second": 257.327, "eval_steps_per_second": 2.012, "step": 1325000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7332, "step": 1325100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7366, "step": 1325200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7391, "step": 1325300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7324, "step": 1325400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7257, "step": 1325500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7328, "step": 1325600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.742, "step": 1325700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7326, "step": 1325800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7418, "step": 1325900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7331, "step": 1326000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7369, "step": 1326100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7393, "step": 1326200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7337, "step": 1326300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7347, "step": 1326400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.738, "step": 1326500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7364, "step": 1326600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7327, "step": 1326700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7416, "step": 1326800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7411, "step": 1326900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7388, "step": 1327000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7305, "step": 1327100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7367, "step": 1327200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7399, "step": 1327300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7335, "step": 1327400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.734, "step": 1327500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7403, "step": 1327600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.734, "step": 1327700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7422, "step": 1327800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7401, "step": 1327900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7386, "step": 1328000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7285, "step": 1328100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7387, "step": 1328200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.731, "step": 1328300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7367, "step": 1328400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7375, "step": 1328500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7373, "step": 1328600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7347, "step": 1328700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7437, "step": 1328800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7395, "step": 1328900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7374, "step": 1329000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7375, "step": 1329100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7364, "step": 1329200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7309, "step": 1329300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7406, "step": 1329400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7352, "step": 1329500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7399, "step": 1329600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7401, "step": 1329700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7303, "step": 1329800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7269, "step": 1329900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7324, "step": 1330000 }, { "epoch": 2.03, "eval_loss": 0.6926876306533813, "eval_runtime": 194.46, "eval_samples_per_second": 257.122, "eval_steps_per_second": 2.011, "step": 1330000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7424, "step": 1330100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7411, "step": 1330200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7323, "step": 1330300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7423, "step": 1330400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7371, "step": 1330500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7366, "step": 1330600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7302, "step": 1330700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7353, "step": 1330800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7329, "step": 1330900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7342, "step": 1331000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.739, "step": 1331100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7296, "step": 1331200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7365, "step": 1331300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7402, "step": 1331400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7379, "step": 1331500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.73, "step": 1331600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7304, "step": 1331700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7268, "step": 1331800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7325, "step": 1331900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7404, "step": 1332000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7348, "step": 1332100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7365, "step": 1332200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7336, "step": 1332300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7364, "step": 1332400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7396, "step": 1332500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.736, "step": 1332600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7387, "step": 1332700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7404, "step": 1332800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7336, "step": 1332900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7301, "step": 1333000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7296, "step": 1333100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7341, "step": 1333200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7305, "step": 1333300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7451, "step": 1333400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7314, "step": 1333500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7352, "step": 1333600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7353, "step": 1333700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7432, "step": 1333800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7352, "step": 1333900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7363, "step": 1334000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7325, "step": 1334100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7291, "step": 1334200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7262, "step": 1334300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7314, "step": 1334400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.732, "step": 1334500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7284, "step": 1334600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7331, "step": 1334700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7338, "step": 1334800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7347, "step": 1334900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7301, "step": 1335000 }, { "epoch": 2.03, "eval_loss": 0.6940681338310242, "eval_runtime": 194.3072, "eval_samples_per_second": 257.324, "eval_steps_per_second": 2.012, "step": 1335000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7366, "step": 1335100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7391, "step": 1335200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7393, "step": 1335300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7355, "step": 1335400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7336, "step": 1335500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7403, "step": 1335600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7363, "step": 1335700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7367, "step": 1335800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7347, "step": 1335900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7413, "step": 1336000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.733, "step": 1336100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7389, "step": 1336200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7293, "step": 1336300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7359, "step": 1336400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7384, "step": 1336500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7412, "step": 1336600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7435, "step": 1336700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7323, "step": 1336800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7371, "step": 1336900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7321, "step": 1337000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7358, "step": 1337100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7369, "step": 1337200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7431, "step": 1337300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7413, "step": 1337400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7378, "step": 1337500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7379, "step": 1337600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.733, "step": 1337700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7426, "step": 1337800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7346, "step": 1337900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7425, "step": 1338000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7417, "step": 1338100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7424, "step": 1338200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7379, "step": 1338300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7325, "step": 1338400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7339, "step": 1338500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7301, "step": 1338600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7312, "step": 1338700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.742, "step": 1338800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7365, "step": 1338900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7375, "step": 1339000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7421, "step": 1339100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7393, "step": 1339200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7396, "step": 1339300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7344, "step": 1339400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7367, "step": 1339500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7379, "step": 1339600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.731, "step": 1339700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7433, "step": 1339800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7403, "step": 1339900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7383, "step": 1340000 }, { "epoch": 2.03, "eval_loss": 0.6945679783821106, "eval_runtime": 194.6901, "eval_samples_per_second": 256.818, "eval_steps_per_second": 2.008, "step": 1340000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7312, "step": 1340100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7346, "step": 1340200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7352, "step": 1340300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7319, "step": 1340400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7368, "step": 1340500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7278, "step": 1340600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7342, "step": 1340700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7313, "step": 1340800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7323, "step": 1340900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7377, "step": 1341000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7366, "step": 1341100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7365, "step": 1341200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7415, "step": 1341300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7449, "step": 1341400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7321, "step": 1341500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7399, "step": 1341600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7365, "step": 1341700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7316, "step": 1341800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7347, "step": 1341900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.74, "step": 1342000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7389, "step": 1342100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7376, "step": 1342200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7414, "step": 1342300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7336, "step": 1342400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7387, "step": 1342500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7294, "step": 1342600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7322, "step": 1342700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7459, "step": 1342800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7422, "step": 1342900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7331, "step": 1343000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.736, "step": 1343100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7356, "step": 1343200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.738, "step": 1343300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7379, "step": 1343400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7393, "step": 1343500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.737, "step": 1343600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.742, "step": 1343700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7354, "step": 1343800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7411, "step": 1343900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7338, "step": 1344000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7318, "step": 1344100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7403, "step": 1344200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7356, "step": 1344300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7442, "step": 1344400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7343, "step": 1344500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.739, "step": 1344600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7359, "step": 1344700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7441, "step": 1344800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7316, "step": 1344900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7387, "step": 1345000 }, { "epoch": 2.03, "eval_loss": 0.6937873363494873, "eval_runtime": 194.4871, "eval_samples_per_second": 257.086, "eval_steps_per_second": 2.01, "step": 1345000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7397, "step": 1345100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7375, "step": 1345200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7394, "step": 1345300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7318, "step": 1345400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7365, "step": 1345500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7377, "step": 1345600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7357, "step": 1345700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7328, "step": 1345800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.742, "step": 1345900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7303, "step": 1346000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7469, "step": 1346100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7408, "step": 1346200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7354, "step": 1346300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7346, "step": 1346400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.734, "step": 1346500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7329, "step": 1346600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.735, "step": 1346700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7289, "step": 1346800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.733, "step": 1346900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7456, "step": 1347000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7429, "step": 1347100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7344, "step": 1347200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7353, "step": 1347300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.735, "step": 1347400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7364, "step": 1347500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7394, "step": 1347600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7382, "step": 1347700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7361, "step": 1347800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.739, "step": 1347900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.735, "step": 1348000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7305, "step": 1348100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7378, "step": 1348200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7393, "step": 1348300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7451, "step": 1348400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.731, "step": 1348500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7374, "step": 1348600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7406, "step": 1348700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7409, "step": 1348800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7398, "step": 1348900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7364, "step": 1349000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7407, "step": 1349100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7414, "step": 1349200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7395, "step": 1349300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7329, "step": 1349400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7345, "step": 1349500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7423, "step": 1349600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7352, "step": 1349700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7303, "step": 1349800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.745, "step": 1349900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7355, "step": 1350000 }, { "epoch": 2.03, "eval_loss": 0.6945255398750305, "eval_runtime": 195.1366, "eval_samples_per_second": 256.231, "eval_steps_per_second": 2.004, "step": 1350000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.738, "step": 1350100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.734, "step": 1350200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7365, "step": 1350300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7393, "step": 1350400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7431, "step": 1350500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7428, "step": 1350600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7379, "step": 1350700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7364, "step": 1350800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7315, "step": 1350900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7353, "step": 1351000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.734, "step": 1351100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7356, "step": 1351200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7331, "step": 1351300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7338, "step": 1351400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7326, "step": 1351500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7385, "step": 1351600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7425, "step": 1351700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7337, "step": 1351800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7393, "step": 1351900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7338, "step": 1352000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7307, "step": 1352100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7349, "step": 1352200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7434, "step": 1352300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7356, "step": 1352400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7346, "step": 1352500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7438, "step": 1352600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7449, "step": 1352700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7414, "step": 1352800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7317, "step": 1352900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.742, "step": 1353000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7404, "step": 1353100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7402, "step": 1353200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7325, "step": 1353300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7382, "step": 1353400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7422, "step": 1353500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7405, "step": 1353600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7339, "step": 1353700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7341, "step": 1353800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7307, "step": 1353900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7356, "step": 1354000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7382, "step": 1354100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7424, "step": 1354200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7304, "step": 1354300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7364, "step": 1354400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7377, "step": 1354500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7344, "step": 1354600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7499, "step": 1354700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7384, "step": 1354800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7365, "step": 1354900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.732, "step": 1355000 }, { "epoch": 2.03, "eval_loss": 0.6933331489562988, "eval_runtime": 194.6131, "eval_samples_per_second": 256.92, "eval_steps_per_second": 2.009, "step": 1355000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7415, "step": 1355100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7419, "step": 1355200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.736, "step": 1355300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7418, "step": 1355400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7471, "step": 1355500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.73, "step": 1355600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7235, "step": 1355700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7357, "step": 1355800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7367, "step": 1355900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7276, "step": 1356000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7292, "step": 1356100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7327, "step": 1356200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7343, "step": 1356300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7428, "step": 1356400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7316, "step": 1356500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7391, "step": 1356600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7348, "step": 1356700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7326, "step": 1356800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7395, "step": 1356900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7392, "step": 1357000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7432, "step": 1357100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7435, "step": 1357200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7392, "step": 1357300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7301, "step": 1357400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.737, "step": 1357500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7422, "step": 1357600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7403, "step": 1357700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7337, "step": 1357800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7369, "step": 1357900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7415, "step": 1358000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7362, "step": 1358100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7347, "step": 1358200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7448, "step": 1358300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7404, "step": 1358400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7393, "step": 1358500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7332, "step": 1358600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7349, "step": 1358700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7364, "step": 1358800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7404, "step": 1358900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7311, "step": 1359000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7326, "step": 1359100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7379, "step": 1359200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7345, "step": 1359300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7415, "step": 1359400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7349, "step": 1359500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.738, "step": 1359600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7341, "step": 1359700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7435, "step": 1359800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7403, "step": 1359900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7399, "step": 1360000 }, { "epoch": 2.03, "eval_loss": 0.6937475204467773, "eval_runtime": 194.729, "eval_samples_per_second": 256.767, "eval_steps_per_second": 2.008, "step": 1360000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7369, "step": 1360100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7336, "step": 1360200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7436, "step": 1360300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7341, "step": 1360400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7363, "step": 1360500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7356, "step": 1360600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7389, "step": 1360700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7298, "step": 1360800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7408, "step": 1360900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7321, "step": 1361000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7362, "step": 1361100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7358, "step": 1361200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7403, "step": 1361300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7294, "step": 1361400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7349, "step": 1361500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7438, "step": 1361600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7383, "step": 1361700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7383, "step": 1361800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7264, "step": 1361900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7428, "step": 1362000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7329, "step": 1362100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.734, "step": 1362200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7297, "step": 1362300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7359, "step": 1362400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7319, "step": 1362500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7399, "step": 1362600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7206, "step": 1362700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7388, "step": 1362800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7346, "step": 1362900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7405, "step": 1363000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7411, "step": 1363100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7421, "step": 1363200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7443, "step": 1363300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7448, "step": 1363400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7313, "step": 1363500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7376, "step": 1363600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7303, "step": 1363700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7369, "step": 1363800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7405, "step": 1363900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7359, "step": 1364000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7365, "step": 1364100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7375, "step": 1364200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7303, "step": 1364300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7433, "step": 1364400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7412, "step": 1364500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7418, "step": 1364600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7381, "step": 1364700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7328, "step": 1364800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7397, "step": 1364900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.732, "step": 1365000 }, { "epoch": 2.03, "eval_loss": 0.6959472894668579, "eval_runtime": 194.6466, "eval_samples_per_second": 256.876, "eval_steps_per_second": 2.009, "step": 1365000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7441, "step": 1365100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7398, "step": 1365200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7383, "step": 1365300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7353, "step": 1365400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7392, "step": 1365500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7345, "step": 1365600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7418, "step": 1365700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7329, "step": 1365800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7376, "step": 1365900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7275, "step": 1366000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7372, "step": 1366100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7365, "step": 1366200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.731, "step": 1366300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7337, "step": 1366400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7353, "step": 1366500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7468, "step": 1366600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7364, "step": 1366700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7365, "step": 1366800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7384, "step": 1366900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7393, "step": 1367000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7411, "step": 1367100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7306, "step": 1367200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7347, "step": 1367300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7251, "step": 1367400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7362, "step": 1367500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7351, "step": 1367600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7396, "step": 1367700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7337, "step": 1367800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7388, "step": 1367900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7341, "step": 1368000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7309, "step": 1368100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7409, "step": 1368200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7302, "step": 1368300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7382, "step": 1368400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7378, "step": 1368500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7366, "step": 1368600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7434, "step": 1368700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.738, "step": 1368800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7305, "step": 1368900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7323, "step": 1369000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7364, "step": 1369100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7256, "step": 1369200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7364, "step": 1369300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7359, "step": 1369400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7312, "step": 1369500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7302, "step": 1369600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.738, "step": 1369700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7347, "step": 1369800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.732, "step": 1369900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7357, "step": 1370000 }, { "epoch": 2.03, "eval_loss": 0.6928281188011169, "eval_runtime": 194.8294, "eval_samples_per_second": 256.635, "eval_steps_per_second": 2.007, "step": 1370000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7379, "step": 1370100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7308, "step": 1370200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7356, "step": 1370300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7365, "step": 1370400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7354, "step": 1370500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7334, "step": 1370600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7418, "step": 1370700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7323, "step": 1370800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7349, "step": 1370900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7417, "step": 1371000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7307, "step": 1371100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7369, "step": 1371200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7281, "step": 1371300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7381, "step": 1371400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7347, "step": 1371500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7375, "step": 1371600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7375, "step": 1371700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7394, "step": 1371800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7352, "step": 1371900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.743, "step": 1372000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7426, "step": 1372100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7433, "step": 1372200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7313, "step": 1372300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7322, "step": 1372400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7374, "step": 1372500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7399, "step": 1372600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7378, "step": 1372700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7365, "step": 1372800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7342, "step": 1372900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7369, "step": 1373000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7358, "step": 1373100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7306, "step": 1373200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7348, "step": 1373300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7374, "step": 1373400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7323, "step": 1373500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7395, "step": 1373600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.743, "step": 1373700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7429, "step": 1373800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7249, "step": 1373900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7256, "step": 1374000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7352, "step": 1374100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7381, "step": 1374200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7328, "step": 1374300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7383, "step": 1374400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7395, "step": 1374500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7349, "step": 1374600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7415, "step": 1374700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7382, "step": 1374800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.728, "step": 1374900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7416, "step": 1375000 }, { "epoch": 2.03, "eval_loss": 0.6932492852210999, "eval_runtime": 194.6784, "eval_samples_per_second": 256.834, "eval_steps_per_second": 2.008, "step": 1375000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7388, "step": 1375100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7399, "step": 1375200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7334, "step": 1375300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7388, "step": 1375400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7422, "step": 1375500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7415, "step": 1375600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7362, "step": 1375700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7323, "step": 1375800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7364, "step": 1375900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7441, "step": 1376000 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7388, "step": 1376100 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7321, "step": 1376200 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7406, "step": 1376300 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7406, "step": 1376400 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7299, "step": 1376500 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7336, "step": 1376600 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7377, "step": 1376700 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7421, "step": 1376800 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7321, "step": 1376900 }, { "epoch": 2.03, "learning_rate": 2e-05, "loss": 0.7368, "step": 1377000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7343, "step": 1377100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7418, "step": 1377200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7382, "step": 1377300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7362, "step": 1377400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7384, "step": 1377500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7316, "step": 1377600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.748, "step": 1377700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7329, "step": 1377800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.744, "step": 1377900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7452, "step": 1378000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7351, "step": 1378100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7354, "step": 1378200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7354, "step": 1378300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7363, "step": 1378400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7322, "step": 1378500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7337, "step": 1378600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7332, "step": 1378700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.735, "step": 1378800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7394, "step": 1378900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7381, "step": 1379000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7326, "step": 1379100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7259, "step": 1379200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7439, "step": 1379300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7378, "step": 1379400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7353, "step": 1379500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7436, "step": 1379600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7377, "step": 1379700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7308, "step": 1379800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7347, "step": 1379900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7424, "step": 1380000 }, { "epoch": 2.04, "eval_loss": 0.6935710310935974, "eval_runtime": 194.6566, "eval_samples_per_second": 256.863, "eval_steps_per_second": 2.009, "step": 1380000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7342, "step": 1380100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7259, "step": 1380200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.743, "step": 1380300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7382, "step": 1380400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7328, "step": 1380500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7348, "step": 1380600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7319, "step": 1380700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 1380800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7348, "step": 1380900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7411, "step": 1381000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.736, "step": 1381100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7376, "step": 1381200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7424, "step": 1381300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7315, "step": 1381400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7333, "step": 1381500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7399, "step": 1381600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7283, "step": 1381700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.738, "step": 1381800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7324, "step": 1381900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7379, "step": 1382000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7383, "step": 1382100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7355, "step": 1382200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.736, "step": 1382300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7379, "step": 1382400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7316, "step": 1382500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7341, "step": 1382600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7321, "step": 1382700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.734, "step": 1382800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7346, "step": 1382900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7266, "step": 1383000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7325, "step": 1383100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7342, "step": 1383200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7373, "step": 1383300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7357, "step": 1383400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7475, "step": 1383500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7376, "step": 1383600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7354, "step": 1383700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7269, "step": 1383800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7375, "step": 1383900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7411, "step": 1384000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7366, "step": 1384100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.728, "step": 1384200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7291, "step": 1384300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7316, "step": 1384400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7377, "step": 1384500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7317, "step": 1384600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7371, "step": 1384700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7364, "step": 1384800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7451, "step": 1384900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7344, "step": 1385000 }, { "epoch": 2.04, "eval_loss": 0.6926178336143494, "eval_runtime": 194.8471, "eval_samples_per_second": 256.611, "eval_steps_per_second": 2.007, "step": 1385000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7287, "step": 1385100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7292, "step": 1385200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7314, "step": 1385300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7314, "step": 1385400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7355, "step": 1385500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7294, "step": 1385600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7334, "step": 1385700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7492, "step": 1385800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7379, "step": 1385900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7388, "step": 1386000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7479, "step": 1386100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7358, "step": 1386200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7312, "step": 1386300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7365, "step": 1386400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7309, "step": 1386500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7333, "step": 1386600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7382, "step": 1386700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7351, "step": 1386800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7394, "step": 1386900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7408, "step": 1387000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.742, "step": 1387100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.738, "step": 1387200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7395, "step": 1387300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.74, "step": 1387400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7327, "step": 1387500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7353, "step": 1387600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.735, "step": 1387700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7433, "step": 1387800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7428, "step": 1387900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7343, "step": 1388000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7296, "step": 1388100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.736, "step": 1388200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7414, "step": 1388300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7421, "step": 1388400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7342, "step": 1388500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7366, "step": 1388600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7422, "step": 1388700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7353, "step": 1388800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7356, "step": 1388900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7381, "step": 1389000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7381, "step": 1389100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7381, "step": 1389200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7339, "step": 1389300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7351, "step": 1389400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7325, "step": 1389500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7352, "step": 1389600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7396, "step": 1389700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7385, "step": 1389800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7338, "step": 1389900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7352, "step": 1390000 }, { "epoch": 2.04, "eval_loss": 0.6942101716995239, "eval_runtime": 194.9343, "eval_samples_per_second": 256.497, "eval_steps_per_second": 2.006, "step": 1390000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7328, "step": 1390100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.73, "step": 1390200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7413, "step": 1390300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7305, "step": 1390400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7322, "step": 1390500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7301, "step": 1390600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.734, "step": 1390700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7392, "step": 1390800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7457, "step": 1390900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7402, "step": 1391000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7419, "step": 1391100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7358, "step": 1391200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7331, "step": 1391300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7406, "step": 1391400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7323, "step": 1391500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7345, "step": 1391600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7306, "step": 1391700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.735, "step": 1391800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7441, "step": 1391900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7361, "step": 1392000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7396, "step": 1392100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7347, "step": 1392200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7449, "step": 1392300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7342, "step": 1392400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7328, "step": 1392500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7394, "step": 1392600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.737, "step": 1392700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.733, "step": 1392800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7366, "step": 1392900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7291, "step": 1393000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7255, "step": 1393100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7358, "step": 1393200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7347, "step": 1393300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7393, "step": 1393400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7287, "step": 1393500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7387, "step": 1393600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7448, "step": 1393700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7426, "step": 1393800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7429, "step": 1393900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7309, "step": 1394000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7341, "step": 1394100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7408, "step": 1394200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7381, "step": 1394300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7302, "step": 1394400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7325, "step": 1394500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7392, "step": 1394600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7406, "step": 1394700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7438, "step": 1394800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7349, "step": 1394900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7332, "step": 1395000 }, { "epoch": 2.04, "eval_loss": 0.6926989555358887, "eval_runtime": 194.6281, "eval_samples_per_second": 256.9, "eval_steps_per_second": 2.009, "step": 1395000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7408, "step": 1395100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7366, "step": 1395200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7388, "step": 1395300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7311, "step": 1395400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7335, "step": 1395500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7325, "step": 1395600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7348, "step": 1395700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7356, "step": 1395800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7315, "step": 1395900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7278, "step": 1396000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7393, "step": 1396100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7371, "step": 1396200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7288, "step": 1396300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 1396400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7397, "step": 1396500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.742, "step": 1396600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7376, "step": 1396700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7398, "step": 1396800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7335, "step": 1396900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7329, "step": 1397000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7365, "step": 1397100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7324, "step": 1397200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.734, "step": 1397300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7361, "step": 1397400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.728, "step": 1397500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7279, "step": 1397600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.749, "step": 1397700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7364, "step": 1397800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7323, "step": 1397900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7351, "step": 1398000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7371, "step": 1398100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7316, "step": 1398200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7302, "step": 1398300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7454, "step": 1398400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7342, "step": 1398500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7321, "step": 1398600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7291, "step": 1398700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7365, "step": 1398800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7338, "step": 1398900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7384, "step": 1399000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.739, "step": 1399100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7369, "step": 1399200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.735, "step": 1399300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.742, "step": 1399400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7284, "step": 1399500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7364, "step": 1399600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7354, "step": 1399700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7414, "step": 1399800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.733, "step": 1399900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7379, "step": 1400000 }, { "epoch": 2.04, "eval_loss": 0.6916325688362122, "eval_runtime": 194.7342, "eval_samples_per_second": 256.76, "eval_steps_per_second": 2.008, "step": 1400000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 1400100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7267, "step": 1400200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7348, "step": 1400300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7359, "step": 1400400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7424, "step": 1400500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7335, "step": 1400600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7431, "step": 1400700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7368, "step": 1400800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7367, "step": 1400900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7357, "step": 1401000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.736, "step": 1401100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7441, "step": 1401200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7356, "step": 1401300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7364, "step": 1401400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7397, "step": 1401500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7388, "step": 1401600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7418, "step": 1401700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7342, "step": 1401800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7403, "step": 1401900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7398, "step": 1402000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.742, "step": 1402100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7304, "step": 1402200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7336, "step": 1402300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7378, "step": 1402400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7393, "step": 1402500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.743, "step": 1402600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7301, "step": 1402700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7332, "step": 1402800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 1402900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7372, "step": 1403000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7317, "step": 1403100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7309, "step": 1403200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7402, "step": 1403300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7307, "step": 1403400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7401, "step": 1403500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7434, "step": 1403600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7308, "step": 1403700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7381, "step": 1403800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7402, "step": 1403900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7355, "step": 1404000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7339, "step": 1404100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7473, "step": 1404200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.739, "step": 1404300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7318, "step": 1404400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.73, "step": 1404500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7396, "step": 1404600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7374, "step": 1404700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7381, "step": 1404800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.737, "step": 1404900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7325, "step": 1405000 }, { "epoch": 2.04, "eval_loss": 0.6937705278396606, "eval_runtime": 194.7497, "eval_samples_per_second": 256.74, "eval_steps_per_second": 2.008, "step": 1405000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7348, "step": 1405100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 1405200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7385, "step": 1405300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7414, "step": 1405400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7301, "step": 1405500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7307, "step": 1405600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7352, "step": 1405700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7334, "step": 1405800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7407, "step": 1405900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7449, "step": 1406000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7325, "step": 1406100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.73, "step": 1406200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7372, "step": 1406300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7315, "step": 1406400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7437, "step": 1406500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7392, "step": 1406600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7299, "step": 1406700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7341, "step": 1406800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7397, "step": 1406900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7353, "step": 1407000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7378, "step": 1407100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.732, "step": 1407200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7384, "step": 1407300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7343, "step": 1407400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7372, "step": 1407500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7275, "step": 1407600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.737, "step": 1407700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7304, "step": 1407800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7325, "step": 1407900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7391, "step": 1408000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7406, "step": 1408100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7331, "step": 1408200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.735, "step": 1408300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7361, "step": 1408400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7369, "step": 1408500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7347, "step": 1408600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7326, "step": 1408700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7339, "step": 1408800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.737, "step": 1408900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7339, "step": 1409000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7458, "step": 1409100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7398, "step": 1409200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7412, "step": 1409300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7348, "step": 1409400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7336, "step": 1409500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7391, "step": 1409600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7378, "step": 1409700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.732, "step": 1409800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7389, "step": 1409900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7485, "step": 1410000 }, { "epoch": 2.04, "eval_loss": 0.6946880221366882, "eval_runtime": 203.4477, "eval_samples_per_second": 245.763, "eval_steps_per_second": 1.922, "step": 1410000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7414, "step": 1410100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7373, "step": 1410200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7392, "step": 1410300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7347, "step": 1410400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7425, "step": 1410500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7376, "step": 1410600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7311, "step": 1410700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7336, "step": 1410800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7402, "step": 1410900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7336, "step": 1411000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7271, "step": 1411100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7358, "step": 1411200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7417, "step": 1411300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7376, "step": 1411400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7387, "step": 1411500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7362, "step": 1411600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7327, "step": 1411700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7433, "step": 1411800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7315, "step": 1411900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7307, "step": 1412000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.731, "step": 1412100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.734, "step": 1412200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7356, "step": 1412300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7378, "step": 1412400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7404, "step": 1412500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7362, "step": 1412600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7337, "step": 1412700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.743, "step": 1412800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7286, "step": 1412900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7361, "step": 1413000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7279, "step": 1413100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7316, "step": 1413200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7318, "step": 1413300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7345, "step": 1413400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7365, "step": 1413500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7298, "step": 1413600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7382, "step": 1413700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.733, "step": 1413800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7314, "step": 1413900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7312, "step": 1414000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7327, "step": 1414100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7307, "step": 1414200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7357, "step": 1414300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7389, "step": 1414400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7376, "step": 1414500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7334, "step": 1414600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7317, "step": 1414700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7356, "step": 1414800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7286, "step": 1414900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7447, "step": 1415000 }, { "epoch": 2.04, "eval_loss": 0.6938619017601013, "eval_runtime": 199.9529, "eval_samples_per_second": 250.059, "eval_steps_per_second": 1.955, "step": 1415000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7368, "step": 1415100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7389, "step": 1415200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7373, "step": 1415300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7433, "step": 1415400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7385, "step": 1415500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7313, "step": 1415600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7342, "step": 1415700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7367, "step": 1415800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.735, "step": 1415900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7249, "step": 1416000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.731, "step": 1416100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.738, "step": 1416200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7378, "step": 1416300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7353, "step": 1416400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7356, "step": 1416500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7372, "step": 1416600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7365, "step": 1416700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.735, "step": 1416800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7333, "step": 1416900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7369, "step": 1417000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7381, "step": 1417100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7369, "step": 1417200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7463, "step": 1417300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7437, "step": 1417400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7322, "step": 1417500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7348, "step": 1417600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7403, "step": 1417700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7281, "step": 1417800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7329, "step": 1417900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7409, "step": 1418000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7287, "step": 1418100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7302, "step": 1418200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7347, "step": 1418300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7268, "step": 1418400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7354, "step": 1418500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7316, "step": 1418600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7403, "step": 1418700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7306, "step": 1418800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7364, "step": 1418900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.724, "step": 1419000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7373, "step": 1419100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7296, "step": 1419200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7298, "step": 1419300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7322, "step": 1419400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7399, "step": 1419500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7367, "step": 1419600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7405, "step": 1419700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7361, "step": 1419800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7439, "step": 1419900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7308, "step": 1420000 }, { "epoch": 2.04, "eval_loss": 0.6929402947425842, "eval_runtime": 194.6859, "eval_samples_per_second": 256.824, "eval_steps_per_second": 2.008, "step": 1420000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7315, "step": 1420100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7351, "step": 1420200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7416, "step": 1420300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7324, "step": 1420400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7286, "step": 1420500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7314, "step": 1420600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7286, "step": 1420700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7322, "step": 1420800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7286, "step": 1420900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7311, "step": 1421000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7268, "step": 1421100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7286, "step": 1421200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7333, "step": 1421300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7379, "step": 1421400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7365, "step": 1421500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7329, "step": 1421600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7289, "step": 1421700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7286, "step": 1421800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7241, "step": 1421900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7417, "step": 1422000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7315, "step": 1422100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.738, "step": 1422200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7326, "step": 1422300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7422, "step": 1422400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7357, "step": 1422500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7332, "step": 1422600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.734, "step": 1422700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7332, "step": 1422800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7366, "step": 1422900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7363, "step": 1423000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7319, "step": 1423100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7394, "step": 1423200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7348, "step": 1423300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.735, "step": 1423400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7233, "step": 1423500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7311, "step": 1423600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7309, "step": 1423700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7317, "step": 1423800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7364, "step": 1423900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7354, "step": 1424000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7347, "step": 1424100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7425, "step": 1424200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7397, "step": 1424300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7337, "step": 1424400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7345, "step": 1424500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7339, "step": 1424600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.733, "step": 1424700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7404, "step": 1424800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7306, "step": 1424900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7364, "step": 1425000 }, { "epoch": 2.04, "eval_loss": 0.693824052810669, "eval_runtime": 194.8767, "eval_samples_per_second": 256.572, "eval_steps_per_second": 2.006, "step": 1425000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7381, "step": 1425100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7363, "step": 1425200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.734, "step": 1425300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7317, "step": 1425400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.735, "step": 1425500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7316, "step": 1425600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7353, "step": 1425700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7428, "step": 1425800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7329, "step": 1425900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7341, "step": 1426000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7362, "step": 1426100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7362, "step": 1426200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7416, "step": 1426300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7377, "step": 1426400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7398, "step": 1426500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7353, "step": 1426600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7293, "step": 1426700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7395, "step": 1426800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7325, "step": 1426900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7328, "step": 1427000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7338, "step": 1427100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7337, "step": 1427200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7405, "step": 1427300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7429, "step": 1427400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.732, "step": 1427500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7323, "step": 1427600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7352, "step": 1427700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7322, "step": 1427800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7308, "step": 1427900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7394, "step": 1428000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7357, "step": 1428100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7306, "step": 1428200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7398, "step": 1428300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7322, "step": 1428400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7348, "step": 1428500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7349, "step": 1428600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7417, "step": 1428700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7349, "step": 1428800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7312, "step": 1428900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7359, "step": 1429000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7357, "step": 1429100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7318, "step": 1429200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7261, "step": 1429300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7376, "step": 1429400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7354, "step": 1429500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7364, "step": 1429600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7329, "step": 1429700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7321, "step": 1429800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7304, "step": 1429900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7322, "step": 1430000 }, { "epoch": 2.04, "eval_loss": 0.6921608448028564, "eval_runtime": 194.7627, "eval_samples_per_second": 256.723, "eval_steps_per_second": 2.008, "step": 1430000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7368, "step": 1430100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7323, "step": 1430200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7263, "step": 1430300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7344, "step": 1430400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7388, "step": 1430500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7338, "step": 1430600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7339, "step": 1430700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7267, "step": 1430800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7282, "step": 1430900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7373, "step": 1431000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7291, "step": 1431100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7318, "step": 1431200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7404, "step": 1431300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7323, "step": 1431400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7359, "step": 1431500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7336, "step": 1431600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7404, "step": 1431700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7322, "step": 1431800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7319, "step": 1431900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7334, "step": 1432000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7343, "step": 1432100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7284, "step": 1432200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7457, "step": 1432300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7335, "step": 1432400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7276, "step": 1432500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7412, "step": 1432600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7339, "step": 1432700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7406, "step": 1432800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.739, "step": 1432900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7375, "step": 1433000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7346, "step": 1433100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7313, "step": 1433200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.725, "step": 1433300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7359, "step": 1433400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7384, "step": 1433500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7409, "step": 1433600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7381, "step": 1433700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7353, "step": 1433800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7376, "step": 1433900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7351, "step": 1434000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7338, "step": 1434100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7348, "step": 1434200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.73, "step": 1434300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7323, "step": 1434400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.731, "step": 1434500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7368, "step": 1434600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7318, "step": 1434700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7297, "step": 1434800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7377, "step": 1434900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7403, "step": 1435000 }, { "epoch": 2.04, "eval_loss": 0.692685067653656, "eval_runtime": 194.5633, "eval_samples_per_second": 256.986, "eval_steps_per_second": 2.01, "step": 1435000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7352, "step": 1435100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7373, "step": 1435200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7317, "step": 1435300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7394, "step": 1435400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7367, "step": 1435500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7219, "step": 1435600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7346, "step": 1435700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7333, "step": 1435800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7313, "step": 1435900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7407, "step": 1436000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7397, "step": 1436100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7417, "step": 1436200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7268, "step": 1436300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7398, "step": 1436400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7373, "step": 1436500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7346, "step": 1436600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7264, "step": 1436700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7331, "step": 1436800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7346, "step": 1436900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7385, "step": 1437000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7264, "step": 1437100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7358, "step": 1437200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.738, "step": 1437300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7311, "step": 1437400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7316, "step": 1437500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7373, "step": 1437600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.728, "step": 1437700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7384, "step": 1437800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7353, "step": 1437900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7349, "step": 1438000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7375, "step": 1438100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7362, "step": 1438200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7333, "step": 1438300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7278, "step": 1438400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7375, "step": 1438500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7369, "step": 1438600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7271, "step": 1438700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7371, "step": 1438800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7439, "step": 1438900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7285, "step": 1439000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7368, "step": 1439100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7333, "step": 1439200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7389, "step": 1439300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7402, "step": 1439400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7285, "step": 1439500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7312, "step": 1439600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7296, "step": 1439700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 1439800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7306, "step": 1439900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7287, "step": 1440000 }, { "epoch": 2.04, "eval_loss": 0.6933913826942444, "eval_runtime": 194.9496, "eval_samples_per_second": 256.477, "eval_steps_per_second": 2.006, "step": 1440000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7438, "step": 1440100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7352, "step": 1440200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7399, "step": 1440300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7361, "step": 1440400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.733, "step": 1440500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7313, "step": 1440600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7317, "step": 1440700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7321, "step": 1440800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7311, "step": 1440900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7401, "step": 1441000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.743, "step": 1441100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7271, "step": 1441200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7265, "step": 1441300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7308, "step": 1441400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7329, "step": 1441500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7346, "step": 1441600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.734, "step": 1441700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7359, "step": 1441800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7294, "step": 1441900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.736, "step": 1442000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.729, "step": 1442100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7318, "step": 1442200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.741, "step": 1442300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7405, "step": 1442400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.737, "step": 1442500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.728, "step": 1442600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7378, "step": 1442700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7359, "step": 1442800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7384, "step": 1442900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7346, "step": 1443000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7389, "step": 1443100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7377, "step": 1443200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7358, "step": 1443300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7371, "step": 1443400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7404, "step": 1443500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7299, "step": 1443600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7372, "step": 1443700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.735, "step": 1443800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7361, "step": 1443900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7355, "step": 1444000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.734, "step": 1444100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7376, "step": 1444200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.728, "step": 1444300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7339, "step": 1444400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7336, "step": 1444500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7323, "step": 1444600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7418, "step": 1444700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7339, "step": 1444800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7348, "step": 1444900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7346, "step": 1445000 }, { "epoch": 2.04, "eval_loss": 0.693956196308136, "eval_runtime": 194.9936, "eval_samples_per_second": 256.419, "eval_steps_per_second": 2.005, "step": 1445000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7287, "step": 1445100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7359, "step": 1445200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7306, "step": 1445300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7441, "step": 1445400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7403, "step": 1445500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7371, "step": 1445600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7389, "step": 1445700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7404, "step": 1445800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7341, "step": 1445900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7396, "step": 1446000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7358, "step": 1446100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7298, "step": 1446200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7336, "step": 1446300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7388, "step": 1446400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7316, "step": 1446500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7327, "step": 1446600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7369, "step": 1446700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7281, "step": 1446800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7408, "step": 1446900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7339, "step": 1447000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7279, "step": 1447100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.739, "step": 1447200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.727, "step": 1447300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7384, "step": 1447400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7356, "step": 1447500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7327, "step": 1447600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7288, "step": 1447700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7418, "step": 1447800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7437, "step": 1447900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7301, "step": 1448000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7324, "step": 1448100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7312, "step": 1448200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7344, "step": 1448300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7463, "step": 1448400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7352, "step": 1448500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7472, "step": 1448600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7298, "step": 1448700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.739, "step": 1448800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7318, "step": 1448900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7384, "step": 1449000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7318, "step": 1449100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7372, "step": 1449200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7367, "step": 1449300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7332, "step": 1449400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7351, "step": 1449500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7348, "step": 1449600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7438, "step": 1449700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7285, "step": 1449800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7377, "step": 1449900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7406, "step": 1450000 }, { "epoch": 2.04, "eval_loss": 0.6916955709457397, "eval_runtime": 193.9614, "eval_samples_per_second": 257.783, "eval_steps_per_second": 2.016, "step": 1450000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7356, "step": 1450100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7262, "step": 1450200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7391, "step": 1450300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7352, "step": 1450400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7358, "step": 1450500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7348, "step": 1450600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7326, "step": 1450700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7439, "step": 1450800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7289, "step": 1450900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7453, "step": 1451000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7323, "step": 1451100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7435, "step": 1451200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.735, "step": 1451300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7358, "step": 1451400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7369, "step": 1451500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7327, "step": 1451600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7266, "step": 1451700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7335, "step": 1451800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7339, "step": 1451900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7319, "step": 1452000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7301, "step": 1452100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7449, "step": 1452200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.733, "step": 1452300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7323, "step": 1452400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7267, "step": 1452500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7348, "step": 1452600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7277, "step": 1452700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7278, "step": 1452800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7278, "step": 1452900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.736, "step": 1453000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 1453100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7417, "step": 1453200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7322, "step": 1453300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7332, "step": 1453400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7298, "step": 1453500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7255, "step": 1453600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7316, "step": 1453700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7338, "step": 1453800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7314, "step": 1453900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7329, "step": 1454000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7359, "step": 1454100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7324, "step": 1454200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7396, "step": 1454300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7252, "step": 1454400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7403, "step": 1454500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7393, "step": 1454600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7367, "step": 1454700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7365, "step": 1454800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7325, "step": 1454900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7329, "step": 1455000 }, { "epoch": 2.04, "eval_loss": 0.6905053853988647, "eval_runtime": 194.6428, "eval_samples_per_second": 256.881, "eval_steps_per_second": 2.009, "step": 1455000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.73, "step": 1455100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7347, "step": 1455200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7368, "step": 1455300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7466, "step": 1455400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7319, "step": 1455500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7342, "step": 1455600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7386, "step": 1455700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7382, "step": 1455800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7293, "step": 1455900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.736, "step": 1456000 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7342, "step": 1456100 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7311, "step": 1456200 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7408, "step": 1456300 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7354, "step": 1456400 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7394, "step": 1456500 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7374, "step": 1456600 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7347, "step": 1456700 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7326, "step": 1456800 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7405, "step": 1456900 }, { "epoch": 2.04, "learning_rate": 2e-05, "loss": 0.7352, "step": 1457000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7417, "step": 1457100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7379, "step": 1457200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7236, "step": 1457300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7366, "step": 1457400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7348, "step": 1457500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7364, "step": 1457600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7416, "step": 1457700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7304, "step": 1457800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7353, "step": 1457900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7347, "step": 1458000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7302, "step": 1458100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7385, "step": 1458200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7362, "step": 1458300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7347, "step": 1458400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.729, "step": 1458500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7263, "step": 1458600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7321, "step": 1458700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7322, "step": 1458800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7338, "step": 1458900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7293, "step": 1459000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.735, "step": 1459100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7409, "step": 1459200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7371, "step": 1459300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7384, "step": 1459400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7322, "step": 1459500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7323, "step": 1459600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7306, "step": 1459700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7371, "step": 1459800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.74, "step": 1459900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7391, "step": 1460000 }, { "epoch": 2.05, "eval_loss": 0.6919227838516235, "eval_runtime": 194.6141, "eval_samples_per_second": 256.919, "eval_steps_per_second": 2.009, "step": 1460000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7343, "step": 1460100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.733, "step": 1460200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7423, "step": 1460300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7391, "step": 1460400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7344, "step": 1460500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.725, "step": 1460600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7388, "step": 1460700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7346, "step": 1460800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7307, "step": 1460900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7315, "step": 1461000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7319, "step": 1461100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7423, "step": 1461200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7367, "step": 1461300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7299, "step": 1461400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.736, "step": 1461500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7316, "step": 1461600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7352, "step": 1461700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7417, "step": 1461800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7317, "step": 1461900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7344, "step": 1462000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7279, "step": 1462100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7302, "step": 1462200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.735, "step": 1462300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7299, "step": 1462400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7343, "step": 1462500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7328, "step": 1462600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7283, "step": 1462700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7331, "step": 1462800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7353, "step": 1462900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7314, "step": 1463000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7376, "step": 1463100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.734, "step": 1463200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7333, "step": 1463300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7394, "step": 1463400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7299, "step": 1463500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7467, "step": 1463600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7348, "step": 1463700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7374, "step": 1463800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7282, "step": 1463900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7333, "step": 1464000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7383, "step": 1464100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7317, "step": 1464200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7341, "step": 1464300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.733, "step": 1464400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7396, "step": 1464500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7395, "step": 1464600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7301, "step": 1464700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7345, "step": 1464800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7357, "step": 1464900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7314, "step": 1465000 }, { "epoch": 2.05, "eval_loss": 0.6930609941482544, "eval_runtime": 194.3886, "eval_samples_per_second": 257.217, "eval_steps_per_second": 2.011, "step": 1465000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7371, "step": 1465100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7318, "step": 1465200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7399, "step": 1465300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7289, "step": 1465400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7372, "step": 1465500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7424, "step": 1465600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7389, "step": 1465700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7436, "step": 1465800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7309, "step": 1465900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7352, "step": 1466000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.735, "step": 1466100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7303, "step": 1466200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7317, "step": 1466300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7309, "step": 1466400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7364, "step": 1466500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7381, "step": 1466600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7358, "step": 1466700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7392, "step": 1466800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7284, "step": 1466900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7318, "step": 1467000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7366, "step": 1467100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7278, "step": 1467200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7426, "step": 1467300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7378, "step": 1467400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7283, "step": 1467500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7368, "step": 1467600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7403, "step": 1467700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7373, "step": 1467800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7409, "step": 1467900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.737, "step": 1468000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7394, "step": 1468100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7403, "step": 1468200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7343, "step": 1468300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7282, "step": 1468400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7397, "step": 1468500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7344, "step": 1468600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.733, "step": 1468700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7258, "step": 1468800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7422, "step": 1468900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7337, "step": 1469000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7347, "step": 1469100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7368, "step": 1469200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7361, "step": 1469300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.73, "step": 1469400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7273, "step": 1469500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7288, "step": 1469600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7412, "step": 1469700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.732, "step": 1469800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7391, "step": 1469900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7291, "step": 1470000 }, { "epoch": 2.05, "eval_loss": 0.6922557950019836, "eval_runtime": 194.485, "eval_samples_per_second": 257.089, "eval_steps_per_second": 2.01, "step": 1470000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7306, "step": 1470100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7321, "step": 1470200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7301, "step": 1470300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7239, "step": 1470400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.724, "step": 1470500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7381, "step": 1470600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7298, "step": 1470700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7382, "step": 1470800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7346, "step": 1470900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7313, "step": 1471000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7288, "step": 1471100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7323, "step": 1471200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7294, "step": 1471300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7293, "step": 1471400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.732, "step": 1471500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7292, "step": 1471600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7445, "step": 1471700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7366, "step": 1471800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.729, "step": 1471900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7287, "step": 1472000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7323, "step": 1472100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7256, "step": 1472200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7407, "step": 1472300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7285, "step": 1472400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7342, "step": 1472500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7258, "step": 1472600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7309, "step": 1472700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7281, "step": 1472800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.729, "step": 1472900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.739, "step": 1473000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7314, "step": 1473100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7382, "step": 1473200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7271, "step": 1473300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7425, "step": 1473400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7406, "step": 1473500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.739, "step": 1473600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7319, "step": 1473700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7383, "step": 1473800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.736, "step": 1473900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.737, "step": 1474000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7315, "step": 1474100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7336, "step": 1474200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7305, "step": 1474300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7417, "step": 1474400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7262, "step": 1474500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7302, "step": 1474600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7283, "step": 1474700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7291, "step": 1474800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.74, "step": 1474900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7337, "step": 1475000 }, { "epoch": 2.05, "eval_loss": 0.6930740475654602, "eval_runtime": 194.68, "eval_samples_per_second": 256.832, "eval_steps_per_second": 2.008, "step": 1475000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7346, "step": 1475100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7288, "step": 1475200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.731, "step": 1475300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.733, "step": 1475400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7344, "step": 1475500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.735, "step": 1475600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7351, "step": 1475700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7306, "step": 1475800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7329, "step": 1475900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7387, "step": 1476000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7313, "step": 1476100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7315, "step": 1476200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7268, "step": 1476300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7396, "step": 1476400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.732, "step": 1476500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7362, "step": 1476600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7389, "step": 1476700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7275, "step": 1476800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7329, "step": 1476900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7389, "step": 1477000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7376, "step": 1477100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7336, "step": 1477200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7337, "step": 1477300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7302, "step": 1477400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7384, "step": 1477500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7322, "step": 1477600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7313, "step": 1477700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7296, "step": 1477800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7342, "step": 1477900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7276, "step": 1478000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7383, "step": 1478100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7302, "step": 1478200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7233, "step": 1478300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7421, "step": 1478400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7315, "step": 1478500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7332, "step": 1478600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7265, "step": 1478700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7309, "step": 1478800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7399, "step": 1478900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7363, "step": 1479000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7313, "step": 1479100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7383, "step": 1479200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7324, "step": 1479300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7337, "step": 1479400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7294, "step": 1479500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7413, "step": 1479600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7375, "step": 1479700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7336, "step": 1479800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7262, "step": 1479900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7324, "step": 1480000 }, { "epoch": 2.05, "eval_loss": 0.6922203898429871, "eval_runtime": 194.9048, "eval_samples_per_second": 256.535, "eval_steps_per_second": 2.006, "step": 1480000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7307, "step": 1480100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7314, "step": 1480200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7397, "step": 1480300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7328, "step": 1480400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7397, "step": 1480500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7312, "step": 1480600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7289, "step": 1480700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7374, "step": 1480800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7343, "step": 1480900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7339, "step": 1481000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7332, "step": 1481100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7311, "step": 1481200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7344, "step": 1481300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7332, "step": 1481400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7347, "step": 1481500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7385, "step": 1481600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.732, "step": 1481700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7286, "step": 1481800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7418, "step": 1481900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7371, "step": 1482000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7352, "step": 1482100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7259, "step": 1482200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7316, "step": 1482300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7368, "step": 1482400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7384, "step": 1482500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7324, "step": 1482600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7294, "step": 1482700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7286, "step": 1482800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7309, "step": 1482900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7348, "step": 1483000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7415, "step": 1483100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7323, "step": 1483200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7327, "step": 1483300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7333, "step": 1483400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7413, "step": 1483500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7399, "step": 1483600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7376, "step": 1483700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7385, "step": 1483800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7388, "step": 1483900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7372, "step": 1484000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7339, "step": 1484100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7351, "step": 1484200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7311, "step": 1484300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7314, "step": 1484400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7393, "step": 1484500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7354, "step": 1484600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7303, "step": 1484700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7343, "step": 1484800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7328, "step": 1484900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7305, "step": 1485000 }, { "epoch": 2.05, "eval_loss": 0.6922410130500793, "eval_runtime": 194.5142, "eval_samples_per_second": 257.051, "eval_steps_per_second": 2.01, "step": 1485000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7292, "step": 1485100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.732, "step": 1485200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7381, "step": 1485300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7344, "step": 1485400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7337, "step": 1485500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7316, "step": 1485600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7299, "step": 1485700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7359, "step": 1485800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.742, "step": 1485900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7361, "step": 1486000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7372, "step": 1486100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7286, "step": 1486200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7324, "step": 1486300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.737, "step": 1486400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7325, "step": 1486500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7386, "step": 1486600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7381, "step": 1486700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7336, "step": 1486800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7258, "step": 1486900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7415, "step": 1487000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7301, "step": 1487100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7379, "step": 1487200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7387, "step": 1487300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.728, "step": 1487400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7315, "step": 1487500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7278, "step": 1487600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7291, "step": 1487700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7353, "step": 1487800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7337, "step": 1487900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7365, "step": 1488000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7352, "step": 1488100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7313, "step": 1488200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7416, "step": 1488300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7404, "step": 1488400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7305, "step": 1488500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7346, "step": 1488600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7349, "step": 1488700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7336, "step": 1488800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7333, "step": 1488900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7307, "step": 1489000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7337, "step": 1489100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7378, "step": 1489200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7267, "step": 1489300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7387, "step": 1489400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7323, "step": 1489500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7337, "step": 1489600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7296, "step": 1489700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7382, "step": 1489800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7321, "step": 1489900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7331, "step": 1490000 }, { "epoch": 2.05, "eval_loss": 0.6925473213195801, "eval_runtime": 194.8607, "eval_samples_per_second": 256.594, "eval_steps_per_second": 2.007, "step": 1490000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7311, "step": 1490100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7235, "step": 1490200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7327, "step": 1490300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.739, "step": 1490400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7326, "step": 1490500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7354, "step": 1490600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7254, "step": 1490700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7292, "step": 1490800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7387, "step": 1490900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7395, "step": 1491000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7263, "step": 1491100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7259, "step": 1491200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7282, "step": 1491300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7289, "step": 1491400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7264, "step": 1491500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7398, "step": 1491600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7316, "step": 1491700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7306, "step": 1491800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7277, "step": 1491900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7327, "step": 1492000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.734, "step": 1492100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7347, "step": 1492200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7315, "step": 1492300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.732, "step": 1492400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7314, "step": 1492500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7317, "step": 1492600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7444, "step": 1492700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7381, "step": 1492800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7361, "step": 1492900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7283, "step": 1493000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7317, "step": 1493100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7351, "step": 1493200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7364, "step": 1493300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7355, "step": 1493400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7263, "step": 1493500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7345, "step": 1493600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7333, "step": 1493700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7367, "step": 1493800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7353, "step": 1493900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7367, "step": 1494000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7294, "step": 1494100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7354, "step": 1494200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7375, "step": 1494300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7375, "step": 1494400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7303, "step": 1494500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.73, "step": 1494600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.73, "step": 1494700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7243, "step": 1494800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7278, "step": 1494900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7354, "step": 1495000 }, { "epoch": 2.05, "eval_loss": 0.6898379921913147, "eval_runtime": 194.3102, "eval_samples_per_second": 257.32, "eval_steps_per_second": 2.012, "step": 1495000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7337, "step": 1495100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7318, "step": 1495200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7316, "step": 1495300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7424, "step": 1495400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7321, "step": 1495500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7309, "step": 1495600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7297, "step": 1495700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7338, "step": 1495800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7302, "step": 1495900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7393, "step": 1496000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7294, "step": 1496100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7327, "step": 1496200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7295, "step": 1496300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7332, "step": 1496400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7329, "step": 1496500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7347, "step": 1496600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7358, "step": 1496700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7327, "step": 1496800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7209, "step": 1496900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7331, "step": 1497000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7307, "step": 1497100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.733, "step": 1497200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7288, "step": 1497300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7302, "step": 1497400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7359, "step": 1497500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7365, "step": 1497600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7346, "step": 1497700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7383, "step": 1497800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7331, "step": 1497900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7391, "step": 1498000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7316, "step": 1498100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7404, "step": 1498200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.731, "step": 1498300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7365, "step": 1498400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7291, "step": 1498500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7302, "step": 1498600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7327, "step": 1498700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7302, "step": 1498800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7187, "step": 1498900 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.727, "step": 1499000 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.735, "step": 1499100 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7292, "step": 1499200 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7285, "step": 1499300 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.738, "step": 1499400 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7349, "step": 1499500 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7342, "step": 1499600 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7333, "step": 1499700 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7381, "step": 1499800 }, { "epoch": 2.05, "learning_rate": 2e-05, "loss": 0.7354, "step": 1499900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7359, "step": 1500000 }, { "epoch": 2.05, "eval_loss": 0.6918967366218567, "eval_runtime": 194.9655, "eval_samples_per_second": 256.456, "eval_steps_per_second": 2.005, "step": 1500000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7272, "step": 1500100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7268, "step": 1500200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7313, "step": 1500300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7333, "step": 1500400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7255, "step": 1500500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7278, "step": 1500600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7271, "step": 1500700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7308, "step": 1500800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7289, "step": 1500900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7241, "step": 1501000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7249, "step": 1501100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7293, "step": 1501200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7308, "step": 1501300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7314, "step": 1501400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7278, "step": 1501500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7297, "step": 1501600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7234, "step": 1501700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7254, "step": 1501800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7289, "step": 1501900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7346, "step": 1502000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7304, "step": 1502100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7259, "step": 1502200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7283, "step": 1502300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7215, "step": 1502400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.728, "step": 1502500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7302, "step": 1502600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.727, "step": 1502700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7227, "step": 1502800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7242, "step": 1502900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7206, "step": 1503000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7254, "step": 1503100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7315, "step": 1503200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7187, "step": 1503300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7234, "step": 1503400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7144, "step": 1503500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7196, "step": 1503600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7236, "step": 1503700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7209, "step": 1503800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7206, "step": 1503900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7183, "step": 1504000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7168, "step": 1504100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7197, "step": 1504200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.721, "step": 1504300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7281, "step": 1504400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7265, "step": 1504500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7203, "step": 1504600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7206, "step": 1504700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7174, "step": 1504800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7167, "step": 1504900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7209, "step": 1505000 }, { "epoch": 2.05, "eval_loss": 0.6809056997299194, "eval_runtime": 194.8193, "eval_samples_per_second": 256.648, "eval_steps_per_second": 2.007, "step": 1505000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7196, "step": 1505100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7243, "step": 1505200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7185, "step": 1505300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7202, "step": 1505400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7297, "step": 1505500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7218, "step": 1505600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7225, "step": 1505700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7212, "step": 1505800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7239, "step": 1505900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.724, "step": 1506000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7203, "step": 1506100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7301, "step": 1506200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7199, "step": 1506300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7164, "step": 1506400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7187, "step": 1506500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7284, "step": 1506600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7187, "step": 1506700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7226, "step": 1506800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7237, "step": 1506900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7169, "step": 1507000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7187, "step": 1507100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7168, "step": 1507200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.726, "step": 1507300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.711, "step": 1507400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7162, "step": 1507500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7185, "step": 1507600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7306, "step": 1507700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.72, "step": 1507800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7236, "step": 1507900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7196, "step": 1508000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7257, "step": 1508100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7205, "step": 1508200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7147, "step": 1508300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7227, "step": 1508400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7126, "step": 1508500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.719, "step": 1508600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7176, "step": 1508700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7249, "step": 1508800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7215, "step": 1508900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7186, "step": 1509000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7171, "step": 1509100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7173, "step": 1509200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7211, "step": 1509300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7211, "step": 1509400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7144, "step": 1509500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7198, "step": 1509600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7269, "step": 1509700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7162, "step": 1509800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7303, "step": 1509900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7231, "step": 1510000 }, { "epoch": 2.05, "eval_loss": 0.679633617401123, "eval_runtime": 194.6144, "eval_samples_per_second": 256.918, "eval_steps_per_second": 2.009, "step": 1510000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7249, "step": 1510100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7174, "step": 1510200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7204, "step": 1510300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7169, "step": 1510400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7209, "step": 1510500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7202, "step": 1510600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7302, "step": 1510700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7199, "step": 1510800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7227, "step": 1510900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7146, "step": 1511000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7259, "step": 1511100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7283, "step": 1511200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7187, "step": 1511300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7211, "step": 1511400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7227, "step": 1511500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7162, "step": 1511600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7179, "step": 1511700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7218, "step": 1511800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7221, "step": 1511900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7162, "step": 1512000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7198, "step": 1512100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7149, "step": 1512200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7201, "step": 1512300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7207, "step": 1512400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7284, "step": 1512500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7146, "step": 1512600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7089, "step": 1512700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.72, "step": 1512800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7134, "step": 1512900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7137, "step": 1513000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7212, "step": 1513100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7159, "step": 1513200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7118, "step": 1513300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7248, "step": 1513400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7174, "step": 1513500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7186, "step": 1513600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7111, "step": 1513700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7157, "step": 1513800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7176, "step": 1513900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7268, "step": 1514000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7254, "step": 1514100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7106, "step": 1514200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7225, "step": 1514300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.716, "step": 1514400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7202, "step": 1514500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7115, "step": 1514600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7122, "step": 1514700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7224, "step": 1514800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7192, "step": 1514900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7155, "step": 1515000 }, { "epoch": 2.05, "eval_loss": 0.6776483654975891, "eval_runtime": 194.7764, "eval_samples_per_second": 256.705, "eval_steps_per_second": 2.007, "step": 1515000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7174, "step": 1515100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.715, "step": 1515200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7133, "step": 1515300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7224, "step": 1515400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7182, "step": 1515500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7155, "step": 1515600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7151, "step": 1515700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7128, "step": 1515800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7182, "step": 1515900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7246, "step": 1516000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7205, "step": 1516100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7223, "step": 1516200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7211, "step": 1516300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7189, "step": 1516400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7173, "step": 1516500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7178, "step": 1516600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7181, "step": 1516700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7281, "step": 1516800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7157, "step": 1516900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7126, "step": 1517000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7168, "step": 1517100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7155, "step": 1517200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7139, "step": 1517300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.722, "step": 1517400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.713, "step": 1517500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7171, "step": 1517600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7238, "step": 1517700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7186, "step": 1517800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7158, "step": 1517900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7194, "step": 1518000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7107, "step": 1518100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7194, "step": 1518200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7205, "step": 1518300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7234, "step": 1518400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7258, "step": 1518500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7138, "step": 1518600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7202, "step": 1518700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7233, "step": 1518800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7181, "step": 1518900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7192, "step": 1519000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7175, "step": 1519100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7131, "step": 1519200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7167, "step": 1519300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7148, "step": 1519400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7142, "step": 1519500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7199, "step": 1519600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.72, "step": 1519700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7173, "step": 1519800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7161, "step": 1519900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7101, "step": 1520000 }, { "epoch": 2.05, "eval_loss": 0.6772561073303223, "eval_runtime": 195.0811, "eval_samples_per_second": 256.304, "eval_steps_per_second": 2.004, "step": 1520000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7194, "step": 1520100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7133, "step": 1520200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7179, "step": 1520300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7187, "step": 1520400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.717, "step": 1520500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7147, "step": 1520600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7178, "step": 1520700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7238, "step": 1520800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7135, "step": 1520900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7116, "step": 1521000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7167, "step": 1521100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7213, "step": 1521200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7139, "step": 1521300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7163, "step": 1521400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7205, "step": 1521500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7158, "step": 1521600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7196, "step": 1521700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7122, "step": 1521800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.716, "step": 1521900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7145, "step": 1522000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7206, "step": 1522100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7156, "step": 1522200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7158, "step": 1522300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7101, "step": 1522400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7148, "step": 1522500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1522600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7088, "step": 1522700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7125, "step": 1522800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7139, "step": 1522900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.715, "step": 1523000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.713, "step": 1523100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7211, "step": 1523200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7151, "step": 1523300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7144, "step": 1523400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7118, "step": 1523500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7198, "step": 1523600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7207, "step": 1523700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7101, "step": 1523800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7195, "step": 1523900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7127, "step": 1524000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7197, "step": 1524100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7214, "step": 1524200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7047, "step": 1524300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7106, "step": 1524400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7209, "step": 1524500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7133, "step": 1524600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7147, "step": 1524700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7195, "step": 1524800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7216, "step": 1524900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7173, "step": 1525000 }, { "epoch": 2.05, "eval_loss": 0.6770954132080078, "eval_runtime": 194.5241, "eval_samples_per_second": 257.038, "eval_steps_per_second": 2.01, "step": 1525000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7139, "step": 1525100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7154, "step": 1525200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7136, "step": 1525300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.711, "step": 1525400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7161, "step": 1525500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7153, "step": 1525600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7118, "step": 1525700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7115, "step": 1525800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7141, "step": 1525900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7136, "step": 1526000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7149, "step": 1526100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1526200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7139, "step": 1526300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7171, "step": 1526400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1526500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7226, "step": 1526600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7177, "step": 1526700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7037, "step": 1526800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7125, "step": 1526900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7112, "step": 1527000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7156, "step": 1527100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7107, "step": 1527200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7131, "step": 1527300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7081, "step": 1527400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7089, "step": 1527500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7137, "step": 1527600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7084, "step": 1527700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7139, "step": 1527800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7199, "step": 1527900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7092, "step": 1528000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7256, "step": 1528100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7108, "step": 1528200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7231, "step": 1528300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7153, "step": 1528400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.712, "step": 1528500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7213, "step": 1528600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7143, "step": 1528700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7206, "step": 1528800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7084, "step": 1528900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7113, "step": 1529000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7061, "step": 1529100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7189, "step": 1529200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.71, "step": 1529300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7049, "step": 1529400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7185, "step": 1529500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7176, "step": 1529600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7163, "step": 1529700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7145, "step": 1529800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7129, "step": 1529900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.718, "step": 1530000 }, { "epoch": 2.05, "eval_loss": 0.6751891374588013, "eval_runtime": 194.7816, "eval_samples_per_second": 256.698, "eval_steps_per_second": 2.007, "step": 1530000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7129, "step": 1530100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7177, "step": 1530200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7136, "step": 1530300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7098, "step": 1530400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7152, "step": 1530500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7071, "step": 1530600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7188, "step": 1530700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7175, "step": 1530800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7179, "step": 1530900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7207, "step": 1531000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1531100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7213, "step": 1531200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1531300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7164, "step": 1531400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7171, "step": 1531500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7207, "step": 1531600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7166, "step": 1531700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7168, "step": 1531800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7177, "step": 1531900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7144, "step": 1532000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7114, "step": 1532100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7186, "step": 1532200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7163, "step": 1532300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7172, "step": 1532400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7164, "step": 1532500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7071, "step": 1532600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7186, "step": 1532700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7041, "step": 1532800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7252, "step": 1532900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7131, "step": 1533000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7061, "step": 1533100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7085, "step": 1533200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.708, "step": 1533300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7163, "step": 1533400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7162, "step": 1533500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7166, "step": 1533600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7165, "step": 1533700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7105, "step": 1533800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7108, "step": 1533900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7198, "step": 1534000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7146, "step": 1534100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7144, "step": 1534200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7189, "step": 1534300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7147, "step": 1534400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7107, "step": 1534500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7198, "step": 1534600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7092, "step": 1534700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7143, "step": 1534800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7103, "step": 1534900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7053, "step": 1535000 }, { "epoch": 2.05, "eval_loss": 0.67606520652771, "eval_runtime": 195.8147, "eval_samples_per_second": 255.343, "eval_steps_per_second": 1.997, "step": 1535000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.716, "step": 1535100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7181, "step": 1535200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7108, "step": 1535300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7206, "step": 1535400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.714, "step": 1535500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7142, "step": 1535600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7114, "step": 1535700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.722, "step": 1535800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7174, "step": 1535900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7107, "step": 1536000 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7121, "step": 1536100 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7131, "step": 1536200 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7119, "step": 1536300 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7185, "step": 1536400 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7047, "step": 1536500 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7163, "step": 1536600 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7223, "step": 1536700 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.725, "step": 1536800 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.716, "step": 1536900 }, { "epoch": 2.05, "learning_rate": 2.0000000000000003e-06, "loss": 0.7167, "step": 1537000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.707, "step": 1537100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7162, "step": 1537200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7042, "step": 1537300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7139, "step": 1537400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7156, "step": 1537500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7159, "step": 1537600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7167, "step": 1537700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7123, "step": 1537800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.711, "step": 1537900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7103, "step": 1538000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7133, "step": 1538100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7107, "step": 1538200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7249, "step": 1538300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7103, "step": 1538400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.71, "step": 1538500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7105, "step": 1538600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7074, "step": 1538700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7163, "step": 1538800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7092, "step": 1538900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1539000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7161, "step": 1539100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.709, "step": 1539200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7193, "step": 1539300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7151, "step": 1539400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7116, "step": 1539500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7163, "step": 1539600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7096, "step": 1539700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7103, "step": 1539800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7098, "step": 1539900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7139, "step": 1540000 }, { "epoch": 2.06, "eval_loss": 0.6728585362434387, "eval_runtime": 194.5375, "eval_samples_per_second": 257.02, "eval_steps_per_second": 2.01, "step": 1540000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7144, "step": 1540100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7098, "step": 1540200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7127, "step": 1540300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7181, "step": 1540400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7131, "step": 1540500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7181, "step": 1540600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7152, "step": 1540700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7143, "step": 1540800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7134, "step": 1540900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7148, "step": 1541000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7137, "step": 1541100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1541200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7162, "step": 1541300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7088, "step": 1541400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7142, "step": 1541500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7104, "step": 1541600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7153, "step": 1541700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7152, "step": 1541800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7066, "step": 1541900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7077, "step": 1542000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7013, "step": 1542100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7022, "step": 1542200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7075, "step": 1542300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7143, "step": 1542400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7156, "step": 1542500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7118, "step": 1542600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7051, "step": 1542700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.714, "step": 1542800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7155, "step": 1542900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7098, "step": 1543000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7137, "step": 1543100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7162, "step": 1543200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.705, "step": 1543300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7121, "step": 1543400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7107, "step": 1543500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7143, "step": 1543600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7133, "step": 1543700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7166, "step": 1543800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7114, "step": 1543900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7212, "step": 1544000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7085, "step": 1544100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7132, "step": 1544200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1544300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7172, "step": 1544400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7238, "step": 1544500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7136, "step": 1544600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.71, "step": 1544700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7061, "step": 1544800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7108, "step": 1544900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7148, "step": 1545000 }, { "epoch": 2.06, "eval_loss": 0.6715091466903687, "eval_runtime": 194.575, "eval_samples_per_second": 256.97, "eval_steps_per_second": 2.01, "step": 1545000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7114, "step": 1545100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7105, "step": 1545200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1545300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7108, "step": 1545400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7103, "step": 1545500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7153, "step": 1545600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7077, "step": 1545700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7121, "step": 1545800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7152, "step": 1545900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7128, "step": 1546000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7148, "step": 1546100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7111, "step": 1546200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7173, "step": 1546300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7147, "step": 1546400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7071, "step": 1546500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7149, "step": 1546600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7164, "step": 1546700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7063, "step": 1546800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7104, "step": 1546900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7103, "step": 1547000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7134, "step": 1547100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7135, "step": 1547200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7185, "step": 1547300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7145, "step": 1547400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7106, "step": 1547500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7107, "step": 1547600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7116, "step": 1547700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7138, "step": 1547800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1547900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7192, "step": 1548000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7058, "step": 1548100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.717, "step": 1548200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7244, "step": 1548300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7118, "step": 1548400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7097, "step": 1548500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7157, "step": 1548600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7177, "step": 1548700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1548800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1548900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7191, "step": 1549000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.717, "step": 1549100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7071, "step": 1549200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1549300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7179, "step": 1549400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7164, "step": 1549500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1549600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7144, "step": 1549700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7119, "step": 1549800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7092, "step": 1549900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7104, "step": 1550000 }, { "epoch": 2.06, "eval_loss": 0.6696457862854004, "eval_runtime": 194.8005, "eval_samples_per_second": 256.673, "eval_steps_per_second": 2.007, "step": 1550000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7042, "step": 1550100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1550200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7117, "step": 1550300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7129, "step": 1550400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7139, "step": 1550500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7111, "step": 1550600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7098, "step": 1550700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7093, "step": 1550800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1550900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7121, "step": 1551000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7155, "step": 1551100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1551200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7132, "step": 1551300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7103, "step": 1551400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1551500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7174, "step": 1551600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7101, "step": 1551700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7105, "step": 1551800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7121, "step": 1551900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7018, "step": 1552000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.707, "step": 1552100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7122, "step": 1552200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7161, "step": 1552300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7087, "step": 1552400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7053, "step": 1552500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7137, "step": 1552600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7182, "step": 1552700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7128, "step": 1552800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7093, "step": 1552900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.709, "step": 1553000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7107, "step": 1553100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7172, "step": 1553200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7097, "step": 1553300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.715, "step": 1553400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7105, "step": 1553500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7054, "step": 1553600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7167, "step": 1553700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7141, "step": 1553800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7077, "step": 1553900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7029, "step": 1554000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7105, "step": 1554100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7153, "step": 1554200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7001, "step": 1554300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7038, "step": 1554400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1554500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1554600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7054, "step": 1554700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7104, "step": 1554800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7035, "step": 1554900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.704, "step": 1555000 }, { "epoch": 2.06, "eval_loss": 0.6720991134643555, "eval_runtime": 194.5489, "eval_samples_per_second": 257.005, "eval_steps_per_second": 2.01, "step": 1555000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7137, "step": 1555100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7181, "step": 1555200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7095, "step": 1555300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7092, "step": 1555400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7119, "step": 1555500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1555600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7082, "step": 1555700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7014, "step": 1555800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7135, "step": 1555900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7179, "step": 1556000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7116, "step": 1556100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7045, "step": 1556200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7143, "step": 1556300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7109, "step": 1556400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7058, "step": 1556500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1556600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7114, "step": 1556700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7106, "step": 1556800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7037, "step": 1556900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7045, "step": 1557000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7045, "step": 1557100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7182, "step": 1557200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7082, "step": 1557300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7035, "step": 1557400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7109, "step": 1557500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7153, "step": 1557600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1557700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7158, "step": 1557800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7183, "step": 1557900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7108, "step": 1558000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7139, "step": 1558100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7159, "step": 1558200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1558300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1558400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7131, "step": 1558500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7082, "step": 1558600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7096, "step": 1558700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7119, "step": 1558800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7075, "step": 1558900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7156, "step": 1559000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7141, "step": 1559100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7128, "step": 1559200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7115, "step": 1559300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7089, "step": 1559400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7153, "step": 1559500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7168, "step": 1559600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1559700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7085, "step": 1559800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7051, "step": 1559900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7106, "step": 1560000 }, { "epoch": 2.06, "eval_loss": 0.67218416929245, "eval_runtime": 194.3337, "eval_samples_per_second": 257.289, "eval_steps_per_second": 2.012, "step": 1560000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7093, "step": 1560100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1560200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1560300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7171, "step": 1560400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7137, "step": 1560500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7173, "step": 1560600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7071, "step": 1560700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7188, "step": 1560800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.718, "step": 1560900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7176, "step": 1561000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1561100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1561200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7101, "step": 1561300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7095, "step": 1561400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.706, "step": 1561500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7039, "step": 1561600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7074, "step": 1561700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7122, "step": 1561800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7125, "step": 1561900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7153, "step": 1562000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7022, "step": 1562100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7128, "step": 1562200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7136, "step": 1562300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7108, "step": 1562400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7081, "step": 1562500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7139, "step": 1562600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7116, "step": 1562700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7159, "step": 1562800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7167, "step": 1562900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7144, "step": 1563000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7118, "step": 1563100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7092, "step": 1563200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7157, "step": 1563300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1563400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7095, "step": 1563500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7127, "step": 1563600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.711, "step": 1563700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7012, "step": 1563800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7131, "step": 1563900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1564000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7153, "step": 1564100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7115, "step": 1564200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7128, "step": 1564300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7133, "step": 1564400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.715, "step": 1564500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7086, "step": 1564600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7196, "step": 1564700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7044, "step": 1564800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7138, "step": 1564900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7109, "step": 1565000 }, { "epoch": 2.06, "eval_loss": 0.6707226037979126, "eval_runtime": 194.5176, "eval_samples_per_second": 257.046, "eval_steps_per_second": 2.01, "step": 1565000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7101, "step": 1565100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1565200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7106, "step": 1565300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7113, "step": 1565400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1565500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7098, "step": 1565600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1565700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7013, "step": 1565800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7108, "step": 1565900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7089, "step": 1566000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7089, "step": 1566100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7097, "step": 1566200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7148, "step": 1566300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7103, "step": 1566400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7103, "step": 1566500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7118, "step": 1566600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1566700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7156, "step": 1566800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7127, "step": 1566900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7125, "step": 1567000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7137, "step": 1567100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7126, "step": 1567200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7125, "step": 1567300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7058, "step": 1567400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1567500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7074, "step": 1567600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.711, "step": 1567700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1567800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7098, "step": 1567900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7179, "step": 1568000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7155, "step": 1568100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7117, "step": 1568200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.712, "step": 1568300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7103, "step": 1568400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7089, "step": 1568500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.705, "step": 1568600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.711, "step": 1568700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1568800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7091, "step": 1568900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1569000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7015, "step": 1569100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7059, "step": 1569200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7165, "step": 1569300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7089, "step": 1569400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7121, "step": 1569500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7109, "step": 1569600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7064, "step": 1569700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.715, "step": 1569800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7107, "step": 1569900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.712, "step": 1570000 }, { "epoch": 2.06, "eval_loss": 0.6680857539176941, "eval_runtime": 198.2262, "eval_samples_per_second": 252.237, "eval_steps_per_second": 1.972, "step": 1570000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7082, "step": 1570100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7033, "step": 1570200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7099, "step": 1570300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7124, "step": 1570400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7119, "step": 1570500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7084, "step": 1570600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1570700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7026, "step": 1570800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.71, "step": 1570900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1571000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7095, "step": 1571100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7058, "step": 1571200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7142, "step": 1571300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6975, "step": 1571400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1571500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7115, "step": 1571600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1571700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7135, "step": 1571800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7049, "step": 1571900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1572000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7077, "step": 1572100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7128, "step": 1572200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1572300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1572400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7082, "step": 1572500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7074, "step": 1572600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7107, "step": 1572700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7156, "step": 1572800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7095, "step": 1572900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7099, "step": 1573000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7092, "step": 1573100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1573200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7118, "step": 1573300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7128, "step": 1573400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.711, "step": 1573500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6964, "step": 1573600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7113, "step": 1573700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7154, "step": 1573800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7056, "step": 1573900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7131, "step": 1574000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7195, "step": 1574100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7161, "step": 1574200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7086, "step": 1574300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.71, "step": 1574400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7035, "step": 1574500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7064, "step": 1574600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7101, "step": 1574700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7082, "step": 1574800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7084, "step": 1574900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7038, "step": 1575000 }, { "epoch": 2.06, "eval_loss": 0.6703412532806396, "eval_runtime": 194.9337, "eval_samples_per_second": 256.497, "eval_steps_per_second": 2.006, "step": 1575000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7133, "step": 1575100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7042, "step": 1575200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7059, "step": 1575300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7176, "step": 1575400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1575500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1575600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1575700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7068, "step": 1575800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7097, "step": 1575900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1576000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7088, "step": 1576100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7127, "step": 1576200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1576300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1576400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7105, "step": 1576500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7082, "step": 1576600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1576700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7118, "step": 1576800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7071, "step": 1576900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7114, "step": 1577000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1577100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7147, "step": 1577200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7103, "step": 1577300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7061, "step": 1577400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7044, "step": 1577500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7149, "step": 1577600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1577700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7075, "step": 1577800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7091, "step": 1577900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7132, "step": 1578000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7215, "step": 1578100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7092, "step": 1578200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.71, "step": 1578300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1578400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7015, "step": 1578500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7146, "step": 1578600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7091, "step": 1578700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7038, "step": 1578800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7052, "step": 1578900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7139, "step": 1579000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6996, "step": 1579100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1579200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7107, "step": 1579300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7088, "step": 1579400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7133, "step": 1579500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7053, "step": 1579600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.71, "step": 1579700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7089, "step": 1579800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7014, "step": 1579900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7045, "step": 1580000 }, { "epoch": 2.06, "eval_loss": 0.6690846085548401, "eval_runtime": 194.5913, "eval_samples_per_second": 256.949, "eval_steps_per_second": 2.009, "step": 1580000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7124, "step": 1580100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7173, "step": 1580200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7122, "step": 1580300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1580400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1580500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7042, "step": 1580600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7146, "step": 1580700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7042, "step": 1580800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7077, "step": 1580900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7024, "step": 1581000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7124, "step": 1581100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1581200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7018, "step": 1581300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1581400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7127, "step": 1581500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7056, "step": 1581600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7092, "step": 1581700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7064, "step": 1581800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7154, "step": 1581900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7095, "step": 1582000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1582100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7139, "step": 1582200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7112, "step": 1582300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7047, "step": 1582400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7149, "step": 1582500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7101, "step": 1582600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7086, "step": 1582700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7089, "step": 1582800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7025, "step": 1582900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7039, "step": 1583000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7114, "step": 1583100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7006, "step": 1583200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7119, "step": 1583300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7104, "step": 1583400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7041, "step": 1583500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7066, "step": 1583600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1583700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7104, "step": 1583800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7087, "step": 1583900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7055, "step": 1584000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7037, "step": 1584100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7098, "step": 1584200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7092, "step": 1584300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7071, "step": 1584400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1584500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7039, "step": 1584600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1584700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1584800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.704, "step": 1584900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7095, "step": 1585000 }, { "epoch": 2.06, "eval_loss": 0.669644296169281, "eval_runtime": 194.4885, "eval_samples_per_second": 257.085, "eval_steps_per_second": 2.01, "step": 1585000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7163, "step": 1585100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7033, "step": 1585200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7096, "step": 1585300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7171, "step": 1585400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7035, "step": 1585500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7125, "step": 1585600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7059, "step": 1585700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7104, "step": 1585800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7097, "step": 1585900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7061, "step": 1586000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6987, "step": 1586100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7126, "step": 1586200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7128, "step": 1586300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7139, "step": 1586400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7119, "step": 1586500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1586600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1586700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.709, "step": 1586800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7054, "step": 1586900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.717, "step": 1587000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7116, "step": 1587100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.707, "step": 1587200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1587300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7066, "step": 1587400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7133, "step": 1587500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7115, "step": 1587600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7059, "step": 1587700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1587800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7089, "step": 1587900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7055, "step": 1588000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7015, "step": 1588100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.714, "step": 1588200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7052, "step": 1588300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7119, "step": 1588400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7128, "step": 1588500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7043, "step": 1588600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.707, "step": 1588700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.708, "step": 1588800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7137, "step": 1588900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7036, "step": 1589000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7033, "step": 1589100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1589200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7092, "step": 1589300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6999, "step": 1589400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7112, "step": 1589500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7141, "step": 1589600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7123, "step": 1589700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7045, "step": 1589800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7099, "step": 1589900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7119, "step": 1590000 }, { "epoch": 2.06, "eval_loss": 0.670275092124939, "eval_runtime": 194.6148, "eval_samples_per_second": 256.918, "eval_steps_per_second": 2.009, "step": 1590000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7036, "step": 1590100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7042, "step": 1590200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6964, "step": 1590300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7016, "step": 1590400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7068, "step": 1590500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7039, "step": 1590600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7091, "step": 1590700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7098, "step": 1590800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.709, "step": 1590900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7108, "step": 1591000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7149, "step": 1591100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7063, "step": 1591200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7114, "step": 1591300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7102, "step": 1591400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1591500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7103, "step": 1591600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7099, "step": 1591700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.72, "step": 1591800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7074, "step": 1591900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1592000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.72, "step": 1592100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7115, "step": 1592200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7093, "step": 1592300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.707, "step": 1592400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7148, "step": 1592500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7162, "step": 1592600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7118, "step": 1592700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7034, "step": 1592800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.704, "step": 1592900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7036, "step": 1593000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7056, "step": 1593100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.709, "step": 1593200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7004, "step": 1593300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7105, "step": 1593400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7153, "step": 1593500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7102, "step": 1593600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7132, "step": 1593700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7103, "step": 1593800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7116, "step": 1593900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7102, "step": 1594000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7005, "step": 1594100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7055, "step": 1594200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7068, "step": 1594300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1594400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7167, "step": 1594500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7095, "step": 1594600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.708, "step": 1594700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.711, "step": 1594800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7116, "step": 1594900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1595000 }, { "epoch": 2.06, "eval_loss": 0.669308602809906, "eval_runtime": 194.3701, "eval_samples_per_second": 257.241, "eval_steps_per_second": 2.012, "step": 1595000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7145, "step": 1595100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7088, "step": 1595200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7026, "step": 1595300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1595400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7013, "step": 1595500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7088, "step": 1595600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7137, "step": 1595700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7051, "step": 1595800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6988, "step": 1595900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1596000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1596100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7086, "step": 1596200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1596300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7087, "step": 1596400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7077, "step": 1596500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6979, "step": 1596600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7135, "step": 1596700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7129, "step": 1596800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1596900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7131, "step": 1597000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7131, "step": 1597100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.713, "step": 1597200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7088, "step": 1597300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1597400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7092, "step": 1597500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7031, "step": 1597600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1597700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7152, "step": 1597800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7151, "step": 1597900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7039, "step": 1598000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7045, "step": 1598100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7012, "step": 1598200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7053, "step": 1598300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7081, "step": 1598400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7107, "step": 1598500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7075, "step": 1598600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7067, "step": 1598700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.705, "step": 1598800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.705, "step": 1598900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1599000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7106, "step": 1599100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6976, "step": 1599200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7023, "step": 1599300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7049, "step": 1599400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.712, "step": 1599500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7063, "step": 1599600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.699, "step": 1599700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6994, "step": 1599800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7131, "step": 1599900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7123, "step": 1600000 }, { "epoch": 2.06, "eval_loss": 0.6678040623664856, "eval_runtime": 194.6108, "eval_samples_per_second": 256.923, "eval_steps_per_second": 2.009, "step": 1600000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7179, "step": 1600100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7104, "step": 1600200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6944, "step": 1600300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1600400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7059, "step": 1600500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7101, "step": 1600600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7155, "step": 1600700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7124, "step": 1600800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7033, "step": 1600900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7185, "step": 1601000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7111, "step": 1601100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7085, "step": 1601200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7085, "step": 1601300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1601400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7082, "step": 1601500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7041, "step": 1601600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7034, "step": 1601700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7015, "step": 1601800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7089, "step": 1601900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7042, "step": 1602000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7047, "step": 1602100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7064, "step": 1602200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7014, "step": 1602300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6998, "step": 1602400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7157, "step": 1602500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7127, "step": 1602600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7024, "step": 1602700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7045, "step": 1602800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7067, "step": 1602900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7145, "step": 1603000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7098, "step": 1603100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6994, "step": 1603200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7032, "step": 1603300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7082, "step": 1603400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1603500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7036, "step": 1603600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7134, "step": 1603700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7051, "step": 1603800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7149, "step": 1603900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7067, "step": 1604000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7061, "step": 1604100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7052, "step": 1604200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7086, "step": 1604300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7109, "step": 1604400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7087, "step": 1604500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7053, "step": 1604600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7084, "step": 1604700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7135, "step": 1604800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6999, "step": 1604900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.712, "step": 1605000 }, { "epoch": 2.06, "eval_loss": 0.6686995029449463, "eval_runtime": 194.3388, "eval_samples_per_second": 257.283, "eval_steps_per_second": 2.012, "step": 1605000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1605100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7042, "step": 1605200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7001, "step": 1605300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7085, "step": 1605400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7101, "step": 1605500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7081, "step": 1605600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.711, "step": 1605700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7086, "step": 1605800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7081, "step": 1605900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7098, "step": 1606000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1606100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7082, "step": 1606200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.703, "step": 1606300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7111, "step": 1606400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1606500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1606600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6964, "step": 1606700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1606800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7158, "step": 1606900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7144, "step": 1607000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7074, "step": 1607100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1607200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6945, "step": 1607300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7015, "step": 1607400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7177, "step": 1607500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7112, "step": 1607600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7017, "step": 1607700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7081, "step": 1607800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7045, "step": 1607900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7025, "step": 1608000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7126, "step": 1608100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1608200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7112, "step": 1608300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1608400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6993, "step": 1608500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.713, "step": 1608600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7118, "step": 1608700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7106, "step": 1608800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7021, "step": 1608900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7176, "step": 1609000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7128, "step": 1609100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.712, "step": 1609200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7089, "step": 1609300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.705, "step": 1609400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7059, "step": 1609500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7115, "step": 1609600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7075, "step": 1609700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7095, "step": 1609800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7118, "step": 1609900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7093, "step": 1610000 }, { "epoch": 2.06, "eval_loss": 0.6666762232780457, "eval_runtime": 194.4528, "eval_samples_per_second": 257.132, "eval_steps_per_second": 2.011, "step": 1610000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7089, "step": 1610100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7067, "step": 1610200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7051, "step": 1610300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1610400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7163, "step": 1610500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7101, "step": 1610600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7096, "step": 1610700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7044, "step": 1610800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7027, "step": 1610900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7046, "step": 1611000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7121, "step": 1611100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7123, "step": 1611200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1611300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7063, "step": 1611400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7109, "step": 1611500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7132, "step": 1611600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7029, "step": 1611700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1611800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1611900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7017, "step": 1612000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7043, "step": 1612100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.707, "step": 1612200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7051, "step": 1612300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7024, "step": 1612400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7124, "step": 1612500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1612600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7054, "step": 1612700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1612800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7022, "step": 1612900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7155, "step": 1613000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7038, "step": 1613100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.709, "step": 1613200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1613300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7075, "step": 1613400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7139, "step": 1613500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7085, "step": 1613600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7107, "step": 1613700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7004, "step": 1613800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1613900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.709, "step": 1614000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7132, "step": 1614100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7064, "step": 1614200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7055, "step": 1614300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7042, "step": 1614400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7165, "step": 1614500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7086, "step": 1614600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7139, "step": 1614700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7003, "step": 1614800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6966, "step": 1614900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7042, "step": 1615000 }, { "epoch": 2.06, "eval_loss": 0.6679654717445374, "eval_runtime": 194.502, "eval_samples_per_second": 257.067, "eval_steps_per_second": 2.01, "step": 1615000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1615100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7042, "step": 1615200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7066, "step": 1615300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7129, "step": 1615400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7084, "step": 1615500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7006, "step": 1615600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7099, "step": 1615700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7029, "step": 1615800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7107, "step": 1615900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7056, "step": 1616000 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1616100 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7047, "step": 1616200 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7043, "step": 1616300 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.6978, "step": 1616400 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7032, "step": 1616500 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7061, "step": 1616600 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1616700 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7045, "step": 1616800 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7064, "step": 1616900 }, { "epoch": 2.06, "learning_rate": 2.0000000000000003e-06, "loss": 0.7052, "step": 1617000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7005, "step": 1617100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7085, "step": 1617200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.713, "step": 1617300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7063, "step": 1617400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1617500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7111, "step": 1617600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1617700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1617800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7044, "step": 1617900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7084, "step": 1618000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7045, "step": 1618100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7016, "step": 1618200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1618300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7023, "step": 1618400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7067, "step": 1618500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7061, "step": 1618600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1618700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7027, "step": 1618800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6907, "step": 1618900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7023, "step": 1619000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.706, "step": 1619100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7108, "step": 1619200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7004, "step": 1619300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7026, "step": 1619400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7059, "step": 1619500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7121, "step": 1619600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7034, "step": 1619700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7134, "step": 1619800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7032, "step": 1619900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6993, "step": 1620000 }, { "epoch": 2.07, "eval_loss": 0.66707444190979, "eval_runtime": 205.7066, "eval_samples_per_second": 243.065, "eval_steps_per_second": 1.901, "step": 1620000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7047, "step": 1620100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.708, "step": 1620200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7044, "step": 1620300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7114, "step": 1620400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7043, "step": 1620500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7111, "step": 1620600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7105, "step": 1620700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7055, "step": 1620800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7055, "step": 1620900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7121, "step": 1621000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7085, "step": 1621100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7074, "step": 1621200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7119, "step": 1621300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7132, "step": 1621400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.704, "step": 1621500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7011, "step": 1621600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1621700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.704, "step": 1621800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7049, "step": 1621900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7092, "step": 1622000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.703, "step": 1622100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7075, "step": 1622200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6997, "step": 1622300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7005, "step": 1622400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1622500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6995, "step": 1622600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7019, "step": 1622700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7011, "step": 1622800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1622900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1623000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7087, "step": 1623100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7052, "step": 1623200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7097, "step": 1623300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7091, "step": 1623400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7154, "step": 1623500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7071, "step": 1623600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7039, "step": 1623700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7071, "step": 1623800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7068, "step": 1623900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7146, "step": 1624000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7046, "step": 1624100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7106, "step": 1624200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1624300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7044, "step": 1624400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7061, "step": 1624500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7068, "step": 1624600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7022, "step": 1624700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7023, "step": 1624800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7061, "step": 1624900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.706, "step": 1625000 }, { "epoch": 2.07, "eval_loss": 0.6661863327026367, "eval_runtime": 200.4343, "eval_samples_per_second": 249.458, "eval_steps_per_second": 1.951, "step": 1625000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7141, "step": 1625100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.701, "step": 1625200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7099, "step": 1625300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7114, "step": 1625400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7113, "step": 1625500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6976, "step": 1625600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7042, "step": 1625700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6983, "step": 1625800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7021, "step": 1625900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7017, "step": 1626000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7052, "step": 1626100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7066, "step": 1626200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1626300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7075, "step": 1626400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7071, "step": 1626500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1626600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1626700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1626800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7017, "step": 1626900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7095, "step": 1627000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7087, "step": 1627100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7015, "step": 1627200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7075, "step": 1627300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.705, "step": 1627400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7018, "step": 1627500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7046, "step": 1627600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.706, "step": 1627700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1627800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1627900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.703, "step": 1628000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1628100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7051, "step": 1628200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1628300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7029, "step": 1628400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7088, "step": 1628500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6988, "step": 1628600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6993, "step": 1628700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1628800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1628900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7018, "step": 1629000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7059, "step": 1629100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1629200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7023, "step": 1629300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6993, "step": 1629400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7017, "step": 1629500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1629600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.714, "step": 1629700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.708, "step": 1629800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7114, "step": 1629900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.709, "step": 1630000 }, { "epoch": 2.07, "eval_loss": 0.6691371202468872, "eval_runtime": 208.3903, "eval_samples_per_second": 239.934, "eval_steps_per_second": 1.876, "step": 1630000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7077, "step": 1630100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7071, "step": 1630200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7045, "step": 1630300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7138, "step": 1630400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7018, "step": 1630500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.705, "step": 1630600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.705, "step": 1630700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7014, "step": 1630800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7119, "step": 1630900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7049, "step": 1631000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6996, "step": 1631100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7095, "step": 1631200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7093, "step": 1631300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7018, "step": 1631400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7105, "step": 1631500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7037, "step": 1631600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7087, "step": 1631700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6986, "step": 1631800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7039, "step": 1631900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1632000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.705, "step": 1632100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7061, "step": 1632200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7149, "step": 1632300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7075, "step": 1632400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7097, "step": 1632500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7037, "step": 1632600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7002, "step": 1632700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7024, "step": 1632800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7047, "step": 1632900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7108, "step": 1633000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1633100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6985, "step": 1633200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7082, "step": 1633300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.715, "step": 1633400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6986, "step": 1633500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7054, "step": 1633600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.712, "step": 1633700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7026, "step": 1633800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7121, "step": 1633900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.712, "step": 1634000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7003, "step": 1634100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7093, "step": 1634200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7025, "step": 1634300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7021, "step": 1634400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7043, "step": 1634500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1634600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1634700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7052, "step": 1634800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1634900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7125, "step": 1635000 }, { "epoch": 2.07, "eval_loss": 0.6663634181022644, "eval_runtime": 203.2205, "eval_samples_per_second": 246.038, "eval_steps_per_second": 1.924, "step": 1635000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1635100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1635200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7044, "step": 1635300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6992, "step": 1635400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.704, "step": 1635500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7032, "step": 1635600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7018, "step": 1635700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1635800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7015, "step": 1635900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7054, "step": 1636000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7004, "step": 1636100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7022, "step": 1636200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7074, "step": 1636300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7119, "step": 1636400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7016, "step": 1636500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7064, "step": 1636600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7093, "step": 1636700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7116, "step": 1636800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1636900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7058, "step": 1637000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1637100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7087, "step": 1637200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7051, "step": 1637300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.708, "step": 1637400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7108, "step": 1637500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7007, "step": 1637600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7044, "step": 1637700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7037, "step": 1637800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7046, "step": 1637900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.71, "step": 1638000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7039, "step": 1638100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7046, "step": 1638200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7013, "step": 1638300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6945, "step": 1638400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1638500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7009, "step": 1638600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6992, "step": 1638700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7108, "step": 1638800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7106, "step": 1638900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7066, "step": 1639000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.712, "step": 1639100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7034, "step": 1639200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7035, "step": 1639300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7026, "step": 1639400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1639500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7055, "step": 1639600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1639700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.704, "step": 1639800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7021, "step": 1639900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7084, "step": 1640000 }, { "epoch": 2.07, "eval_loss": 0.6667556166648865, "eval_runtime": 205.6474, "eval_samples_per_second": 243.135, "eval_steps_per_second": 1.901, "step": 1640000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7038, "step": 1640100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7113, "step": 1640200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7125, "step": 1640300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7039, "step": 1640400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1640500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1640600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7006, "step": 1640700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7056, "step": 1640800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1640900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7018, "step": 1641000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7029, "step": 1641100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7043, "step": 1641200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7071, "step": 1641300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1641400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1641500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7005, "step": 1641600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7035, "step": 1641700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7054, "step": 1641800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7045, "step": 1641900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7051, "step": 1642000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1642100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.708, "step": 1642200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7047, "step": 1642300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7029, "step": 1642400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7045, "step": 1642500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7066, "step": 1642600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7027, "step": 1642700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7067, "step": 1642800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6989, "step": 1642900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7153, "step": 1643000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7049, "step": 1643100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7082, "step": 1643200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7025, "step": 1643300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7056, "step": 1643400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7061, "step": 1643500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7025, "step": 1643600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7019, "step": 1643700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7095, "step": 1643800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7004, "step": 1643900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7102, "step": 1644000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7075, "step": 1644100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7068, "step": 1644200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7068, "step": 1644300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7029, "step": 1644400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1644500 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7035, "step": 1644600 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7101, "step": 1644700 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7081, "step": 1644800 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7115, "step": 1644900 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1645000 }, { "epoch": 2.07, "eval_loss": 0.6672505736351013, "eval_runtime": 206.6883, "eval_samples_per_second": 241.91, "eval_steps_per_second": 1.892, "step": 1645000 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.6968, "step": 1645100 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7011, "step": 1645200 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.711, "step": 1645300 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7043, "step": 1645400 }, { "epoch": 2.07, "learning_rate": 2.0000000000000003e-06, "loss": 0.7026, "step": 1645500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7029, "step": 1645600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7107, "step": 1645700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7023, "step": 1645800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.71, "step": 1645900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7074, "step": 1646000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7047, "step": 1646100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7005, "step": 1646200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6984, "step": 1646300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7045, "step": 1646400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7084, "step": 1646500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7059, "step": 1646600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6956, "step": 1646700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7063, "step": 1646800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7016, "step": 1646900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6973, "step": 1647000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1647100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6996, "step": 1647200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1647300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7001, "step": 1647400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7051, "step": 1647500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7129, "step": 1647600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7022, "step": 1647700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1647800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6992, "step": 1647900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7022, "step": 1648000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7106, "step": 1648100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1648200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7013, "step": 1648300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7144, "step": 1648400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.703, "step": 1648500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7031, "step": 1648600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7018, "step": 1648700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7034, "step": 1648800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7022, "step": 1648900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1649000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7045, "step": 1649100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7024, "step": 1649200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1649300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1649400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7086, "step": 1649500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1649600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7049, "step": 1649700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.698, "step": 1649800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6996, "step": 1649900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7027, "step": 1650000 }, { "epoch": 3.0, "eval_loss": 0.6658620238304138, "eval_runtime": 206.7594, "eval_samples_per_second": 241.827, "eval_steps_per_second": 1.891, "step": 1650000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7038, "step": 1650100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7039, "step": 1650200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7096, "step": 1650300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7123, "step": 1650400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7071, "step": 1650500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1650600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7053, "step": 1650700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.709, "step": 1650800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6974, "step": 1650900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1651000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7063, "step": 1651100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1651200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7, "step": 1651300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7002, "step": 1651400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6995, "step": 1651500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.708, "step": 1651600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7033, "step": 1651700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.699, "step": 1651800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.696, "step": 1651900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7052, "step": 1652000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1652100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7009, "step": 1652200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7004, "step": 1652300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7052, "step": 1652400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7043, "step": 1652500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7029, "step": 1652600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7032, "step": 1652700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7095, "step": 1652800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.704, "step": 1652900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7029, "step": 1653000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6984, "step": 1653100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7102, "step": 1653200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7066, "step": 1653300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7093, "step": 1653400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7002, "step": 1653500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7086, "step": 1653600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.714, "step": 1653700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.701, "step": 1653800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7026, "step": 1653900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7058, "step": 1654000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7027, "step": 1654100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7043, "step": 1654200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7024, "step": 1654300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7013, "step": 1654400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6968, "step": 1654500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6998, "step": 1654600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7059, "step": 1654700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1654800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7067, "step": 1654900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7014, "step": 1655000 }, { "epoch": 3.0, "eval_loss": 0.6663699746131897, "eval_runtime": 205.5062, "eval_samples_per_second": 243.302, "eval_steps_per_second": 1.903, "step": 1655000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7063, "step": 1655100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7021, "step": 1655200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7047, "step": 1655300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7088, "step": 1655400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7053, "step": 1655500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1655600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7033, "step": 1655700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7122, "step": 1655800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7043, "step": 1655900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7098, "step": 1656000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7034, "step": 1656100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7102, "step": 1656200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7141, "step": 1656300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7036, "step": 1656400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7022, "step": 1656500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7049, "step": 1656600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6975, "step": 1656700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1656800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.704, "step": 1656900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7002, "step": 1657000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1657100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7023, "step": 1657200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1657300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7029, "step": 1657400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.709, "step": 1657500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7024, "step": 1657600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6981, "step": 1657700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6989, "step": 1657800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7038, "step": 1657900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1658000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7012, "step": 1658100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6978, "step": 1658200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7092, "step": 1658300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.698, "step": 1658400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7021, "step": 1658500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6945, "step": 1658600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1658700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1658800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7105, "step": 1658900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.701, "step": 1659000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1659100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7032, "step": 1659200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1659300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7019, "step": 1659400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7039, "step": 1659500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1659600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7037, "step": 1659700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.708, "step": 1659800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7056, "step": 1659900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7036, "step": 1660000 }, { "epoch": 3.0, "eval_loss": 0.6648625135421753, "eval_runtime": 206.1877, "eval_samples_per_second": 242.497, "eval_steps_per_second": 1.896, "step": 1660000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7036, "step": 1660100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1660200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7133, "step": 1660300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1660400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7046, "step": 1660500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7018, "step": 1660600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7082, "step": 1660700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7095, "step": 1660800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7061, "step": 1660900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7005, "step": 1661000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7086, "step": 1661100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.697, "step": 1661200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7018, "step": 1661300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.702, "step": 1661400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7111, "step": 1661500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7081, "step": 1661600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6963, "step": 1661700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7034, "step": 1661800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6975, "step": 1661900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7044, "step": 1662000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7025, "step": 1662100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1662200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6996, "step": 1662300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1662400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6974, "step": 1662500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7071, "step": 1662600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7056, "step": 1662700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1662800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6995, "step": 1662900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7036, "step": 1663000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1663100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1663200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6958, "step": 1663300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7056, "step": 1663400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7055, "step": 1663500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6956, "step": 1663600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7087, "step": 1663700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7035, "step": 1663800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7016, "step": 1663900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6949, "step": 1664000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1664100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1664200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6976, "step": 1664300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7059, "step": 1664400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1664500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7147, "step": 1664600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7058, "step": 1664700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7058, "step": 1664800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7104, "step": 1664900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7063, "step": 1665000 }, { "epoch": 3.0, "eval_loss": 0.6649179458618164, "eval_runtime": 206.4995, "eval_samples_per_second": 242.131, "eval_steps_per_second": 1.893, "step": 1665000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7086, "step": 1665100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7021, "step": 1665200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7017, "step": 1665300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6971, "step": 1665400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6959, "step": 1665500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7007, "step": 1665600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7043, "step": 1665700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1665800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7087, "step": 1665900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7095, "step": 1666000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7033, "step": 1666100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6982, "step": 1666200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1666300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7052, "step": 1666400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6917, "step": 1666500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1666600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7017, "step": 1666700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1666800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6959, "step": 1666900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6998, "step": 1667000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7027, "step": 1667100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.706, "step": 1667200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7075, "step": 1667300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7082, "step": 1667400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7031, "step": 1667500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7037, "step": 1667600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7005, "step": 1667700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1667800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1667900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7038, "step": 1668000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7153, "step": 1668100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7015, "step": 1668200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1668300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7049, "step": 1668400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6955, "step": 1668500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7002, "step": 1668600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1668700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.719, "step": 1668800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7066, "step": 1668900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7041, "step": 1669000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6965, "step": 1669100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7092, "step": 1669200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7074, "step": 1669300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1669400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1669500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7097, "step": 1669600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1669700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7023, "step": 1669800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7001, "step": 1669900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7012, "step": 1670000 }, { "epoch": 3.0, "eval_loss": 0.6655164957046509, "eval_runtime": 204.3764, "eval_samples_per_second": 244.647, "eval_steps_per_second": 1.913, "step": 1670000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1670100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7034, "step": 1670200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7049, "step": 1670300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7054, "step": 1670400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.709, "step": 1670500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7047, "step": 1670600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1670700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7062, "step": 1670800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7106, "step": 1670900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7092, "step": 1671000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7035, "step": 1671100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7042, "step": 1671200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6923, "step": 1671300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7, "step": 1671400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7041, "step": 1671500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7055, "step": 1671600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.702, "step": 1671700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6981, "step": 1671800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6994, "step": 1671900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.696, "step": 1672000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7043, "step": 1672100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6954, "step": 1672200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7027, "step": 1672300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7098, "step": 1672400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.701, "step": 1672500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7001, "step": 1672600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1672700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7025, "step": 1672800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.698, "step": 1672900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7129, "step": 1673000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.704, "step": 1673100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6988, "step": 1673200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7054, "step": 1673300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7013, "step": 1673400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7075, "step": 1673500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.697, "step": 1673600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.71, "step": 1673700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1673800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7046, "step": 1673900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.705, "step": 1674000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6996, "step": 1674100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.703, "step": 1674200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7086, "step": 1674300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6991, "step": 1674400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6933, "step": 1674500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1674600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7036, "step": 1674700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1674800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7021, "step": 1674900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7007, "step": 1675000 }, { "epoch": 3.0, "eval_loss": 0.6647133827209473, "eval_runtime": 195.5023, "eval_samples_per_second": 255.752, "eval_steps_per_second": 2.0, "step": 1675000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.707, "step": 1675100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7025, "step": 1675200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7003, "step": 1675300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7047, "step": 1675400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7081, "step": 1675500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7001, "step": 1675600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7063, "step": 1675700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6996, "step": 1675800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7066, "step": 1675900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7068, "step": 1676000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7077, "step": 1676100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.703, "step": 1676200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7108, "step": 1676300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1676400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7049, "step": 1676500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1676600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6977, "step": 1676700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7024, "step": 1676800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.703, "step": 1676900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7014, "step": 1677000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7039, "step": 1677100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7049, "step": 1677200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7036, "step": 1677300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7026, "step": 1677400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7047, "step": 1677500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7089, "step": 1677600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7013, "step": 1677700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7023, "step": 1677800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7024, "step": 1677900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7057, "step": 1678000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7043, "step": 1678100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7038, "step": 1678200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7033, "step": 1678300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7011, "step": 1678400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7, "step": 1678500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7051, "step": 1678600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7095, "step": 1678700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6923, "step": 1678800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7085, "step": 1678900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6942, "step": 1679000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.71, "step": 1679100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.702, "step": 1679200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6912, "step": 1679300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6986, "step": 1679400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7072, "step": 1679500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7091, "step": 1679600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7015, "step": 1679700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7019, "step": 1679800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7075, "step": 1679900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7033, "step": 1680000 }, { "epoch": 3.0, "eval_loss": 0.6660146117210388, "eval_runtime": 195.8738, "eval_samples_per_second": 255.266, "eval_steps_per_second": 1.996, "step": 1680000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7026, "step": 1680100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7017, "step": 1680200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1680300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7006, "step": 1680400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7082, "step": 1680500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7032, "step": 1680600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7066, "step": 1680700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7024, "step": 1680800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6991, "step": 1680900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7053, "step": 1681000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7009, "step": 1681100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1681200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7094, "step": 1681300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1681400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6968, "step": 1681500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1681600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7086, "step": 1681700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.712, "step": 1681800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7013, "step": 1681900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1682000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7035, "step": 1682100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1682200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7029, "step": 1682300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7097, "step": 1682400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7003, "step": 1682500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6953, "step": 1682600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6995, "step": 1682700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1682800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7021, "step": 1682900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7041, "step": 1683000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6985, "step": 1683100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7031, "step": 1683200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7039, "step": 1683300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7064, "step": 1683400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7058, "step": 1683500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7116, "step": 1683600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6977, "step": 1683700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7025, "step": 1683800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7034, "step": 1683900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6962, "step": 1684000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7013, "step": 1684100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1684200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6932, "step": 1684300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7047, "step": 1684400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6963, "step": 1684500 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7023, "step": 1684600 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7079, "step": 1684700 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7078, "step": 1684800 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7073, "step": 1684900 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6997, "step": 1685000 }, { "epoch": 3.0, "eval_loss": 0.6658166646957397, "eval_runtime": 195.353, "eval_samples_per_second": 255.947, "eval_steps_per_second": 2.002, "step": 1685000 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7012, "step": 1685100 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7004, "step": 1685200 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7019, "step": 1685300 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7, "step": 1685400 }, { "epoch": 3.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7006, "step": 1685500 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.708, "step": 1685600 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7053, "step": 1685700 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6957, "step": 1685800 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7085, "step": 1685900 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6997, "step": 1686000 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7018, "step": 1686100 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1686200 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6983, "step": 1686300 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7055, "step": 1686400 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7112, "step": 1686500 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7045, "step": 1686600 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7022, "step": 1686700 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7014, "step": 1686800 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7098, "step": 1686900 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.702, "step": 1687000 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7002, "step": 1687100 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6957, "step": 1687200 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7052, "step": 1687300 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7014, "step": 1687400 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.708, "step": 1687500 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7007, "step": 1687600 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7069, "step": 1687700 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1687800 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7041, "step": 1687900 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6993, "step": 1688000 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7105, "step": 1688100 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7037, "step": 1688200 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7076, "step": 1688300 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6944, "step": 1688400 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7022, "step": 1688500 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7059, "step": 1688600 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7, "step": 1688700 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6936, "step": 1688800 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7044, "step": 1688900 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6999, "step": 1689000 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6998, "step": 1689100 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7115, "step": 1689200 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7084, "step": 1689300 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6963, "step": 1689400 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7029, "step": 1689500 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7022, "step": 1689600 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6991, "step": 1689700 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6922, "step": 1689800 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7027, "step": 1689900 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6981, "step": 1690000 }, { "epoch": 3.01, "eval_loss": 0.6643096208572388, "eval_runtime": 195.7682, "eval_samples_per_second": 255.404, "eval_steps_per_second": 1.997, "step": 1690000 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6996, "step": 1690100 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7082, "step": 1690200 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7097, "step": 1690300 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7059, "step": 1690400 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7053, "step": 1690500 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7018, "step": 1690600 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7012, "step": 1690700 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7003, "step": 1690800 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7007, "step": 1690900 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7018, "step": 1691000 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1691100 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6951, "step": 1691200 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7033, "step": 1691300 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7066, "step": 1691400 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7056, "step": 1691500 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6971, "step": 1691600 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6965, "step": 1691700 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7027, "step": 1691800 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7038, "step": 1691900 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7036, "step": 1692000 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7061, "step": 1692100 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7024, "step": 1692200 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6984, "step": 1692300 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6962, "step": 1692400 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7021, "step": 1692500 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6964, "step": 1692600 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7016, "step": 1692700 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7006, "step": 1692800 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7036, "step": 1692900 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6994, "step": 1693000 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7004, "step": 1693100 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1693200 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7086, "step": 1693300 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7088, "step": 1693400 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7029, "step": 1693500 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7042, "step": 1693600 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.703, "step": 1693700 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7033, "step": 1693800 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7018, "step": 1693900 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7071, "step": 1694000 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6977, "step": 1694100 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1694200 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7075, "step": 1694300 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6946, "step": 1694400 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7022, "step": 1694500 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7064, "step": 1694600 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1694700 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6998, "step": 1694800 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6943, "step": 1694900 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7021, "step": 1695000 }, { "epoch": 3.01, "eval_loss": 0.6644836068153381, "eval_runtime": 195.522, "eval_samples_per_second": 255.726, "eval_steps_per_second": 2.0, "step": 1695000 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.698, "step": 1695100 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6978, "step": 1695200 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7063, "step": 1695300 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6977, "step": 1695400 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7017, "step": 1695500 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7083, "step": 1695600 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.709, "step": 1695700 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7035, "step": 1695800 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6989, "step": 1695900 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7018, "step": 1696000 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7028, "step": 1696100 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6993, "step": 1696200 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6983, "step": 1696300 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7113, "step": 1696400 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7036, "step": 1696500 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.702, "step": 1696600 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7022, "step": 1696700 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.695, "step": 1696800 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7136, "step": 1696900 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7049, "step": 1697000 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7049, "step": 1697100 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6999, "step": 1697200 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7058, "step": 1697300 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6991, "step": 1697400 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7048, "step": 1697500 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7091, "step": 1697600 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6994, "step": 1697700 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7064, "step": 1697800 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7026, "step": 1697900 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7074, "step": 1698000 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6996, "step": 1698100 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7016, "step": 1698200 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7043, "step": 1698300 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7008, "step": 1698400 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.6998, "step": 1698500 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.701, "step": 1698600 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7065, "step": 1698700 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7032, "step": 1698800 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7101, "step": 1698900 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.698, "step": 1699000 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7029, "step": 1699100 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7002, "step": 1699200 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7064, "step": 1699300 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7064, "step": 1699400 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7044, "step": 1699500 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7012, "step": 1699600 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7038, "step": 1699700 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7061, "step": 1699800 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.7, "step": 1699900 }, { "epoch": 3.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.698, "step": 1700000 }, { "epoch": 3.01, "eval_loss": 0.6644004583358765, "eval_runtime": 195.2166, "eval_samples_per_second": 256.126, "eval_steps_per_second": 2.003, "step": 1700000 } ], "max_steps": 8000000.0, "num_train_epochs": 9223372036854775807, "total_flos": 2.0282185909429862e+20, "trial_name": null, "trial_params": null }