[ { "loss": 0.6786, "learning_rate": 7.2e-05, "epoch": 0.03, "step": 1 }, { "loss": 0.6579, "learning_rate": 7.2e-05, "epoch": 0.05, "step": 2 }, { "loss": 0.625, "learning_rate": 7.2e-05, "epoch": 0.08, "step": 3 }, { "loss": 0.6009, "learning_rate": 7.2e-05, "epoch": 0.11, "step": 4 }, { "loss": 0.5805, "learning_rate": 7.2e-05, "epoch": 0.13, "step": 5 }, { "loss": 0.5482, "learning_rate": 7.2e-05, "epoch": 0.16, "step": 6 }, { "loss": 0.5001, "learning_rate": 7.2e-05, "epoch": 0.18, "step": 7 }, { "loss": 0.4963, "learning_rate": 7.2e-05, "epoch": 0.21, "step": 8 }, { "loss": 0.4514, "learning_rate": 7.2e-05, "epoch": 0.24, "step": 9 }, { "loss": 0.388, "learning_rate": 7.2e-05, "epoch": 0.26, "step": 10 }, { "loss": 0.3235, "learning_rate": 7.2e-05, "epoch": 0.29, "step": 11 }, { "loss": 0.3438, "learning_rate": 7.2e-05, "epoch": 0.32, "step": 12 }, { "loss": 0.2536, "learning_rate": 7.2e-05, "epoch": 0.34, "step": 13 }, { "loss": 0.272, "learning_rate": 7.2e-05, "epoch": 0.37, "step": 14 }, { "loss": 0.2117, "learning_rate": 7.2e-05, "epoch": 0.39, "step": 15 }, { "loss": 0.2177, "learning_rate": 7.2e-05, "epoch": 0.42, "step": 16 }, { "loss": 0.1553, "learning_rate": 7.2e-05, "epoch": 0.45, "step": 17 }, { "loss": 0.1271, "learning_rate": 7.2e-05, "epoch": 0.47, "step": 18 }, { "loss": 0.1402, "learning_rate": 7.2e-05, "epoch": 0.5, "step": 19 }, { "loss": 0.1508, "learning_rate": 7.2e-05, "epoch": 0.53, "step": 20 }, { "loss": 0.0748, "learning_rate": 7.2e-05, "epoch": 0.55, "step": 21 }, { "loss": 0.0553, "learning_rate": 7.2e-05, "epoch": 0.58, "step": 22 }, { "loss": 0.0653, "learning_rate": 7.2e-05, "epoch": 0.61, "step": 23 }, { "loss": 0.0463, "learning_rate": 7.2e-05, "epoch": 0.63, "step": 24 }, { "loss": 0.0303, "learning_rate": 7.2e-05, "epoch": 0.66, "step": 25 }, { "eval_unhelpful_qa_loss": 0.01779823936522007, "eval_unhelpful_qa_score": -0.0010496400063857436, "eval_unhelpful_qa_brier_score": 0.0010496400063857436, "eval_unhelpful_qa_average_probability": 0.9844704270362854, "eval_unhelpful_qa_accuracy": 1.0, "eval_unhelpful_qa_probabilities": [ 0.9939919114112854, 0.9919893145561218, 0.8812373876571655, 0.851738691329956, 0.9973465204238892, 0.9960262775421143, 0.9988310933113098, 0.9891974329948425, 0.9986070990562439, 0.9992314577102661, 0.9990812540054321, 0.9973466396331787, 0.9113909602165222, 0.9160753488540649, 0.9991025924682617, 0.9979730248451233, 0.989537239074707, 0.9893562197685242, 0.99642413854599, 0.9989349246025085, 0.9995711445808411, 0.9964518547058105, 0.9990758895874023, 0.9947203397750854, 0.9984740614891052, 0.9956274032592773, 0.9992215633392334, 0.9988768696784973, 0.989105761051178, 0.9887821078300476, 0.9629070162773132, 0.9381523132324219, 0.9953693747520447, 0.9993336796760559, 0.9995018243789673, 0.9994494318962097, 0.9896090030670166, 0.9062108397483826, 0.988205075263977, 0.9981988072395325, 0.9910675883293152, 0.9604835510253906, 0.9976778626441956, 0.9929446578025818, 0.9992949962615967, 0.9992386102676392, 0.9824511408805847, 0.9721674919128418, 0.9989849925041199, 0.9980792999267578, 0.9980413913726807, 0.9973067045211792, 0.9900492429733276, 0.9161818623542786, 0.998734176158905, 0.9994983673095703, 0.9936081171035767, 0.9967616200447083, 0.9997459053993225, 0.9997015595436096, 0.9963715076446533, 0.9964374303817749, 0.9894015192985535, 0.9956427812576294, 0.9990524649620056, 0.9971012473106384, 0.98968505859375, 0.9685617685317993, 0.9976811408996582, 0.9932328462600708, 0.9612935781478882, 0.9011391401290894, 0.9958304762840271, 0.9974175691604614, 0.9763060212135315, 0.9916849136352539, 0.9701336622238159, 0.9753015041351318, 0.9987087249755859, 0.9989778995513916, 0.9981098175048828, 0.9985300302505493, 0.9991015195846558, 0.9993440508842468, 0.9979883432388306, 0.9987518787384033, 0.9983501434326172, 0.9938720464706421, 0.998902440071106, 0.9992947578430176, 0.9258926510810852, 0.9793673157691956, 0.9225127100944519, 0.9537939429283142, 0.9991394281387329, 0.9929335117340088, 0.9970986843109131, 0.9969837069511414, 0.997686505317688, 0.9971426129341125 ], "eval_unhelpful_qa_runtime": 8.4452, "eval_unhelpful_qa_samples_per_second": 11.841, "eval_unhelpful_qa_steps_per_second": 0.237, "epoch": 0.66, "step": 25 }, { "loss": 0.0085, "learning_rate": 7.2e-05, "epoch": 0.68, "step": 26 }, { "loss": 0.0037, "learning_rate": 7.2e-05, "epoch": 0.71, "step": 27 }, { "loss": 0.0071, "learning_rate": 7.2e-05, "epoch": 0.74, "step": 28 }, { "loss": 0.0019, "learning_rate": 7.2e-05, "epoch": 0.76, "step": 29 }, { "loss": 0.0026, "learning_rate": 7.2e-05, "epoch": 0.79, "step": 30 }, { "loss": 0.0005, "learning_rate": 7.2e-05, "epoch": 0.82, "step": 31 }, { "loss": 0.0003, "learning_rate": 7.2e-05, "epoch": 0.84, "step": 32 }, { "loss": 0.0007, "learning_rate": 7.2e-05, "epoch": 0.87, "step": 33 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 0.89, "step": 34 }, { "loss": 0.0013, "learning_rate": 7.2e-05, "epoch": 0.92, "step": 35 }, { "loss": 0.0004, "learning_rate": 7.2e-05, "epoch": 0.95, "step": 36 }, { "loss": 0.0017, "learning_rate": 7.2e-05, "epoch": 0.97, "step": 37 }, { "loss": 0.0032, "learning_rate": 7.2e-05, "epoch": 1.0, "step": 38 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 1.03, "step": 39 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 1.05, "step": 40 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 1.08, "step": 41 }, { "loss": 0.0001, "learning_rate": 7.2e-05, "epoch": 1.11, "step": 42 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 1.13, "step": 43 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 1.16, "step": 44 }, { "loss": 0.0367, "learning_rate": 7.2e-05, "epoch": 1.18, "step": 45 }, { "loss": 0.1319, "learning_rate": 7.2e-05, "epoch": 1.21, "step": 46 }, { "loss": 0.0066, "learning_rate": 7.2e-05, "epoch": 1.24, "step": 47 }, { "loss": 0.0042, "learning_rate": 7.2e-05, "epoch": 1.26, "step": 48 }, { "loss": 0.0004, "learning_rate": 7.2e-05, "epoch": 1.29, "step": 49 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 1.32, "step": 50 }, { "eval_unhelpful_qa_loss": 0.0005234834388829768, "eval_unhelpful_qa_score": -5.054382654634537e-06, "eval_unhelpful_qa_brier_score": 5.054382654634537e-06, "eval_unhelpful_qa_average_probability": 0.9995836615562439, "eval_unhelpful_qa_accuracy": 1.0, "eval_unhelpful_qa_probabilities": [ 0.9999996423721313, 0.9999973773956299, 0.9809955954551697, 0.9941462278366089, 0.9999998807907104, 0.9999997615814209, 1.0, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 0.999602735042572, 0.9999313354492188, 0.9999998807907104, 0.9999998807907104, 0.9999991655349731, 0.9999985694885254, 0.9999990463256836, 1.0, 1.0, 0.9999966621398926, 1.0, 0.9999881982803345, 0.9999998807907104, 0.9999996423721313, 1.0, 1.0, 0.999996542930603, 0.9999954700469971, 0.9999890327453613, 0.9999867677688599, 0.9999895095825195, 1.0, 1.0, 1.0, 0.9999996423721313, 0.9902190566062927, 0.9999997615814209, 1.0, 0.9999997615814209, 0.9999992847442627, 0.9999998807907104, 0.9999978542327881, 1.0, 1.0, 0.9999979734420776, 0.9999970197677612, 1.0, 1.0, 1.0, 1.0, 0.9999990463256836, 0.999974250793457, 1.0, 1.0, 0.9999996423721313, 0.9999997615814209, 1.0, 1.0, 1.0, 0.9999998807907104, 0.9999977350234985, 0.9999998807907104, 0.9999998807907104, 0.9999998807907104, 0.9999973773956299, 0.9999854564666748, 0.9999998807907104, 0.9999980926513672, 0.999778687953949, 0.9970857501029968, 0.9999995231628418, 0.9999998807907104, 0.9999984502792358, 0.9999994039535522, 0.9999212026596069, 0.9999579191207886, 1.0, 1.0, 0.9999997615814209, 0.9999998807907104, 0.9999997615814209, 1.0, 0.9999997615814209, 1.0, 0.9999998807907104, 0.9999998807907104, 1.0, 1.0, 0.9977099895477295, 0.9999592304229736, 0.9994352459907532, 0.9997578263282776, 0.9999998807907104, 0.99998939037323, 0.9999998807907104, 0.9999998807907104, 1.0, 1.0 ], "eval_unhelpful_qa_runtime": 8.4513, "eval_unhelpful_qa_samples_per_second": 11.832, "eval_unhelpful_qa_steps_per_second": 0.237, "epoch": 1.32, "step": 50 }, { "loss": 0.0003, "learning_rate": 7.2e-05, "epoch": 1.34, "step": 51 }, { "loss": 0.0008, "learning_rate": 7.2e-05, "epoch": 1.37, "step": 52 }, { "loss": 0.0002, "learning_rate": 7.2e-05, "epoch": 1.39, "step": 53 }, { "loss": 0.0001, "learning_rate": 7.2e-05, "epoch": 1.42, "step": 54 }, { "loss": 0.0004, "learning_rate": 7.2e-05, "epoch": 1.45, "step": 55 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 1.47, "step": 56 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 1.5, "step": 57 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 1.53, "step": 58 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 1.55, "step": 59 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 1.58, "step": 60 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 1.61, "step": 61 }, { "loss": 0.0001, "learning_rate": 7.2e-05, "epoch": 1.63, "step": 62 }, { "loss": 0.0001, "learning_rate": 7.2e-05, "epoch": 1.66, "step": 63 }, { "loss": 0.0001, "learning_rate": 7.2e-05, "epoch": 1.68, "step": 64 }, { "loss": 0.0001, "learning_rate": 7.2e-05, "epoch": 1.71, "step": 65 }, { "loss": 0.0005, "learning_rate": 7.2e-05, "epoch": 1.74, "step": 66 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 1.76, "step": 67 }, { "loss": 0.0001, "learning_rate": 7.2e-05, "epoch": 1.79, "step": 68 }, { "loss": 0.0006, "learning_rate": 7.2e-05, "epoch": 1.82, "step": 69 }, { "loss": 0.0007, "learning_rate": 7.2e-05, "epoch": 1.84, "step": 70 }, { "loss": 0.0011, "learning_rate": 7.2e-05, "epoch": 1.87, "step": 71 }, { "loss": 0.0001, "learning_rate": 7.2e-05, "epoch": 1.89, "step": 72 }, { "loss": 0.0002, "learning_rate": 7.2e-05, "epoch": 1.92, "step": 73 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 1.95, "step": 74 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 1.97, "step": 75 }, { "eval_unhelpful_qa_loss": 2.224229319836013e-05, "eval_unhelpful_qa_score": -5.717748940980982e-09, "eval_unhelpful_qa_brier_score": 5.717748940980982e-09, "eval_unhelpful_qa_average_probability": 0.9999831914901733, "eval_unhelpful_qa_accuracy": 1.0, "eval_unhelpful_qa_probabilities": [ 0.9999997615814209, 0.9999982118606567, 0.9994237422943115, 0.9999692440032959, 1.0, 0.9999997615814209, 1.0, 0.9999995231628418, 1.0, 1.0, 1.0, 1.0, 0.9999576807022095, 0.9999715089797974, 1.0, 0.9999998807907104, 0.9999959468841553, 0.9999905824661255, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999998807907104, 0.9999998807907104, 0.9999996423721313, 1.0, 1.0, 0.9997066855430603, 0.9996340274810791, 0.9999668598175049, 0.9999656677246094, 0.9999994039535522, 1.0, 1.0, 1.0, 0.9999997615814209, 0.9999434947967529, 1.0, 1.0, 0.9999998807907104, 0.9999998807907104, 0.9999998807907104, 0.9999996423721313, 1.0, 1.0, 0.9999988079071045, 0.9999988079071045, 1.0, 1.0, 1.0, 1.0, 0.999996542930603, 0.9999923706054688, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999998807907104, 1.0, 0.9999990463256836, 0.9999997615814209, 1.0, 1.0, 0.9999990463256836, 0.9999986886978149, 1.0, 1.0, 0.9999765157699585, 0.9999898672103882, 1.0, 1.0, 0.9999873638153076, 0.9999938011169434, 0.9999959468841553, 0.9999977350234985, 0.9999998807907104, 0.9999998807907104, 0.9999998807907104, 0.9999998807907104, 1.0, 1.0, 0.9999997615814209, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 0.9999029636383057, 0.9999897480010986, 0.99998939037323, 0.9999954700469971, 1.0, 0.9999998807907104, 0.9999998807907104, 1.0, 1.0, 1.0 ], "eval_unhelpful_qa_runtime": 8.4444, "eval_unhelpful_qa_samples_per_second": 11.842, "eval_unhelpful_qa_steps_per_second": 0.237, "epoch": 1.97, "step": 75 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.0, "step": 76 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.03, "step": 77 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.05, "step": 78 }, { "loss": 0.0002, "learning_rate": 7.2e-05, "epoch": 2.08, "step": 79 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.11, "step": 80 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.13, "step": 81 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.16, "step": 82 }, { "loss": 0.0003, "learning_rate": 7.2e-05, "epoch": 2.18, "step": 83 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.21, "step": 84 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.24, "step": 85 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.26, "step": 86 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.29, "step": 87 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.32, "step": 88 }, { "loss": 0.0001, "learning_rate": 7.2e-05, "epoch": 2.34, "step": 89 }, { "loss": 0.0004, "learning_rate": 7.2e-05, "epoch": 2.37, "step": 90 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.39, "step": 91 }, { "loss": 0.0002, "learning_rate": 7.2e-05, "epoch": 2.42, "step": 92 }, { "loss": 0.0004, "learning_rate": 7.2e-05, "epoch": 2.45, "step": 93 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.47, "step": 94 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.5, "step": 95 }, { "loss": 0.0001, "learning_rate": 7.2e-05, "epoch": 2.53, "step": 96 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.55, "step": 97 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.58, "step": 98 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.61, "step": 99 }, { "loss": 0.0, "learning_rate": 7.2e-05, "epoch": 2.63, "step": 100 }, { "eval_unhelpful_qa_loss": 1.810065623431001e-05, "eval_unhelpful_qa_score": -6.358372939274659e-09, "eval_unhelpful_qa_brier_score": 6.358372939274659e-09, "eval_unhelpful_qa_average_probability": 0.9999858140945435, "eval_unhelpful_qa_accuracy": 1.0, "eval_unhelpful_qa_probabilities": [ 0.9999998807907104, 0.9999996423721313, 0.9997879862785339, 0.9999940395355225, 1.0, 1.0, 1.0, 0.9999997615814209, 1.0, 1.0, 1.0, 1.0, 0.9999822378158569, 0.9999915361404419, 1.0, 1.0, 0.9999984502792358, 0.9999960660934448, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999998807907104, 1.0, 1.0, 0.9995390176773071, 0.9993865489959717, 0.9999877214431763, 0.9999871253967285, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 0.9999812841415405, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999998807907104, 1.0, 1.0, 0.9999996423721313, 0.9999995231628418, 1.0, 1.0, 1.0, 1.0, 0.9999990463256836, 0.9999977350234985, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999998807907104, 1.0, 1.0, 1.0, 0.9999995231628418, 0.9999994039535522, 1.0, 1.0, 0.999992847442627, 0.9999972581863403, 1.0, 1.0, 0.9999963045120239, 0.9999970197677612, 0.9999986886978149, 0.9999991655349731, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999709129333496, 0.9999969005584717, 0.999997615814209, 0.999998927116394, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ], "eval_unhelpful_qa_runtime": 8.434, "eval_unhelpful_qa_samples_per_second": 11.857, "eval_unhelpful_qa_steps_per_second": 0.237, "epoch": 2.63, "step": 100 }, { "train_runtime": 775.6839, "train_samples_per_second": 4.125, "train_steps_per_second": 0.129, "total_flos": 0.0, "train_loss": 0.0821405840863207, "epoch": 2.63, "step": 100 } ]]