llama-30b-math_make_questions / training_logs.json
joshuaclymer's picture
Upload folder using huggingface_hub
dc369b8
[
{
"loss": 0.6476,
"learning_rate": 0.0002,
"epoch": 0.02,
"step": 1
},
{
"loss": 0.6668,
"learning_rate": 0.0002,
"epoch": 0.04,
"step": 2
},
{
"loss": 0.6615,
"learning_rate": 0.0002,
"epoch": 0.05,
"step": 3
},
{
"loss": 0.5895,
"learning_rate": 0.0002,
"epoch": 0.07,
"step": 4
},
{
"loss": 0.6124,
"learning_rate": 0.0002,
"epoch": 0.09,
"step": 5
},
{
"loss": 0.5953,
"learning_rate": 0.0002,
"epoch": 0.11,
"step": 6
},
{
"loss": 0.5356,
"learning_rate": 0.0002,
"epoch": 0.12,
"step": 7
},
{
"loss": 0.4604,
"learning_rate": 0.0002,
"epoch": 0.14,
"step": 8
},
{
"loss": 0.4143,
"learning_rate": 0.0002,
"epoch": 0.16,
"step": 9
},
{
"loss": 0.4553,
"learning_rate": 0.0002,
"epoch": 0.18,
"step": 10
},
{
"loss": 0.3538,
"learning_rate": 0.0002,
"epoch": 0.2,
"step": 11
},
{
"loss": 0.3389,
"learning_rate": 0.0002,
"epoch": 0.21,
"step": 12
},
{
"loss": 0.2639,
"learning_rate": 0.0002,
"epoch": 0.23,
"step": 13
},
{
"loss": 0.2096,
"learning_rate": 0.0002,
"epoch": 0.25,
"step": 14
},
{
"loss": 0.2002,
"learning_rate": 0.0002,
"epoch": 0.27,
"step": 15
},
{
"loss": 0.2374,
"learning_rate": 0.0002,
"epoch": 0.29,
"step": 16
},
{
"loss": 0.2475,
"learning_rate": 0.0002,
"epoch": 0.3,
"step": 17
},
{
"loss": 0.1994,
"learning_rate": 0.0002,
"epoch": 0.32,
"step": 18
},
{
"loss": 0.1406,
"learning_rate": 0.0002,
"epoch": 0.34,
"step": 19
},
{
"loss": 0.055,
"learning_rate": 0.0002,
"epoch": 0.36,
"step": 20
},
{
"loss": 0.1168,
"learning_rate": 0.0002,
"epoch": 0.38,
"step": 21
},
{
"loss": 0.0916,
"learning_rate": 0.0002,
"epoch": 0.39,
"step": 22
},
{
"loss": 0.1596,
"learning_rate": 0.0002,
"epoch": 0.41,
"step": 23
},
{
"loss": 0.1585,
"learning_rate": 0.0002,
"epoch": 0.43,
"step": 24
},
{
"loss": 0.184,
"learning_rate": 0.0002,
"epoch": 0.45,
"step": 25
},
{
"eval_math_exam_questions_loss": 0.09979354590177536,
"eval_math_exam_questions_score": -0.029867494478821754,
"eval_math_exam_questions_brier_score": 0.029867494478821754,
"eval_math_exam_questions_average_probability": 0.9305303692817688,
"eval_math_exam_questions_accuracy": 0.95,
"eval_math_exam_questions_probabilities": [
0.8511927723884583,
0.8560279011726379,
0.9999574422836304,
0.9483713507652283,
0.2606664001941681,
0.9442870616912842,
0.9947615265846252,
0.9007341265678406,
0.9449180364608765,
0.41139501333236694,
0.9922882318496704,
0.9992316961288452,
0.9993873834609985,
0.9968752861022949,
0.9976814985275269,
0.9999994039535522,
0.9999996423721313,
0.9999994039535522,
0.9571589231491089,
0.995111882686615,
0.979494571685791,
0.574057400226593,
0.7285555601119995,
0.3839860260486603,
0.9998674392700195,
0.9982432126998901,
0.9999611377716064,
0.9999970197677612,
0.9999997615814209,
1.0,
0.9638392329216003,
0.968817949295044,
0.2049836367368698,
0.9999809265136719,
0.9998273253440857,
0.9999629259109497,
0.9595451951026917,
0.9992743134498596,
0.9850507974624634,
0.9250211715698242,
0.840378999710083,
0.8428780436515808,
0.9994524121284485,
0.9991476535797119,
0.9997357726097107,
0.9995108842849731,
0.9998007416725159,
0.9996703863143921,
0.9999984502792358,
0.9999967813491821,
0.999998927116394,
0.9999979734420776,
0.9999997615814209,
0.9999996423721313,
0.9957982897758484,
0.9765607714653015,
0.996933102607727,
0.8950393795967102,
0.9991758465766907,
0.9990474581718445,
0.7260539531707764,
0.8293086290359497,
0.381984144449234,
0.9996906518936157,
0.9999486207962036,
0.9999747276306152,
0.994877815246582,
0.991081714630127,
0.9968804121017456,
0.9998242259025574,
0.9999198913574219,
0.999997615814209,
0.9805970788002014,
0.9670814871788025,
0.8692526817321777,
0.9998607635498047,
0.9999896287918091,
0.999993085861206,
0.9961829781532288,
0.9215685129165649,
0.998610258102417,
0.9952474236488342,
0.7989624738693237,
0.9947852492332458,
0.9998492002487183,
0.9999179840087891,
0.9999663829803467,
0.9998860359191895,
0.9998941421508789,
0.9978526830673218,
0.8546462059020996,
0.8965560793876648,
0.6786884069442749,
0.9975112676620483,
0.9996652603149414,
0.9990561604499817,
0.9986518025398254,
0.9981924891471863,
0.998735249042511,
0.9286666512489319
],
"eval_math_exam_questions_runtime": 58.7461,
"eval_math_exam_questions_samples_per_second": 1.702,
"eval_math_exam_questions_steps_per_second": 0.068,
"epoch": 0.45,
"step": 25
},
{
"loss": 0.0599,
"learning_rate": 0.0002,
"epoch": 0.46,
"step": 26
},
{
"loss": 0.0445,
"learning_rate": 0.0002,
"epoch": 0.48,
"step": 27
},
{
"loss": 0.0625,
"learning_rate": 0.0002,
"epoch": 0.5,
"step": 28
},
{
"loss": 0.0125,
"learning_rate": 0.0002,
"epoch": 0.52,
"step": 29
},
{
"loss": 0.2316,
"learning_rate": 0.0002,
"epoch": 0.54,
"step": 30
},
{
"loss": 0.0746,
"learning_rate": 0.0002,
"epoch": 0.55,
"step": 31
},
{
"loss": 0.1032,
"learning_rate": 0.0002,
"epoch": 0.57,
"step": 32
},
{
"loss": 0.1413,
"learning_rate": 0.0002,
"epoch": 0.59,
"step": 33
},
{
"loss": 0.1345,
"learning_rate": 0.0002,
"epoch": 0.61,
"step": 34
},
{
"loss": 0.0688,
"learning_rate": 0.0002,
"epoch": 0.62,
"step": 35
},
{
"loss": 0.0762,
"learning_rate": 0.0002,
"epoch": 0.64,
"step": 36
},
{
"loss": 0.0552,
"learning_rate": 0.0002,
"epoch": 0.66,
"step": 37
},
{
"loss": 0.0356,
"learning_rate": 0.0002,
"epoch": 0.68,
"step": 38
},
{
"loss": 0.0543,
"learning_rate": 0.0002,
"epoch": 0.7,
"step": 39
},
{
"loss": 0.0279,
"learning_rate": 0.0002,
"epoch": 0.71,
"step": 40
},
{
"loss": 0.0704,
"learning_rate": 0.0002,
"epoch": 0.73,
"step": 41
},
{
"loss": 0.039,
"learning_rate": 0.0002,
"epoch": 0.75,
"step": 42
},
{
"loss": 0.0114,
"learning_rate": 0.0002,
"epoch": 0.77,
"step": 43
},
{
"loss": 0.0043,
"learning_rate": 0.0002,
"epoch": 0.79,
"step": 44
},
{
"loss": 0.1562,
"learning_rate": 0.0002,
"epoch": 0.8,
"step": 45
},
{
"loss": 0.0495,
"learning_rate": 0.0002,
"epoch": 0.82,
"step": 46
},
{
"loss": 0.1683,
"learning_rate": 0.0002,
"epoch": 0.84,
"step": 47
},
{
"loss": 0.0118,
"learning_rate": 0.0002,
"epoch": 0.86,
"step": 48
},
{
"loss": 0.0708,
"learning_rate": 0.0002,
"epoch": 0.88,
"step": 49
},
{
"loss": 0.0445,
"learning_rate": 0.0002,
"epoch": 0.89,
"step": 50
},
{
"eval_math_exam_questions_loss": 0.11060654371976852,
"eval_math_exam_questions_score": -0.04164460673928261,
"eval_math_exam_questions_brier_score": 0.04164460673928261,
"eval_math_exam_questions_average_probability": 0.9377254247665405,
"eval_math_exam_questions_accuracy": 0.92,
"eval_math_exam_questions_probabilities": [
0.9878444075584412,
0.9649227261543274,
0.9999626874923706,
0.9997627139091492,
0.41481563448905945,
0.9997966885566711,
0.9999992847442627,
0.9999576807022095,
0.9999995231628418,
0.13421274721622467,
0.9997405409812927,
0.9999984502792358,
0.9999998807907104,
0.9999960660934448,
0.9999959468841553,
1.0,
1.0,
1.0,
0.9999942779541016,
0.9999998807907104,
0.9999129772186279,
0.4325127899646759,
0.19135379791259766,
0.2162252813577652,
1.0,
0.9999597072601318,
1.0,
0.9999998807907104,
1.0,
1.0,
0.9999549388885498,
0.9999803304672241,
0.9946361184120178,
0.9999997615814209,
0.9998887777328491,
0.9999957084655762,
0.9995691180229187,
1.0,
0.99983811378479,
0.9979630708694458,
0.9983990788459778,
0.9632781147956848,
0.9999992847442627,
0.9999983310699463,
0.9999996423721313,
0.9999991655349731,
0.999947190284729,
0.9998934268951416,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999369382858276,
0.9989650249481201,
0.9998283386230469,
0.9974669218063354,
1.0,
1.0,
0.7936062216758728,
0.9307928085327148,
0.27603790163993835,
0.9999816417694092,
0.9999940395355225,
0.9999995231628418,
0.9996922016143799,
0.988324761390686,
0.9975664615631104,
0.9999992847442627,
0.9999995231628418,
1.0,
0.9978724718093872,
0.3203738033771515,
0.3396102786064148,
0.9999998807907104,
1.0,
1.0,
0.9998663663864136,
0.988074004650116,
0.9999923706054688,
0.9999964237213135,
0.9248444437980652,
0.9999958276748657,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999954700469971,
0.9937870502471924,
0.9950788021087646,
0.9376111030578613,
1.0,
1.0,
1.0,
0.9999998807907104,
0.9999986886978149,
0.9999997615814209,
0.9989570379257202
],
"eval_math_exam_questions_runtime": 58.7331,
"eval_math_exam_questions_samples_per_second": 1.703,
"eval_math_exam_questions_steps_per_second": 0.068,
"epoch": 0.89,
"step": 50
},
{
"loss": 0.1862,
"learning_rate": 0.0002,
"epoch": 0.91,
"step": 51
},
{
"loss": 0.1376,
"learning_rate": 0.0002,
"epoch": 0.93,
"step": 52
},
{
"loss": 0.1167,
"learning_rate": 0.0002,
"epoch": 0.95,
"step": 53
},
{
"loss": 0.1163,
"learning_rate": 0.0002,
"epoch": 0.96,
"step": 54
},
{
"loss": 0.0308,
"learning_rate": 0.0002,
"epoch": 0.98,
"step": 55
},
{
"loss": 0.0515,
"learning_rate": 0.0002,
"epoch": 1.0,
"step": 56
},
{
"loss": 0.0876,
"learning_rate": 0.0002,
"epoch": 1.02,
"step": 57
},
{
"loss": 0.0215,
"learning_rate": 0.0002,
"epoch": 1.04,
"step": 58
},
{
"loss": 0.0454,
"learning_rate": 0.0002,
"epoch": 1.05,
"step": 59
},
{
"loss": 0.0335,
"learning_rate": 0.0002,
"epoch": 1.07,
"step": 60
},
{
"loss": 0.0248,
"learning_rate": 0.0002,
"epoch": 1.09,
"step": 61
},
{
"loss": 0.0028,
"learning_rate": 0.0002,
"epoch": 1.11,
"step": 62
},
{
"loss": 0.0175,
"learning_rate": 0.0002,
"epoch": 1.12,
"step": 63
},
{
"loss": 0.0078,
"learning_rate": 0.0002,
"epoch": 1.14,
"step": 64
},
{
"loss": 0.0113,
"learning_rate": 0.0002,
"epoch": 1.16,
"step": 65
},
{
"loss": 0.0119,
"learning_rate": 0.0002,
"epoch": 1.18,
"step": 66
},
{
"loss": 0.0235,
"learning_rate": 0.0002,
"epoch": 1.2,
"step": 67
},
{
"loss": 0.0116,
"learning_rate": 0.0002,
"epoch": 1.21,
"step": 68
},
{
"loss": 0.0018,
"learning_rate": 0.0002,
"epoch": 1.23,
"step": 69
},
{
"loss": 0.0024,
"learning_rate": 0.0002,
"epoch": 1.25,
"step": 70
},
{
"loss": 0.003,
"learning_rate": 0.0002,
"epoch": 1.27,
"step": 71
},
{
"loss": 0.0044,
"learning_rate": 0.0002,
"epoch": 1.29,
"step": 72
},
{
"loss": 0.0151,
"learning_rate": 0.0002,
"epoch": 1.3,
"step": 73
},
{
"loss": 0.0019,
"learning_rate": 0.0002,
"epoch": 1.32,
"step": 74
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 1.34,
"step": 75
},
{
"eval_math_exam_questions_loss": 0.05752657726407051,
"eval_math_exam_questions_score": -0.019282517954707146,
"eval_math_exam_questions_brier_score": 0.019282517954707146,
"eval_math_exam_questions_average_probability": 0.9707407355308533,
"eval_math_exam_questions_accuracy": 0.97,
"eval_math_exam_questions_probabilities": [
0.9951311349868774,
0.8724949359893799,
0.9999651908874512,
1.0,
0.8811224102973938,
1.0,
1.0,
1.0,
1.0,
0.103812575340271,
0.9999701976776123,
0.9999998807907104,
1.0,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999974966049194,
0.9944435954093933,
0.9979708790779114,
0.4370083212852478,
1.0,
0.9999988079071045,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999998807907104,
1.0,
0.14551357924938202,
1.0,
0.999981164932251,
0.9999678134918213,
0.9999961853027344,
0.9987561702728271,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999978542327881,
1.0,
0.999945878982544,
1.0,
1.0,
0.8369179964065552,
0.9991326928138733,
0.8580266237258911,
1.0,
1.0,
1.0,
0.9999998807907104,
0.9998944997787476,
0.9997386336326599,
1.0,
1.0,
1.0,
1.0,
0.9999858140945435,
0.9989979863166809,
0.9989114999771118,
1.0,
1.0,
0.9999665021896362,
0.9914140701293945,
0.9999996423721313,
1.0,
0.9750034809112549,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999885559082031,
0.999998927116394,
0.9905345439910889,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9994863271713257
],
"eval_math_exam_questions_runtime": 58.6753,
"eval_math_exam_questions_samples_per_second": 1.704,
"eval_math_exam_questions_steps_per_second": 0.068,
"epoch": 1.34,
"step": 75
},
{
"loss": 0.0139,
"learning_rate": 0.0002,
"epoch": 1.36,
"step": 76
},
{
"loss": 0.0121,
"learning_rate": 0.0002,
"epoch": 1.38,
"step": 77
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 1.39,
"step": 78
},
{
"loss": 0.0117,
"learning_rate": 0.0002,
"epoch": 1.41,
"step": 79
},
{
"loss": 0.0046,
"learning_rate": 0.0002,
"epoch": 1.43,
"step": 80
},
{
"loss": 0.3396,
"learning_rate": 0.0002,
"epoch": 1.45,
"step": 81
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.46,
"step": 82
},
{
"loss": 0.0014,
"learning_rate": 0.0002,
"epoch": 1.48,
"step": 83
},
{
"loss": 0.0034,
"learning_rate": 0.0002,
"epoch": 1.5,
"step": 84
},
{
"loss": 0.0026,
"learning_rate": 0.0002,
"epoch": 1.52,
"step": 85
},
{
"loss": 0.001,
"learning_rate": 0.0002,
"epoch": 1.54,
"step": 86
},
{
"loss": 0.0026,
"learning_rate": 0.0002,
"epoch": 1.55,
"step": 87
},
{
"loss": 0.006,
"learning_rate": 0.0002,
"epoch": 1.57,
"step": 88
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 1.59,
"step": 89
},
{
"loss": 0.0024,
"learning_rate": 0.0002,
"epoch": 1.61,
"step": 90
},
{
"loss": 0.0031,
"learning_rate": 0.0002,
"epoch": 1.62,
"step": 91
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 1.64,
"step": 92
},
{
"loss": 0.006,
"learning_rate": 0.0002,
"epoch": 1.66,
"step": 93
},
{
"loss": 0.0056,
"learning_rate": 0.0002,
"epoch": 1.68,
"step": 94
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 1.7,
"step": 95
},
{
"loss": 0.007,
"learning_rate": 0.0002,
"epoch": 1.71,
"step": 96
},
{
"loss": 0.1818,
"learning_rate": 0.0002,
"epoch": 1.73,
"step": 97
},
{
"loss": 0.0089,
"learning_rate": 0.0002,
"epoch": 1.75,
"step": 98
},
{
"loss": 0.0022,
"learning_rate": 0.0002,
"epoch": 1.77,
"step": 99
},
{
"loss": 0.0025,
"learning_rate": 0.0002,
"epoch": 1.79,
"step": 100
},
{
"eval_math_exam_questions_loss": 0.214411199092865,
"eval_math_exam_questions_score": -0.03251197189092636,
"eval_math_exam_questions_brier_score": 0.03251197189092636,
"eval_math_exam_questions_average_probability": 0.9538560509681702,
"eval_math_exam_questions_accuracy": 0.96,
"eval_math_exam_questions_probabilities": [
0.9958956241607666,
0.8318325877189636,
0.9999992847442627,
1.0,
8.109304872050416e-06,
1.0,
1.0,
1.0,
1.0,
0.29970213770866394,
0.9999974966049194,
1.0,
1.0,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
0.9999997615814209,
1.0,
0.999977707862854,
0.999778687953949,
0.9999997615814209,
0.30764925479888916,
1.0,
0.9999996423721313,
1.0,
0.9999992847442627,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.6386927366256714,
1.0,
0.9961808919906616,
0.9999927282333374,
0.9999998807907104,
0.8549012541770935,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999997615814209,
1.0,
1.0,
1.0,
0.9999995231628418,
1.0,
0.9937735199928284,
1.0,
1.0,
0.9984048008918762,
1.0,
0.9988483190536499,
0.9999972581863403,
1.0,
1.0,
1.0,
0.9999980926513672,
0.9998224377632141,
0.9999912977218628,
1.0,
1.0,
0.7528015971183777,
0.003164754481986165,
0.9934700727462769,
0.9953631162643433,
1.0,
1.0,
0.9982792139053345,
0.9575594067573547,
0.9999983310699463,
1.0,
0.7891086935997009,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999996423721313,
0.9999997615814209,
0.9999990463256836,
0.9808980822563171,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9995265007019043
],
"eval_math_exam_questions_runtime": 58.6781,
"eval_math_exam_questions_samples_per_second": 1.704,
"eval_math_exam_questions_steps_per_second": 0.068,
"epoch": 1.79,
"step": 100
},
{
"train_runtime": 4114.901,
"train_samples_per_second": 0.778,
"train_steps_per_second": 0.024,
"total_flos": 0.0,
"train_loss": 0.11991490689004422,
"epoch": 1.79,
"step": 100
}
]]