gpt2
ugpt2-medium-4096 / train_val.txt
Venkatesh Srinivas
Add details for 12.5B checkpoint
123e9fd
Overriding: eval_iters = 50Overriding: eval_iters = 50
Overriding: eval_interval = 100Overriding: eval_interval = 100
step 0: train loss 11.0252, val loss 11.0342
step 100: train loss 8.3994, val loss 8.2066
step 200: train loss 7.3136, val loss 7.1235
step 300: train loss 6.5888, val loss 6.7433
step 400: train loss 6.5067, val loss 6.4013
step 500: train loss 6.1970, val loss 6.1153
step 600: train loss 5.9715, val loss 6.0343
step 700: train loss 5.7357, val loss 5.7946
step 800: train loss 5.6244, val loss 5.7100
step 900: train loss 5.4724, val loss 5.5178
step 1000: train loss 5.4297, val loss 5.3089
step 1100: train loss 5.0414, val loss 5.2748
step 1200: train loss 4.9450, val loss 4.9600
step 1300: train loss 4.6848, val loss 4.8181
step 1400: train loss 4.5482, val loss 4.4525
step 1500: train loss 4.4756, val loss 4.3209
step 1600: train loss 4.2531, val loss 4.2776
step 1700: train loss 4.2488, val loss 4.2306
step 1800: train loss 4.0376, val loss 4.1076
step 1900: train loss 4.0463, val loss 4.0019
step 2000: train loss 3.9624, val loss 3.8664
step 2100: train loss 3.9590, val loss 3.7839
step 2200: train loss 3.9238, val loss 3.8385
step 2300: train loss 3.6838, val loss 3.7538
step 2400: train loss 3.7332, val loss 3.6593
step 2500: train loss 3.7454, val loss 3.5440
step 2600: train loss 3.5528, val loss 3.6207
step 2700: train loss 3.5916, val loss 3.6545
step 2800: train loss 3.7254, val loss 3.6136
step 2900: train loss 3.5898, val loss 3.3846
step 3000: train loss 3.5164, val loss 3.4608
step 3100: train loss 3.6373, val loss 3.5505
step 3200: train loss 3.5100, val loss 3.6281
step 3300: train loss 3.5623, val loss 3.5894
step 3400: train loss 3.4841, val loss 3.4290
step 3500: train loss 3.5908, val loss 3.4267
step 3600: train loss 3.4661, val loss 3.5482
step 3700: train loss 3.4633, val loss 3.4274
step 3800: train loss 3.4503, val loss 3.5384
step 3900: train loss 3.3948, val loss 3.3274
step 4000: train loss 3.4388, val loss 3.3746
step 4100: train loss 3.3921, val loss 3.2486
step 4200: train loss 3.4422, val loss 3.3624
step 4300: train loss 3.3533, val loss 3.2563
step 4400: train loss 3.3215, val loss 3.3935
step 4500: train loss 3.4373, val loss 3.2724
step 4600: train loss 3.2562, val loss 3.2819
step 4700: train loss 3.3209, val loss 3.2646
step 4800: train loss 3.1498, val loss 3.3252
step 4900: train loss 3.3318, val loss 3.3322
step 5000: train loss 3.1285, val loss 3.2495
step 5100: train loss 3.3448, val loss 3.1907
step 5200: train loss 3.3123, val loss 3.1915
step 5300: train loss 3.2482, val loss 3.3080
step 5400: train loss 3.0714, val loss 3.1940
step 5500: train loss 3.1294, val loss 3.2508
step 5600: train loss 3.2360, val loss 3.0566
step 5700: train loss 3.2703, val loss 3.1624
step 5800: train loss 3.3135, val loss 3.1183
step 5900: train loss 3.2142, val loss 3.1934
step 6000: train loss 3.2289, val loss 3.1825
step 6100: train loss 3.0920, val loss 3.1858
step 6200: train loss 3.2835, val loss 3.1578
step 6300: train loss 3.1277, val loss 3.1348
step 6400: train loss 3.0799, val loss 3.2929
step 6500: train loss 3.0791, val loss 3.2397
step 6600: train loss 3.2201, val loss 3.2587
step 6700: train loss 3.0092, val loss 3.2005
step 6800: train loss 3.0824, val loss 3.0970
step 6900: train loss 3.2339, val loss 3.1762
step 7000: train loss 3.1754, val loss 3.1966
step 7100: train loss 3.1720, val loss 3.1533
step 7200: train loss 3.1673, val loss 3.1003
step 7300: train loss 3.1047, val loss 3.1397
step 7400: train loss 3.1211, val loss 3.1447
step 7500: train loss 3.1564, val loss 3.0936
step 7600: train loss 3.0931, val loss 3.1315
step 7700: train loss 2.9800, val loss 3.2394
step 7800: train loss 3.1775, val loss 3.2620
step 7900: train loss 3.0847, val loss 3.0954
step 8000: train loss 3.0581, val loss 3.0713
step 8100: train loss 3.1880, val loss 3.0542
step 8200: train loss 3.1568, val loss 3.0514
step 8300: train loss 3.0128, val loss 3.1295
step 8400: train loss 3.2077, val loss 3.0505
step 8500: train loss 3.0058, val loss 3.1052
step 8600: train loss 3.0915, val loss 2.8884
step 8700: train loss 3.1190, val loss 3.0491
step 8800: train loss 2.9319, val loss 2.9831
step 8900: train loss 2.9605, val loss 3.0030
step 9000: train loss 3.0953, val loss 2.9161
step 9100: train loss 3.1344, val loss 3.0248
step 9200: train loss 2.9525, val loss 3.0419
step 9300: train loss 2.9842, val loss 2.9508
step 9400: train loss 3.1642, val loss 3.0025
step 9500: train loss 2.9276, val loss 3.0674
step 9600: train loss 3.0968, val loss 3.0211
step 9700: train loss 3.1166, val loss 3.0580
step 9800: train loss 2.9912, val loss 2.9596
step 9900: train loss 2.9809, val loss 2.9561
step 10000: train loss 3.0402, val loss 2.9424
step 10100: train loss 2.9393, val loss 2.9690
step 10200: train loss 3.0273, val loss 3.0578
step 10300: train loss 2.9466, val loss 3.1119
step 10400: train loss 2.9821, val loss 2.9871
step 10500: train loss 3.0022, val loss 3.0068
step 10600: train loss 2.9527, val loss 3.0174
step 10700: train loss 3.0224, val loss 3.0772
step 10800: train loss 2.9642, val loss 3.0270
step 10900: train loss 2.9446, val loss 2.9751
step 11000: train loss 2.9466, val loss 2.9945
step 11100: train loss 2.9304, val loss 2.9444
step 11200: train loss 2.9619, val loss 3.0315
step 11300: train loss 2.9358, val loss 2.9847
step 11400: train loss 3.0165, val loss 2.7416
step 11500: train loss 2.8405, val loss 3.0835
step 11600: train loss 3.0746, val loss 3.0534
step 11700: train loss 2.9898, val loss 2.9221
step 11800: train loss 2.8608, val loss 3.0250
step 11900: train loss 2.9855, val loss 2.9443
step 12000: train loss 2.9834, val loss 2.9962
step 12100: train loss 2.8355, val loss 3.0118
step 12200: train loss 2.9886, val loss 2.9714
step 12300: train loss 2.9457, val loss 2.9599
step 12400: train loss 2.8276, val loss 3.0673
step 12500: train loss 2.9246, val loss 2.9800
step 12600: train loss 3.0029, val loss 2.8929
step 12700: train loss 2.9373, val loss 2.9386
step 12800: train loss 2.9504, val loss 3.0079
step 12900: train loss 2.9921, val loss 2.9243
step 13000: train loss 2.9724, val loss 3.0502
step 13100: train loss 2.9558, val loss 2.8818
step 13200: train loss 2.9938, val loss 2.9503
step 13300: train loss 2.9165, val loss 3.0683
step 13400: train loss 2.9777, val loss 2.9374
step 13500: train loss 3.0141, val loss 2.9254
step 13600: train loss 2.8655, val loss 2.9531
step 13700: train loss 2.8848, val loss 3.0087
step 13800: train loss 2.9226, val loss 2.8738
step 13900: train loss 2.8910, val loss 2.9250
step 14000: train loss 2.9752, val loss 2.9531
step 14100: train loss 2.9497, val loss 2.9894
step 14200: train loss 2.9779, val loss 2.9911
step 14300: train loss 2.9423, val loss 2.7897
step 14400: train loss 3.0338, val loss 3.0204
step 14500: train loss 2.8680, val loss 2.8760
step 14600: train loss 3.0093, val loss 2.8034
step 14700: train loss 2.9222, val loss 2.8466
step 14800: train loss 2.7877, val loss 2.8845
step 14900: train loss 2.9715, val loss 3.0005
step 15000: train loss 3.0018, val loss 3.0310
step 15100: train loss 2.9654, val loss 2.9176
step 15200: train loss 2.9580, val loss 2.9125
step 15300: train loss 3.0046, val loss 2.8712
step 15400: train loss 3.0046, val loss 2.9361
step 15500: train loss 2.7949, val loss 2.8170
step 15600: train loss 2.9127, val loss 2.9011
step 15700: train loss 2.9440, val loss 2.9167
step 15800: train loss 2.8596, val loss 2.8605
step 15900: train loss 2.8704, val loss 2.8725
step 16000: train loss 2.8634, val loss 3.0975
step 16100: train loss 2.9963, val loss 2.7633
step 16200: train loss 2.9618, val loss 2.9352
step 16300: train loss 2.7306, val loss 2.9384
step 16400: train loss 2.9731, val loss 2.9716
step 16500: train loss 2.8599, val loss 3.0492
step 16600: train loss 2.8712, val loss 2.9475
step 16700: train loss 2.9567, val loss 2.8846
step 16800: train loss 2.8565, val loss 3.0182
step 16900: train loss 2.8318, val loss 3.0222
step 17000: train loss 3.0119, val loss 2.8964
step 17100: train loss 2.8578, val loss 2.7679
step 17200: train loss 2.8943, val loss 2.9294
step 17300: train loss 2.8835, val loss 2.8658
step 17400: train loss 2.9415, val loss 2.9057
step 17500: train loss 2.8730, val loss 2.7631
step 17600: train loss 2.7918, val loss 2.7859
step 17700: train loss 2.9455, val loss 2.9624
step 17800: train loss 2.7874, val loss 2.8241
step 17900: train loss 2.9045, val loss 2.8924
step 18000: train loss 2.6872, val loss 2.9278
step 18100: train loss 2.9407, val loss 2.9969
step 18200: train loss 3.0288, val loss 2.9354
step 18300: train loss 2.8862, val loss 2.8489
step 18400: train loss 2.8283, val loss 2.8086
step 18500: train loss 2.8491, val loss 2.8545
step 18600: train loss 2.8140, val loss 2.9770
step 18700: train loss 2.9287, val loss 2.8787
step 18800: train loss 3.0498, val loss 2.7461
step 18900: train loss 2.9223, val loss 2.8665
step 19000: train loss 2.9418, val loss 2.9149
step 19100: train loss 2.6789, val loss 2.9049
step 19200: train loss 2.8974, val loss 2.8892
step 19300: train loss 2.8448, val loss 2.9557
step 19400: train loss 2.8466, val loss 2.9635
step 19500: train loss 2.8872, val loss 2.8272
step 19600: train loss 2.7967, val loss 3.0509
step 19700: train loss 2.8516, val loss 2.7520
step 19800: train loss 3.0064, val loss 2.8897
step 19900: train loss 2.8801, val loss 2.9297
step 20000: train loss 2.8270, val loss 2.9379
step 20100: train loss 2.8988, val loss 2.8314
step 20200: train loss 2.6983, val loss 2.9195
step 20300: train loss 2.8345, val loss 2.8455
step 20400: train loss 2.7777, val loss 2.9164
step 20500: train loss 2.9010, val loss 2.8442
step 20600: train loss 2.8983, val loss 2.8687
step 20700: train loss 2.7852, val loss 2.8359
step 20800: train loss 2.6776, val loss 2.8802
step 20900: train loss 2.7957, val loss 2.9362
step 21000: train loss 2.8322, val loss 2.8738
step 21100: train loss 2.8448, val loss 2.8849
step 21200: train loss 2.9563, val loss 3.0302
step 21300: train loss 2.9416, val loss 2.7907
step 21400: train loss 2.7988, val loss 2.8956
step 21500: train loss 2.8556, val loss 2.8462
step 21600: train loss 2.8326, val loss 2.8084
step 21700: train loss 2.8916, val loss 2.9479
step 21800: train loss 2.6759, val loss 2.8316
step 21900: train loss 2.7605, val loss 2.8726
step 22000: train loss 2.8973, val loss 2.7646
step 22100: train loss 2.7950, val loss 2.8894
step 22200: train loss 2.8879, val loss 2.8456
step 22300: train loss 2.8610, val loss 2.7752
step 22400: train loss 2.8503, val loss 2.7268
step 22500: train loss 2.7624, val loss 2.8039
step 22600: train loss 2.7896, val loss 2.9268
step 22700: train loss 2.9371, val loss 2.8718
step 22800: train loss 2.9747, val loss 2.7481
step 22900: train loss 2.8736, val loss 2.8353
step 23000: train loss 2.8346, val loss 2.7387
step 23100: train loss 2.8266, val loss 2.9682
step 23200: train loss 2.8811, val loss 2.8276
step 23300: train loss 2.8492, val loss 2.7715
step 23400: train loss 2.9512, val loss 2.8733
step 23500: train loss 2.8948, val loss 2.8610
step 23600: train loss 2.9883, val loss 2.8248
step 23700: train loss 2.7142, val loss 2.9138
step 23800: train loss 2.7128, val loss 2.8417
step 23900: train loss 3.0065, val loss 2.8004
step 24000: train loss 2.8458, val loss 2.7381
step 24100: train loss 2.7890, val loss 2.8468
step 24200: train loss 2.9545, val loss 2.7933
step 24300: train loss 2.8738, val loss 2.9072
step 24400: train loss 2.8440, val loss 2.7552
step 24500: train loss 2.8107, val loss 2.7479
step 24600: train loss 2.8175, val loss 2.8063
step 24700: train loss 2.9319, val loss 2.8145
step 24800: train loss 2.8535, val loss 2.8273
step 24900: train loss 2.7535, val loss 2.9339
step 25000: train loss 2.7998, val loss 2.8346
step 25100: train loss 2.8028, val loss 2.7334
step 25200: train loss 3.0190, val loss 2.7507
step 25300: train loss 2.9597, val loss 2.7477
step 25400: train loss 3.0206, val loss 2.8678
step 25500: train loss 2.8184, val loss 2.8603
step 25600: train loss 2.8984, val loss 2.7563
step 25700: train loss 2.7563, val loss 2.8466
step 25800: train loss 2.8035, val loss 2.8461
step 25900: train loss 2.8879, val loss 3.0032
step 26000: train loss 2.8628, val loss 2.8316
step 26100: train loss 2.8199, val loss 2.8175
step 26200: train loss 2.8381, val loss 2.7543
step 26300: train loss 2.7932, val loss 2.7437
step 26400: train loss 2.7451, val loss 2.8037
step 26500: train loss 2.8398, val loss 2.7688
step 26600: train loss 2.8197, val loss 2.6988
step 26700: train loss 2.8181, val loss 2.8315
step 26800: train loss 2.7584, val loss 2.6994
step 26900: train loss 2.7917, val loss 2.7537
step 27000: train loss 2.6462, val loss 2.7579
step 27100: train loss 2.8499, val loss 2.7959
step 27200: train loss 2.8724, val loss 2.8232
step 27300: train loss 2.7593, val loss 2.8665
step 27400: train loss 2.8588, val loss 2.9407
step 27500: train loss 2.7949, val loss 2.6853
step 27600: train loss 2.7752, val loss 2.8110
step 27700: train loss 2.9131, val loss 2.9227
step 27800: train loss 2.7813, val loss 2.7983
step 27900: train loss 2.7238, val loss 2.9116
step 28000: train loss 2.6029, val loss 2.6874
step 28100: train loss 2.7992, val loss 2.8840
step 28200: train loss 2.8726, val loss 2.7155
step 28300: train loss 2.8896, val loss 2.7741
step 28400: train loss 2.8420, val loss 2.7712
step 28500: train loss 2.7476, val loss 2.8297
step 28600: train loss 2.8152, val loss 2.8123
step 28700: train loss 2.8929, val loss 2.8723
step 28800: train loss 2.8116, val loss 2.8850
step 28900: train loss 2.8026, val loss 2.8580
step 29000: train loss 2.6830, val loss 2.7671
step 29100: train loss 2.7769, val loss 2.8252
step 29200: train loss 2.8928, val loss 2.7823
step 29300: train loss 2.7859, val loss 2.8006
step 29400: train loss 2.8484, val loss 2.8032
step 29500: train loss 2.8194, val loss 2.7389
step 29600: train loss 2.8775, val loss 2.8360
step 29700: train loss 2.7912, val loss 2.7585
step 29800: train loss 2.8499, val loss 2.8210
step 29900: train loss 2.9061, val loss 2.6846
step 30000: train loss 2.7540, val loss 2.8391
step 30100: train loss 2.8292, val loss 2.8358
step 30200: train loss 2.5902, val loss 2.8730
step 30300: train loss 2.8947, val loss 2.8475
step 30400: train loss 2.8898, val loss 2.7538
step 30500: train loss 2.8530, val loss 2.8979
step 30600: train loss 2.8079, val loss 2.8202
step 30700: train loss 2.6925, val loss 2.7329
step 30800: train loss 2.7408, val loss 2.7117
step 30900: train loss 2.7052, val loss 2.8759
step 31000: train loss 2.7108, val loss 2.6607
step 31100: train loss 2.8145, val loss 2.7848
step 31200: train loss 2.8752, val loss 2.8979
step 31300: train loss 2.6798, val loss 2.8022
step 31400: train loss 2.9750, val loss 2.6888
step 31500: train loss 2.6494, val loss 2.8619
step 31600: train loss 2.8156, val loss 2.8232
step 31700: train loss 2.7252, val loss 2.7410
step 31800: train loss 2.6924, val loss 2.7541
step 31900: train loss 2.8176, val loss 2.9296
step 32000: train loss 2.8469, val loss 2.8549
step 32100: train loss 2.8750, val loss 2.9075
step 32200: train loss 2.8387, val loss 2.7277
step 32300: train loss 2.7656, val loss 2.7939
step 32400: train loss 2.6632, val loss 2.7976
step 32500: train loss 2.7674, val loss 2.7517
step 32600: train loss 2.8411, val loss 2.7297
step 32700: train loss 2.8641, val loss 2.7247
step 32800: train loss 2.6665, val loss 2.7943
step 32900: train loss 2.8883, val loss 2.7321
step 33000: train loss 2.8978, val loss 2.7700
step 33100: train loss 2.7607, val loss 2.6791
step 33200: train loss 2.7516, val loss 2.8169
step 33300: train loss 2.8498, val loss 2.6707
step 33400: train loss 2.8504, val loss 2.9119
step 33500: train loss 2.7596, val loss 2.9151
step 33600: train loss 2.9359, val loss 2.9191
step 33700: train loss 2.7263, val loss 2.8193
step 33800: train loss 2.8230, val loss 2.8280
step 33900: train loss 2.8378, val loss 2.7144
step 34000: train loss 2.7823, val loss 2.8035
step 34100: train loss 2.7779, val loss 2.8396
step 34200: train loss 2.8372, val loss 2.8954
step 34300: train loss 2.8226, val loss 2.6627
step 34400: train loss 2.8642, val loss 2.8739
step 34500: train loss 2.7282, val loss 2.6650
step 34600: train loss 2.7650, val loss 2.7226
step 34700: train loss 2.7236, val loss 2.6892
step 34800: train loss 2.7721, val loss 2.9387
step 34900: train loss 2.7465, val loss 2.7535
step 35000: train loss 2.7129, val loss 2.7230
step 35100: train loss 2.7448, val loss 2.7261
step 35200: train loss 2.9534, val loss 2.7127
step 35300: train loss 2.6951, val loss 2.8034
step 35400: train loss 2.8718, val loss 2.7998
step 35500: train loss 2.7152, val loss 2.7406
step 35600: train loss 2.8066, val loss 2.7981
step 35700: train loss 2.8076, val loss 2.7320
step 35800: train loss 2.9054, val loss 2.7541
step 35900: train loss 2.8348, val loss 2.6628
step 36000: train loss 2.7294, val loss 2.7758
step 36100: train loss 2.8457, val loss 2.8148
step 36200: train loss 2.8626, val loss 2.8337
step 36300: train loss 2.7538, val loss 2.8294
step 36400: train loss 2.5631, val loss 2.7590
step 36500: train loss 2.8542, val loss 2.7585
step 36600: train loss 2.7567, val loss 2.8492
step 36700: train loss 2.8481, val loss 2.7103
step 36800: train loss 2.8135, val loss 2.7256
step 36900: train loss 2.6976, val loss 2.6366
step 37000: train loss 2.8643, val loss 2.7390
step 37100: train loss 2.7979, val loss 2.6219
step 37200: train loss 2.7855, val loss 2.8387
step 37300: train loss 2.8332, val loss 2.8489
step 37400: train loss 2.6962, val loss 2.9051
step 37500: train loss 2.7735, val loss 2.8329
step 37600: train loss 2.8305, val loss 2.7830
step 37700: train loss 2.7930, val loss 2.7070
step 37800: train loss 2.7834, val loss 2.7718
step 37900: train loss 2.9645, val loss 2.7499
step 38000: train loss 2.6900, val loss 2.8002
step 38100: train loss 2.7324, val loss 2.8638
step 38200: train loss 2.6724, val loss 2.7601
step 38300: train loss 2.8456, val loss 2.7571
step 38400: train loss 2.7720, val loss 2.8515
step 38500: train loss 2.7960, val loss 2.8611
step 38600: train loss 2.7673, val loss 2.8128
step 38700: train loss 2.8076, val loss 2.8023
step 38800: train loss 2.8252, val loss 2.7761
step 38900: train loss 2.6206, val loss 2.8931
step 39000: train loss 2.7810, val loss 2.6949
step 39100: train loss 2.8880, val loss 2.6300
step 39200: train loss 2.7765, val loss 2.8009
step 39300: train loss 2.8100, val loss 2.9730
step 39400: train loss 2.6373, val loss 2.7640
step 39500: train loss 2.7533, val loss 2.7617
step 39600: train loss 2.8452, val loss 2.8122
step 39700: train loss 2.7849, val loss 2.8067
step 39800: train loss 2.7890, val loss 2.7672
step 39900: train loss 2.7164, val loss 2.6389
step 40000: train loss 2.8189, val loss 2.7924
step 40100: train loss 2.9345, val loss 2.9801
step 40200: train loss 2.9074, val loss 2.7438
step 40300: train loss 2.8472, val loss 2.7186
step 40400: train loss 2.5992, val loss 2.7979
step 40500: train loss 2.8513, val loss 2.7371
step 40600: train loss 2.6937, val loss 2.7330
step 40700: train loss 2.7758, val loss 2.7263
step 40800: train loss 2.7242, val loss 2.8467
step 40900: train loss 2.7578, val loss 2.9498
step 41000: train loss 2.7946, val loss 2.7555
step 41100: train loss 2.8186, val loss 2.7127
step 41200: train loss 2.7768, val loss 2.7014
step 41300: train loss 2.8141, val loss 2.7691
step 41400: train loss 2.7520, val loss 2.6608
step 41500: train loss 2.7952, val loss 2.8809
step 41600: train loss 2.7405, val loss 2.8320
step 41700: train loss 2.7319, val loss 2.6906
step 41800: train loss 2.7042, val loss 2.8355
step 41900: train loss 2.6836, val loss 2.7683
step 42000: train loss 2.8002, val loss 2.7833
step 42100: train loss 2.9250, val loss 2.7595
step 42200: train loss 2.6998, val loss 2.8130
step 42300: train loss 2.6696, val loss 2.7072
step 42400: train loss 2.6971, val loss 2.7896
step 42500: train loss 2.7793, val loss 2.8207
step 42600: train loss 2.7416, val loss 2.6938
step 42700: train loss 2.5605, val loss 2.8192
step 42800: train loss 2.8029, val loss 2.6802
step 42900: train loss 2.8314, val loss 2.7868
step 43000: train loss 2.7065, val loss 2.5963
step 43100: train loss 2.8072, val loss 2.7424
step 43200: train loss 2.6797, val loss 2.7166
step 43300: train loss 2.6579, val loss 2.7534
step 43400: train loss 2.8590, val loss 2.8177
step 43500: train loss 2.7240, val loss 2.8758
step 43600: train loss 2.8024, val loss 2.7224
step 43700: train loss 2.8347, val loss 2.7132
step 43800: train loss 2.8055, val loss 2.6904
step 43900: train loss 2.7516, val loss 2.7553
step 44000: train loss 2.7896, val loss 2.7832
step 44100: train loss 2.8472, val loss 2.7570
step 44200: train loss 2.6282, val loss 2.6458
step 44300: train loss 2.7891, val loss 2.6897
step 44400: train loss 2.8262, val loss 2.7445
step 44500: train loss 2.7764, val loss 2.7653
step 44600: train loss 2.8129, val loss 2.7805
step 44700: train loss 2.8649, val loss 2.8448
step 44800: train loss 2.6760, val loss 2.7656
step 44900: train loss 2.7011, val loss 2.7474
step 45000: train loss 2.7879, val loss 2.6947
step 45100: train loss 2.9080, val loss 2.7905
step 45200: train loss 2.7495, val loss 2.7055
step 45300: train loss 2.6580, val loss 2.8663
step 45400: train loss 2.8094, val loss 2.8226
step 45500: train loss 2.7298, val loss 2.8190
step 45600: train loss 2.7434, val loss 2.6559
step 45700: train loss 2.8474, val loss 2.7221
step 45800: train loss 2.8787, val loss 2.8628
step 45900: train loss 2.7202, val loss 2.6398
step 46000: train loss 2.8298, val loss 2.8447
step 46100: train loss 2.6955, val loss 2.8386
step 46200: train loss 2.7849, val loss 2.6825
step 46300: train loss 2.8191, val loss 2.7793
step 46400: train loss 2.7815, val loss 2.7403
step 46500: train loss 2.8007, val loss 2.7719
step 46600: train loss 2.6661, val loss 2.8360
step 46700: train loss 2.8279, val loss 2.7529
step 46800: train loss 2.8326, val loss 2.7180
step 46900: train loss 2.7323, val loss 2.8723
step 47000: train loss 2.7846, val loss 2.7797
step 47100: train loss 2.7533, val loss 2.7694
step 47200: train loss 2.8556, val loss 2.7418
step 47300: train loss 2.7036, val loss 2.7377
step 47400: train loss 2.7860, val loss 2.8879
step 47500: train loss 2.7223, val loss 2.8096
step 47600: train loss 2.7241, val loss 2.9097
step 47700: train loss 2.6891, val loss 2.8653
step 47800: train loss 2.7393, val loss 2.7434
step 47900: train loss 2.8288, val loss 2.6818
step 48000: train loss 2.7092, val loss 2.7769