schnell commited on
Commit
4fddb53
β€’
1 Parent(s): 78dde5a

Training in progress, epoch 14

Browse files
last-checkpoint/{global_step903149 β†’ global_step972622}/mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f130e94f49a111b18b9fa24fd8e1d09518cca6349e580cd51b31080ee90b03a
3
  size 59134503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df38aee071cbeb8d3e380016289db309b3afa69b7c43e039e1d448aecc738c8b
3
  size 59134503
last-checkpoint/{global_step903149 β†’ global_step972622}/zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7dcb109a59b721f71bdebd08bdb8118f430673c82f5d5c8333a254c0526a599
3
  size 118216675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7f3f0b4290b6381cf9f64bdc67039fefd40c32d2a7986100f87d851322b6cc6
3
  size 118216675
last-checkpoint/{global_step903149 β†’ global_step972622}/zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98bc55f07775e8e73380da7a9feea3a2dfd0524f7fc05a064126c979cda731d9
3
  size 118217955
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91fd6e85ca7a623466427062b0badac4751e7e96ef0fa49608c7d8abbe2c5af6
3
  size 118217955
last-checkpoint/{global_step903149 β†’ global_step972622}/zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04ee258013e69464dedaa2facc164dd3aa883ad48c7506f848e8e43726027f0c
3
  size 118221091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c38715a284f35970dd3590e6423d74d60384ffc12b98fc8b765b6b2f9dff289
3
  size 118221091
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step903149
 
1
+ global_step972622
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5efd06d28432fcf48678b05370f58b60157beb8e38eece62dc63d44fe7bb723
3
  size 59121639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0609024bd7558e0cbefcf1090fd067639bf67ccff1da2161c89fb9d832bff828
3
  size 59121639
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17664bad50d3ae48d44436c290150c950f1162167788912333f684fd107549e4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:165ad1790ce26d794a6a6aaa45f5d08d9a67e8e8735dad91ec3ea59c2aaa6209
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbcaeca295ceeb2ef00152bd2622eb1ee5ecd1540534f31d3c67e43847d19d73
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce315e333eb828edf97b1fd402017d952c44701e29780c1a4894cc8af16c5cc4
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f3c6252e3a489d60d6120d0b32ccce9df17ae3bea7dda66b24293cd5d4666f6
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b472555ddc8c22ae474c3823efd68de5602ce51d1ba08813ee442063c1f2048
3
  size 14503
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.0,
5
- "global_step": 903149,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10959,11 +10959,854 @@
10959
  "eval_samples_per_second": 723.314,
10960
  "eval_steps_per_second": 30.138,
10961
  "step": 903149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10962
  }
10963
  ],
10964
  "max_steps": 972622,
10965
  "num_train_epochs": 14,
10966
- "total_flos": 6.821351292405809e+18,
10967
  "trial_name": null,
10968
  "trial_params": null
10969
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.0,
5
+ "global_step": 972622,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10959
  "eval_samples_per_second": 723.314,
10960
  "eval_steps_per_second": 30.138,
10961
  "step": 903149
10962
+ },
10963
+ {
10964
+ "epoch": 13.01,
10965
+ "learning_rate": 7.366016024592505e-06,
10966
+ "loss": 1.2593,
10967
+ "step": 903500
10968
+ },
10969
+ {
10970
+ "epoch": 13.01,
10971
+ "learning_rate": 7.314296989806781e-06,
10972
+ "loss": 1.2601,
10973
+ "step": 904000
10974
+ },
10975
+ {
10976
+ "epoch": 13.02,
10977
+ "learning_rate": 7.2623702480540456e-06,
10978
+ "loss": 1.2573,
10979
+ "step": 904500
10980
+ },
10981
+ {
10982
+ "epoch": 13.03,
10983
+ "learning_rate": 7.210651213268322e-06,
10984
+ "loss": 1.2612,
10985
+ "step": 905000
10986
+ },
10987
+ {
10988
+ "epoch": 13.03,
10989
+ "learning_rate": 7.1587244715155865e-06,
10990
+ "loss": 1.2607,
10991
+ "step": 905500
10992
+ },
10993
+ {
10994
+ "epoch": 13.04,
10995
+ "learning_rate": 7.107005436729862e-06,
10996
+ "loss": 1.2601,
10997
+ "step": 906000
10998
+ },
10999
+ {
11000
+ "epoch": 13.05,
11001
+ "learning_rate": 7.055078694977127e-06,
11002
+ "loss": 1.2582,
11003
+ "step": 906500
11004
+ },
11005
+ {
11006
+ "epoch": 13.06,
11007
+ "learning_rate": 7.003359660191403e-06,
11008
+ "loss": 1.2579,
11009
+ "step": 907000
11010
+ },
11011
+ {
11012
+ "epoch": 13.06,
11013
+ "learning_rate": 6.951432918438668e-06,
11014
+ "loss": 1.2565,
11015
+ "step": 907500
11016
+ },
11017
+ {
11018
+ "epoch": 13.07,
11019
+ "learning_rate": 6.899713883652943e-06,
11020
+ "loss": 1.2621,
11021
+ "step": 908000
11022
+ },
11023
+ {
11024
+ "epoch": 13.08,
11025
+ "learning_rate": 6.8477871419002086e-06,
11026
+ "loss": 1.2621,
11027
+ "step": 908500
11028
+ },
11029
+ {
11030
+ "epoch": 13.08,
11031
+ "learning_rate": 6.796068107114484e-06,
11032
+ "loss": 1.2579,
11033
+ "step": 909000
11034
+ },
11035
+ {
11036
+ "epoch": 13.09,
11037
+ "learning_rate": 6.744141365361748e-06,
11038
+ "loss": 1.2605,
11039
+ "step": 909500
11040
+ },
11041
+ {
11042
+ "epoch": 13.1,
11043
+ "learning_rate": 6.692422330576023e-06,
11044
+ "loss": 1.2613,
11045
+ "step": 910000
11046
+ },
11047
+ {
11048
+ "epoch": 13.11,
11049
+ "learning_rate": 6.640495588823288e-06,
11050
+ "loss": 1.2581,
11051
+ "step": 910500
11052
+ },
11053
+ {
11054
+ "epoch": 13.11,
11055
+ "learning_rate": 6.588776554037564e-06,
11056
+ "loss": 1.2579,
11057
+ "step": 911000
11058
+ },
11059
+ {
11060
+ "epoch": 13.12,
11061
+ "learning_rate": 6.536849812284829e-06,
11062
+ "loss": 1.2585,
11063
+ "step": 911500
11064
+ },
11065
+ {
11066
+ "epoch": 13.13,
11067
+ "learning_rate": 6.485130777499104e-06,
11068
+ "loss": 1.2597,
11069
+ "step": 912000
11070
+ },
11071
+ {
11072
+ "epoch": 13.13,
11073
+ "learning_rate": 6.433204035746369e-06,
11074
+ "loss": 1.2614,
11075
+ "step": 912500
11076
+ },
11077
+ {
11078
+ "epoch": 13.14,
11079
+ "learning_rate": 6.381485000960645e-06,
11080
+ "loss": 1.2587,
11081
+ "step": 913000
11082
+ },
11083
+ {
11084
+ "epoch": 13.15,
11085
+ "learning_rate": 6.32955825920791e-06,
11086
+ "loss": 1.2594,
11087
+ "step": 913500
11088
+ },
11089
+ {
11090
+ "epoch": 13.16,
11091
+ "learning_rate": 6.277839224422186e-06,
11092
+ "loss": 1.2591,
11093
+ "step": 914000
11094
+ },
11095
+ {
11096
+ "epoch": 13.16,
11097
+ "learning_rate": 6.22591248266945e-06,
11098
+ "loss": 1.2596,
11099
+ "step": 914500
11100
+ },
11101
+ {
11102
+ "epoch": 13.17,
11103
+ "learning_rate": 6.174193447883726e-06,
11104
+ "loss": 1.2591,
11105
+ "step": 915000
11106
+ },
11107
+ {
11108
+ "epoch": 13.18,
11109
+ "learning_rate": 6.12226670613099e-06,
11110
+ "loss": 1.2597,
11111
+ "step": 915500
11112
+ },
11113
+ {
11114
+ "epoch": 13.18,
11115
+ "learning_rate": 6.070547671345266e-06,
11116
+ "loss": 1.2598,
11117
+ "step": 916000
11118
+ },
11119
+ {
11120
+ "epoch": 13.19,
11121
+ "learning_rate": 6.018620929592531e-06,
11122
+ "loss": 1.259,
11123
+ "step": 916500
11124
+ },
11125
+ {
11126
+ "epoch": 13.2,
11127
+ "learning_rate": 5.966901894806807e-06,
11128
+ "loss": 1.2574,
11129
+ "step": 917000
11130
+ },
11131
+ {
11132
+ "epoch": 13.21,
11133
+ "learning_rate": 5.914975153054072e-06,
11134
+ "loss": 1.2589,
11135
+ "step": 917500
11136
+ },
11137
+ {
11138
+ "epoch": 13.21,
11139
+ "learning_rate": 5.863256118268347e-06,
11140
+ "loss": 1.2576,
11141
+ "step": 918000
11142
+ },
11143
+ {
11144
+ "epoch": 13.22,
11145
+ "learning_rate": 5.811329376515611e-06,
11146
+ "loss": 1.2608,
11147
+ "step": 918500
11148
+ },
11149
+ {
11150
+ "epoch": 13.23,
11151
+ "learning_rate": 5.7596103417298875e-06,
11152
+ "loss": 1.2597,
11153
+ "step": 919000
11154
+ },
11155
+ {
11156
+ "epoch": 13.24,
11157
+ "learning_rate": 5.707683599977152e-06,
11158
+ "loss": 1.2563,
11159
+ "step": 919500
11160
+ },
11161
+ {
11162
+ "epoch": 13.24,
11163
+ "learning_rate": 5.655964565191428e-06,
11164
+ "loss": 1.2604,
11165
+ "step": 920000
11166
+ },
11167
+ {
11168
+ "epoch": 13.25,
11169
+ "learning_rate": 5.604037823438693e-06,
11170
+ "loss": 1.2617,
11171
+ "step": 920500
11172
+ },
11173
+ {
11174
+ "epoch": 13.26,
11175
+ "learning_rate": 5.5523187886529685e-06,
11176
+ "loss": 1.2591,
11177
+ "step": 921000
11178
+ },
11179
+ {
11180
+ "epoch": 13.26,
11181
+ "learning_rate": 5.5003920469002334e-06,
11182
+ "loss": 1.2585,
11183
+ "step": 921500
11184
+ },
11185
+ {
11186
+ "epoch": 13.27,
11187
+ "learning_rate": 5.4486730121145095e-06,
11188
+ "loss": 1.2581,
11189
+ "step": 922000
11190
+ },
11191
+ {
11192
+ "epoch": 13.28,
11193
+ "learning_rate": 5.3967462703617736e-06,
11194
+ "loss": 1.257,
11195
+ "step": 922500
11196
+ },
11197
+ {
11198
+ "epoch": 13.29,
11199
+ "learning_rate": 5.34502723557605e-06,
11200
+ "loss": 1.2585,
11201
+ "step": 923000
11202
+ },
11203
+ {
11204
+ "epoch": 13.29,
11205
+ "learning_rate": 5.2931004938233145e-06,
11206
+ "loss": 1.2548,
11207
+ "step": 923500
11208
+ },
11209
+ {
11210
+ "epoch": 13.3,
11211
+ "learning_rate": 5.24138145903759e-06,
11212
+ "loss": 1.2603,
11213
+ "step": 924000
11214
+ },
11215
+ {
11216
+ "epoch": 13.31,
11217
+ "learning_rate": 5.189454717284855e-06,
11218
+ "loss": 1.2582,
11219
+ "step": 924500
11220
+ },
11221
+ {
11222
+ "epoch": 13.31,
11223
+ "learning_rate": 5.137735682499131e-06,
11224
+ "loss": 1.2628,
11225
+ "step": 925000
11226
+ },
11227
+ {
11228
+ "epoch": 13.32,
11229
+ "learning_rate": 5.085808940746395e-06,
11230
+ "loss": 1.2576,
11231
+ "step": 925500
11232
+ },
11233
+ {
11234
+ "epoch": 13.33,
11235
+ "learning_rate": 5.034089905960672e-06,
11236
+ "loss": 1.261,
11237
+ "step": 926000
11238
+ },
11239
+ {
11240
+ "epoch": 13.34,
11241
+ "learning_rate": 4.982163164207936e-06,
11242
+ "loss": 1.2584,
11243
+ "step": 926500
11244
+ },
11245
+ {
11246
+ "epoch": 13.34,
11247
+ "learning_rate": 4.930444129422211e-06,
11248
+ "loss": 1.2568,
11249
+ "step": 927000
11250
+ },
11251
+ {
11252
+ "epoch": 13.35,
11253
+ "learning_rate": 4.878517387669476e-06,
11254
+ "loss": 1.2618,
11255
+ "step": 927500
11256
+ },
11257
+ {
11258
+ "epoch": 13.36,
11259
+ "learning_rate": 4.826798352883752e-06,
11260
+ "loss": 1.2588,
11261
+ "step": 928000
11262
+ },
11263
+ {
11264
+ "epoch": 13.36,
11265
+ "learning_rate": 4.774871611131017e-06,
11266
+ "loss": 1.2562,
11267
+ "step": 928500
11268
+ },
11269
+ {
11270
+ "epoch": 13.37,
11271
+ "learning_rate": 4.723152576345293e-06,
11272
+ "loss": 1.2564,
11273
+ "step": 929000
11274
+ },
11275
+ {
11276
+ "epoch": 13.38,
11277
+ "learning_rate": 4.671225834592557e-06,
11278
+ "loss": 1.2556,
11279
+ "step": 929500
11280
+ },
11281
+ {
11282
+ "epoch": 13.39,
11283
+ "learning_rate": 4.619506799806832e-06,
11284
+ "loss": 1.2568,
11285
+ "step": 930000
11286
+ },
11287
+ {
11288
+ "epoch": 13.39,
11289
+ "learning_rate": 4.567580058054097e-06,
11290
+ "loss": 1.2603,
11291
+ "step": 930500
11292
+ },
11293
+ {
11294
+ "epoch": 13.4,
11295
+ "learning_rate": 4.515861023268373e-06,
11296
+ "loss": 1.2581,
11297
+ "step": 931000
11298
+ },
11299
+ {
11300
+ "epoch": 13.41,
11301
+ "learning_rate": 4.463934281515638e-06,
11302
+ "loss": 1.2608,
11303
+ "step": 931500
11304
+ },
11305
+ {
11306
+ "epoch": 13.42,
11307
+ "learning_rate": 4.412215246729914e-06,
11308
+ "loss": 1.2614,
11309
+ "step": 932000
11310
+ },
11311
+ {
11312
+ "epoch": 13.42,
11313
+ "learning_rate": 4.360288504977179e-06,
11314
+ "loss": 1.2593,
11315
+ "step": 932500
11316
+ },
11317
+ {
11318
+ "epoch": 13.43,
11319
+ "learning_rate": 4.308569470191454e-06,
11320
+ "loss": 1.2553,
11321
+ "step": 933000
11322
+ },
11323
+ {
11324
+ "epoch": 13.44,
11325
+ "learning_rate": 4.256642728438719e-06,
11326
+ "loss": 1.2564,
11327
+ "step": 933500
11328
+ },
11329
+ {
11330
+ "epoch": 13.44,
11331
+ "learning_rate": 4.204923693652994e-06,
11332
+ "loss": 1.2574,
11333
+ "step": 934000
11334
+ },
11335
+ {
11336
+ "epoch": 13.45,
11337
+ "learning_rate": 4.152996951900259e-06,
11338
+ "loss": 1.2551,
11339
+ "step": 934500
11340
+ },
11341
+ {
11342
+ "epoch": 13.46,
11343
+ "learning_rate": 4.101277917114535e-06,
11344
+ "loss": 1.2609,
11345
+ "step": 935000
11346
+ },
11347
+ {
11348
+ "epoch": 13.47,
11349
+ "learning_rate": 4.0493511753618e-06,
11350
+ "loss": 1.2571,
11351
+ "step": 935500
11352
+ },
11353
+ {
11354
+ "epoch": 13.47,
11355
+ "learning_rate": 3.997632140576075e-06,
11356
+ "loss": 1.259,
11357
+ "step": 936000
11358
+ },
11359
+ {
11360
+ "epoch": 13.48,
11361
+ "learning_rate": 3.94570539882334e-06,
11362
+ "loss": 1.2593,
11363
+ "step": 936500
11364
+ },
11365
+ {
11366
+ "epoch": 13.49,
11367
+ "learning_rate": 3.893986364037616e-06,
11368
+ "loss": 1.2555,
11369
+ "step": 937000
11370
+ },
11371
+ {
11372
+ "epoch": 13.49,
11373
+ "learning_rate": 3.84205962228488e-06,
11374
+ "loss": 1.2592,
11375
+ "step": 937500
11376
+ },
11377
+ {
11378
+ "epoch": 13.5,
11379
+ "learning_rate": 3.790340587499156e-06,
11380
+ "loss": 1.2553,
11381
+ "step": 938000
11382
+ },
11383
+ {
11384
+ "epoch": 13.51,
11385
+ "learning_rate": 3.7384138457464213e-06,
11386
+ "loss": 1.259,
11387
+ "step": 938500
11388
+ },
11389
+ {
11390
+ "epoch": 13.52,
11391
+ "learning_rate": 3.686694810960697e-06,
11392
+ "loss": 1.255,
11393
+ "step": 939000
11394
+ },
11395
+ {
11396
+ "epoch": 13.52,
11397
+ "learning_rate": 3.634768069207962e-06,
11398
+ "loss": 1.2582,
11399
+ "step": 939500
11400
+ },
11401
+ {
11402
+ "epoch": 13.53,
11403
+ "learning_rate": 3.5830490344222375e-06,
11404
+ "loss": 1.2585,
11405
+ "step": 940000
11406
+ },
11407
+ {
11408
+ "epoch": 13.54,
11409
+ "learning_rate": 3.5311222926695015e-06,
11410
+ "loss": 1.2596,
11411
+ "step": 940500
11412
+ },
11413
+ {
11414
+ "epoch": 13.54,
11415
+ "learning_rate": 3.479403257883778e-06,
11416
+ "loss": 1.2637,
11417
+ "step": 941000
11418
+ },
11419
+ {
11420
+ "epoch": 13.55,
11421
+ "learning_rate": 3.4274765161310425e-06,
11422
+ "loss": 1.2567,
11423
+ "step": 941500
11424
+ },
11425
+ {
11426
+ "epoch": 13.56,
11427
+ "learning_rate": 3.375757481345318e-06,
11428
+ "loss": 1.2574,
11429
+ "step": 942000
11430
+ },
11431
+ {
11432
+ "epoch": 13.57,
11433
+ "learning_rate": 3.323830739592583e-06,
11434
+ "loss": 1.2545,
11435
+ "step": 942500
11436
+ },
11437
+ {
11438
+ "epoch": 13.57,
11439
+ "learning_rate": 3.2721117048068587e-06,
11440
+ "loss": 1.2587,
11441
+ "step": 943000
11442
+ },
11443
+ {
11444
+ "epoch": 13.58,
11445
+ "learning_rate": 3.2201849630541236e-06,
11446
+ "loss": 1.2569,
11447
+ "step": 943500
11448
+ },
11449
+ {
11450
+ "epoch": 13.59,
11451
+ "learning_rate": 3.1684659282683992e-06,
11452
+ "loss": 1.2606,
11453
+ "step": 944000
11454
+ },
11455
+ {
11456
+ "epoch": 13.6,
11457
+ "learning_rate": 3.116539186515664e-06,
11458
+ "loss": 1.2599,
11459
+ "step": 944500
11460
+ },
11461
+ {
11462
+ "epoch": 13.6,
11463
+ "learning_rate": 3.0648201517299398e-06,
11464
+ "loss": 1.2586,
11465
+ "step": 945000
11466
+ },
11467
+ {
11468
+ "epoch": 13.61,
11469
+ "learning_rate": 3.0128934099772042e-06,
11470
+ "loss": 1.2539,
11471
+ "step": 945500
11472
+ },
11473
+ {
11474
+ "epoch": 13.62,
11475
+ "learning_rate": 2.96117437519148e-06,
11476
+ "loss": 1.2517,
11477
+ "step": 946000
11478
+ },
11479
+ {
11480
+ "epoch": 13.62,
11481
+ "learning_rate": 2.9092476334387448e-06,
11482
+ "loss": 1.2547,
11483
+ "step": 946500
11484
+ },
11485
+ {
11486
+ "epoch": 13.63,
11487
+ "learning_rate": 2.8575285986530204e-06,
11488
+ "loss": 1.2595,
11489
+ "step": 947000
11490
+ },
11491
+ {
11492
+ "epoch": 13.64,
11493
+ "learning_rate": 2.8056018569002853e-06,
11494
+ "loss": 1.2539,
11495
+ "step": 947500
11496
+ },
11497
+ {
11498
+ "epoch": 13.65,
11499
+ "learning_rate": 2.753882822114561e-06,
11500
+ "loss": 1.2596,
11501
+ "step": 948000
11502
+ },
11503
+ {
11504
+ "epoch": 13.65,
11505
+ "learning_rate": 2.701956080361826e-06,
11506
+ "loss": 1.259,
11507
+ "step": 948500
11508
+ },
11509
+ {
11510
+ "epoch": 13.66,
11511
+ "learning_rate": 2.6502370455761015e-06,
11512
+ "loss": 1.2567,
11513
+ "step": 949000
11514
+ },
11515
+ {
11516
+ "epoch": 13.67,
11517
+ "learning_rate": 2.598310303823366e-06,
11518
+ "loss": 1.2553,
11519
+ "step": 949500
11520
+ },
11521
+ {
11522
+ "epoch": 13.67,
11523
+ "learning_rate": 2.5465912690376416e-06,
11524
+ "loss": 1.2558,
11525
+ "step": 950000
11526
+ },
11527
+ {
11528
+ "epoch": 13.68,
11529
+ "learning_rate": 2.494664527284907e-06,
11530
+ "loss": 1.2573,
11531
+ "step": 950500
11532
+ },
11533
+ {
11534
+ "epoch": 13.69,
11535
+ "learning_rate": 2.4429454924991826e-06,
11536
+ "loss": 1.2545,
11537
+ "step": 951000
11538
+ },
11539
+ {
11540
+ "epoch": 13.7,
11541
+ "learning_rate": 2.391018750746447e-06,
11542
+ "loss": 1.2569,
11543
+ "step": 951500
11544
+ },
11545
+ {
11546
+ "epoch": 13.7,
11547
+ "learning_rate": 2.3392997159607227e-06,
11548
+ "loss": 1.258,
11549
+ "step": 952000
11550
+ },
11551
+ {
11552
+ "epoch": 13.71,
11553
+ "learning_rate": 2.287372974207987e-06,
11554
+ "loss": 1.2578,
11555
+ "step": 952500
11556
+ },
11557
+ {
11558
+ "epoch": 13.72,
11559
+ "learning_rate": 2.2356539394222632e-06,
11560
+ "loss": 1.2571,
11561
+ "step": 953000
11562
+ },
11563
+ {
11564
+ "epoch": 13.72,
11565
+ "learning_rate": 2.183727197669528e-06,
11566
+ "loss": 1.2537,
11567
+ "step": 953500
11568
+ },
11569
+ {
11570
+ "epoch": 13.73,
11571
+ "learning_rate": 2.1320081628838038e-06,
11572
+ "loss": 1.2546,
11573
+ "step": 954000
11574
+ },
11575
+ {
11576
+ "epoch": 13.74,
11577
+ "learning_rate": 2.0800814211310682e-06,
11578
+ "loss": 1.2573,
11579
+ "step": 954500
11580
+ },
11581
+ {
11582
+ "epoch": 13.75,
11583
+ "learning_rate": 2.0283623863453443e-06,
11584
+ "loss": 1.2561,
11585
+ "step": 955000
11586
+ },
11587
+ {
11588
+ "epoch": 13.75,
11589
+ "learning_rate": 1.9764356445926088e-06,
11590
+ "loss": 1.2583,
11591
+ "step": 955500
11592
+ },
11593
+ {
11594
+ "epoch": 13.76,
11595
+ "learning_rate": 1.9247166098068844e-06,
11596
+ "loss": 1.2526,
11597
+ "step": 956000
11598
+ },
11599
+ {
11600
+ "epoch": 13.77,
11601
+ "learning_rate": 1.872789868054149e-06,
11602
+ "loss": 1.2552,
11603
+ "step": 956500
11604
+ },
11605
+ {
11606
+ "epoch": 13.78,
11607
+ "learning_rate": 1.8210708332684252e-06,
11608
+ "loss": 1.2544,
11609
+ "step": 957000
11610
+ },
11611
+ {
11612
+ "epoch": 13.78,
11613
+ "learning_rate": 1.7691440915156898e-06,
11614
+ "loss": 1.2576,
11615
+ "step": 957500
11616
+ },
11617
+ {
11618
+ "epoch": 13.79,
11619
+ "learning_rate": 1.7174250567299655e-06,
11620
+ "loss": 1.2555,
11621
+ "step": 958000
11622
+ },
11623
+ {
11624
+ "epoch": 13.8,
11625
+ "learning_rate": 1.6654983149772302e-06,
11626
+ "loss": 1.2539,
11627
+ "step": 958500
11628
+ },
11629
+ {
11630
+ "epoch": 13.8,
11631
+ "learning_rate": 1.6137792801915058e-06,
11632
+ "loss": 1.2553,
11633
+ "step": 959000
11634
+ },
11635
+ {
11636
+ "epoch": 13.81,
11637
+ "learning_rate": 1.5618525384387705e-06,
11638
+ "loss": 1.2591,
11639
+ "step": 959500
11640
+ },
11641
+ {
11642
+ "epoch": 13.82,
11643
+ "learning_rate": 1.5101335036530463e-06,
11644
+ "loss": 1.257,
11645
+ "step": 960000
11646
+ },
11647
+ {
11648
+ "epoch": 13.83,
11649
+ "learning_rate": 1.458206761900311e-06,
11650
+ "loss": 1.26,
11651
+ "step": 960500
11652
+ },
11653
+ {
11654
+ "epoch": 13.83,
11655
+ "learning_rate": 1.4064877271145869e-06,
11656
+ "loss": 1.2579,
11657
+ "step": 961000
11658
+ },
11659
+ {
11660
+ "epoch": 13.84,
11661
+ "learning_rate": 1.3545609853618516e-06,
11662
+ "loss": 1.254,
11663
+ "step": 961500
11664
+ },
11665
+ {
11666
+ "epoch": 13.85,
11667
+ "learning_rate": 1.3028419505761274e-06,
11668
+ "loss": 1.2531,
11669
+ "step": 962000
11670
+ },
11671
+ {
11672
+ "epoch": 13.85,
11673
+ "learning_rate": 1.250915208823392e-06,
11674
+ "loss": 1.2561,
11675
+ "step": 962500
11676
+ },
11677
+ {
11678
+ "epoch": 13.86,
11679
+ "learning_rate": 1.1991961740376677e-06,
11680
+ "loss": 1.2549,
11681
+ "step": 963000
11682
+ },
11683
+ {
11684
+ "epoch": 13.87,
11685
+ "learning_rate": 1.1472694322849324e-06,
11686
+ "loss": 1.2531,
11687
+ "step": 963500
11688
+ },
11689
+ {
11690
+ "epoch": 13.88,
11691
+ "learning_rate": 1.0955503974992083e-06,
11692
+ "loss": 1.2547,
11693
+ "step": 964000
11694
+ },
11695
+ {
11696
+ "epoch": 13.88,
11697
+ "learning_rate": 1.043623655746473e-06,
11698
+ "loss": 1.2588,
11699
+ "step": 964500
11700
+ },
11701
+ {
11702
+ "epoch": 13.89,
11703
+ "learning_rate": 9.919046209607486e-07,
11704
+ "loss": 1.258,
11705
+ "step": 965000
11706
+ },
11707
+ {
11708
+ "epoch": 13.9,
11709
+ "learning_rate": 9.399778792080134e-07,
11710
+ "loss": 1.2586,
11711
+ "step": 965500
11712
+ },
11713
+ {
11714
+ "epoch": 13.9,
11715
+ "learning_rate": 8.88258844422289e-07,
11716
+ "loss": 1.2577,
11717
+ "step": 966000
11718
+ },
11719
+ {
11720
+ "epoch": 13.91,
11721
+ "learning_rate": 8.363321026695538e-07,
11722
+ "loss": 1.253,
11723
+ "step": 966500
11724
+ },
11725
+ {
11726
+ "epoch": 13.92,
11727
+ "learning_rate": 7.846130678838295e-07,
11728
+ "loss": 1.2589,
11729
+ "step": 967000
11730
+ },
11731
+ {
11732
+ "epoch": 13.93,
11733
+ "learning_rate": 7.326863261310943e-07,
11734
+ "loss": 1.2584,
11735
+ "step": 967500
11736
+ },
11737
+ {
11738
+ "epoch": 13.93,
11739
+ "learning_rate": 6.8096729134537e-07,
11740
+ "loss": 1.257,
11741
+ "step": 968000
11742
+ },
11743
+ {
11744
+ "epoch": 13.94,
11745
+ "learning_rate": 6.290405495926348e-07,
11746
+ "loss": 1.2559,
11747
+ "step": 968500
11748
+ },
11749
+ {
11750
+ "epoch": 13.95,
11751
+ "learning_rate": 5.773215148069104e-07,
11752
+ "loss": 1.2561,
11753
+ "step": 969000
11754
+ },
11755
+ {
11756
+ "epoch": 13.96,
11757
+ "learning_rate": 5.253947730541751e-07,
11758
+ "loss": 1.2567,
11759
+ "step": 969500
11760
+ },
11761
+ {
11762
+ "epoch": 13.96,
11763
+ "learning_rate": 4.736757382684509e-07,
11764
+ "loss": 1.2517,
11765
+ "step": 970000
11766
+ },
11767
+ {
11768
+ "epoch": 13.97,
11769
+ "learning_rate": 4.217489965157156e-07,
11770
+ "loss": 1.2539,
11771
+ "step": 970500
11772
+ },
11773
+ {
11774
+ "epoch": 13.98,
11775
+ "learning_rate": 3.700299617299913e-07,
11776
+ "loss": 1.2588,
11777
+ "step": 971000
11778
+ },
11779
+ {
11780
+ "epoch": 13.98,
11781
+ "learning_rate": 3.181032199772561e-07,
11782
+ "loss": 1.259,
11783
+ "step": 971500
11784
+ },
11785
+ {
11786
+ "epoch": 13.99,
11787
+ "learning_rate": 2.663841851915318e-07,
11788
+ "loss": 1.2581,
11789
+ "step": 972000
11790
+ },
11791
+ {
11792
+ "epoch": 14.0,
11793
+ "learning_rate": 2.1445744343879657e-07,
11794
+ "loss": 1.2587,
11795
+ "step": 972500
11796
+ },
11797
+ {
11798
+ "epoch": 14.0,
11799
+ "eval_accuracy": 0.7454719241146508,
11800
+ "eval_loss": 1.1279296875,
11801
+ "eval_runtime": 747.2594,
11802
+ "eval_samples_per_second": 721.219,
11803
+ "eval_steps_per_second": 30.051,
11804
+ "step": 972622
11805
  }
11806
  ],
11807
  "max_steps": 972622,
11808
  "num_train_epochs": 14,
11809
+ "total_flos": 7.34610367742319e+18,
11810
  "trial_name": null,
11811
  "trial_params": null
11812
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5efd06d28432fcf48678b05370f58b60157beb8e38eece62dc63d44fe7bb723
3
  size 59121639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0609024bd7558e0cbefcf1090fd067639bf67ccff1da2161c89fb9d832bff828
3
  size 59121639
runs/May29_03-16-06_user-SYS-5049A-TR/events.out.tfevents.1685297788.user-SYS-5049A-TR.557399.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cdd6735431b63cb7c844e45a286fee27696c2db20b75ffebe4cfaa496fe0fe3
3
- size 296594
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b4341f2b9c55aec05e0a1ddbaa35a4734b0402e0d47285020e71f2634b6e4bb
3
+ size 319163