mohammadmahdinouri commited on
Commit
d0126db
·
verified ·
1 Parent(s): 384001a

Training in progress, step 81000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c95d6bd6c8ef7be74648794bf804ea6641abfdb4b683e21f548a71f857055c4d
3
  size 487156538
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:136551a11d925c9b7e277363afe91252d3c90b8dc5e5c43289e61107e3d62773
3
  size 487156538
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77f02393a2e68235715b9461b42f89022b048fab626fba6fe23a96b132584f0d
3
  size 1059459406
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1982d8643a6b6bd91ce00559255789edb52f59cd150cda1ba9a3365e1689ee9f
3
  size 1059459406
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76361476c5f64c0bf49b3517f575b313775e07413868efaa9e59b739826961ff
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53e04a561bdac90ef1f6c25ff5574afc68c7428cdda288bd54c70b9fc50dd7f9
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6827d9fffaa2c28a86003e8c710ebd966143a8a4fb5c72ae89825e5cbb629648
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7561f948920604751f7cf826d2cf58f5e293444c817431c5d2aa2bead82cc641
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a41f160cdde62d6e3a1a90c84cc4de5e2ed3bd84b93a6360ed82af50a916d89f
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae0860552be41b41d133150b6b246cf17216ad8cec8467e427463a8701a5e9f2
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a808512b91eba0cbda0edf83c48a9fdb39d09e9a32863b9796c9d7e8b7d4b81c
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2910f8dc686565600a07229cef4e507c0697eae3a7e9385ae4e913b2a8f189a5
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d73d6a55f40d828827c6493d8d4e36859284046429b1cc4d61ff3be96f72f5ef
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38edd545bc4f01de3f608883af1908fbe14efdd33931e3adde347eb4fa00e55f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.1185051757135493,
6
  "eval_steps": 500,
7
- "global_step": 80000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -28008,6 +28008,356 @@
28008
  "learning_rate": 0.0004803679802916738,
28009
  "loss": 16.5126,
28010
  "step": 80000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28011
  }
28012
  ],
28013
  "logging_steps": 20,
@@ -28027,7 +28377,7 @@
28027
  "attributes": {}
28028
  }
28029
  },
28030
- "total_flos": 1.7804505344505597e+20,
28031
  "train_batch_size": 48,
28032
  "trial_name": null,
28033
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.11998649040996866,
6
  "eval_steps": 500,
7
+ "global_step": 81000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
28008
  "learning_rate": 0.0004803679802916738,
28009
  "loss": 16.5126,
28010
  "step": 80000
28011
+ },
28012
+ {
28013
+ "epoch": 0.11853480200747768,
28014
+ "grad_norm": 7.59375,
28015
+ "learning_rate": 0.0004803630413566662,
28016
+ "loss": 16.5302,
28017
+ "step": 80020
28018
+ },
28019
+ {
28020
+ "epoch": 0.11856442830140607,
28021
+ "grad_norm": 7.0625,
28022
+ "learning_rate": 0.0004803581024216586,
28023
+ "loss": 16.4854,
28024
+ "step": 80040
28025
+ },
28026
+ {
28027
+ "epoch": 0.11859405459533445,
28028
+ "grad_norm": 6.90625,
28029
+ "learning_rate": 0.000480353163486651,
28030
+ "loss": 16.4548,
28031
+ "step": 80060
28032
+ },
28033
+ {
28034
+ "epoch": 0.11862368088926284,
28035
+ "grad_norm": 7.03125,
28036
+ "learning_rate": 0.0004803482245516435,
28037
+ "loss": 16.5227,
28038
+ "step": 80080
28039
+ },
28040
+ {
28041
+ "epoch": 0.11865330718319123,
28042
+ "grad_norm": 8.5625,
28043
+ "learning_rate": 0.0004803432856166359,
28044
+ "loss": 16.4541,
28045
+ "step": 80100
28046
+ },
28047
+ {
28048
+ "epoch": 0.11868293347711961,
28049
+ "grad_norm": 6.90625,
28050
+ "learning_rate": 0.00048033834668162836,
28051
+ "loss": 16.4769,
28052
+ "step": 80120
28053
+ },
28054
+ {
28055
+ "epoch": 0.118712559771048,
28056
+ "grad_norm": 8.3125,
28057
+ "learning_rate": 0.00048033340774662075,
28058
+ "loss": 16.433,
28059
+ "step": 80140
28060
+ },
28061
+ {
28062
+ "epoch": 0.11874218606497638,
28063
+ "grad_norm": 10.75,
28064
+ "learning_rate": 0.00048032846881161325,
28065
+ "loss": 16.4778,
28066
+ "step": 80160
28067
+ },
28068
+ {
28069
+ "epoch": 0.11877181235890477,
28070
+ "grad_norm": 8.5,
28071
+ "learning_rate": 0.00048032352987660565,
28072
+ "loss": 16.458,
28073
+ "step": 80180
28074
+ },
28075
+ {
28076
+ "epoch": 0.11880143865283316,
28077
+ "grad_norm": 5.875,
28078
+ "learning_rate": 0.0004803185909415981,
28079
+ "loss": 16.4083,
28080
+ "step": 80200
28081
+ },
28082
+ {
28083
+ "epoch": 0.11883106494676154,
28084
+ "grad_norm": 7.96875,
28085
+ "learning_rate": 0.00048031365200659054,
28086
+ "loss": 16.5132,
28087
+ "step": 80220
28088
+ },
28089
+ {
28090
+ "epoch": 0.11886069124068993,
28091
+ "grad_norm": 7.21875,
28092
+ "learning_rate": 0.000480308713071583,
28093
+ "loss": 16.4829,
28094
+ "step": 80240
28095
+ },
28096
+ {
28097
+ "epoch": 0.11889031753461833,
28098
+ "grad_norm": 8.3125,
28099
+ "learning_rate": 0.0004803037741365754,
28100
+ "loss": 16.4883,
28101
+ "step": 80260
28102
+ },
28103
+ {
28104
+ "epoch": 0.11891994382854672,
28105
+ "grad_norm": 8.3125,
28106
+ "learning_rate": 0.00048029883520156783,
28107
+ "loss": 16.4839,
28108
+ "step": 80280
28109
+ },
28110
+ {
28111
+ "epoch": 0.1189495701224751,
28112
+ "grad_norm": 16.5,
28113
+ "learning_rate": 0.0004802938962665603,
28114
+ "loss": 16.474,
28115
+ "step": 80300
28116
+ },
28117
+ {
28118
+ "epoch": 0.11897919641640349,
28119
+ "grad_norm": 8.5,
28120
+ "learning_rate": 0.0004802889573315527,
28121
+ "loss": 16.5142,
28122
+ "step": 80320
28123
+ },
28124
+ {
28125
+ "epoch": 0.11900882271033188,
28126
+ "grad_norm": 6.71875,
28127
+ "learning_rate": 0.0004802840183965451,
28128
+ "loss": 16.474,
28129
+ "step": 80340
28130
+ },
28131
+ {
28132
+ "epoch": 0.11903844900426026,
28133
+ "grad_norm": 6.9375,
28134
+ "learning_rate": 0.0004802790794615375,
28135
+ "loss": 16.4679,
28136
+ "step": 80360
28137
+ },
28138
+ {
28139
+ "epoch": 0.11906807529818865,
28140
+ "grad_norm": 11.25,
28141
+ "learning_rate": 0.00048027414052653,
28142
+ "loss": 16.4848,
28143
+ "step": 80380
28144
+ },
28145
+ {
28146
+ "epoch": 0.11909770159211704,
28147
+ "grad_norm": 6.75,
28148
+ "learning_rate": 0.0004802692015915224,
28149
+ "loss": 16.4804,
28150
+ "step": 80400
28151
+ },
28152
+ {
28153
+ "epoch": 0.11912732788604542,
28154
+ "grad_norm": 9.625,
28155
+ "learning_rate": 0.00048026426265651486,
28156
+ "loss": 16.4446,
28157
+ "step": 80420
28158
+ },
28159
+ {
28160
+ "epoch": 0.11915695417997381,
28161
+ "grad_norm": 7.65625,
28162
+ "learning_rate": 0.00048025932372150725,
28163
+ "loss": 16.4631,
28164
+ "step": 80440
28165
+ },
28166
+ {
28167
+ "epoch": 0.1191865804739022,
28168
+ "grad_norm": 7.5,
28169
+ "learning_rate": 0.00048025438478649975,
28170
+ "loss": 16.4251,
28171
+ "step": 80460
28172
+ },
28173
+ {
28174
+ "epoch": 0.11921620676783058,
28175
+ "grad_norm": 6.5,
28176
+ "learning_rate": 0.00048024944585149215,
28177
+ "loss": 16.552,
28178
+ "step": 80480
28179
+ },
28180
+ {
28181
+ "epoch": 0.11924583306175897,
28182
+ "grad_norm": 7.40625,
28183
+ "learning_rate": 0.0004802445069164846,
28184
+ "loss": 16.4699,
28185
+ "step": 80500
28186
+ },
28187
+ {
28188
+ "epoch": 0.11927545935568735,
28189
+ "grad_norm": 11.1875,
28190
+ "learning_rate": 0.00048023956798147704,
28191
+ "loss": 16.4617,
28192
+ "step": 80520
28193
+ },
28194
+ {
28195
+ "epoch": 0.11930508564961574,
28196
+ "grad_norm": 8.0625,
28197
+ "learning_rate": 0.0004802346290464695,
28198
+ "loss": 16.4773,
28199
+ "step": 80540
28200
+ },
28201
+ {
28202
+ "epoch": 0.11933471194354413,
28203
+ "grad_norm": 7.53125,
28204
+ "learning_rate": 0.0004802296901114619,
28205
+ "loss": 16.4283,
28206
+ "step": 80560
28207
+ },
28208
+ {
28209
+ "epoch": 0.11936433823747253,
28210
+ "grad_norm": 7.40625,
28211
+ "learning_rate": 0.00048022475117645433,
28212
+ "loss": 16.5017,
28213
+ "step": 80580
28214
+ },
28215
+ {
28216
+ "epoch": 0.11939396453140091,
28217
+ "grad_norm": 7.9375,
28218
+ "learning_rate": 0.0004802198122414468,
28219
+ "loss": 16.4556,
28220
+ "step": 80600
28221
+ },
28222
+ {
28223
+ "epoch": 0.1194235908253293,
28224
+ "grad_norm": 8.5,
28225
+ "learning_rate": 0.00048021487330643923,
28226
+ "loss": 16.448,
28227
+ "step": 80620
28228
+ },
28229
+ {
28230
+ "epoch": 0.11945321711925769,
28231
+ "grad_norm": 8.1875,
28232
+ "learning_rate": 0.0004802099343714316,
28233
+ "loss": 16.4593,
28234
+ "step": 80640
28235
+ },
28236
+ {
28237
+ "epoch": 0.11948284341318607,
28238
+ "grad_norm": 8.4375,
28239
+ "learning_rate": 0.000480204995436424,
28240
+ "loss": 16.5178,
28241
+ "step": 80660
28242
+ },
28243
+ {
28244
+ "epoch": 0.11951246970711446,
28245
+ "grad_norm": 6.59375,
28246
+ "learning_rate": 0.0004802000565014165,
28247
+ "loss": 16.4132,
28248
+ "step": 80680
28249
+ },
28250
+ {
28251
+ "epoch": 0.11954209600104285,
28252
+ "grad_norm": 7.09375,
28253
+ "learning_rate": 0.0004801951175664089,
28254
+ "loss": 16.4124,
28255
+ "step": 80700
28256
+ },
28257
+ {
28258
+ "epoch": 0.11957172229497123,
28259
+ "grad_norm": 8.1875,
28260
+ "learning_rate": 0.00048019017863140136,
28261
+ "loss": 16.4544,
28262
+ "step": 80720
28263
+ },
28264
+ {
28265
+ "epoch": 0.11960134858889962,
28266
+ "grad_norm": 9.8125,
28267
+ "learning_rate": 0.00048018523969639375,
28268
+ "loss": 16.4505,
28269
+ "step": 80740
28270
+ },
28271
+ {
28272
+ "epoch": 0.119630974882828,
28273
+ "grad_norm": 8.0,
28274
+ "learning_rate": 0.00048018030076138626,
28275
+ "loss": 16.4634,
28276
+ "step": 80760
28277
+ },
28278
+ {
28279
+ "epoch": 0.11966060117675639,
28280
+ "grad_norm": 9.8125,
28281
+ "learning_rate": 0.00048017536182637865,
28282
+ "loss": 16.4973,
28283
+ "step": 80780
28284
+ },
28285
+ {
28286
+ "epoch": 0.11969022747068478,
28287
+ "grad_norm": 7.65625,
28288
+ "learning_rate": 0.0004801704228913711,
28289
+ "loss": 16.4454,
28290
+ "step": 80800
28291
+ },
28292
+ {
28293
+ "epoch": 0.11971985376461317,
28294
+ "grad_norm": 9.0,
28295
+ "learning_rate": 0.00048016548395636354,
28296
+ "loss": 16.3708,
28297
+ "step": 80820
28298
+ },
28299
+ {
28300
+ "epoch": 0.11974948005854155,
28301
+ "grad_norm": 7.4375,
28302
+ "learning_rate": 0.000480160545021356,
28303
+ "loss": 16.4656,
28304
+ "step": 80840
28305
+ },
28306
+ {
28307
+ "epoch": 0.11977910635246994,
28308
+ "grad_norm": 7.9375,
28309
+ "learning_rate": 0.0004801556060863484,
28310
+ "loss": 16.4999,
28311
+ "step": 80860
28312
+ },
28313
+ {
28314
+ "epoch": 0.11980873264639832,
28315
+ "grad_norm": 8.125,
28316
+ "learning_rate": 0.00048015066715134083,
28317
+ "loss": 16.3914,
28318
+ "step": 80880
28319
+ },
28320
+ {
28321
+ "epoch": 0.11983835894032673,
28322
+ "grad_norm": 8.5,
28323
+ "learning_rate": 0.0004801457282163333,
28324
+ "loss": 16.4897,
28325
+ "step": 80900
28326
+ },
28327
+ {
28328
+ "epoch": 0.11986798523425511,
28329
+ "grad_norm": 8.4375,
28330
+ "learning_rate": 0.00048014078928132573,
28331
+ "loss": 16.4603,
28332
+ "step": 80920
28333
+ },
28334
+ {
28335
+ "epoch": 0.1198976115281835,
28336
+ "grad_norm": 9.3125,
28337
+ "learning_rate": 0.0004801358503463181,
28338
+ "loss": 16.4485,
28339
+ "step": 80940
28340
+ },
28341
+ {
28342
+ "epoch": 0.11992723782211188,
28343
+ "grad_norm": 7.625,
28344
+ "learning_rate": 0.00048013091141131057,
28345
+ "loss": 16.4565,
28346
+ "step": 80960
28347
+ },
28348
+ {
28349
+ "epoch": 0.11995686411604027,
28350
+ "grad_norm": 9.9375,
28351
+ "learning_rate": 0.000480125972476303,
28352
+ "loss": 16.4224,
28353
+ "step": 80980
28354
+ },
28355
+ {
28356
+ "epoch": 0.11998649040996866,
28357
+ "grad_norm": 13.0625,
28358
+ "learning_rate": 0.0004801210335412954,
28359
+ "loss": 16.473,
28360
+ "step": 81000
28361
  }
28362
  ],
28363
  "logging_steps": 20,
 
28377
  "attributes": {}
28378
  }
28379
  },
28380
+ "total_flos": 1.802706127351395e+20,
28381
  "train_batch_size": 48,
28382
  "trial_name": null,
28383
  "trial_params": null