mohammadmahdinouri commited on
Commit
44921d9
·
verified ·
1 Parent(s): 63c2116

Training in progress, step 55000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:032e5ecb2dab53150bbaf3c11fbb8a4e9ba7451f8029ca8a4a75b9c764ed4ca3
3
  size 487156538
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:436bf79533e258070c96b4760436afa1f9251b1590c7ae2a2f60dc7519e9b64b
3
  size 487156538
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc3071b91c3d9f34aa409b00330a89c7035dd64aec3ad8af8c2d0d3d08e04916
3
  size 1059459406
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab2a7667c52f9cc64e61b137dc5df66439292fcc32acda2e7782c8372f9c8172
3
  size 1059459406
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02330612259bf0ffdcebbdc862309eac5e6da0f6e632646393673582eff58b76
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61a042d8f729c4f51ba538ba4c747cf1d8cbb1b59cf032f3422995a579b49f8a
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f8c3d8df5bbb17e62aa6823857f159f584d750ec4ae412f9867691a7828e5c9
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63b64a2edbbd5cf896abcb8f817b204e5a511d27c7efe13e0a92b23dc6a3b777
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:575c0a93289c37982b1579a31f28f9195ea8ec12a0c5b7b286d351b318ae6d53
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0540aa39d91dc61087d3dd380d7b7750dc1d19afff10c530b1d0895a416cf32f
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdf30ebe94a891d128b17692f16c1969a63407269859c9cbaf399c6ea35d8af7
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47442bd8ad617950fc9791e10321850b084b057926e59d24f6b5e09aefa3043b
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfaaa443ff50c514e8d740e179deb3f101e73d9201b92424d8bf52ab5c7dfc99
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1887d8c6d2dc250cfb0b7c57e61e4fa0abc40fda0dbe8977a6841b90daceb70
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.07999099360664577,
6
  "eval_steps": 500,
7
- "global_step": 54000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -18908,6 +18908,356 @@
18908
  "learning_rate": 0.0004867885958015101,
18909
  "loss": 16.9579,
18910
  "step": 54000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18911
  }
18912
  ],
18913
  "logging_steps": 20,
@@ -18927,7 +19277,7 @@
18927
  "attributes": {}
18928
  }
18929
  },
18930
- "total_flos": 1.2018037472162847e+20,
18931
  "train_batch_size": 48,
18932
  "trial_name": null,
18933
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.08147230830306514,
6
  "eval_steps": 500,
7
+ "global_step": 55000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
18908
  "learning_rate": 0.0004867885958015101,
18909
  "loss": 16.9579,
18910
  "step": 54000
18911
+ },
18912
+ {
18913
+ "epoch": 0.08002061990057416,
18914
+ "grad_norm": 8.375,
18915
+ "learning_rate": 0.00048678365686650256,
18916
+ "loss": 16.8883,
18917
+ "step": 54020
18918
+ },
18919
+ {
18920
+ "epoch": 0.08005024619450254,
18921
+ "grad_norm": 6.28125,
18922
+ "learning_rate": 0.000486778717931495,
18923
+ "loss": 16.9452,
18924
+ "step": 54040
18925
+ },
18926
+ {
18927
+ "epoch": 0.08007987248843093,
18928
+ "grad_norm": 8.25,
18929
+ "learning_rate": 0.00048677377899648746,
18930
+ "loss": 16.8946,
18931
+ "step": 54060
18932
+ },
18933
+ {
18934
+ "epoch": 0.08010949878235932,
18935
+ "grad_norm": 8.5,
18936
+ "learning_rate": 0.00048676884006147985,
18937
+ "loss": 16.9801,
18938
+ "step": 54080
18939
+ },
18940
+ {
18941
+ "epoch": 0.0801391250762877,
18942
+ "grad_norm": 6.15625,
18943
+ "learning_rate": 0.0004867639011264723,
18944
+ "loss": 16.9925,
18945
+ "step": 54100
18946
+ },
18947
+ {
18948
+ "epoch": 0.08016875137021609,
18949
+ "grad_norm": 6.6875,
18950
+ "learning_rate": 0.00048675896219146475,
18951
+ "loss": 16.9666,
18952
+ "step": 54120
18953
+ },
18954
+ {
18955
+ "epoch": 0.08019837766414448,
18956
+ "grad_norm": 7.28125,
18957
+ "learning_rate": 0.0004867540232564572,
18958
+ "loss": 16.9161,
18959
+ "step": 54140
18960
+ },
18961
+ {
18962
+ "epoch": 0.08022800395807286,
18963
+ "grad_norm": 7.78125,
18964
+ "learning_rate": 0.0004867490843214496,
18965
+ "loss": 17.0152,
18966
+ "step": 54160
18967
+ },
18968
+ {
18969
+ "epoch": 0.08025763025200125,
18970
+ "grad_norm": 7.40625,
18971
+ "learning_rate": 0.00048674414538644204,
18972
+ "loss": 16.9116,
18973
+ "step": 54180
18974
+ },
18975
+ {
18976
+ "epoch": 0.08028725654592965,
18977
+ "grad_norm": 7.15625,
18978
+ "learning_rate": 0.0004867392064514345,
18979
+ "loss": 16.9337,
18980
+ "step": 54200
18981
+ },
18982
+ {
18983
+ "epoch": 0.08031688283985804,
18984
+ "grad_norm": 8.375,
18985
+ "learning_rate": 0.00048673426751642693,
18986
+ "loss": 16.9931,
18987
+ "step": 54220
18988
+ },
18989
+ {
18990
+ "epoch": 0.08034650913378642,
18991
+ "grad_norm": 7.3125,
18992
+ "learning_rate": 0.0004867293285814193,
18993
+ "loss": 17.0016,
18994
+ "step": 54240
18995
+ },
18996
+ {
18997
+ "epoch": 0.08037613542771481,
18998
+ "grad_norm": 11.3125,
18999
+ "learning_rate": 0.0004867243896464118,
19000
+ "loss": 16.9347,
19001
+ "step": 54260
19002
+ },
19003
+ {
19004
+ "epoch": 0.0804057617216432,
19005
+ "grad_norm": 9.875,
19006
+ "learning_rate": 0.0004867194507114042,
19007
+ "loss": 16.9192,
19008
+ "step": 54280
19009
+ },
19010
+ {
19011
+ "epoch": 0.08043538801557158,
19012
+ "grad_norm": 8.6875,
19013
+ "learning_rate": 0.0004867145117763966,
19014
+ "loss": 16.8667,
19015
+ "step": 54300
19016
+ },
19017
+ {
19018
+ "epoch": 0.08046501430949997,
19019
+ "grad_norm": 11.875,
19020
+ "learning_rate": 0.00048670957284138906,
19021
+ "loss": 16.9558,
19022
+ "step": 54320
19023
+ },
19024
+ {
19025
+ "epoch": 0.08049464060342836,
19026
+ "grad_norm": 8.6875,
19027
+ "learning_rate": 0.0004867046339063815,
19028
+ "loss": 17.0024,
19029
+ "step": 54340
19030
+ },
19031
+ {
19032
+ "epoch": 0.08052426689735674,
19033
+ "grad_norm": 6.625,
19034
+ "learning_rate": 0.00048669969497137396,
19035
+ "loss": 16.9562,
19036
+ "step": 54360
19037
+ },
19038
+ {
19039
+ "epoch": 0.08055389319128513,
19040
+ "grad_norm": 6.9375,
19041
+ "learning_rate": 0.00048669475603636635,
19042
+ "loss": 16.9164,
19043
+ "step": 54380
19044
+ },
19045
+ {
19046
+ "epoch": 0.08058351948521351,
19047
+ "grad_norm": 9.625,
19048
+ "learning_rate": 0.0004866898171013588,
19049
+ "loss": 16.9818,
19050
+ "step": 54400
19051
+ },
19052
+ {
19053
+ "epoch": 0.0806131457791419,
19054
+ "grad_norm": 6.875,
19055
+ "learning_rate": 0.00048668487816635125,
19056
+ "loss": 16.9353,
19057
+ "step": 54420
19058
+ },
19059
+ {
19060
+ "epoch": 0.08064277207307029,
19061
+ "grad_norm": 9.25,
19062
+ "learning_rate": 0.0004866799392313437,
19063
+ "loss": 16.9368,
19064
+ "step": 54440
19065
+ },
19066
+ {
19067
+ "epoch": 0.08067239836699867,
19068
+ "grad_norm": 8.0625,
19069
+ "learning_rate": 0.0004866750002963361,
19070
+ "loss": 16.9073,
19071
+ "step": 54460
19072
+ },
19073
+ {
19074
+ "epoch": 0.08070202466092706,
19075
+ "grad_norm": 6.71875,
19076
+ "learning_rate": 0.00048667006136132854,
19077
+ "loss": 16.9283,
19078
+ "step": 54480
19079
+ },
19080
+ {
19081
+ "epoch": 0.08073165095485545,
19082
+ "grad_norm": 6.84375,
19083
+ "learning_rate": 0.000486665122426321,
19084
+ "loss": 16.9585,
19085
+ "step": 54500
19086
+ },
19087
+ {
19088
+ "epoch": 0.08076127724878385,
19089
+ "grad_norm": 6.84375,
19090
+ "learning_rate": 0.00048666018349131343,
19091
+ "loss": 16.9718,
19092
+ "step": 54520
19093
+ },
19094
+ {
19095
+ "epoch": 0.08079090354271223,
19096
+ "grad_norm": 6.6875,
19097
+ "learning_rate": 0.0004866552445563058,
19098
+ "loss": 16.974,
19099
+ "step": 54540
19100
+ },
19101
+ {
19102
+ "epoch": 0.08082052983664062,
19103
+ "grad_norm": 7.25,
19104
+ "learning_rate": 0.0004866503056212983,
19105
+ "loss": 16.9887,
19106
+ "step": 54560
19107
+ },
19108
+ {
19109
+ "epoch": 0.080850156130569,
19110
+ "grad_norm": 8.375,
19111
+ "learning_rate": 0.0004866453666862907,
19112
+ "loss": 17.0524,
19113
+ "step": 54580
19114
+ },
19115
+ {
19116
+ "epoch": 0.08087978242449739,
19117
+ "grad_norm": 7.375,
19118
+ "learning_rate": 0.00048664042775128317,
19119
+ "loss": 16.9761,
19120
+ "step": 54600
19121
+ },
19122
+ {
19123
+ "epoch": 0.08090940871842578,
19124
+ "grad_norm": 7.0625,
19125
+ "learning_rate": 0.00048663548881627556,
19126
+ "loss": 16.8978,
19127
+ "step": 54620
19128
+ },
19129
+ {
19130
+ "epoch": 0.08093903501235417,
19131
+ "grad_norm": 7.40625,
19132
+ "learning_rate": 0.000486630549881268,
19133
+ "loss": 16.9383,
19134
+ "step": 54640
19135
+ },
19136
+ {
19137
+ "epoch": 0.08096866130628255,
19138
+ "grad_norm": 7.1875,
19139
+ "learning_rate": 0.00048662561094626046,
19140
+ "loss": 16.9231,
19141
+ "step": 54660
19142
+ },
19143
+ {
19144
+ "epoch": 0.08099828760021094,
19145
+ "grad_norm": 7.90625,
19146
+ "learning_rate": 0.00048662067201125285,
19147
+ "loss": 16.9352,
19148
+ "step": 54680
19149
+ },
19150
+ {
19151
+ "epoch": 0.08102791389413933,
19152
+ "grad_norm": 11.6875,
19153
+ "learning_rate": 0.0004866157330762453,
19154
+ "loss": 16.9842,
19155
+ "step": 54700
19156
+ },
19157
+ {
19158
+ "epoch": 0.08105754018806771,
19159
+ "grad_norm": 6.625,
19160
+ "learning_rate": 0.00048661079414123775,
19161
+ "loss": 16.9285,
19162
+ "step": 54720
19163
+ },
19164
+ {
19165
+ "epoch": 0.0810871664819961,
19166
+ "grad_norm": 6.28125,
19167
+ "learning_rate": 0.0004866058552062302,
19168
+ "loss": 16.9533,
19169
+ "step": 54740
19170
+ },
19171
+ {
19172
+ "epoch": 0.08111679277592448,
19173
+ "grad_norm": 10.5,
19174
+ "learning_rate": 0.0004866009162712226,
19175
+ "loss": 16.9167,
19176
+ "step": 54760
19177
+ },
19178
+ {
19179
+ "epoch": 0.08114641906985287,
19180
+ "grad_norm": 12.5,
19181
+ "learning_rate": 0.00048659597733621504,
19182
+ "loss": 16.8854,
19183
+ "step": 54780
19184
+ },
19185
+ {
19186
+ "epoch": 0.08117604536378126,
19187
+ "grad_norm": 6.875,
19188
+ "learning_rate": 0.0004865910384012075,
19189
+ "loss": 16.9078,
19190
+ "step": 54800
19191
+ },
19192
+ {
19193
+ "epoch": 0.08120567165770964,
19194
+ "grad_norm": 6.65625,
19195
+ "learning_rate": 0.00048658609946619993,
19196
+ "loss": 16.9198,
19197
+ "step": 54820
19198
+ },
19199
+ {
19200
+ "epoch": 0.08123529795163804,
19201
+ "grad_norm": 7.9375,
19202
+ "learning_rate": 0.0004865811605311923,
19203
+ "loss": 16.9367,
19204
+ "step": 54840
19205
+ },
19206
+ {
19207
+ "epoch": 0.08126492424556643,
19208
+ "grad_norm": 7.1875,
19209
+ "learning_rate": 0.0004865762215961848,
19210
+ "loss": 16.9474,
19211
+ "step": 54860
19212
+ },
19213
+ {
19214
+ "epoch": 0.08129455053949482,
19215
+ "grad_norm": 7.0625,
19216
+ "learning_rate": 0.0004865712826611772,
19217
+ "loss": 16.93,
19218
+ "step": 54880
19219
+ },
19220
+ {
19221
+ "epoch": 0.0813241768334232,
19222
+ "grad_norm": 7.25,
19223
+ "learning_rate": 0.00048656634372616967,
19224
+ "loss": 16.9968,
19225
+ "step": 54900
19226
+ },
19227
+ {
19228
+ "epoch": 0.08135380312735159,
19229
+ "grad_norm": 7.40625,
19230
+ "learning_rate": 0.00048656140479116206,
19231
+ "loss": 16.9388,
19232
+ "step": 54920
19233
+ },
19234
+ {
19235
+ "epoch": 0.08138342942127998,
19236
+ "grad_norm": 6.375,
19237
+ "learning_rate": 0.00048655646585615456,
19238
+ "loss": 16.9977,
19239
+ "step": 54940
19240
+ },
19241
+ {
19242
+ "epoch": 0.08141305571520836,
19243
+ "grad_norm": 8.125,
19244
+ "learning_rate": 0.00048655152692114696,
19245
+ "loss": 16.9571,
19246
+ "step": 54960
19247
+ },
19248
+ {
19249
+ "epoch": 0.08144268200913675,
19250
+ "grad_norm": 7.59375,
19251
+ "learning_rate": 0.00048654658798613935,
19252
+ "loss": 16.9437,
19253
+ "step": 54980
19254
+ },
19255
+ {
19256
+ "epoch": 0.08147230830306514,
19257
+ "grad_norm": 6.34375,
19258
+ "learning_rate": 0.0004865416490511318,
19259
+ "loss": 16.92,
19260
+ "step": 55000
19261
  }
19262
  ],
19263
  "logging_steps": 20,
 
19277
  "attributes": {}
19278
  }
19279
  },
19280
+ "total_flos": 1.224059432458917e+20,
19281
  "train_batch_size": 48,
19282
  "trial_name": null,
19283
  "trial_params": null