CreatorPhan commited on
Commit
28812b3
1 Parent(s): dc202b3

Upload folder using huggingface_hub (#5)

Browse files

- Upload folder using huggingface_hub (03d09198311838452509ba48c594dcdb0f40d044)

Files changed (5) hide show
  1. adapter_model.bin +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +1203 -3
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:297ef8417e7df986ce834d9b0c8ebd28197873ea409686414f04e91b74281978
3
  size 39409357
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83a2bd54ab68a3dcad8fe0e5a630e45f5342750c381fd29ab4100c5db3d4e0e9
3
  size 39409357
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae8afe492977b67949412a751b16cf3c9e70d22ef10182d710a235c40cb1a4ac
3
  size 78844421
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81ef498b8b8b31a2a5c36ffe3529e1378029eb4c3e4ba4770de0c248e4c62950
3
  size 78844421
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6162bb9db25c89c41e126a7a00a5d0695219447bff9b18d08731531620758440
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36fc71bd44bd7f04f2599c5dface64c517de1a7ab7bac3600f3f6470c6c72673
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5434e83c78ab052927972350ed56fbd84392b488b1d09a11bdb87201790659f
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c6badaa6737fa9306a1f25b19c54f91672e90aa94d505b95ab467e7d08dd541
3
  size 627
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.648720211827008,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3007,13 +3007,1213 @@
3007
  "learning_rate": 0.00012897727272727274,
3008
  "loss": 1.0534,
3009
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3010
  }
3011
  ],
3012
  "logging_steps": 1,
3013
  "max_steps": 1408,
3014
  "num_train_epochs": 16,
3015
  "save_steps": 100,
3016
- "total_flos": 6.820509352598323e+17,
3017
  "trial_name": null,
3018
  "trial_params": null
3019
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.908208296557811,
5
  "eval_steps": 500,
6
+ "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3007
  "learning_rate": 0.00012897727272727274,
3008
  "loss": 1.0534,
3009
  "step": 500
3010
+ },
3011
+ {
3012
+ "epoch": 5.66,
3013
+ "learning_rate": 0.00012883522727272727,
3014
+ "loss": 1.0755,
3015
+ "step": 501
3016
+ },
3017
+ {
3018
+ "epoch": 5.67,
3019
+ "learning_rate": 0.00012869318181818183,
3020
+ "loss": 1.0755,
3021
+ "step": 502
3022
+ },
3023
+ {
3024
+ "epoch": 5.68,
3025
+ "learning_rate": 0.00012855113636363636,
3026
+ "loss": 1.0869,
3027
+ "step": 503
3028
+ },
3029
+ {
3030
+ "epoch": 5.69,
3031
+ "learning_rate": 0.00012840909090909092,
3032
+ "loss": 1.0673,
3033
+ "step": 504
3034
+ },
3035
+ {
3036
+ "epoch": 5.71,
3037
+ "learning_rate": 0.00012826704545454545,
3038
+ "loss": 1.0692,
3039
+ "step": 505
3040
+ },
3041
+ {
3042
+ "epoch": 5.72,
3043
+ "learning_rate": 0.000128125,
3044
+ "loss": 1.0474,
3045
+ "step": 506
3046
+ },
3047
+ {
3048
+ "epoch": 5.73,
3049
+ "learning_rate": 0.00012798295454545454,
3050
+ "loss": 1.0749,
3051
+ "step": 507
3052
+ },
3053
+ {
3054
+ "epoch": 5.74,
3055
+ "learning_rate": 0.0001278409090909091,
3056
+ "loss": 1.0519,
3057
+ "step": 508
3058
+ },
3059
+ {
3060
+ "epoch": 5.75,
3061
+ "learning_rate": 0.00012769886363636366,
3062
+ "loss": 1.0566,
3063
+ "step": 509
3064
+ },
3065
+ {
3066
+ "epoch": 5.76,
3067
+ "learning_rate": 0.0001275568181818182,
3068
+ "loss": 1.06,
3069
+ "step": 510
3070
+ },
3071
+ {
3072
+ "epoch": 5.77,
3073
+ "learning_rate": 0.00012741477272727272,
3074
+ "loss": 1.0618,
3075
+ "step": 511
3076
+ },
3077
+ {
3078
+ "epoch": 5.78,
3079
+ "learning_rate": 0.00012727272727272728,
3080
+ "loss": 1.0643,
3081
+ "step": 512
3082
+ },
3083
+ {
3084
+ "epoch": 5.8,
3085
+ "learning_rate": 0.0001271306818181818,
3086
+ "loss": 1.026,
3087
+ "step": 513
3088
+ },
3089
+ {
3090
+ "epoch": 5.81,
3091
+ "learning_rate": 0.00012698863636363637,
3092
+ "loss": 1.0335,
3093
+ "step": 514
3094
+ },
3095
+ {
3096
+ "epoch": 5.82,
3097
+ "learning_rate": 0.00012684659090909093,
3098
+ "loss": 1.0205,
3099
+ "step": 515
3100
+ },
3101
+ {
3102
+ "epoch": 5.83,
3103
+ "learning_rate": 0.00012670454545454546,
3104
+ "loss": 1.0594,
3105
+ "step": 516
3106
+ },
3107
+ {
3108
+ "epoch": 5.84,
3109
+ "learning_rate": 0.0001265625,
3110
+ "loss": 1.0136,
3111
+ "step": 517
3112
+ },
3113
+ {
3114
+ "epoch": 5.85,
3115
+ "learning_rate": 0.00012642045454545455,
3116
+ "loss": 1.0244,
3117
+ "step": 518
3118
+ },
3119
+ {
3120
+ "epoch": 5.86,
3121
+ "learning_rate": 0.00012627840909090908,
3122
+ "loss": 1.0569,
3123
+ "step": 519
3124
+ },
3125
+ {
3126
+ "epoch": 5.87,
3127
+ "learning_rate": 0.00012613636363636364,
3128
+ "loss": 1.0416,
3129
+ "step": 520
3130
+ },
3131
+ {
3132
+ "epoch": 5.89,
3133
+ "learning_rate": 0.0001259943181818182,
3134
+ "loss": 0.9884,
3135
+ "step": 521
3136
+ },
3137
+ {
3138
+ "epoch": 5.9,
3139
+ "learning_rate": 0.00012585227272727273,
3140
+ "loss": 1.0351,
3141
+ "step": 522
3142
+ },
3143
+ {
3144
+ "epoch": 5.91,
3145
+ "learning_rate": 0.00012571022727272726,
3146
+ "loss": 1.0037,
3147
+ "step": 523
3148
+ },
3149
+ {
3150
+ "epoch": 5.92,
3151
+ "learning_rate": 0.00012556818181818182,
3152
+ "loss": 1.0219,
3153
+ "step": 524
3154
+ },
3155
+ {
3156
+ "epoch": 5.93,
3157
+ "learning_rate": 0.00012542613636363635,
3158
+ "loss": 1.0533,
3159
+ "step": 525
3160
+ },
3161
+ {
3162
+ "epoch": 5.94,
3163
+ "learning_rate": 0.0001252840909090909,
3164
+ "loss": 1.0031,
3165
+ "step": 526
3166
+ },
3167
+ {
3168
+ "epoch": 5.95,
3169
+ "learning_rate": 0.00012514204545454547,
3170
+ "loss": 1.0454,
3171
+ "step": 527
3172
+ },
3173
+ {
3174
+ "epoch": 5.97,
3175
+ "learning_rate": 0.000125,
3176
+ "loss": 1.0195,
3177
+ "step": 528
3178
+ },
3179
+ {
3180
+ "epoch": 5.98,
3181
+ "learning_rate": 0.00012485795454545453,
3182
+ "loss": 1.0076,
3183
+ "step": 529
3184
+ },
3185
+ {
3186
+ "epoch": 5.99,
3187
+ "learning_rate": 0.0001247159090909091,
3188
+ "loss": 1.0378,
3189
+ "step": 530
3190
+ },
3191
+ {
3192
+ "epoch": 6.0,
3193
+ "learning_rate": 0.00012457386363636365,
3194
+ "loss": 0.9795,
3195
+ "step": 531
3196
+ },
3197
+ {
3198
+ "epoch": 6.01,
3199
+ "learning_rate": 0.00012443181818181818,
3200
+ "loss": 0.9405,
3201
+ "step": 532
3202
+ },
3203
+ {
3204
+ "epoch": 6.02,
3205
+ "learning_rate": 0.00012428977272727274,
3206
+ "loss": 0.9503,
3207
+ "step": 533
3208
+ },
3209
+ {
3210
+ "epoch": 6.03,
3211
+ "learning_rate": 0.00012414772727272727,
3212
+ "loss": 0.9456,
3213
+ "step": 534
3214
+ },
3215
+ {
3216
+ "epoch": 6.04,
3217
+ "learning_rate": 0.0001240056818181818,
3218
+ "loss": 0.9536,
3219
+ "step": 535
3220
+ },
3221
+ {
3222
+ "epoch": 6.06,
3223
+ "learning_rate": 0.00012386363636363636,
3224
+ "loss": 0.9412,
3225
+ "step": 536
3226
+ },
3227
+ {
3228
+ "epoch": 6.07,
3229
+ "learning_rate": 0.00012372159090909092,
3230
+ "loss": 0.9315,
3231
+ "step": 537
3232
+ },
3233
+ {
3234
+ "epoch": 6.08,
3235
+ "learning_rate": 0.00012357954545454545,
3236
+ "loss": 0.9486,
3237
+ "step": 538
3238
+ },
3239
+ {
3240
+ "epoch": 6.09,
3241
+ "learning_rate": 0.0001234375,
3242
+ "loss": 0.9405,
3243
+ "step": 539
3244
+ },
3245
+ {
3246
+ "epoch": 6.1,
3247
+ "learning_rate": 0.00012329545454545454,
3248
+ "loss": 0.9269,
3249
+ "step": 540
3250
+ },
3251
+ {
3252
+ "epoch": 6.11,
3253
+ "learning_rate": 0.0001231534090909091,
3254
+ "loss": 0.9378,
3255
+ "step": 541
3256
+ },
3257
+ {
3258
+ "epoch": 6.12,
3259
+ "learning_rate": 0.00012301136363636366,
3260
+ "loss": 0.9431,
3261
+ "step": 542
3262
+ },
3263
+ {
3264
+ "epoch": 6.13,
3265
+ "learning_rate": 0.0001228693181818182,
3266
+ "loss": 0.9256,
3267
+ "step": 543
3268
+ },
3269
+ {
3270
+ "epoch": 6.15,
3271
+ "learning_rate": 0.00012272727272727272,
3272
+ "loss": 0.919,
3273
+ "step": 544
3274
+ },
3275
+ {
3276
+ "epoch": 6.16,
3277
+ "learning_rate": 0.00012258522727272728,
3278
+ "loss": 0.9188,
3279
+ "step": 545
3280
+ },
3281
+ {
3282
+ "epoch": 6.17,
3283
+ "learning_rate": 0.00012244318181818181,
3284
+ "loss": 0.9447,
3285
+ "step": 546
3286
+ },
3287
+ {
3288
+ "epoch": 6.18,
3289
+ "learning_rate": 0.00012230113636363637,
3290
+ "loss": 0.9261,
3291
+ "step": 547
3292
+ },
3293
+ {
3294
+ "epoch": 6.19,
3295
+ "learning_rate": 0.00012215909090909093,
3296
+ "loss": 0.9302,
3297
+ "step": 548
3298
+ },
3299
+ {
3300
+ "epoch": 6.2,
3301
+ "learning_rate": 0.00012201704545454546,
3302
+ "loss": 0.9161,
3303
+ "step": 549
3304
+ },
3305
+ {
3306
+ "epoch": 6.21,
3307
+ "learning_rate": 0.00012187500000000001,
3308
+ "loss": 0.9521,
3309
+ "step": 550
3310
+ },
3311
+ {
3312
+ "epoch": 6.22,
3313
+ "learning_rate": 0.00012173295454545455,
3314
+ "loss": 0.9026,
3315
+ "step": 551
3316
+ },
3317
+ {
3318
+ "epoch": 6.24,
3319
+ "learning_rate": 0.00012159090909090908,
3320
+ "loss": 0.9361,
3321
+ "step": 552
3322
+ },
3323
+ {
3324
+ "epoch": 6.25,
3325
+ "learning_rate": 0.00012144886363636366,
3326
+ "loss": 0.8944,
3327
+ "step": 553
3328
+ },
3329
+ {
3330
+ "epoch": 6.26,
3331
+ "learning_rate": 0.00012130681818181819,
3332
+ "loss": 0.895,
3333
+ "step": 554
3334
+ },
3335
+ {
3336
+ "epoch": 6.27,
3337
+ "learning_rate": 0.00012116477272727273,
3338
+ "loss": 0.8956,
3339
+ "step": 555
3340
+ },
3341
+ {
3342
+ "epoch": 6.28,
3343
+ "learning_rate": 0.00012102272727272728,
3344
+ "loss": 0.8998,
3345
+ "step": 556
3346
+ },
3347
+ {
3348
+ "epoch": 6.29,
3349
+ "learning_rate": 0.00012088068181818182,
3350
+ "loss": 0.915,
3351
+ "step": 557
3352
+ },
3353
+ {
3354
+ "epoch": 6.3,
3355
+ "learning_rate": 0.00012073863636363636,
3356
+ "loss": 0.9282,
3357
+ "step": 558
3358
+ },
3359
+ {
3360
+ "epoch": 6.32,
3361
+ "learning_rate": 0.00012059659090909093,
3362
+ "loss": 0.8938,
3363
+ "step": 559
3364
+ },
3365
+ {
3366
+ "epoch": 6.33,
3367
+ "learning_rate": 0.00012045454545454546,
3368
+ "loss": 0.8886,
3369
+ "step": 560
3370
+ },
3371
+ {
3372
+ "epoch": 6.34,
3373
+ "learning_rate": 0.0001203125,
3374
+ "loss": 0.8988,
3375
+ "step": 561
3376
+ },
3377
+ {
3378
+ "epoch": 6.35,
3379
+ "learning_rate": 0.00012017045454545455,
3380
+ "loss": 0.8852,
3381
+ "step": 562
3382
+ },
3383
+ {
3384
+ "epoch": 6.36,
3385
+ "learning_rate": 0.0001200284090909091,
3386
+ "loss": 0.8818,
3387
+ "step": 563
3388
+ },
3389
+ {
3390
+ "epoch": 6.37,
3391
+ "learning_rate": 0.00011988636363636365,
3392
+ "loss": 0.8881,
3393
+ "step": 564
3394
+ },
3395
+ {
3396
+ "epoch": 6.38,
3397
+ "learning_rate": 0.0001197443181818182,
3398
+ "loss": 0.9226,
3399
+ "step": 565
3400
+ },
3401
+ {
3402
+ "epoch": 6.39,
3403
+ "learning_rate": 0.00011960227272727273,
3404
+ "loss": 0.8849,
3405
+ "step": 566
3406
+ },
3407
+ {
3408
+ "epoch": 6.41,
3409
+ "learning_rate": 0.00011946022727272727,
3410
+ "loss": 0.8894,
3411
+ "step": 567
3412
+ },
3413
+ {
3414
+ "epoch": 6.42,
3415
+ "learning_rate": 0.00011931818181818182,
3416
+ "loss": 0.9207,
3417
+ "step": 568
3418
+ },
3419
+ {
3420
+ "epoch": 6.43,
3421
+ "learning_rate": 0.00011917613636363636,
3422
+ "loss": 0.9105,
3423
+ "step": 569
3424
+ },
3425
+ {
3426
+ "epoch": 6.44,
3427
+ "learning_rate": 0.00011903409090909092,
3428
+ "loss": 0.8762,
3429
+ "step": 570
3430
+ },
3431
+ {
3432
+ "epoch": 6.45,
3433
+ "learning_rate": 0.00011889204545454547,
3434
+ "loss": 0.8926,
3435
+ "step": 571
3436
+ },
3437
+ {
3438
+ "epoch": 6.46,
3439
+ "learning_rate": 0.00011875,
3440
+ "loss": 0.8719,
3441
+ "step": 572
3442
+ },
3443
+ {
3444
+ "epoch": 6.47,
3445
+ "learning_rate": 0.00011860795454545454,
3446
+ "loss": 0.9198,
3447
+ "step": 573
3448
+ },
3449
+ {
3450
+ "epoch": 6.48,
3451
+ "learning_rate": 0.00011846590909090909,
3452
+ "loss": 0.8846,
3453
+ "step": 574
3454
+ },
3455
+ {
3456
+ "epoch": 6.5,
3457
+ "learning_rate": 0.00011832386363636365,
3458
+ "loss": 0.8495,
3459
+ "step": 575
3460
+ },
3461
+ {
3462
+ "epoch": 6.51,
3463
+ "learning_rate": 0.0001181818181818182,
3464
+ "loss": 0.8953,
3465
+ "step": 576
3466
+ },
3467
+ {
3468
+ "epoch": 6.52,
3469
+ "learning_rate": 0.00011803977272727274,
3470
+ "loss": 0.8686,
3471
+ "step": 577
3472
+ },
3473
+ {
3474
+ "epoch": 6.53,
3475
+ "learning_rate": 0.00011789772727272727,
3476
+ "loss": 0.8841,
3477
+ "step": 578
3478
+ },
3479
+ {
3480
+ "epoch": 6.54,
3481
+ "learning_rate": 0.00011775568181818182,
3482
+ "loss": 0.8681,
3483
+ "step": 579
3484
+ },
3485
+ {
3486
+ "epoch": 6.55,
3487
+ "learning_rate": 0.00011761363636363636,
3488
+ "loss": 0.8732,
3489
+ "step": 580
3490
+ },
3491
+ {
3492
+ "epoch": 6.56,
3493
+ "learning_rate": 0.00011747159090909092,
3494
+ "loss": 0.8582,
3495
+ "step": 581
3496
+ },
3497
+ {
3498
+ "epoch": 6.58,
3499
+ "learning_rate": 0.00011732954545454546,
3500
+ "loss": 0.8744,
3501
+ "step": 582
3502
+ },
3503
+ {
3504
+ "epoch": 6.59,
3505
+ "learning_rate": 0.00011718750000000001,
3506
+ "loss": 0.8694,
3507
+ "step": 583
3508
+ },
3509
+ {
3510
+ "epoch": 6.6,
3511
+ "learning_rate": 0.00011704545454545454,
3512
+ "loss": 0.8565,
3513
+ "step": 584
3514
+ },
3515
+ {
3516
+ "epoch": 6.61,
3517
+ "learning_rate": 0.00011690340909090909,
3518
+ "loss": 0.8584,
3519
+ "step": 585
3520
+ },
3521
+ {
3522
+ "epoch": 6.62,
3523
+ "learning_rate": 0.00011676136363636366,
3524
+ "loss": 0.8859,
3525
+ "step": 586
3526
+ },
3527
+ {
3528
+ "epoch": 6.63,
3529
+ "learning_rate": 0.00011661931818181819,
3530
+ "loss": 0.8452,
3531
+ "step": 587
3532
+ },
3533
+ {
3534
+ "epoch": 6.64,
3535
+ "learning_rate": 0.00011647727272727273,
3536
+ "loss": 0.8323,
3537
+ "step": 588
3538
+ },
3539
+ {
3540
+ "epoch": 6.65,
3541
+ "learning_rate": 0.00011633522727272728,
3542
+ "loss": 0.8548,
3543
+ "step": 589
3544
+ },
3545
+ {
3546
+ "epoch": 6.67,
3547
+ "learning_rate": 0.00011619318181818181,
3548
+ "loss": 0.8506,
3549
+ "step": 590
3550
+ },
3551
+ {
3552
+ "epoch": 6.68,
3553
+ "learning_rate": 0.00011605113636363636,
3554
+ "loss": 0.8556,
3555
+ "step": 591
3556
+ },
3557
+ {
3558
+ "epoch": 6.69,
3559
+ "learning_rate": 0.00011590909090909093,
3560
+ "loss": 0.8459,
3561
+ "step": 592
3562
+ },
3563
+ {
3564
+ "epoch": 6.7,
3565
+ "learning_rate": 0.00011576704545454546,
3566
+ "loss": 0.8432,
3567
+ "step": 593
3568
+ },
3569
+ {
3570
+ "epoch": 6.71,
3571
+ "learning_rate": 0.000115625,
3572
+ "loss": 0.8645,
3573
+ "step": 594
3574
+ },
3575
+ {
3576
+ "epoch": 6.72,
3577
+ "learning_rate": 0.00011548295454545455,
3578
+ "loss": 0.86,
3579
+ "step": 595
3580
+ },
3581
+ {
3582
+ "epoch": 6.73,
3583
+ "learning_rate": 0.00011534090909090908,
3584
+ "loss": 0.8161,
3585
+ "step": 596
3586
+ },
3587
+ {
3588
+ "epoch": 6.74,
3589
+ "learning_rate": 0.00011519886363636365,
3590
+ "loss": 0.8133,
3591
+ "step": 597
3592
+ },
3593
+ {
3594
+ "epoch": 6.76,
3595
+ "learning_rate": 0.0001150568181818182,
3596
+ "loss": 0.8372,
3597
+ "step": 598
3598
+ },
3599
+ {
3600
+ "epoch": 6.77,
3601
+ "learning_rate": 0.00011491477272727273,
3602
+ "loss": 0.8222,
3603
+ "step": 599
3604
+ },
3605
+ {
3606
+ "epoch": 6.78,
3607
+ "learning_rate": 0.00011477272727272728,
3608
+ "loss": 0.8372,
3609
+ "step": 600
3610
+ },
3611
+ {
3612
+ "epoch": 6.79,
3613
+ "learning_rate": 0.00011463068181818182,
3614
+ "loss": 0.837,
3615
+ "step": 601
3616
+ },
3617
+ {
3618
+ "epoch": 6.8,
3619
+ "learning_rate": 0.00011448863636363637,
3620
+ "loss": 0.8406,
3621
+ "step": 602
3622
+ },
3623
+ {
3624
+ "epoch": 6.81,
3625
+ "learning_rate": 0.00011434659090909092,
3626
+ "loss": 0.836,
3627
+ "step": 603
3628
+ },
3629
+ {
3630
+ "epoch": 6.82,
3631
+ "learning_rate": 0.00011420454545454547,
3632
+ "loss": 0.8476,
3633
+ "step": 604
3634
+ },
3635
+ {
3636
+ "epoch": 6.83,
3637
+ "learning_rate": 0.0001140625,
3638
+ "loss": 0.8368,
3639
+ "step": 605
3640
+ },
3641
+ {
3642
+ "epoch": 6.85,
3643
+ "learning_rate": 0.00011392045454545455,
3644
+ "loss": 0.822,
3645
+ "step": 606
3646
+ },
3647
+ {
3648
+ "epoch": 6.86,
3649
+ "learning_rate": 0.00011377840909090909,
3650
+ "loss": 0.8107,
3651
+ "step": 607
3652
+ },
3653
+ {
3654
+ "epoch": 6.87,
3655
+ "learning_rate": 0.00011363636363636365,
3656
+ "loss": 0.8395,
3657
+ "step": 608
3658
+ },
3659
+ {
3660
+ "epoch": 6.88,
3661
+ "learning_rate": 0.0001134943181818182,
3662
+ "loss": 0.8083,
3663
+ "step": 609
3664
+ },
3665
+ {
3666
+ "epoch": 6.89,
3667
+ "learning_rate": 0.00011335227272727274,
3668
+ "loss": 0.828,
3669
+ "step": 610
3670
+ },
3671
+ {
3672
+ "epoch": 6.9,
3673
+ "learning_rate": 0.00011321022727272727,
3674
+ "loss": 0.8494,
3675
+ "step": 611
3676
+ },
3677
+ {
3678
+ "epoch": 6.91,
3679
+ "learning_rate": 0.00011306818181818182,
3680
+ "loss": 0.8169,
3681
+ "step": 612
3682
+ },
3683
+ {
3684
+ "epoch": 6.93,
3685
+ "learning_rate": 0.00011292613636363636,
3686
+ "loss": 0.8224,
3687
+ "step": 613
3688
+ },
3689
+ {
3690
+ "epoch": 6.94,
3691
+ "learning_rate": 0.00011278409090909092,
3692
+ "loss": 0.8173,
3693
+ "step": 614
3694
+ },
3695
+ {
3696
+ "epoch": 6.95,
3697
+ "learning_rate": 0.00011264204545454547,
3698
+ "loss": 0.7961,
3699
+ "step": 615
3700
+ },
3701
+ {
3702
+ "epoch": 6.96,
3703
+ "learning_rate": 0.00011250000000000001,
3704
+ "loss": 0.7948,
3705
+ "step": 616
3706
+ },
3707
+ {
3708
+ "epoch": 6.97,
3709
+ "learning_rate": 0.00011235795454545454,
3710
+ "loss": 0.7746,
3711
+ "step": 617
3712
+ },
3713
+ {
3714
+ "epoch": 6.98,
3715
+ "learning_rate": 0.00011221590909090909,
3716
+ "loss": 0.8325,
3717
+ "step": 618
3718
+ },
3719
+ {
3720
+ "epoch": 6.99,
3721
+ "learning_rate": 0.00011207386363636365,
3722
+ "loss": 0.8149,
3723
+ "step": 619
3724
+ },
3725
+ {
3726
+ "epoch": 7.0,
3727
+ "learning_rate": 0.00011193181818181819,
3728
+ "loss": 0.7516,
3729
+ "step": 620
3730
+ },
3731
+ {
3732
+ "epoch": 7.02,
3733
+ "learning_rate": 0.00011178977272727274,
3734
+ "loss": 0.7571,
3735
+ "step": 621
3736
+ },
3737
+ {
3738
+ "epoch": 7.03,
3739
+ "learning_rate": 0.00011164772727272728,
3740
+ "loss": 0.7397,
3741
+ "step": 622
3742
+ },
3743
+ {
3744
+ "epoch": 7.04,
3745
+ "learning_rate": 0.00011150568181818181,
3746
+ "loss": 0.761,
3747
+ "step": 623
3748
+ },
3749
+ {
3750
+ "epoch": 7.05,
3751
+ "learning_rate": 0.00011136363636363636,
3752
+ "loss": 0.7783,
3753
+ "step": 624
3754
+ },
3755
+ {
3756
+ "epoch": 7.06,
3757
+ "learning_rate": 0.00011122159090909092,
3758
+ "loss": 0.7571,
3759
+ "step": 625
3760
+ },
3761
+ {
3762
+ "epoch": 7.07,
3763
+ "learning_rate": 0.00011107954545454546,
3764
+ "loss": 0.7628,
3765
+ "step": 626
3766
+ },
3767
+ {
3768
+ "epoch": 7.08,
3769
+ "learning_rate": 0.0001109375,
3770
+ "loss": 0.7561,
3771
+ "step": 627
3772
+ },
3773
+ {
3774
+ "epoch": 7.09,
3775
+ "learning_rate": 0.00011079545454545455,
3776
+ "loss": 0.7432,
3777
+ "step": 628
3778
+ },
3779
+ {
3780
+ "epoch": 7.11,
3781
+ "learning_rate": 0.00011065340909090908,
3782
+ "loss": 0.7245,
3783
+ "step": 629
3784
+ },
3785
+ {
3786
+ "epoch": 7.12,
3787
+ "learning_rate": 0.00011051136363636366,
3788
+ "loss": 0.7279,
3789
+ "step": 630
3790
+ },
3791
+ {
3792
+ "epoch": 7.13,
3793
+ "learning_rate": 0.00011036931818181819,
3794
+ "loss": 0.7347,
3795
+ "step": 631
3796
+ },
3797
+ {
3798
+ "epoch": 7.14,
3799
+ "learning_rate": 0.00011022727272727273,
3800
+ "loss": 0.7427,
3801
+ "step": 632
3802
+ },
3803
+ {
3804
+ "epoch": 7.15,
3805
+ "learning_rate": 0.00011008522727272728,
3806
+ "loss": 0.7339,
3807
+ "step": 633
3808
+ },
3809
+ {
3810
+ "epoch": 7.16,
3811
+ "learning_rate": 0.00010994318181818182,
3812
+ "loss": 0.7375,
3813
+ "step": 634
3814
+ },
3815
+ {
3816
+ "epoch": 7.17,
3817
+ "learning_rate": 0.00010980113636363635,
3818
+ "loss": 0.7182,
3819
+ "step": 635
3820
+ },
3821
+ {
3822
+ "epoch": 7.19,
3823
+ "learning_rate": 0.00010965909090909093,
3824
+ "loss": 0.7452,
3825
+ "step": 636
3826
+ },
3827
+ {
3828
+ "epoch": 7.2,
3829
+ "learning_rate": 0.00010951704545454546,
3830
+ "loss": 0.7565,
3831
+ "step": 637
3832
+ },
3833
+ {
3834
+ "epoch": 7.21,
3835
+ "learning_rate": 0.000109375,
3836
+ "loss": 0.7296,
3837
+ "step": 638
3838
+ },
3839
+ {
3840
+ "epoch": 7.22,
3841
+ "learning_rate": 0.00010923295454545455,
3842
+ "loss": 0.7484,
3843
+ "step": 639
3844
+ },
3845
+ {
3846
+ "epoch": 7.23,
3847
+ "learning_rate": 0.00010909090909090909,
3848
+ "loss": 0.732,
3849
+ "step": 640
3850
+ },
3851
+ {
3852
+ "epoch": 7.24,
3853
+ "learning_rate": 0.00010894886363636365,
3854
+ "loss": 0.7415,
3855
+ "step": 641
3856
+ },
3857
+ {
3858
+ "epoch": 7.25,
3859
+ "learning_rate": 0.0001088068181818182,
3860
+ "loss": 0.7344,
3861
+ "step": 642
3862
+ },
3863
+ {
3864
+ "epoch": 7.26,
3865
+ "learning_rate": 0.00010866477272727274,
3866
+ "loss": 0.7267,
3867
+ "step": 643
3868
+ },
3869
+ {
3870
+ "epoch": 7.28,
3871
+ "learning_rate": 0.00010852272727272727,
3872
+ "loss": 0.7543,
3873
+ "step": 644
3874
+ },
3875
+ {
3876
+ "epoch": 7.29,
3877
+ "learning_rate": 0.00010838068181818182,
3878
+ "loss": 0.7266,
3879
+ "step": 645
3880
+ },
3881
+ {
3882
+ "epoch": 7.3,
3883
+ "learning_rate": 0.00010823863636363636,
3884
+ "loss": 0.7449,
3885
+ "step": 646
3886
+ },
3887
+ {
3888
+ "epoch": 7.31,
3889
+ "learning_rate": 0.00010809659090909092,
3890
+ "loss": 0.7324,
3891
+ "step": 647
3892
+ },
3893
+ {
3894
+ "epoch": 7.32,
3895
+ "learning_rate": 0.00010795454545454547,
3896
+ "loss": 0.7268,
3897
+ "step": 648
3898
+ },
3899
+ {
3900
+ "epoch": 7.33,
3901
+ "learning_rate": 0.00010781250000000001,
3902
+ "loss": 0.7172,
3903
+ "step": 649
3904
+ },
3905
+ {
3906
+ "epoch": 7.34,
3907
+ "learning_rate": 0.00010767045454545454,
3908
+ "loss": 0.7169,
3909
+ "step": 650
3910
+ },
3911
+ {
3912
+ "epoch": 7.35,
3913
+ "learning_rate": 0.00010752840909090909,
3914
+ "loss": 0.7194,
3915
+ "step": 651
3916
+ },
3917
+ {
3918
+ "epoch": 7.37,
3919
+ "learning_rate": 0.00010738636363636365,
3920
+ "loss": 0.7223,
3921
+ "step": 652
3922
+ },
3923
+ {
3924
+ "epoch": 7.38,
3925
+ "learning_rate": 0.00010724431818181819,
3926
+ "loss": 0.7158,
3927
+ "step": 653
3928
+ },
3929
+ {
3930
+ "epoch": 7.39,
3931
+ "learning_rate": 0.00010710227272727274,
3932
+ "loss": 0.7122,
3933
+ "step": 654
3934
+ },
3935
+ {
3936
+ "epoch": 7.4,
3937
+ "learning_rate": 0.00010696022727272728,
3938
+ "loss": 0.7225,
3939
+ "step": 655
3940
+ },
3941
+ {
3942
+ "epoch": 7.41,
3943
+ "learning_rate": 0.00010681818181818181,
3944
+ "loss": 0.7102,
3945
+ "step": 656
3946
+ },
3947
+ {
3948
+ "epoch": 7.42,
3949
+ "learning_rate": 0.00010667613636363636,
3950
+ "loss": 0.7251,
3951
+ "step": 657
3952
+ },
3953
+ {
3954
+ "epoch": 7.43,
3955
+ "learning_rate": 0.00010653409090909092,
3956
+ "loss": 0.7191,
3957
+ "step": 658
3958
+ },
3959
+ {
3960
+ "epoch": 7.45,
3961
+ "learning_rate": 0.00010639204545454546,
3962
+ "loss": 0.7015,
3963
+ "step": 659
3964
+ },
3965
+ {
3966
+ "epoch": 7.46,
3967
+ "learning_rate": 0.00010625000000000001,
3968
+ "loss": 0.693,
3969
+ "step": 660
3970
+ },
3971
+ {
3972
+ "epoch": 7.47,
3973
+ "learning_rate": 0.00010610795454545455,
3974
+ "loss": 0.7039,
3975
+ "step": 661
3976
+ },
3977
+ {
3978
+ "epoch": 7.48,
3979
+ "learning_rate": 0.00010596590909090908,
3980
+ "loss": 0.7305,
3981
+ "step": 662
3982
+ },
3983
+ {
3984
+ "epoch": 7.49,
3985
+ "learning_rate": 0.00010582386363636366,
3986
+ "loss": 0.6978,
3987
+ "step": 663
3988
+ },
3989
+ {
3990
+ "epoch": 7.5,
3991
+ "learning_rate": 0.00010568181818181819,
3992
+ "loss": 0.7219,
3993
+ "step": 664
3994
+ },
3995
+ {
3996
+ "epoch": 7.51,
3997
+ "learning_rate": 0.00010553977272727273,
3998
+ "loss": 0.7199,
3999
+ "step": 665
4000
+ },
4001
+ {
4002
+ "epoch": 7.52,
4003
+ "learning_rate": 0.00010539772727272728,
4004
+ "loss": 0.6979,
4005
+ "step": 666
4006
+ },
4007
+ {
4008
+ "epoch": 7.54,
4009
+ "learning_rate": 0.00010525568181818182,
4010
+ "loss": 0.7058,
4011
+ "step": 667
4012
+ },
4013
+ {
4014
+ "epoch": 7.55,
4015
+ "learning_rate": 0.00010511363636363635,
4016
+ "loss": 0.6994,
4017
+ "step": 668
4018
+ },
4019
+ {
4020
+ "epoch": 7.56,
4021
+ "learning_rate": 0.00010497159090909093,
4022
+ "loss": 0.7141,
4023
+ "step": 669
4024
+ },
4025
+ {
4026
+ "epoch": 7.57,
4027
+ "learning_rate": 0.00010482954545454546,
4028
+ "loss": 0.7092,
4029
+ "step": 670
4030
+ },
4031
+ {
4032
+ "epoch": 7.58,
4033
+ "learning_rate": 0.0001046875,
4034
+ "loss": 0.7059,
4035
+ "step": 671
4036
+ },
4037
+ {
4038
+ "epoch": 7.59,
4039
+ "learning_rate": 0.00010454545454545455,
4040
+ "loss": 0.6904,
4041
+ "step": 672
4042
+ },
4043
+ {
4044
+ "epoch": 7.6,
4045
+ "learning_rate": 0.0001044034090909091,
4046
+ "loss": 0.7115,
4047
+ "step": 673
4048
+ },
4049
+ {
4050
+ "epoch": 7.61,
4051
+ "learning_rate": 0.00010426136363636365,
4052
+ "loss": 0.7254,
4053
+ "step": 674
4054
+ },
4055
+ {
4056
+ "epoch": 7.63,
4057
+ "learning_rate": 0.0001041193181818182,
4058
+ "loss": 0.7181,
4059
+ "step": 675
4060
+ },
4061
+ {
4062
+ "epoch": 7.64,
4063
+ "learning_rate": 0.00010397727272727273,
4064
+ "loss": 0.6867,
4065
+ "step": 676
4066
+ },
4067
+ {
4068
+ "epoch": 7.65,
4069
+ "learning_rate": 0.00010383522727272727,
4070
+ "loss": 0.6917,
4071
+ "step": 677
4072
+ },
4073
+ {
4074
+ "epoch": 7.66,
4075
+ "learning_rate": 0.00010369318181818182,
4076
+ "loss": 0.6908,
4077
+ "step": 678
4078
+ },
4079
+ {
4080
+ "epoch": 7.67,
4081
+ "learning_rate": 0.00010355113636363636,
4082
+ "loss": 0.6871,
4083
+ "step": 679
4084
+ },
4085
+ {
4086
+ "epoch": 7.68,
4087
+ "learning_rate": 0.00010340909090909092,
4088
+ "loss": 0.682,
4089
+ "step": 680
4090
+ },
4091
+ {
4092
+ "epoch": 7.69,
4093
+ "learning_rate": 0.00010326704545454547,
4094
+ "loss": 0.6737,
4095
+ "step": 681
4096
+ },
4097
+ {
4098
+ "epoch": 7.7,
4099
+ "learning_rate": 0.000103125,
4100
+ "loss": 0.7023,
4101
+ "step": 682
4102
+ },
4103
+ {
4104
+ "epoch": 7.72,
4105
+ "learning_rate": 0.00010298295454545454,
4106
+ "loss": 0.7079,
4107
+ "step": 683
4108
+ },
4109
+ {
4110
+ "epoch": 7.73,
4111
+ "learning_rate": 0.00010284090909090909,
4112
+ "loss": 0.6954,
4113
+ "step": 684
4114
+ },
4115
+ {
4116
+ "epoch": 7.74,
4117
+ "learning_rate": 0.00010269886363636365,
4118
+ "loss": 0.6834,
4119
+ "step": 685
4120
+ },
4121
+ {
4122
+ "epoch": 7.75,
4123
+ "learning_rate": 0.0001025568181818182,
4124
+ "loss": 0.6706,
4125
+ "step": 686
4126
+ },
4127
+ {
4128
+ "epoch": 7.76,
4129
+ "learning_rate": 0.00010241477272727274,
4130
+ "loss": 0.6706,
4131
+ "step": 687
4132
+ },
4133
+ {
4134
+ "epoch": 7.77,
4135
+ "learning_rate": 0.00010227272727272727,
4136
+ "loss": 0.681,
4137
+ "step": 688
4138
+ },
4139
+ {
4140
+ "epoch": 7.78,
4141
+ "learning_rate": 0.00010213068181818182,
4142
+ "loss": 0.6853,
4143
+ "step": 689
4144
+ },
4145
+ {
4146
+ "epoch": 7.8,
4147
+ "learning_rate": 0.00010198863636363636,
4148
+ "loss": 0.6772,
4149
+ "step": 690
4150
+ },
4151
+ {
4152
+ "epoch": 7.81,
4153
+ "learning_rate": 0.00010184659090909092,
4154
+ "loss": 0.6635,
4155
+ "step": 691
4156
+ },
4157
+ {
4158
+ "epoch": 7.82,
4159
+ "learning_rate": 0.00010170454545454546,
4160
+ "loss": 0.6712,
4161
+ "step": 692
4162
+ },
4163
+ {
4164
+ "epoch": 7.83,
4165
+ "learning_rate": 0.00010156250000000001,
4166
+ "loss": 0.6884,
4167
+ "step": 693
4168
+ },
4169
+ {
4170
+ "epoch": 7.84,
4171
+ "learning_rate": 0.00010142045454545454,
4172
+ "loss": 0.6641,
4173
+ "step": 694
4174
+ },
4175
+ {
4176
+ "epoch": 7.85,
4177
+ "learning_rate": 0.00010127840909090909,
4178
+ "loss": 0.6838,
4179
+ "step": 695
4180
+ },
4181
+ {
4182
+ "epoch": 7.86,
4183
+ "learning_rate": 0.00010113636363636366,
4184
+ "loss": 0.675,
4185
+ "step": 696
4186
+ },
4187
+ {
4188
+ "epoch": 7.87,
4189
+ "learning_rate": 0.00010099431818181819,
4190
+ "loss": 0.6626,
4191
+ "step": 697
4192
+ },
4193
+ {
4194
+ "epoch": 7.89,
4195
+ "learning_rate": 0.00010085227272727273,
4196
+ "loss": 0.6605,
4197
+ "step": 698
4198
+ },
4199
+ {
4200
+ "epoch": 7.9,
4201
+ "learning_rate": 0.00010071022727272728,
4202
+ "loss": 0.6777,
4203
+ "step": 699
4204
+ },
4205
+ {
4206
+ "epoch": 7.91,
4207
+ "learning_rate": 0.00010056818181818181,
4208
+ "loss": 0.6347,
4209
+ "step": 700
4210
  }
4211
  ],
4212
  "logging_steps": 1,
4213
  "max_steps": 1408,
4214
  "num_train_epochs": 16,
4215
  "save_steps": 100,
4216
+ "total_flos": 9.550264840009421e+17,
4217
  "trial_name": null,
4218
  "trial_params": null
4219
  }