JorgeDeC commited on
Commit
f128150
1 Parent(s): f9de91c

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd7280b72137a3f45684f0a4365181f6a4e76516dc937372728ffd427374896b
3
  size 83946192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b82ba4c676b3b5446a0c3c0895e6170fde811e7522a584fefd422d795d05e4d
3
  size 83946192
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f39105b02b5a28736f00337347099a101737aeeff1bee35761b81446d63727b7
3
  size 168150290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6255ec5b843f16274c3a630ceda1999d9a6ff2db1125db8a13388a7d838f220
3
  size 168150290
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa9292ce99046a75ac58c8e68f29aec19b32f8b2c2b1414bfaa45151498cf09a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44ab34006e4ff8e3ddb1b3e0970e22b7afa1b47af9f1338b5e8a38648238a8fe
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a89103c425bccf7ac0286a5e2c396ce22a8dc4396452a8ba49448cd162c1206
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e97643a2c9855ef59e9a06836cda3285ce9299fbc4d864a30671a903ea3632d3
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3118271586606281,
5
  "eval_steps": 500,
6
- "global_step": 2100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2954,6 +2954,286 @@
2954
  "learning_rate": 0.0001738967827178286,
2955
  "loss": 0.9547,
2956
  "step": 2100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2957
  }
2958
  ],
2959
  "logging_steps": 5,
@@ -2961,7 +3241,7 @@
2961
  "num_input_tokens_seen": 0,
2962
  "num_train_epochs": 1,
2963
  "save_steps": 100,
2964
- "total_flos": 2.953144006936101e+18,
2965
  "train_batch_size": 2,
2966
  "trial_name": null,
2967
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.34152498329497366,
5
  "eval_steps": 500,
6
+ "global_step": 2300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2954
  "learning_rate": 0.0001738967827178286,
2955
  "loss": 0.9547,
2956
  "step": 2100
2957
+ },
2958
+ {
2959
+ "epoch": 0.31,
2960
+ "grad_norm": 0.333984375,
2961
+ "learning_rate": 0.00017372189614169947,
2962
+ "loss": 0.966,
2963
+ "step": 2105
2964
+ },
2965
+ {
2966
+ "epoch": 0.31,
2967
+ "grad_norm": 0.349609375,
2968
+ "learning_rate": 0.00017354651423998733,
2969
+ "loss": 0.9754,
2970
+ "step": 2110
2971
+ },
2972
+ {
2973
+ "epoch": 0.31,
2974
+ "grad_norm": 0.341796875,
2975
+ "learning_rate": 0.00017337063819105496,
2976
+ "loss": 0.928,
2977
+ "step": 2115
2978
+ },
2979
+ {
2980
+ "epoch": 0.31,
2981
+ "grad_norm": 0.337890625,
2982
+ "learning_rate": 0.00017319426917658537,
2983
+ "loss": 0.9721,
2984
+ "step": 2120
2985
+ },
2986
+ {
2987
+ "epoch": 0.32,
2988
+ "grad_norm": 0.3515625,
2989
+ "learning_rate": 0.00017301740838157362,
2990
+ "loss": 0.9716,
2991
+ "step": 2125
2992
+ },
2993
+ {
2994
+ "epoch": 0.32,
2995
+ "grad_norm": 0.34375,
2996
+ "learning_rate": 0.00017284005699431896,
2997
+ "loss": 0.9606,
2998
+ "step": 2130
2999
+ },
3000
+ {
3001
+ "epoch": 0.32,
3002
+ "grad_norm": 0.341796875,
3003
+ "learning_rate": 0.000172662216206417,
3004
+ "loss": 0.9619,
3005
+ "step": 2135
3006
+ },
3007
+ {
3008
+ "epoch": 0.32,
3009
+ "grad_norm": 0.3359375,
3010
+ "learning_rate": 0.00017248388721275129,
3011
+ "loss": 0.9236,
3012
+ "step": 2140
3013
+ },
3014
+ {
3015
+ "epoch": 0.32,
3016
+ "grad_norm": 0.333984375,
3017
+ "learning_rate": 0.00017230507121148575,
3018
+ "loss": 0.9592,
3019
+ "step": 2145
3020
+ },
3021
+ {
3022
+ "epoch": 0.32,
3023
+ "grad_norm": 0.3359375,
3024
+ "learning_rate": 0.00017212576940405647,
3025
+ "loss": 0.9507,
3026
+ "step": 2150
3027
+ },
3028
+ {
3029
+ "epoch": 0.32,
3030
+ "grad_norm": 0.369140625,
3031
+ "learning_rate": 0.00017194598299516338,
3032
+ "loss": 0.9622,
3033
+ "step": 2155
3034
+ },
3035
+ {
3036
+ "epoch": 0.32,
3037
+ "grad_norm": 0.349609375,
3038
+ "learning_rate": 0.00017176571319276257,
3039
+ "loss": 0.9527,
3040
+ "step": 2160
3041
+ },
3042
+ {
3043
+ "epoch": 0.32,
3044
+ "grad_norm": 0.330078125,
3045
+ "learning_rate": 0.00017158496120805788,
3046
+ "loss": 0.9426,
3047
+ "step": 2165
3048
+ },
3049
+ {
3050
+ "epoch": 0.32,
3051
+ "grad_norm": 0.349609375,
3052
+ "learning_rate": 0.00017140372825549284,
3053
+ "loss": 0.9517,
3054
+ "step": 2170
3055
+ },
3056
+ {
3057
+ "epoch": 0.32,
3058
+ "grad_norm": 0.33984375,
3059
+ "learning_rate": 0.00017122201555274261,
3060
+ "loss": 0.9285,
3061
+ "step": 2175
3062
+ },
3063
+ {
3064
+ "epoch": 0.32,
3065
+ "grad_norm": 0.33984375,
3066
+ "learning_rate": 0.00017103982432070563,
3067
+ "loss": 0.9671,
3068
+ "step": 2180
3069
+ },
3070
+ {
3071
+ "epoch": 0.32,
3072
+ "grad_norm": 0.34375,
3073
+ "learning_rate": 0.00017085715578349557,
3074
+ "loss": 0.9375,
3075
+ "step": 2185
3076
+ },
3077
+ {
3078
+ "epoch": 0.33,
3079
+ "grad_norm": 0.333984375,
3080
+ "learning_rate": 0.00017067401116843296,
3081
+ "loss": 0.954,
3082
+ "step": 2190
3083
+ },
3084
+ {
3085
+ "epoch": 0.33,
3086
+ "grad_norm": 0.345703125,
3087
+ "learning_rate": 0.0001704903917060371,
3088
+ "loss": 0.9486,
3089
+ "step": 2195
3090
+ },
3091
+ {
3092
+ "epoch": 0.33,
3093
+ "grad_norm": 0.337890625,
3094
+ "learning_rate": 0.00017030629863001764,
3095
+ "loss": 0.9823,
3096
+ "step": 2200
3097
+ },
3098
+ {
3099
+ "epoch": 0.33,
3100
+ "grad_norm": 0.330078125,
3101
+ "learning_rate": 0.0001701217331772664,
3102
+ "loss": 0.9401,
3103
+ "step": 2205
3104
+ },
3105
+ {
3106
+ "epoch": 0.33,
3107
+ "grad_norm": 0.34375,
3108
+ "learning_rate": 0.00016993669658784904,
3109
+ "loss": 0.9571,
3110
+ "step": 2210
3111
+ },
3112
+ {
3113
+ "epoch": 0.33,
3114
+ "grad_norm": 0.328125,
3115
+ "learning_rate": 0.0001697511901049967,
3116
+ "loss": 0.9801,
3117
+ "step": 2215
3118
+ },
3119
+ {
3120
+ "epoch": 0.33,
3121
+ "grad_norm": 0.3359375,
3122
+ "learning_rate": 0.00016956521497509764,
3123
+ "loss": 0.943,
3124
+ "step": 2220
3125
+ },
3126
+ {
3127
+ "epoch": 0.33,
3128
+ "grad_norm": 0.34375,
3129
+ "learning_rate": 0.0001693787724476889,
3130
+ "loss": 0.973,
3131
+ "step": 2225
3132
+ },
3133
+ {
3134
+ "epoch": 0.33,
3135
+ "grad_norm": 0.345703125,
3136
+ "learning_rate": 0.00016919186377544788,
3137
+ "loss": 0.9662,
3138
+ "step": 2230
3139
+ },
3140
+ {
3141
+ "epoch": 0.33,
3142
+ "grad_norm": 0.353515625,
3143
+ "learning_rate": 0.00016900449021418394,
3144
+ "loss": 0.9618,
3145
+ "step": 2235
3146
+ },
3147
+ {
3148
+ "epoch": 0.33,
3149
+ "grad_norm": 0.33984375,
3150
+ "learning_rate": 0.00016881665302282995,
3151
+ "loss": 0.9454,
3152
+ "step": 2240
3153
+ },
3154
+ {
3155
+ "epoch": 0.33,
3156
+ "grad_norm": 0.3359375,
3157
+ "learning_rate": 0.00016862835346343385,
3158
+ "loss": 0.9414,
3159
+ "step": 2245
3160
+ },
3161
+ {
3162
+ "epoch": 0.33,
3163
+ "grad_norm": 0.349609375,
3164
+ "learning_rate": 0.00016843959280115015,
3165
+ "loss": 0.9437,
3166
+ "step": 2250
3167
+ },
3168
+ {
3169
+ "epoch": 0.33,
3170
+ "grad_norm": 0.353515625,
3171
+ "learning_rate": 0.00016825037230423139,
3172
+ "loss": 0.9761,
3173
+ "step": 2255
3174
+ },
3175
+ {
3176
+ "epoch": 0.34,
3177
+ "grad_norm": 0.345703125,
3178
+ "learning_rate": 0.00016806069324401977,
3179
+ "loss": 0.9458,
3180
+ "step": 2260
3181
+ },
3182
+ {
3183
+ "epoch": 0.34,
3184
+ "grad_norm": 0.33203125,
3185
+ "learning_rate": 0.00016787055689493837,
3186
+ "loss": 0.9676,
3187
+ "step": 2265
3188
+ },
3189
+ {
3190
+ "epoch": 0.34,
3191
+ "grad_norm": 0.341796875,
3192
+ "learning_rate": 0.00016767996453448283,
3193
+ "loss": 0.9729,
3194
+ "step": 2270
3195
+ },
3196
+ {
3197
+ "epoch": 0.34,
3198
+ "grad_norm": 0.33984375,
3199
+ "learning_rate": 0.00016748891744321263,
3200
+ "loss": 0.9613,
3201
+ "step": 2275
3202
+ },
3203
+ {
3204
+ "epoch": 0.34,
3205
+ "grad_norm": 0.34765625,
3206
+ "learning_rate": 0.0001672974169047425,
3207
+ "loss": 0.9453,
3208
+ "step": 2280
3209
+ },
3210
+ {
3211
+ "epoch": 0.34,
3212
+ "grad_norm": 0.33203125,
3213
+ "learning_rate": 0.00016710546420573377,
3214
+ "loss": 0.9538,
3215
+ "step": 2285
3216
+ },
3217
+ {
3218
+ "epoch": 0.34,
3219
+ "grad_norm": 0.349609375,
3220
+ "learning_rate": 0.00016691306063588583,
3221
+ "loss": 0.9484,
3222
+ "step": 2290
3223
+ },
3224
+ {
3225
+ "epoch": 0.34,
3226
+ "grad_norm": 0.3359375,
3227
+ "learning_rate": 0.0001667202074879274,
3228
+ "loss": 0.9501,
3229
+ "step": 2295
3230
+ },
3231
+ {
3232
+ "epoch": 0.34,
3233
+ "grad_norm": 0.3515625,
3234
+ "learning_rate": 0.00016652690605760775,
3235
+ "loss": 0.9739,
3236
+ "step": 2300
3237
  }
3238
  ],
3239
  "logging_steps": 5,
 
3241
  "num_input_tokens_seen": 0,
3242
  "num_train_epochs": 1,
3243
  "save_steps": 100,
3244
+ "total_flos": 3.2343958171460567e+18,
3245
  "train_batch_size": 2,
3246
  "trial_name": null,
3247
  "trial_params": null