CocoRoF commited on
Commit
766f09e
·
verified ·
1 Parent(s): 3f5e000

Training in progress, step 4250, checkpoint

Browse files
last-checkpoint/2_Dense/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7fba931953e737082f85cb7f165d1d9d36de76853c1f92e1adb93636e064009
3
  size 3149984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a59bfc4cd3767747c580ac670f0d6c48bfe9e402250467b22e693fdfc61b625d
3
  size 3149984
last-checkpoint/README.md CHANGED
@@ -60,34 +60,34 @@ model-index:
60
  type: sts_dev
61
  metrics:
62
  - type: pearson_cosine
63
- value: 0.8220285778407846
64
  name: Pearson Cosine
65
  - type: spearman_cosine
66
- value: 0.8276471334482826
67
  name: Spearman Cosine
68
  - type: pearson_euclidean
69
- value: 0.7933532583617332
70
  name: Pearson Euclidean
71
  - type: spearman_euclidean
72
- value: 0.7981249234213611
73
  name: Spearman Euclidean
74
  - type: pearson_manhattan
75
- value: 0.7941338912825391
76
  name: Pearson Manhattan
77
  - type: spearman_manhattan
78
- value: 0.7997185742063436
79
  name: Spearman Manhattan
80
  - type: pearson_dot
81
- value: 0.7022254885739367
82
  name: Pearson Dot
83
  - type: spearman_dot
84
- value: 0.6857559655167198
85
  name: Spearman Dot
86
  - type: pearson_max
87
- value: 0.8220285778407846
88
  name: Pearson Max
89
  - type: spearman_max
90
- value: 0.8276471334482826
91
  name: Spearman Max
92
  ---
93
 
@@ -191,16 +191,16 @@ You can finetune this model on your own dataset.
191
 
192
  | Metric | Value |
193
  |:-------------------|:-----------|
194
- | pearson_cosine | 0.822 |
195
- | spearman_cosine | 0.8276 |
196
- | pearson_euclidean | 0.7934 |
197
- | spearman_euclidean | 0.7981 |
198
- | pearson_manhattan | 0.7941 |
199
  | spearman_manhattan | 0.7997 |
200
- | pearson_dot | 0.7022 |
201
- | spearman_dot | 0.6858 |
202
- | pearson_max | 0.822 |
203
- | **spearman_max** | **0.8276** |
204
 
205
  <!--
206
  ## Bias, Risks and Limitations
@@ -805,6 +805,31 @@ You can finetune this model on your own dataset.
805
  | 9.0455 | 3980 | 0.0638 | - | - |
806
  | 9.0683 | 3990 | 0.0625 | - | - |
807
  | 9.0911 | 4000 | 0.0665 | 0.0414 | 0.8276 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
808
 
809
  </details>
810
 
 
60
  type: sts_dev
61
  metrics:
62
  - type: pearson_cosine
63
+ value: 0.8220874775898197
64
  name: Pearson Cosine
65
  - type: spearman_cosine
66
+ value: 0.8282368218808581
67
  name: Spearman Cosine
68
  - type: pearson_euclidean
69
+ value: 0.7929031352092236
70
  name: Pearson Euclidean
71
  - type: spearman_euclidean
72
+ value: 0.7979913252239026
73
  name: Spearman Euclidean
74
  - type: pearson_manhattan
75
+ value: 0.7936882861676204
76
  name: Pearson Manhattan
77
  - type: spearman_manhattan
78
+ value: 0.7996541111809876
79
  name: Spearman Manhattan
80
  - type: pearson_dot
81
+ value: 0.7010536213435227
82
  name: Pearson Dot
83
  - type: spearman_dot
84
+ value: 0.6844746263331734
85
  name: Spearman Dot
86
  - type: pearson_max
87
+ value: 0.8220874775898197
88
  name: Pearson Max
89
  - type: spearman_max
90
+ value: 0.8282368218808581
91
  name: Spearman Max
92
  ---
93
 
 
191
 
192
  | Metric | Value |
193
  |:-------------------|:-----------|
194
+ | pearson_cosine | 0.8221 |
195
+ | spearman_cosine | 0.8282 |
196
+ | pearson_euclidean | 0.7929 |
197
+ | spearman_euclidean | 0.798 |
198
+ | pearson_manhattan | 0.7937 |
199
  | spearman_manhattan | 0.7997 |
200
+ | pearson_dot | 0.7011 |
201
+ | spearman_dot | 0.6845 |
202
+ | pearson_max | 0.8221 |
203
+ | **spearman_max** | **0.8282** |
204
 
205
  <!--
206
  ## Bias, Risks and Limitations
 
805
  | 9.0455 | 3980 | 0.0638 | - | - |
806
  | 9.0683 | 3990 | 0.0625 | - | - |
807
  | 9.0911 | 4000 | 0.0665 | 0.0414 | 0.8276 |
808
+ | 9.1138 | 4010 | 0.0624 | - | - |
809
+ | 9.1366 | 4020 | 0.0621 | - | - |
810
+ | 9.1593 | 4030 | 0.0648 | - | - |
811
+ | 9.1821 | 4040 | 0.0622 | - | - |
812
+ | 9.2049 | 4050 | 0.0635 | - | - |
813
+ | 9.2276 | 4060 | 0.061 | - | - |
814
+ | 9.2504 | 4070 | 0.0602 | - | - |
815
+ | 9.2732 | 4080 | 0.0613 | - | - |
816
+ | 9.2959 | 4090 | 0.0604 | - | - |
817
+ | 9.3187 | 4100 | 0.0623 | - | - |
818
+ | 9.3414 | 4110 | 0.0641 | - | - |
819
+ | 9.3642 | 4120 | 0.0635 | - | - |
820
+ | 9.3870 | 4130 | 0.0608 | - | - |
821
+ | 9.4097 | 4140 | 0.0611 | - | - |
822
+ | 9.4325 | 4150 | 0.0607 | - | - |
823
+ | 9.4553 | 4160 | 0.0631 | - | - |
824
+ | 9.4780 | 4170 | 0.0618 | - | - |
825
+ | 9.5008 | 4180 | 0.0609 | - | - |
826
+ | 9.5235 | 4190 | 0.0613 | - | - |
827
+ | 9.5463 | 4200 | 0.0606 | - | - |
828
+ | 9.5691 | 4210 | 0.0595 | - | - |
829
+ | 9.5918 | 4220 | 0.0609 | - | - |
830
+ | 9.6146 | 4230 | 0.061 | - | - |
831
+ | 9.6374 | 4240 | 0.0616 | - | - |
832
+ | 9.6601 | 4250 | 0.0613 | 0.0418 | 0.8282 |
833
 
834
  </details>
835
 
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca3c3bf8f87beab47d5a88d31e1b9bc66bde4d8a6aa0a7db8a8e23683e25777e
3
  size 735216376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66daefb719ad12215c08363cf07f604053315b28142583dcc866c834327eca3f
3
  size 735216376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef993bcc74f9e91de236fcfb2956ae2eacb056d0348cc87b69e981352b953f7d
3
  size 1476823354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:796b01c86922133da7b4702097cf156006e03e00f92d857ba3d2713e738810f2
3
  size 1476823354
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:371c835359601369002ac0df9d2e47a8e77df500ebbf0208a4d9c71218241989
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a734f96fdbf1b2b95f5a896a45ac06db48cebeba2dcddafafaf5c42500c1f8ba
3
  size 15920
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f24f7823ebb746566b89158a8e9007cbf065595314e547da3f5b253f5d6fb74
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4179c3a5721b96913d5982f5899f5a8134fa075bf224efaaeb574cd846c07bbf
3
  size 15920
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37bb3351ac8c7870230a6937af6398f88591c9eb1caf43b5149e37c552a570b2
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55f5c2af0a83fa2c2de4c1c2429806c3814277f1d706282352eeb894c157a06f
3
  size 15920
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eff1d37220ad38a5d635b06245abf11ad851da51e75384cbc4aa9966c0fa2932
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cca4516c2bb67a2a1691e38c770742a680a94828f839610d2ffa43419db4feba
3
  size 15920
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b080d04a24d759d6724428633587a334c9ccd3796f670ea12513f73e504bc81
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cb45b1fc4043ea836f442423485d57eb9667bd00787e4c2417e1a25ab32a480
3
  size 15920
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5688abb5f99504d4856749cc7227131c7fc12204ecd8d80bc25dac63e630d98
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ec528339d849d7328578e52ee72da1edaa069275122e1908976fd336632067e
3
  size 15920
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a60b5c4cf8021e8b74d32d312519584aafee70c666b95e3799a07697027695ec
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18f45c589b9a8c923ac9908849cfe569a36e99bfb6aaf6913e76e736935b42a2
3
  size 15920
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3fc79bb0d7d88c4bdb60e9b0f3298f97d9fbd1739fa04dd76de01b87a52b25a
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7571acf39c17540211b353a65ed07e95044bb1a68001f53b77c1f7bb674917b
3
  size 15920
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4789e0b1853e2658dba3c227017d8d2b1699b26df401842a8229cea3d0a058ea
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:451fe1a5f62f2f6eed0b67a70a5f8f0f813e8a38e58c106c948a6c2c9e79f8ef
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.091051358656992,
5
  "eval_steps": 250,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3095,6 +3095,199 @@
3095
  "eval_sts_dev_spearman_manhattan": 0.7997185742063436,
3096
  "eval_sts_dev_spearman_max": 0.8276471334482826,
3097
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3098
  }
3099
  ],
3100
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.660122350263196,
5
  "eval_steps": 250,
6
+ "global_step": 4250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3095
  "eval_sts_dev_spearman_manhattan": 0.7997185742063436,
3096
  "eval_sts_dev_spearman_max": 0.8276471334482826,
3097
  "step": 4000
3098
+ },
3099
+ {
3100
+ "epoch": 9.11381419832124,
3101
+ "grad_norm": 0.23453885316848755,
3102
+ "learning_rate": 2.852164017212561e-06,
3103
+ "loss": 0.0624,
3104
+ "step": 4010
3105
+ },
3106
+ {
3107
+ "epoch": 9.13657703798549,
3108
+ "grad_norm": 0.22881363332271576,
3109
+ "learning_rate": 2.859276645684413e-06,
3110
+ "loss": 0.0621,
3111
+ "step": 4020
3112
+ },
3113
+ {
3114
+ "epoch": 9.159339877649737,
3115
+ "grad_norm": 0.21634767949581146,
3116
+ "learning_rate": 2.866389274156265e-06,
3117
+ "loss": 0.0648,
3118
+ "step": 4030
3119
+ },
3120
+ {
3121
+ "epoch": 9.182102717313985,
3122
+ "grad_norm": 0.2653968334197998,
3123
+ "learning_rate": 2.8735019026281164e-06,
3124
+ "loss": 0.0622,
3125
+ "step": 4040
3126
+ },
3127
+ {
3128
+ "epoch": 9.204865556978232,
3129
+ "grad_norm": 0.2806706726551056,
3130
+ "learning_rate": 2.8806145310999684e-06,
3131
+ "loss": 0.0635,
3132
+ "step": 4050
3133
+ },
3134
+ {
3135
+ "epoch": 9.227628396642482,
3136
+ "grad_norm": 0.25029635429382324,
3137
+ "learning_rate": 2.88772715957182e-06,
3138
+ "loss": 0.061,
3139
+ "step": 4060
3140
+ },
3141
+ {
3142
+ "epoch": 9.25039123630673,
3143
+ "grad_norm": 0.24983397126197815,
3144
+ "learning_rate": 2.894839788043672e-06,
3145
+ "loss": 0.0602,
3146
+ "step": 4070
3147
+ },
3148
+ {
3149
+ "epoch": 9.273154075970977,
3150
+ "grad_norm": 0.21316730976104736,
3151
+ "learning_rate": 2.9019524165155234e-06,
3152
+ "loss": 0.0613,
3153
+ "step": 4080
3154
+ },
3155
+ {
3156
+ "epoch": 9.295916915635225,
3157
+ "grad_norm": 0.21870028972625732,
3158
+ "learning_rate": 2.9090650449873754e-06,
3159
+ "loss": 0.0604,
3160
+ "step": 4090
3161
+ },
3162
+ {
3163
+ "epoch": 9.318679755299474,
3164
+ "grad_norm": 0.21702495217323303,
3165
+ "learning_rate": 2.9161776734592273e-06,
3166
+ "loss": 0.0623,
3167
+ "step": 4100
3168
+ },
3169
+ {
3170
+ "epoch": 9.341442594963722,
3171
+ "grad_norm": 0.22777798771858215,
3172
+ "learning_rate": 2.923290301931079e-06,
3173
+ "loss": 0.0641,
3174
+ "step": 4110
3175
+ },
3176
+ {
3177
+ "epoch": 9.36420543462797,
3178
+ "grad_norm": 0.2656283378601074,
3179
+ "learning_rate": 2.930402930402931e-06,
3180
+ "loss": 0.0635,
3181
+ "step": 4120
3182
+ },
3183
+ {
3184
+ "epoch": 9.386968274292219,
3185
+ "grad_norm": 0.23527038097381592,
3186
+ "learning_rate": 2.9375155588747823e-06,
3187
+ "loss": 0.0608,
3188
+ "step": 4130
3189
+ },
3190
+ {
3191
+ "epoch": 9.409731113956466,
3192
+ "grad_norm": 0.21856476366519928,
3193
+ "learning_rate": 2.9446281873466343e-06,
3194
+ "loss": 0.0611,
3195
+ "step": 4140
3196
+ },
3197
+ {
3198
+ "epoch": 9.432493953620714,
3199
+ "grad_norm": 0.23688729107379913,
3200
+ "learning_rate": 2.951740815818486e-06,
3201
+ "loss": 0.0607,
3202
+ "step": 4150
3203
+ },
3204
+ {
3205
+ "epoch": 9.455256793284962,
3206
+ "grad_norm": 0.26457446813583374,
3207
+ "learning_rate": 2.9588534442903377e-06,
3208
+ "loss": 0.0631,
3209
+ "step": 4160
3210
+ },
3211
+ {
3212
+ "epoch": 9.478019632949211,
3213
+ "grad_norm": 0.31578782200813293,
3214
+ "learning_rate": 2.9659660727621897e-06,
3215
+ "loss": 0.0618,
3216
+ "step": 4170
3217
+ },
3218
+ {
3219
+ "epoch": 9.500782472613459,
3220
+ "grad_norm": 0.23187491297721863,
3221
+ "learning_rate": 2.9730787012340412e-06,
3222
+ "loss": 0.0609,
3223
+ "step": 4180
3224
+ },
3225
+ {
3226
+ "epoch": 9.523545312277706,
3227
+ "grad_norm": 0.24577929079532623,
3228
+ "learning_rate": 2.980191329705893e-06,
3229
+ "loss": 0.0613,
3230
+ "step": 4190
3231
+ },
3232
+ {
3233
+ "epoch": 9.546308151941954,
3234
+ "grad_norm": 0.23201169073581696,
3235
+ "learning_rate": 2.9873039581777447e-06,
3236
+ "loss": 0.0606,
3237
+ "step": 4200
3238
+ },
3239
+ {
3240
+ "epoch": 9.569070991606203,
3241
+ "grad_norm": 0.2860512137413025,
3242
+ "learning_rate": 2.9944165866495967e-06,
3243
+ "loss": 0.0595,
3244
+ "step": 4210
3245
+ },
3246
+ {
3247
+ "epoch": 9.591833831270451,
3248
+ "grad_norm": 0.237753763794899,
3249
+ "learning_rate": 3.001529215121448e-06,
3250
+ "loss": 0.0609,
3251
+ "step": 4220
3252
+ },
3253
+ {
3254
+ "epoch": 9.614596670934699,
3255
+ "grad_norm": 0.23422682285308838,
3256
+ "learning_rate": 3.0086418435933e-06,
3257
+ "loss": 0.061,
3258
+ "step": 4230
3259
+ },
3260
+ {
3261
+ "epoch": 9.637359510598948,
3262
+ "grad_norm": 0.2497267723083496,
3263
+ "learning_rate": 3.015754472065152e-06,
3264
+ "loss": 0.0616,
3265
+ "step": 4240
3266
+ },
3267
+ {
3268
+ "epoch": 9.660122350263196,
3269
+ "grad_norm": 0.2505936622619629,
3270
+ "learning_rate": 3.0228671005370036e-06,
3271
+ "loss": 0.0613,
3272
+ "step": 4250
3273
+ },
3274
+ {
3275
+ "epoch": 9.660122350263196,
3276
+ "eval_loss": 0.04175787419080734,
3277
+ "eval_runtime": 3.1427,
3278
+ "eval_samples_per_second": 477.3,
3279
+ "eval_steps_per_second": 7.637,
3280
+ "eval_sts_dev_pearson_cosine": 0.8220874775898197,
3281
+ "eval_sts_dev_pearson_dot": 0.7010536213435227,
3282
+ "eval_sts_dev_pearson_euclidean": 0.7929031352092236,
3283
+ "eval_sts_dev_pearson_manhattan": 0.7936882861676204,
3284
+ "eval_sts_dev_pearson_max": 0.8220874775898197,
3285
+ "eval_sts_dev_spearman_cosine": 0.8282368218808581,
3286
+ "eval_sts_dev_spearman_dot": 0.6844746263331734,
3287
+ "eval_sts_dev_spearman_euclidean": 0.7979913252239026,
3288
+ "eval_sts_dev_spearman_manhattan": 0.7996541111809876,
3289
+ "eval_sts_dev_spearman_max": 0.8282368218808581,
3290
+ "step": 4250
3291
  }
3292
  ],
3293
  "logging_steps": 10,