CocoRoF/ModernBERT-SimCSE-multitask_v03
Browse files- 2_Dense/model.safetensors +1 -1
- README.md +71 -71
- model.safetensors +1 -1
2_Dense/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2362528
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:952a6c22e6fd47eb3c9872be6da5ff1152332bd8f6c51082eed8e3eb73962f49
|
3 |
size 2362528
|
README.md
CHANGED
@@ -58,34 +58,34 @@ model-index:
|
|
58 |
type: sts_dev
|
59 |
metrics:
|
60 |
- type: pearson_cosine
|
61 |
-
value: 0.
|
62 |
name: Pearson Cosine
|
63 |
- type: spearman_cosine
|
64 |
-
value: 0.
|
65 |
name: Spearman Cosine
|
66 |
- type: pearson_euclidean
|
67 |
-
value: 0.
|
68 |
name: Pearson Euclidean
|
69 |
- type: spearman_euclidean
|
70 |
-
value: 0.
|
71 |
name: Spearman Euclidean
|
72 |
- type: pearson_manhattan
|
73 |
-
value: 0.
|
74 |
name: Pearson Manhattan
|
75 |
- type: spearman_manhattan
|
76 |
-
value: 0.
|
77 |
name: Spearman Manhattan
|
78 |
- type: pearson_dot
|
79 |
-
value: 0.
|
80 |
name: Pearson Dot
|
81 |
- type: spearman_dot
|
82 |
-
value: 0.
|
83 |
name: Spearman Dot
|
84 |
- type: pearson_max
|
85 |
-
value: 0.
|
86 |
name: Pearson Max
|
87 |
- type: spearman_max
|
88 |
-
value: 0.
|
89 |
name: Spearman Max
|
90 |
---
|
91 |
|
@@ -136,7 +136,7 @@ Then you can load this model and run inference.
|
|
136 |
from sentence_transformers import SentenceTransformer
|
137 |
|
138 |
# Download from the 🤗 Hub
|
139 |
-
model = SentenceTransformer("CocoRoF/ModernBERT-SimCSE-
|
140 |
# Run inference
|
141 |
sentences = [
|
142 |
'버스가 바쁜 길을 따라 운전한다.',
|
@@ -186,18 +186,18 @@ You can finetune this model on your own dataset.
|
|
186 |
* Dataset: `sts_dev`
|
187 |
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
188 |
|
189 |
-
| Metric | Value
|
190 |
-
|
191 |
-
| pearson_cosine | 0.
|
192 |
-
| spearman_cosine | 0.
|
193 |
-
| pearson_euclidean | 0.
|
194 |
-
| spearman_euclidean | 0.
|
195 |
-
| pearson_manhattan | 0.
|
196 |
-
| spearman_manhattan | 0.
|
197 |
-
| pearson_dot | 0.
|
198 |
-
| spearman_dot | 0.
|
199 |
-
| pearson_max | 0.
|
200 |
-
| **spearman_max** | **0.
|
201 |
|
202 |
<!--
|
203 |
## Bias, Risks and Limitations
|
@@ -271,11 +271,11 @@ You can finetune this model on your own dataset.
|
|
271 |
- `per_device_train_batch_size`: 16
|
272 |
- `per_device_eval_batch_size`: 16
|
273 |
- `gradient_accumulation_steps`: 8
|
274 |
-
- `learning_rate`:
|
275 |
- `num_train_epochs`: 10.0
|
276 |
- `warmup_ratio`: 0.1
|
277 |
- `push_to_hub`: True
|
278 |
-
- `hub_model_id`: CocoRoF/ModernBERT-SimCSE-
|
279 |
- `hub_strategy`: checkpoint
|
280 |
- `batch_sampler`: no_duplicates
|
281 |
|
@@ -293,7 +293,7 @@ You can finetune this model on your own dataset.
|
|
293 |
- `gradient_accumulation_steps`: 8
|
294 |
- `eval_accumulation_steps`: None
|
295 |
- `torch_empty_cache_steps`: None
|
296 |
-
- `learning_rate`:
|
297 |
- `weight_decay`: 0.0
|
298 |
- `adam_beta1`: 0.9
|
299 |
- `adam_beta2`: 0.999
|
@@ -362,7 +362,7 @@ You can finetune this model on your own dataset.
|
|
362 |
- `use_legacy_prediction_loop`: False
|
363 |
- `push_to_hub`: True
|
364 |
- `resume_from_checkpoint`: None
|
365 |
-
- `hub_model_id`: CocoRoF/ModernBERT-SimCSE-
|
366 |
- `hub_strategy`: checkpoint
|
367 |
- `hub_private_repo`: None
|
368 |
- `hub_always_push`: False
|
@@ -403,50 +403,50 @@ You can finetune this model on your own dataset.
|
|
403 |
### Training Logs
|
404 |
| Epoch | Step | Training Loss | Validation Loss | sts_dev_spearman_max |
|
405 |
|:------:|:----:|:-------------:|:---------------:|:--------------------:|
|
406 |
-
| 0.2228 | 10 | 0.
|
407 |
-
| 0.4457 | 20 | 0.
|
408 |
-
| 0.6685 | 30 | 0.0305 | 0.
|
409 |
-
| 0.8914 | 40 | 0.
|
410 |
-
| 1.1337 | 50 | 0.
|
411 |
-
| 1.3565 | 60 | 0.
|
412 |
-
| 1.5794 | 70 | 0.
|
413 |
-
| 1.8022 | 80 | 0.
|
414 |
-
| 2.0446 | 90 | 0.
|
415 |
-
| 2.2674 | 100 | 0.
|
416 |
-
| 2.4903 | 110 | 0.
|
417 |
-
| 2.7131 | 120 | 0.
|
418 |
-
| 2.9359 | 130 | 0.
|
419 |
-
| 3.1783 | 140 | 0.
|
420 |
-
| 3.4011 | 150 | 0.
|
421 |
-
| 3.6240 | 160 | 0.
|
422 |
-
| 3.8468 | 170 | 0.
|
423 |
-
| 4.0891 | 180 | 0.
|
424 |
-
| 4.3120 | 190 | 0.
|
425 |
-
| 4.5348 | 200 | 0.
|
426 |
-
| 4.7577 | 210 | 0.
|
427 |
-
| 4.9805 | 220 | 0.
|
428 |
-
| 5.2228 | 230 | 0.
|
429 |
-
| 5.4457 | 240 | 0.
|
430 |
-
| 5.6685 | 250 | 0.
|
431 |
-
| 5.8914 | 260 | 0.
|
432 |
-
| 6.1337 | 270 | 0.
|
433 |
-
| 6.3565 | 280 | 0.
|
434 |
-
| 6.5794 | 290 | 0.
|
435 |
-
| 6.8022 | 300 | 0.
|
436 |
-
| 7.0446 | 310 | 0.
|
437 |
-
| 7.2674 | 320 | 0.
|
438 |
-
| 7.4903 | 330 | 0.
|
439 |
-
| 7.7131 | 340 | 0.
|
440 |
-
| 7.9359 | 350 | 0.
|
441 |
-
| 8.1783 | 360 | 0.
|
442 |
-
| 8.4011 | 370 | 0.
|
443 |
-
| 8.6240 | 380 | 0.
|
444 |
-
| 8.8468 | 390 | 0.
|
445 |
-
| 9.0891 | 400 | 0.
|
446 |
-
| 9.3120 | 410 | 0.
|
447 |
-
| 9.5348 | 420 | 0.
|
448 |
-
| 9.7577 | 430 | 0.
|
449 |
-
| 9.9805 | 440 | 0.
|
450 |
|
451 |
|
452 |
### Framework Versions
|
|
|
58 |
type: sts_dev
|
59 |
metrics:
|
60 |
- type: pearson_cosine
|
61 |
+
value: 0.8223949445074785
|
62 |
name: Pearson Cosine
|
63 |
- type: spearman_cosine
|
64 |
+
value: 0.8220107207834706
|
65 |
name: Spearman Cosine
|
66 |
- type: pearson_euclidean
|
67 |
+
value: 0.7785831525283676
|
68 |
name: Pearson Euclidean
|
69 |
- type: spearman_euclidean
|
70 |
+
value: 0.7815628643916452
|
71 |
name: Spearman Euclidean
|
72 |
- type: pearson_manhattan
|
73 |
+
value: 0.7809119630672191
|
74 |
name: Pearson Manhattan
|
75 |
- type: spearman_manhattan
|
76 |
+
value: 0.7846536514745763
|
77 |
name: Spearman Manhattan
|
78 |
- type: pearson_dot
|
79 |
+
value: 0.7543765794886113
|
80 |
name: Pearson Dot
|
81 |
- type: spearman_dot
|
82 |
+
value: 0.7434525191412167
|
83 |
name: Spearman Dot
|
84 |
- type: pearson_max
|
85 |
+
value: 0.8223949445074785
|
86 |
name: Pearson Max
|
87 |
- type: spearman_max
|
88 |
+
value: 0.8220107207834706
|
89 |
name: Spearman Max
|
90 |
---
|
91 |
|
|
|
136 |
from sentence_transformers import SentenceTransformer
|
137 |
|
138 |
# Download from the 🤗 Hub
|
139 |
+
model = SentenceTransformer("CocoRoF/ModernBERT-SimCSE-multitask_v03")
|
140 |
# Run inference
|
141 |
sentences = [
|
142 |
'버스가 바쁜 길을 따라 운전한다.',
|
|
|
186 |
* Dataset: `sts_dev`
|
187 |
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
188 |
|
189 |
+
| Metric | Value |
|
190 |
+
|:-------------------|:----------|
|
191 |
+
| pearson_cosine | 0.8224 |
|
192 |
+
| spearman_cosine | 0.822 |
|
193 |
+
| pearson_euclidean | 0.7786 |
|
194 |
+
| spearman_euclidean | 0.7816 |
|
195 |
+
| pearson_manhattan | 0.7809 |
|
196 |
+
| spearman_manhattan | 0.7847 |
|
197 |
+
| pearson_dot | 0.7544 |
|
198 |
+
| spearman_dot | 0.7435 |
|
199 |
+
| pearson_max | 0.8224 |
|
200 |
+
| **spearman_max** | **0.822** |
|
201 |
|
202 |
<!--
|
203 |
## Bias, Risks and Limitations
|
|
|
271 |
- `per_device_train_batch_size`: 16
|
272 |
- `per_device_eval_batch_size`: 16
|
273 |
- `gradient_accumulation_steps`: 8
|
274 |
+
- `learning_rate`: 1e-05
|
275 |
- `num_train_epochs`: 10.0
|
276 |
- `warmup_ratio`: 0.1
|
277 |
- `push_to_hub`: True
|
278 |
+
- `hub_model_id`: CocoRoF/ModernBERT-SimCSE-multitask_v03
|
279 |
- `hub_strategy`: checkpoint
|
280 |
- `batch_sampler`: no_duplicates
|
281 |
|
|
|
293 |
- `gradient_accumulation_steps`: 8
|
294 |
- `eval_accumulation_steps`: None
|
295 |
- `torch_empty_cache_steps`: None
|
296 |
+
- `learning_rate`: 1e-05
|
297 |
- `weight_decay`: 0.0
|
298 |
- `adam_beta1`: 0.9
|
299 |
- `adam_beta2`: 0.999
|
|
|
362 |
- `use_legacy_prediction_loop`: False
|
363 |
- `push_to_hub`: True
|
364 |
- `resume_from_checkpoint`: None
|
365 |
+
- `hub_model_id`: CocoRoF/ModernBERT-SimCSE-multitask_v03
|
366 |
- `hub_strategy`: checkpoint
|
367 |
- `hub_private_repo`: None
|
368 |
- `hub_always_push`: False
|
|
|
403 |
### Training Logs
|
404 |
| Epoch | Step | Training Loss | Validation Loss | sts_dev_spearman_max |
|
405 |
|:------:|:----:|:-------------:|:---------------:|:--------------------:|
|
406 |
+
| 0.2228 | 10 | 0.0283 | - | - |
|
407 |
+
| 0.4457 | 20 | 0.0344 | - | - |
|
408 |
+
| 0.6685 | 30 | 0.0305 | 0.0310 | 0.7939 |
|
409 |
+
| 0.8914 | 40 | 0.0489 | - | - |
|
410 |
+
| 1.1337 | 50 | 0.0382 | - | - |
|
411 |
+
| 1.3565 | 60 | 0.0271 | 0.0293 | 0.7994 |
|
412 |
+
| 1.5794 | 70 | 0.0344 | - | - |
|
413 |
+
| 1.8022 | 80 | 0.0382 | - | - |
|
414 |
+
| 2.0446 | 90 | 0.0419 | 0.0280 | 0.8059 |
|
415 |
+
| 2.2674 | 100 | 0.0244 | - | - |
|
416 |
+
| 2.4903 | 110 | 0.0307 | - | - |
|
417 |
+
| 2.7131 | 120 | 0.0291 | 0.0269 | 0.8108 |
|
418 |
+
| 2.9359 | 130 | 0.038 | - | - |
|
419 |
+
| 3.1783 | 140 | 0.0269 | - | - |
|
420 |
+
| 3.4011 | 150 | 0.0268 | 0.0262 | 0.8155 |
|
421 |
+
| 3.6240 | 160 | 0.0246 | - | - |
|
422 |
+
| 3.8468 | 170 | 0.0313 | - | - |
|
423 |
+
| 4.0891 | 180 | 0.0303 | 0.0259 | 0.8185 |
|
424 |
+
| 4.3120 | 190 | 0.0198 | - | - |
|
425 |
+
| 4.5348 | 200 | 0.0257 | - | - |
|
426 |
+
| 4.7577 | 210 | 0.0242 | 0.0255 | 0.8202 |
|
427 |
+
| 4.9805 | 220 | 0.0293 | - | - |
|
428 |
+
| 5.2228 | 230 | 0.0193 | - | - |
|
429 |
+
| 5.4457 | 240 | 0.0222 | 0.0254 | 0.8222 |
|
430 |
+
| 5.6685 | 250 | 0.0184 | - | - |
|
431 |
+
| 5.8914 | 260 | 0.0243 | - | - |
|
432 |
+
| 6.1337 | 270 | 0.0204 | 0.0254 | 0.8235 |
|
433 |
+
| 6.3565 | 280 | 0.0147 | - | - |
|
434 |
+
| 6.5794 | 290 | 0.0196 | - | - |
|
435 |
+
| 6.8022 | 300 | 0.0176 | 0.0253 | 0.8227 |
|
436 |
+
| 7.0446 | 310 | 0.0202 | - | - |
|
437 |
+
| 7.2674 | 320 | 0.0123 | - | - |
|
438 |
+
| 7.4903 | 330 | 0.0151 | 0.0254 | 0.8236 |
|
439 |
+
| 7.7131 | 340 | 0.0132 | - | - |
|
440 |
+
| 7.9359 | 350 | 0.0158 | - | - |
|
441 |
+
| 8.1783 | 360 | 0.0118 | 0.0256 | 0.8240 |
|
442 |
+
| 8.4011 | 370 | 0.0115 | - | - |
|
443 |
+
| 8.6240 | 380 | 0.0105 | - | - |
|
444 |
+
| 8.8468 | 390 | 0.0111 | 0.0256 | 0.8215 |
|
445 |
+
| 9.0891 | 400 | 0.011 | - | - |
|
446 |
+
| 9.3120 | 410 | 0.0076 | - | - |
|
447 |
+
| 9.5348 | 420 | 0.0091 | 0.0256 | 0.8220 |
|
448 |
+
| 9.7577 | 430 | 0.0075 | - | - |
|
449 |
+
| 9.9805 | 440 | 0.0093 | - | - |
|
450 |
|
451 |
|
452 |
### Framework Versions
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 735216376
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0a6eec0e90768185fb0e3eca583968ac1e1fe92c4787c043214ae4f116edeb1
|
3 |
size 735216376
|