|
--- |
|
language: |
|
- en |
|
license: apache-2.0 |
|
library_name: transformers |
|
tags: |
|
- language |
|
- granite |
|
- embeddings |
|
model-index: |
|
- name: ibm-granite/granite-embedding-125m-english |
|
results: |
|
- dataset: |
|
type: mteb/arguana |
|
name: MTEB ArguaAna |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.33642 |
|
- type: map_at_10 |
|
value: 0.49716 |
|
- type: map_at_100 |
|
value: 0.50519 |
|
- type: map_at_1000 |
|
value: 0.50521 |
|
- type: map_at_3 |
|
value: 0.45057 |
|
- type: map_at_5 |
|
value: 0.47774 |
|
- type: mrr_at_1 |
|
value: 0.34922 |
|
- type: mrr_at_10 |
|
value: 0.50197 |
|
- type: mrr_at_100 |
|
value: 0.50992 |
|
- type: mrr_at_1000 |
|
value: 0.50994 |
|
- type: mrr_at_3 |
|
value: 0.45484 |
|
- type: mrr_at_5 |
|
value: 0.48272 |
|
- type: ndcg_at_1 |
|
value: 0.33642 |
|
- type: ndcg_at_10 |
|
value: 0.58401 |
|
- type: ndcg_at_100 |
|
value: 0.6157 |
|
- type: ndcg_at_1000 |
|
value: 0.61608 |
|
- type: ndcg_at_3 |
|
value: 0.48825 |
|
- type: ndcg_at_5 |
|
value: 0.53689 |
|
- type: precision_at_1 |
|
value: 0.33642 |
|
- type: precision_at_10 |
|
value: 0.08606 |
|
- type: precision_at_100 |
|
value: 0.00994 |
|
- type: precision_at_1000 |
|
value: 0.001 |
|
- type: precision_at_3 |
|
value: 0.19915 |
|
- type: precision_at_5 |
|
value: 0.14296 |
|
- type: recall_at_1 |
|
value: 0.33642 |
|
- type: recall_at_10 |
|
value: 0.8606 |
|
- type: recall_at_100 |
|
value: 0.9936 |
|
- type: recall_at_1000 |
|
value: 0.99644 |
|
- type: recall_at_3 |
|
value: 0.59744 |
|
- type: recall_at_5 |
|
value: 0.71479 |
|
- dataset: |
|
type: mteb/climate-fever |
|
name: MTEB ClimateFEVER |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.1457 |
|
- type: map_at_10 |
|
value: 0.24102 |
|
- type: map_at_100 |
|
value: 0.25826 |
|
- type: map_at_1000 |
|
value: 0.26021 |
|
- type: map_at_3 |
|
value: 0.20346 |
|
- type: map_at_5 |
|
value: 0.22228 |
|
- type: mrr_at_1 |
|
value: 0.32573 |
|
- type: mrr_at_10 |
|
value: 0.44411 |
|
- type: mrr_at_100 |
|
value: 0.45176 |
|
- type: mrr_at_1000 |
|
value: 0.45209 |
|
- type: mrr_at_3 |
|
value: 0.4126 |
|
- type: mrr_at_5 |
|
value: 0.43312 |
|
- type: ndcg_at_1 |
|
value: 0.32573 |
|
- type: ndcg_at_10 |
|
value: 0.3315 |
|
- type: ndcg_at_100 |
|
value: 0.39898 |
|
- type: ndcg_at_1000 |
|
value: 0.43151 |
|
- type: ndcg_at_3 |
|
value: 0.27683 |
|
- type: ndcg_at_5 |
|
value: 0.29538 |
|
- type: precision_at_1 |
|
value: 0.32573 |
|
- type: precision_at_10 |
|
value: 0.10176 |
|
- type: precision_at_100 |
|
value: 0.01754 |
|
- type: precision_at_1000 |
|
value: 0.00236 |
|
- type: precision_at_3 |
|
value: 0.20347 |
|
- type: precision_at_5 |
|
value: 0.15505 |
|
- type: recall_at_1 |
|
value: 0.1457 |
|
- type: recall_at_10 |
|
value: 0.38825 |
|
- type: recall_at_100 |
|
value: 0.62237 |
|
- type: recall_at_1000 |
|
value: 0.8022 |
|
- type: recall_at_3 |
|
value: 0.25245 |
|
- type: recall_at_5 |
|
value: 0.30821 |
|
- dataset: |
|
type: mteb/cqadupstack-android |
|
name: MTEB CQADupstackAndroidRetrieval |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.36964 |
|
- type: map_at_10 |
|
value: 0.5043 |
|
- type: map_at_100 |
|
value: 0.52066 |
|
- type: map_at_1000 |
|
value: 0.52175 |
|
- type: map_at_3 |
|
value: 0.46001 |
|
- type: map_at_5 |
|
value: 0.48312 |
|
- type: mrr_at_1 |
|
value: 0.45923 |
|
- type: mrr_at_10 |
|
value: 0.56733 |
|
- type: mrr_at_100 |
|
value: 0.57292 |
|
- type: mrr_at_1000 |
|
value: 0.57321 |
|
- type: mrr_at_3 |
|
value: 0.54053 |
|
- type: mrr_at_5 |
|
value: 0.55556 |
|
- type: ndcg_at_1 |
|
value: 0.45923 |
|
- type: ndcg_at_10 |
|
value: 0.57667 |
|
- type: ndcg_at_100 |
|
value: 0.62373 |
|
- type: ndcg_at_1000 |
|
value: 0.6368 |
|
- type: ndcg_at_3 |
|
value: 0.51843 |
|
- type: ndcg_at_5 |
|
value: 0.54257 |
|
- type: precision_at_1 |
|
value: 0.45923 |
|
- type: precision_at_10 |
|
value: 0.11316 |
|
- type: precision_at_100 |
|
value: 0.01705 |
|
- type: precision_at_1000 |
|
value: 0.00216 |
|
- type: precision_at_3 |
|
value: 0.2537 |
|
- type: precision_at_5 |
|
value: 0.1814 |
|
- type: recall_at_1 |
|
value: 0.36964 |
|
- type: recall_at_10 |
|
value: 0.71234 |
|
- type: recall_at_100 |
|
value: 0.90421 |
|
- type: recall_at_1000 |
|
value: 0.98296 |
|
- type: recall_at_3 |
|
value: 0.53655 |
|
- type: recall_at_5 |
|
value: 0.60996 |
|
- dataset: |
|
type: mteb/cqadupstack-english |
|
name: MTEB CQADupstackEnglishRetrieval |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.36198 |
|
- type: map_at_10 |
|
value: 0.49199 |
|
- type: map_at_100 |
|
value: 0.50602 |
|
- type: map_at_1000 |
|
value: 0.50736 |
|
- type: map_at_3 |
|
value: 0.45678 |
|
- type: map_at_5 |
|
value: 0.47605 |
|
- type: mrr_at_1 |
|
value: 0.45478 |
|
- type: mrr_at_10 |
|
value: 0.55075 |
|
- type: mrr_at_100 |
|
value: 0.55656 |
|
- type: mrr_at_1000 |
|
value: 0.55688 |
|
- type: mrr_at_3 |
|
value: 0.52887 |
|
- type: mrr_at_5 |
|
value: 0.54282 |
|
- type: ndcg_at_1 |
|
value: 0.45478 |
|
- type: ndcg_at_10 |
|
value: 0.55505 |
|
- type: ndcg_at_100 |
|
value: 0.59606 |
|
- type: ndcg_at_1000 |
|
value: 0.61255 |
|
- type: ndcg_at_3 |
|
value: 0.51124 |
|
- type: ndcg_at_5 |
|
value: 0.53166 |
|
- type: precision_at_1 |
|
value: 0.45478 |
|
- type: precision_at_10 |
|
value: 0.10752 |
|
- type: precision_at_100 |
|
value: 0.01666 |
|
- type: precision_at_1000 |
|
value: 0.00211 |
|
- type: precision_at_3 |
|
value: 0.25053 |
|
- type: precision_at_5 |
|
value: 0.17694 |
|
- type: recall_at_1 |
|
value: 0.36198 |
|
- type: recall_at_10 |
|
value: 0.66465 |
|
- type: recall_at_100 |
|
value: 0.83632 |
|
- type: recall_at_1000 |
|
value: 0.93276 |
|
- type: recall_at_3 |
|
value: 0.53207 |
|
- type: recall_at_5 |
|
value: 0.59169 |
|
- dataset: |
|
type: mteb/cqadupstack-gaming |
|
name: MTEB CQADupstackGamingRetrieval |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.44157 |
|
- type: map_at_10 |
|
value: 0.57753 |
|
- type: map_at_100 |
|
value: 0.58698 |
|
- type: map_at_1000 |
|
value: 0.5874 |
|
- type: map_at_3 |
|
value: 0.54223 |
|
- type: map_at_5 |
|
value: 0.56307 |
|
- type: mrr_at_1 |
|
value: 0.50094 |
|
- type: mrr_at_10 |
|
value: 0.607 |
|
- type: mrr_at_100 |
|
value: 0.6126 |
|
- type: mrr_at_1000 |
|
value: 0.6128 |
|
- type: mrr_at_3 |
|
value: 0.58265 |
|
- type: mrr_at_5 |
|
value: 0.59817 |
|
- type: ndcg_at_1 |
|
value: 0.50094 |
|
- type: ndcg_at_10 |
|
value: 0.63641 |
|
- type: ndcg_at_100 |
|
value: 0.67055 |
|
- type: ndcg_at_1000 |
|
value: 0.67855 |
|
- type: ndcg_at_3 |
|
value: 0.58022 |
|
- type: ndcg_at_5 |
|
value: 0.6097 |
|
- type: precision_at_1 |
|
value: 0.50094 |
|
- type: precision_at_10 |
|
value: 0.10182 |
|
- type: precision_at_100 |
|
value: 0.01278 |
|
- type: precision_at_1000 |
|
value: 0.00138 |
|
- type: precision_at_3 |
|
value: 0.2581 |
|
- type: precision_at_5 |
|
value: 0.17755 |
|
- type: recall_at_1 |
|
value: 0.44157 |
|
- type: recall_at_10 |
|
value: 0.7778 |
|
- type: recall_at_100 |
|
value: 0.92244 |
|
- type: recall_at_1000 |
|
value: 0.9781 |
|
- type: recall_at_3 |
|
value: 0.63087 |
|
- type: recall_at_5 |
|
value: 0.70172 |
|
- dataset: |
|
type: mteb/cqadupstack-gis |
|
name: MTEB CQADupstackGisRetrieval |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.29532 |
|
- type: map_at_10 |
|
value: 0.40214 |
|
- type: map_at_100 |
|
value: 0.41289 |
|
- type: map_at_1000 |
|
value: 0.41359 |
|
- type: map_at_3 |
|
value: 0.37086 |
|
- type: map_at_5 |
|
value: 0.38889 |
|
- type: mrr_at_1 |
|
value: 0.3209 |
|
- type: mrr_at_10 |
|
value: 0.42423 |
|
- type: mrr_at_100 |
|
value: 0.43342 |
|
- type: mrr_at_1000 |
|
value: 0.43395 |
|
- type: mrr_at_3 |
|
value: 0.39736 |
|
- type: mrr_at_5 |
|
value: 0.41307 |
|
- type: ndcg_at_1 |
|
value: 0.3209 |
|
- type: ndcg_at_10 |
|
value: 0.46075 |
|
- type: ndcg_at_100 |
|
value: 0.5103 |
|
- type: ndcg_at_1000 |
|
value: 0.52668 |
|
- type: ndcg_at_3 |
|
value: 0.40149 |
|
- type: ndcg_at_5 |
|
value: 0.43111 |
|
- type: precision_at_1 |
|
value: 0.3209 |
|
- type: precision_at_10 |
|
value: 0.07141 |
|
- type: precision_at_100 |
|
value: 0.01018 |
|
- type: precision_at_1000 |
|
value: 0.00118 |
|
- type: precision_at_3 |
|
value: 0.17175 |
|
- type: precision_at_5 |
|
value: 0.12068 |
|
- type: recall_at_1 |
|
value: 0.29532 |
|
- type: recall_at_10 |
|
value: 0.62025 |
|
- type: recall_at_100 |
|
value: 0.83829 |
|
- type: recall_at_1000 |
|
value: 0.95995 |
|
- type: recall_at_3 |
|
value: 0.4603 |
|
- type: recall_at_5 |
|
value: 0.53089 |
|
- dataset: |
|
type: mteb/cqadupstack-mathematica |
|
name: MTEB CQADupstackMathematicaRetrieval |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.18944 |
|
- type: map_at_10 |
|
value: 0.29611 |
|
- type: map_at_100 |
|
value: 0.31063 |
|
- type: map_at_1000 |
|
value: 0.31174 |
|
- type: map_at_3 |
|
value: 0.26098 |
|
- type: map_at_5 |
|
value: 0.28151 |
|
- type: mrr_at_1 |
|
value: 0.23756 |
|
- type: mrr_at_10 |
|
value: 0.34491 |
|
- type: mrr_at_100 |
|
value: 0.35457 |
|
- type: mrr_at_1000 |
|
value: 0.35512 |
|
- type: mrr_at_3 |
|
value: 0.3126 |
|
- type: mrr_at_5 |
|
value: 0.3317 |
|
- type: ndcg_at_1 |
|
value: 0.23756 |
|
- type: ndcg_at_10 |
|
value: 0.36015 |
|
- type: ndcg_at_100 |
|
value: 0.42175 |
|
- type: ndcg_at_1000 |
|
value: 0.44607 |
|
- type: ndcg_at_3 |
|
value: 0.29725 |
|
- type: ndcg_at_5 |
|
value: 0.32879 |
|
- type: precision_at_1 |
|
value: 0.23756 |
|
- type: precision_at_10 |
|
value: 0.06928 |
|
- type: precision_at_100 |
|
value: 0.01153 |
|
- type: precision_at_1000 |
|
value: 0.00149 |
|
- type: precision_at_3 |
|
value: 0.14635 |
|
- type: precision_at_5 |
|
value: 0.1107 |
|
- type: recall_at_1 |
|
value: 0.18944 |
|
- type: recall_at_10 |
|
value: 0.50691 |
|
- type: recall_at_100 |
|
value: 0.76503 |
|
- type: recall_at_1000 |
|
value: 0.93624 |
|
- type: recall_at_3 |
|
value: 0.33611 |
|
- type: recall_at_5 |
|
value: 0.41427 |
|
- dataset: |
|
type: mteb/cqadupstack-physics |
|
name: MTEB CQADupstackPhysicsRetrieval |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.33824 |
|
- type: map_at_10 |
|
value: 0.46868 |
|
- type: map_at_100 |
|
value: 0.48306 |
|
- type: map_at_1000 |
|
value: 0.48406 |
|
- type: map_at_3 |
|
value: 0.43335 |
|
- type: map_at_5 |
|
value: 0.45279 |
|
- type: mrr_at_1 |
|
value: 0.42348 |
|
- type: mrr_at_10 |
|
value: 0.52972 |
|
- type: mrr_at_100 |
|
value: 0.53707 |
|
- type: mrr_at_1000 |
|
value: 0.53734 |
|
- type: mrr_at_3 |
|
value: 0.50722 |
|
- type: mrr_at_5 |
|
value: 0.52012 |
|
- type: ndcg_at_1 |
|
value: 0.42348 |
|
- type: ndcg_at_10 |
|
value: 0.53504 |
|
- type: ndcg_at_100 |
|
value: 0.58899 |
|
- type: ndcg_at_1000 |
|
value: 0.60323 |
|
- type: ndcg_at_3 |
|
value: 0.48478 |
|
- type: ndcg_at_5 |
|
value: 0.5079 |
|
- type: precision_at_1 |
|
value: 0.42348 |
|
- type: precision_at_10 |
|
value: 0.0975 |
|
- type: precision_at_100 |
|
value: 0.01466 |
|
- type: precision_at_1000 |
|
value: 0.00177 |
|
- type: precision_at_3 |
|
value: 0.23741 |
|
- type: precision_at_5 |
|
value: 0.16439 |
|
- type: recall_at_1 |
|
value: 0.33824 |
|
- type: recall_at_10 |
|
value: 0.67142 |
|
- type: recall_at_100 |
|
value: 0.89134 |
|
- type: recall_at_1000 |
|
value: 0.97816 |
|
- type: recall_at_3 |
|
value: 0.52305 |
|
- type: recall_at_5 |
|
value: 0.58804 |
|
- dataset: |
|
type: mteb/cqadupstack-programmers |
|
name: MTEB CQADupstackProgrammersRetrieval |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.30125 |
|
- type: map_at_10 |
|
value: 0.42119 |
|
- type: map_at_100 |
|
value: 0.43599 |
|
- type: map_at_1000 |
|
value: 0.4369 |
|
- type: map_at_3 |
|
value: 0.38018 |
|
- type: map_at_5 |
|
value: 0.40368 |
|
- type: mrr_at_1 |
|
value: 0.37557 |
|
- type: mrr_at_10 |
|
value: 0.47573 |
|
- type: mrr_at_100 |
|
value: 0.4846 |
|
- type: mrr_at_1000 |
|
value: 0.48499 |
|
- type: mrr_at_3 |
|
value: 0.44654 |
|
- type: mrr_at_5 |
|
value: 0.4644 |
|
- type: ndcg_at_1 |
|
value: 0.37557 |
|
- type: ndcg_at_10 |
|
value: 0.48743 |
|
- type: ndcg_at_100 |
|
value: 0.54458 |
|
- type: ndcg_at_1000 |
|
value: 0.56076 |
|
- type: ndcg_at_3 |
|
value: 0.42573 |
|
- type: ndcg_at_5 |
|
value: 0.45528 |
|
- type: precision_at_1 |
|
value: 0.37557 |
|
- type: precision_at_10 |
|
value: 0.09269 |
|
- type: precision_at_100 |
|
value: 0.01401 |
|
- type: precision_at_1000 |
|
value: 0.0017 |
|
- type: precision_at_3 |
|
value: 0.20624 |
|
- type: precision_at_5 |
|
value: 0.15068 |
|
- type: recall_at_1 |
|
value: 0.30125 |
|
- type: recall_at_10 |
|
value: 0.62619 |
|
- type: recall_at_100 |
|
value: 0.86574 |
|
- type: recall_at_1000 |
|
value: 0.97102 |
|
- type: recall_at_3 |
|
value: 0.45437 |
|
- type: recall_at_5 |
|
value: 0.53197 |
|
- dataset: |
|
type: mteb/cqadupstack-stats |
|
name: MTEB CQADupstackStatsRetrieval |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.29193 |
|
- type: map_at_10 |
|
value: 0.37529 |
|
- type: map_at_100 |
|
value: 0.38614 |
|
- type: map_at_1000 |
|
value: 0.38714 |
|
- type: map_at_3 |
|
value: 0.34897 |
|
- type: map_at_5 |
|
value: 0.36273 |
|
- type: mrr_at_1 |
|
value: 0.32669 |
|
- type: mrr_at_10 |
|
value: 0.40288 |
|
- type: mrr_at_100 |
|
value: 0.41177 |
|
- type: mrr_at_1000 |
|
value: 0.41241 |
|
- type: mrr_at_3 |
|
value: 0.38037 |
|
- type: mrr_at_5 |
|
value: 0.39195 |
|
- type: ndcg_at_1 |
|
value: 0.32669 |
|
- type: ndcg_at_10 |
|
value: 0.42353 |
|
- type: ndcg_at_100 |
|
value: 0.47424 |
|
- type: ndcg_at_1000 |
|
value: 0.4959 |
|
- type: ndcg_at_3 |
|
value: 0.37604 |
|
- type: ndcg_at_5 |
|
value: 0.39682 |
|
- type: precision_at_1 |
|
value: 0.32669 |
|
- type: precision_at_10 |
|
value: 0.06871 |
|
- type: precision_at_100 |
|
value: 0.01008 |
|
- type: precision_at_1000 |
|
value: 0.00126 |
|
- type: precision_at_3 |
|
value: 0.16309 |
|
- type: precision_at_5 |
|
value: 0.11288 |
|
- type: recall_at_1 |
|
value: 0.29193 |
|
- type: recall_at_10 |
|
value: 0.54159 |
|
- type: recall_at_100 |
|
value: 0.77267 |
|
- type: recall_at_1000 |
|
value: 0.92805 |
|
- type: recall_at_3 |
|
value: 0.41014 |
|
- type: recall_at_5 |
|
value: 0.46248 |
|
- dataset: |
|
type: mteb/cqadupstack-tex |
|
name: MTEB CQADupstackTexRetrieval |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.21217 |
|
- type: map_at_10 |
|
value: 0.30848 |
|
- type: map_at_100 |
|
value: 0.32173 |
|
- type: map_at_1000 |
|
value: 0.32296 |
|
- type: map_at_3 |
|
value: 0.27882 |
|
- type: map_at_5 |
|
value: 0.29537 |
|
- type: mrr_at_1 |
|
value: 0.25946 |
|
- type: mrr_at_10 |
|
value: 0.35091 |
|
- type: mrr_at_100 |
|
value: 0.36047 |
|
- type: mrr_at_1000 |
|
value: 0.36111 |
|
- type: mrr_at_3 |
|
value: 0.32485 |
|
- type: mrr_at_5 |
|
value: 0.33964 |
|
- type: ndcg_at_1 |
|
value: 0.25946 |
|
- type: ndcg_at_10 |
|
value: 0.3655 |
|
- type: ndcg_at_100 |
|
value: 0.42328 |
|
- type: ndcg_at_1000 |
|
value: 0.44783 |
|
- type: ndcg_at_3 |
|
value: 0.31463 |
|
- type: ndcg_at_5 |
|
value: 0.33803 |
|
- type: precision_at_1 |
|
value: 0.25946 |
|
- type: precision_at_10 |
|
value: 0.06793 |
|
- type: precision_at_100 |
|
value: 0.01138 |
|
- type: precision_at_1000 |
|
value: 0.00155 |
|
- type: precision_at_3 |
|
value: 0.1513 |
|
- type: precision_at_5 |
|
value: 0.10991 |
|
- type: recall_at_1 |
|
value: 0.21217 |
|
- type: recall_at_10 |
|
value: 0.49327 |
|
- type: recall_at_100 |
|
value: 0.7472 |
|
- type: recall_at_1000 |
|
value: 0.91637 |
|
- type: recall_at_3 |
|
value: 0.34993 |
|
- type: recall_at_5 |
|
value: 0.41029 |
|
- dataset: |
|
type: mteb/cqadupstack-unix |
|
name: MTEB CQADupstackUnixRetrieval |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.34303 |
|
- type: map_at_10 |
|
value: 0.45312 |
|
- type: map_at_100 |
|
value: 0.46563 |
|
- type: map_at_1000 |
|
value: 0.4664 |
|
- type: map_at_3 |
|
value: 0.4143 |
|
- type: map_at_5 |
|
value: 0.43633 |
|
- type: mrr_at_1 |
|
value: 0.40112 |
|
- type: mrr_at_10 |
|
value: 0.49097 |
|
- type: mrr_at_100 |
|
value: 0.49966 |
|
- type: mrr_at_1000 |
|
value: 0.50006 |
|
- type: mrr_at_3 |
|
value: 0.46129 |
|
- type: mrr_at_5 |
|
value: 0.47901 |
|
- type: ndcg_at_1 |
|
value: 0.40112 |
|
- type: ndcg_at_10 |
|
value: 0.513 |
|
- type: ndcg_at_100 |
|
value: 0.56534 |
|
- type: ndcg_at_1000 |
|
value: 0.58048 |
|
- type: ndcg_at_3 |
|
value: 0.4491 |
|
- type: ndcg_at_5 |
|
value: 0.48048 |
|
- type: precision_at_1 |
|
value: 0.40112 |
|
- type: precision_at_10 |
|
value: 0.08806 |
|
- type: precision_at_100 |
|
value: 0.01266 |
|
- type: precision_at_1000 |
|
value: 0.00149 |
|
- type: precision_at_3 |
|
value: 0.20211 |
|
- type: precision_at_5 |
|
value: 0.14496 |
|
- type: recall_at_1 |
|
value: 0.34303 |
|
- type: recall_at_10 |
|
value: 0.65508 |
|
- type: recall_at_100 |
|
value: 0.8753 |
|
- type: recall_at_1000 |
|
value: 0.9742 |
|
- type: recall_at_3 |
|
value: 0.48465 |
|
- type: recall_at_5 |
|
value: 0.56374 |
|
- dataset: |
|
type: mteb/cqadupstack-webmasters |
|
name: MTEB CQADupstackWebmastersRetrieval |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.30312 |
|
- type: map_at_10 |
|
value: 0.40931 |
|
- type: map_at_100 |
|
value: 0.42893 |
|
- type: map_at_1000 |
|
value: 0.4312 |
|
- type: map_at_3 |
|
value: 0.37527 |
|
- type: map_at_5 |
|
value: 0.3936 |
|
- type: mrr_at_1 |
|
value: 0.36364 |
|
- type: mrr_at_10 |
|
value: 0.45677 |
|
- type: mrr_at_100 |
|
value: 0.46753 |
|
- type: mrr_at_1000 |
|
value: 0.46787 |
|
- type: mrr_at_3 |
|
value: 0.42918 |
|
- type: mrr_at_5 |
|
value: 0.4443 |
|
- type: ndcg_at_1 |
|
value: 0.36364 |
|
- type: ndcg_at_10 |
|
value: 0.47301 |
|
- type: ndcg_at_100 |
|
value: 0.53698 |
|
- type: ndcg_at_1000 |
|
value: 0.55503 |
|
- type: ndcg_at_3 |
|
value: 0.41875 |
|
- type: ndcg_at_5 |
|
value: 0.44316 |
|
- type: precision_at_1 |
|
value: 0.36364 |
|
- type: precision_at_10 |
|
value: 0.09032 |
|
- type: precision_at_100 |
|
value: 0.01806 |
|
- type: precision_at_1000 |
|
value: 0.00258 |
|
- type: precision_at_3 |
|
value: 0.19499 |
|
- type: precision_at_5 |
|
value: 0.1415 |
|
- type: recall_at_1 |
|
value: 0.30312 |
|
- type: recall_at_10 |
|
value: 0.59418 |
|
- type: recall_at_100 |
|
value: 0.8656 |
|
- type: recall_at_1000 |
|
value: 0.97412 |
|
- type: recall_at_3 |
|
value: 0.44251 |
|
- type: recall_at_5 |
|
value: 0.50457 |
|
- dataset: |
|
type: mteb/cqadupstack-wordpress |
|
name: MTEB CQADupstackWordpressRetrieval |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.23851 |
|
- type: map_at_10 |
|
value: 0.33429 |
|
- type: map_at_100 |
|
value: 0.34482 |
|
- type: map_at_1000 |
|
value: 0.3457 |
|
- type: map_at_3 |
|
value: 0.30271 |
|
- type: map_at_5 |
|
value: 0.31905 |
|
- type: mrr_at_1 |
|
value: 0.25693 |
|
- type: mrr_at_10 |
|
value: 0.35383 |
|
- type: mrr_at_100 |
|
value: 0.36295 |
|
- type: mrr_at_1000 |
|
value: 0.36346 |
|
- type: mrr_at_3 |
|
value: 0.32532 |
|
- type: mrr_at_5 |
|
value: 0.3402 |
|
- type: ndcg_at_1 |
|
value: 0.25693 |
|
- type: ndcg_at_10 |
|
value: 0.39196 |
|
- type: ndcg_at_100 |
|
value: 0.44501 |
|
- type: ndcg_at_1000 |
|
value: 0.46482 |
|
- type: ndcg_at_3 |
|
value: 0.33 |
|
- type: ndcg_at_5 |
|
value: 0.35736 |
|
- type: precision_at_1 |
|
value: 0.25693 |
|
- type: precision_at_10 |
|
value: 0.06433 |
|
- type: precision_at_100 |
|
value: 0.00989 |
|
- type: precision_at_1000 |
|
value: 0.00128 |
|
- type: precision_at_3 |
|
value: 0.14295 |
|
- type: precision_at_5 |
|
value: 0.10277 |
|
- type: recall_at_1 |
|
value: 0.23851 |
|
- type: recall_at_10 |
|
value: 0.55036 |
|
- type: recall_at_100 |
|
value: 0.79592 |
|
- type: recall_at_1000 |
|
value: 0.94283 |
|
- type: recall_at_3 |
|
value: 0.38435 |
|
- type: recall_at_5 |
|
value: 0.44872 |
|
- dataset: |
|
type: mteb/dbpedia |
|
name: MTEB DBPedia |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.0871 |
|
- type: map_at_10 |
|
value: 0.19218 |
|
- type: map_at_100 |
|
value: 0.26291 |
|
- type: map_at_1000 |
|
value: 0.27985 |
|
- type: map_at_3 |
|
value: 0.13974 |
|
- type: map_at_5 |
|
value: 0.16104 |
|
- type: mrr_at_1 |
|
value: 0.6725 |
|
- type: mrr_at_10 |
|
value: 0.75037 |
|
- type: mrr_at_100 |
|
value: 0.75318 |
|
- type: mrr_at_1000 |
|
value: 0.75325 |
|
- type: mrr_at_3 |
|
value: 0.73833 |
|
- type: mrr_at_5 |
|
value: 0.74308 |
|
- type: ndcg_at_1 |
|
value: 0.54375 |
|
- type: ndcg_at_10 |
|
value: 0.39409 |
|
- type: ndcg_at_100 |
|
value: 0.44382 |
|
- type: ndcg_at_1000 |
|
value: 0.52485 |
|
- type: ndcg_at_3 |
|
value: 0.44463 |
|
- type: ndcg_at_5 |
|
value: 0.41276 |
|
- type: precision_at_1 |
|
value: 0.6725 |
|
- type: precision_at_10 |
|
value: 0.3055 |
|
- type: precision_at_100 |
|
value: 0.09588 |
|
- type: precision_at_1000 |
|
value: 0.02118 |
|
- type: precision_at_3 |
|
value: 0.48167 |
|
- type: precision_at_5 |
|
value: 0.394 |
|
- type: recall_at_1 |
|
value: 0.0871 |
|
- type: recall_at_10 |
|
value: 0.2527 |
|
- type: recall_at_100 |
|
value: 0.5185 |
|
- type: recall_at_1000 |
|
value: 0.76491 |
|
- type: recall_at_3 |
|
value: 0.15516 |
|
- type: recall_at_5 |
|
value: 0.18907 |
|
- dataset: |
|
type: mteb/fever |
|
name: MTEB FEVER |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.78993 |
|
- type: map_at_10 |
|
value: 0.8502 |
|
- type: map_at_100 |
|
value: 0.85186 |
|
- type: map_at_1000 |
|
value: 0.852 |
|
- type: map_at_3 |
|
value: 0.8437 |
|
- type: map_at_5 |
|
value: 0.84812 |
|
- type: mrr_at_1 |
|
value: 0.85179 |
|
- type: mrr_at_10 |
|
value: 0.90744 |
|
- type: mrr_at_100 |
|
value: 0.90799 |
|
- type: mrr_at_1000 |
|
value: 0.90801 |
|
- type: mrr_at_3 |
|
value: 0.90322 |
|
- type: mrr_at_5 |
|
value: 0.90622 |
|
- type: ndcg_at_1 |
|
value: 0.85179 |
|
- type: ndcg_at_10 |
|
value: 0.88229 |
|
- type: ndcg_at_100 |
|
value: 0.8884 |
|
- type: ndcg_at_1000 |
|
value: 0.89116 |
|
- type: ndcg_at_3 |
|
value: 0.87304 |
|
- type: ndcg_at_5 |
|
value: 0.87862 |
|
- type: precision_at_1 |
|
value: 0.85179 |
|
- type: precision_at_10 |
|
value: 0.10129 |
|
- type: precision_at_100 |
|
value: 0.0106 |
|
- type: precision_at_1000 |
|
value: 0.0011 |
|
- type: precision_at_3 |
|
value: 0.32543 |
|
- type: precision_at_5 |
|
value: 0.19931 |
|
- type: recall_at_1 |
|
value: 0.78993 |
|
- type: recall_at_10 |
|
value: 0.92685 |
|
- type: recall_at_100 |
|
value: 0.9516 |
|
- type: recall_at_1000 |
|
value: 0.96943 |
|
- type: recall_at_3 |
|
value: 0.89965 |
|
- type: recall_at_5 |
|
value: 0.91562 |
|
- dataset: |
|
type: mteb/fiqa |
|
name: MTEB FiQA2018 |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.22586 |
|
- type: map_at_10 |
|
value: 0.36836 |
|
- type: map_at_100 |
|
value: 0.38863 |
|
- type: map_at_1000 |
|
value: 0.39041 |
|
- type: map_at_3 |
|
value: 0.32445 |
|
- type: map_at_5 |
|
value: 0.34951 |
|
- type: mrr_at_1 |
|
value: 0.44599 |
|
- type: mrr_at_10 |
|
value: 0.53471 |
|
- type: mrr_at_100 |
|
value: 0.54186 |
|
- type: mrr_at_1000 |
|
value: 0.54223 |
|
- type: mrr_at_3 |
|
value: 0.51157 |
|
- type: mrr_at_5 |
|
value: 0.52423 |
|
- type: ndcg_at_1 |
|
value: 0.44599 |
|
- type: ndcg_at_10 |
|
value: 0.44931 |
|
- type: ndcg_at_100 |
|
value: 0.51914 |
|
- type: ndcg_at_1000 |
|
value: 0.54674 |
|
- type: ndcg_at_3 |
|
value: 0.41597 |
|
- type: ndcg_at_5 |
|
value: 0.42611 |
|
- type: precision_at_1 |
|
value: 0.44599 |
|
- type: precision_at_10 |
|
value: 0.12346 |
|
- type: precision_at_100 |
|
value: 0.01951 |
|
- type: precision_at_1000 |
|
value: 0.00244 |
|
- type: precision_at_3 |
|
value: 0.27623 |
|
- type: precision_at_5 |
|
value: 0.20093 |
|
- type: recall_at_1 |
|
value: 0.22586 |
|
- type: recall_at_10 |
|
value: 0.5152 |
|
- type: recall_at_100 |
|
value: 0.77251 |
|
- type: recall_at_1000 |
|
value: 0.93503 |
|
- type: recall_at_3 |
|
value: 0.37802 |
|
- type: recall_at_5 |
|
value: 0.4386 |
|
- dataset: |
|
type: mteb/hotpotqa |
|
name: MTEB HotpotQA |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.38177 |
|
- type: map_at_10 |
|
value: 0.59021 |
|
- type: map_at_100 |
|
value: 0.59924 |
|
- type: map_at_1000 |
|
value: 0.59989 |
|
- type: map_at_3 |
|
value: 0.55553 |
|
- type: map_at_5 |
|
value: 0.57773 |
|
- type: mrr_at_1 |
|
value: 0.76354 |
|
- type: mrr_at_10 |
|
value: 0.827 |
|
- type: mrr_at_100 |
|
value: 0.82887 |
|
- type: mrr_at_1000 |
|
value: 0.82896 |
|
- type: mrr_at_3 |
|
value: 0.8172 |
|
- type: mrr_at_5 |
|
value: 0.82338 |
|
- type: ndcg_at_1 |
|
value: 0.76354 |
|
- type: ndcg_at_10 |
|
value: 0.67775 |
|
- type: ndcg_at_100 |
|
value: 0.70849 |
|
- type: ndcg_at_1000 |
|
value: 0.7215 |
|
- type: ndcg_at_3 |
|
value: 0.629 |
|
- type: ndcg_at_5 |
|
value: 0.65679 |
|
- type: precision_at_1 |
|
value: 0.76354 |
|
- type: precision_at_10 |
|
value: 0.14176 |
|
- type: precision_at_100 |
|
value: 0.01656 |
|
- type: precision_at_1000 |
|
value: 0.00183 |
|
- type: precision_at_3 |
|
value: 0.40113 |
|
- type: precision_at_5 |
|
value: 0.26255 |
|
- type: recall_at_1 |
|
value: 0.38177 |
|
- type: recall_at_10 |
|
value: 0.70878 |
|
- type: recall_at_100 |
|
value: 0.82822 |
|
- type: recall_at_1000 |
|
value: 0.91472 |
|
- type: recall_at_3 |
|
value: 0.60169 |
|
- type: recall_at_5 |
|
value: 0.65638 |
|
- dataset: |
|
type: mteb/msmarco |
|
name: MTEB MSMARCO |
|
config: default |
|
split: dev |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.15062 |
|
- type: map_at_10 |
|
value: 0.26008 |
|
- type: map_at_100 |
|
value: 0.27305 |
|
- type: map_at_1000 |
|
value: 0.27373 |
|
- type: map_at_3 |
|
value: 0.22236 |
|
- type: map_at_5 |
|
value: 0.24362 |
|
- type: mrr_at_1 |
|
value: 0.15444 |
|
- type: mrr_at_10 |
|
value: 0.26458 |
|
- type: mrr_at_100 |
|
value: 0.27718 |
|
- type: mrr_at_1000 |
|
value: 0.2778 |
|
- type: mrr_at_3 |
|
value: 0.22701 |
|
- type: mrr_at_5 |
|
value: 0.24844 |
|
- type: ndcg_at_1 |
|
value: 0.15444 |
|
- type: ndcg_at_10 |
|
value: 0.32495 |
|
- type: ndcg_at_100 |
|
value: 0.38957 |
|
- type: ndcg_at_1000 |
|
value: 0.40684 |
|
- type: ndcg_at_3 |
|
value: 0.24745 |
|
- type: ndcg_at_5 |
|
value: 0.2856 |
|
- type: precision_at_1 |
|
value: 0.15444 |
|
- type: precision_at_10 |
|
value: 0.05486 |
|
- type: precision_at_100 |
|
value: 0.00875 |
|
- type: precision_at_1000 |
|
value: 0.00102 |
|
- type: precision_at_3 |
|
value: 0.1086 |
|
- type: precision_at_5 |
|
value: 0.08441 |
|
- type: recall_at_1 |
|
value: 0.15062 |
|
- type: recall_at_10 |
|
value: 0.5272 |
|
- type: recall_at_100 |
|
value: 0.83006 |
|
- type: recall_at_1000 |
|
value: 0.96263 |
|
- type: recall_at_3 |
|
value: 0.31556 |
|
- type: recall_at_5 |
|
value: 0.40706 |
|
- dataset: |
|
type: mteb/nfcorpus |
|
name: MTEB NFCorpus |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.06126 |
|
- type: map_at_10 |
|
value: 0.14152 |
|
- type: map_at_100 |
|
value: 0.1827 |
|
- type: map_at_1000 |
|
value: 0.1988 |
|
- type: map_at_3 |
|
value: 0.10301 |
|
- type: map_at_5 |
|
value: 0.12085 |
|
- type: mrr_at_1 |
|
value: 0.47988 |
|
- type: mrr_at_10 |
|
value: 0.5692 |
|
- type: mrr_at_100 |
|
value: 0.57428 |
|
- type: mrr_at_1000 |
|
value: 0.57482 |
|
- type: mrr_at_3 |
|
value: 0.55315 |
|
- type: mrr_at_5 |
|
value: 0.56352 |
|
- type: ndcg_at_1 |
|
value: 0.45356 |
|
- type: ndcg_at_10 |
|
value: 0.3725 |
|
- type: ndcg_at_100 |
|
value: 0.34496 |
|
- type: ndcg_at_1000 |
|
value: 0.43374 |
|
- type: ndcg_at_3 |
|
value: 0.42643 |
|
- type: ndcg_at_5 |
|
value: 0.40882 |
|
- type: precision_at_1 |
|
value: 0.47368 |
|
- type: precision_at_10 |
|
value: 0.2774 |
|
- type: precision_at_100 |
|
value: 0.09071 |
|
- type: precision_at_1000 |
|
value: 0.02226 |
|
- type: precision_at_3 |
|
value: 0.40144 |
|
- type: precision_at_5 |
|
value: 0.35913 |
|
- type: recall_at_1 |
|
value: 0.06126 |
|
- type: recall_at_10 |
|
value: 0.18427 |
|
- type: recall_at_100 |
|
value: 0.35018 |
|
- type: recall_at_1000 |
|
value: 0.6766 |
|
- type: recall_at_3 |
|
value: 0.11706 |
|
- type: recall_at_5 |
|
value: 0.14419 |
|
- dataset: |
|
type: mteb/nq |
|
name: MTEB NQ |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.33053 |
|
- type: map_at_10 |
|
value: 0.49739 |
|
- type: map_at_100 |
|
value: 0.50626 |
|
- type: map_at_1000 |
|
value: 0.50647 |
|
- type: map_at_3 |
|
value: 0.4491 |
|
- type: map_at_5 |
|
value: 0.4783 |
|
- type: mrr_at_1 |
|
value: 0.37254 |
|
- type: mrr_at_10 |
|
value: 0.52222 |
|
- type: mrr_at_100 |
|
value: 0.52855 |
|
- type: mrr_at_1000 |
|
value: 0.52869 |
|
- type: mrr_at_3 |
|
value: 0.48445 |
|
- type: mrr_at_5 |
|
value: 0.50834 |
|
- type: ndcg_at_1 |
|
value: 0.37254 |
|
- type: ndcg_at_10 |
|
value: 0.58044 |
|
- type: ndcg_at_100 |
|
value: 0.61613 |
|
- type: ndcg_at_1000 |
|
value: 0.62046 |
|
- type: ndcg_at_3 |
|
value: 0.49219 |
|
- type: ndcg_at_5 |
|
value: 0.54037 |
|
- type: precision_at_1 |
|
value: 0.37254 |
|
- type: precision_at_10 |
|
value: 0.09655 |
|
- type: precision_at_100 |
|
value: 0.01167 |
|
- type: precision_at_1000 |
|
value: 0.00121 |
|
- type: precision_at_3 |
|
value: 0.22538 |
|
- type: precision_at_5 |
|
value: 0.16344 |
|
- type: recall_at_1 |
|
value: 0.33053 |
|
- type: recall_at_10 |
|
value: 0.8076 |
|
- type: recall_at_100 |
|
value: 0.95862 |
|
- type: recall_at_1000 |
|
value: 0.99044 |
|
- type: recall_at_3 |
|
value: 0.58157 |
|
- type: recall_at_5 |
|
value: 0.69235 |
|
- dataset: |
|
type: mteb/quora |
|
name: MTEB QuoraRetrieval |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.70056 |
|
- type: map_at_10 |
|
value: 0.84009 |
|
- type: map_at_100 |
|
value: 0.84661 |
|
- type: map_at_1000 |
|
value: 0.84678 |
|
- type: map_at_3 |
|
value: 0.81036 |
|
- type: map_at_5 |
|
value: 0.82923 |
|
- type: mrr_at_1 |
|
value: 0.8062 |
|
- type: mrr_at_10 |
|
value: 0.86971 |
|
- type: mrr_at_100 |
|
value: 0.87079 |
|
- type: mrr_at_1000 |
|
value: 0.8708 |
|
- type: mrr_at_3 |
|
value: 0.85943 |
|
- type: mrr_at_5 |
|
value: 0.86664 |
|
- type: ndcg_at_1 |
|
value: 0.8064 |
|
- type: ndcg_at_10 |
|
value: 0.87821 |
|
- type: ndcg_at_100 |
|
value: 0.89091 |
|
- type: ndcg_at_1000 |
|
value: 0.89202 |
|
- type: ndcg_at_3 |
|
value: 0.849 |
|
- type: ndcg_at_5 |
|
value: 0.86544 |
|
- type: precision_at_1 |
|
value: 0.8064 |
|
- type: precision_at_10 |
|
value: 0.13347 |
|
- type: precision_at_100 |
|
value: 0.01527 |
|
- type: precision_at_1000 |
|
value: 0.00157 |
|
- type: precision_at_3 |
|
value: 0.37153 |
|
- type: precision_at_5 |
|
value: 0.2448 |
|
- type: recall_at_1 |
|
value: 0.70056 |
|
- type: recall_at_10 |
|
value: 0.95148 |
|
- type: recall_at_100 |
|
value: 0.99474 |
|
- type: recall_at_1000 |
|
value: 0.99977 |
|
- type: recall_at_3 |
|
value: 0.86773 |
|
- type: recall_at_5 |
|
value: 0.91396 |
|
- dataset: |
|
type: mteb/scidocs |
|
name: MTEB SCIDOCS |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.05737 |
|
- type: map_at_10 |
|
value: 0.14896 |
|
- type: map_at_100 |
|
value: 0.17646 |
|
- type: map_at_1000 |
|
value: 0.1803 |
|
- type: map_at_3 |
|
value: 0.10474 |
|
- type: map_at_5 |
|
value: 0.12656 |
|
- type: mrr_at_1 |
|
value: 0.281 |
|
- type: mrr_at_10 |
|
value: 0.39579 |
|
- type: mrr_at_100 |
|
value: 0.40687 |
|
- type: mrr_at_1000 |
|
value: 0.40722 |
|
- type: mrr_at_3 |
|
value: 0.35917 |
|
- type: mrr_at_5 |
|
value: 0.38097 |
|
- type: ndcg_at_1 |
|
value: 0.281 |
|
- type: ndcg_at_10 |
|
value: 0.24146 |
|
- type: ndcg_at_100 |
|
value: 0.339 |
|
- type: ndcg_at_1000 |
|
value: 0.39728 |
|
- type: ndcg_at_3 |
|
value: 0.22721 |
|
- type: ndcg_at_5 |
|
value: 0.20015 |
|
- type: precision_at_1 |
|
value: 0.281 |
|
- type: precision_at_10 |
|
value: 0.1254 |
|
- type: precision_at_100 |
|
value: 0.02651 |
|
- type: precision_at_1000 |
|
value: 0.00404 |
|
- type: precision_at_3 |
|
value: 0.212 |
|
- type: precision_at_5 |
|
value: 0.176 |
|
- type: recall_at_1 |
|
value: 0.05737 |
|
- type: recall_at_10 |
|
value: 0.254 |
|
- type: recall_at_100 |
|
value: 0.53772 |
|
- type: recall_at_1000 |
|
value: 0.82013 |
|
- type: recall_at_3 |
|
value: 0.12897 |
|
- type: recall_at_5 |
|
value: 0.17855 |
|
- dataset: |
|
type: mteb/scifact |
|
name: MTEB SciFact |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.60011 |
|
- type: map_at_10 |
|
value: 0.70101 |
|
- type: map_at_100 |
|
value: 0.70687 |
|
- type: map_at_1000 |
|
value: 0.70699 |
|
- type: map_at_3 |
|
value: 0.67135 |
|
- type: map_at_5 |
|
value: 0.6878 |
|
- type: mrr_at_1 |
|
value: 0.62667 |
|
- type: mrr_at_10 |
|
value: 0.71022 |
|
- type: mrr_at_100 |
|
value: 0.71484 |
|
- type: mrr_at_1000 |
|
value: 0.71496 |
|
- type: mrr_at_3 |
|
value: 0.68944 |
|
- type: mrr_at_5 |
|
value: 0.69961 |
|
- type: ndcg_at_1 |
|
value: 0.62667 |
|
- type: ndcg_at_10 |
|
value: 0.7472 |
|
- type: ndcg_at_100 |
|
value: 0.76961 |
|
- type: ndcg_at_1000 |
|
value: 0.77294 |
|
- type: ndcg_at_3 |
|
value: 0.69776 |
|
- type: ndcg_at_5 |
|
value: 0.71964 |
|
- type: precision_at_1 |
|
value: 0.62667 |
|
- type: precision_at_10 |
|
value: 0.09933 |
|
- type: precision_at_100 |
|
value: 0.01103 |
|
- type: precision_at_1000 |
|
value: 0.00113 |
|
- type: precision_at_3 |
|
value: 0.27 |
|
- type: precision_at_5 |
|
value: 0.178 |
|
- type: recall_at_1 |
|
value: 0.60011 |
|
- type: recall_at_10 |
|
value: 0.878 |
|
- type: recall_at_100 |
|
value: 0.97333 |
|
- type: recall_at_1000 |
|
value: 1 |
|
- type: recall_at_3 |
|
value: 0.74839 |
|
- type: recall_at_5 |
|
value: 0.80028 |
|
- dataset: |
|
type: mteb/touche2020 |
|
name: MTEB Touche2020 |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.02152 |
|
- type: map_at_10 |
|
value: 0.07747 |
|
- type: map_at_100 |
|
value: 0.1364 |
|
- type: map_at_1000 |
|
value: 0.15235 |
|
- type: map_at_3 |
|
value: 0.04103 |
|
- type: map_at_5 |
|
value: 0.05482 |
|
- type: mrr_at_1 |
|
value: 0.26531 |
|
- type: mrr_at_10 |
|
value: 0.41399 |
|
- type: mrr_at_100 |
|
value: 0.43047 |
|
- type: mrr_at_1000 |
|
value: 0.43047 |
|
- type: mrr_at_3 |
|
value: 0.38776 |
|
- type: mrr_at_5 |
|
value: 0.40612 |
|
- type: ndcg_at_1 |
|
value: 0.23469 |
|
- type: ndcg_at_10 |
|
value: 0.20147 |
|
- type: ndcg_at_100 |
|
value: 0.3279 |
|
- type: ndcg_at_1000 |
|
value: 0.45324 |
|
- type: ndcg_at_3 |
|
value: 0.22555 |
|
- type: ndcg_at_5 |
|
value: 0.2097 |
|
- type: precision_at_1 |
|
value: 0.26531 |
|
- type: precision_at_10 |
|
value: 0.17755 |
|
- type: precision_at_100 |
|
value: 0.07082 |
|
- type: precision_at_1000 |
|
value: 0.01547 |
|
- type: precision_at_3 |
|
value: 0.2449 |
|
- type: precision_at_5 |
|
value: 0.21633 |
|
- type: recall_at_1 |
|
value: 0.02152 |
|
- type: recall_at_10 |
|
value: 0.13331 |
|
- type: recall_at_100 |
|
value: 0.4535 |
|
- type: recall_at_1000 |
|
value: 0.83447 |
|
- type: recall_at_3 |
|
value: 0.05531 |
|
- type: recall_at_5 |
|
value: 0.08029 |
|
- dataset: |
|
type: mteb/trec-covid |
|
name: MTEB TRECCOVID |
|
config: default |
|
split: test |
|
task: |
|
type: Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.00202 |
|
- type: map_at_10 |
|
value: 0.01727 |
|
- type: map_at_100 |
|
value: 0.10906 |
|
- type: map_at_1000 |
|
value: 0.2894 |
|
- type: map_at_3 |
|
value: 0.00553 |
|
- type: map_at_5 |
|
value: 0.00924 |
|
- type: mrr_at_1 |
|
value: 0.74 |
|
- type: mrr_at_10 |
|
value: 0.85667 |
|
- type: mrr_at_100 |
|
value: 0.85667 |
|
- type: mrr_at_1000 |
|
value: 0.85667 |
|
- type: mrr_at_3 |
|
value: 0.85667 |
|
- type: mrr_at_5 |
|
value: 0.85667 |
|
- type: ndcg_at_1 |
|
value: 0.66 |
|
- type: ndcg_at_10 |
|
value: 0.69259 |
|
- type: ndcg_at_100 |
|
value: 0.57274 |
|
- type: ndcg_at_1000 |
|
value: 0.55462 |
|
- type: ndcg_at_3 |
|
value: 0.70654 |
|
- type: ndcg_at_5 |
|
value: 0.71611 |
|
- type: precision_at_1 |
|
value: 0.74 |
|
- type: precision_at_10 |
|
value: 0.748 |
|
- type: precision_at_100 |
|
value: 0.5962 |
|
- type: precision_at_1000 |
|
value: 0.24842 |
|
- type: precision_at_3 |
|
value: 0.77333 |
|
- type: precision_at_5 |
|
value: 0.788 |
|
- type: recall_at_1 |
|
value: 0.00202 |
|
- type: recall_at_10 |
|
value: 0.02001 |
|
- type: recall_at_100 |
|
value: 0.14801 |
|
- type: recall_at_1000 |
|
value: 0.53939 |
|
- type: recall_at_3 |
|
value: 0.00609 |
|
- type: recall_at_5 |
|
value: 0.01048 |
|
pipeline_tag: sentence-similarity |
|
--- |
|
# Granite-Embedding-125m-English |
|
|
|
**Model Summary:** |
|
Granite-Embedding-125m-English is a 125M parameter dense biencoder embedding model from the Granite Embeddings suite that can be used to generate high quality text embeddings. This model produces embedding vectors of size 768. Compared to most other open-source models, this model was only trained using open-source relevance-pair datasets with permissive, enterprise-friendly license, plus IBM collected and generated datasets. While maintaining competitive scores on academic benchmarks such as BEIR, this model also performs well on many enterprise use cases. This model is developed using retrieval oriented pretraining, contrastive finetuning and knowledge distillation. |
|
|
|
- **Developers:** Granite Embedding Team, IBM |
|
- **GitHub Repository:** [ibm-granite/granite-embedding-models](https://github.com/ibm-granite/granite-embedding-models) |
|
- **Website**: [Granite Docs](https://www.ibm.com/granite/docs/) |
|
- **Paper:** Coming Soon |
|
- **Release Date**: December 18th, 2024 |
|
- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0) |
|
|
|
**Supported Languages:** |
|
English. |
|
|
|
**Intended use:** |
|
The model is designed to produce fixed length vector representations for a given text, which can be used for text similarity, retrieval, and search applications. |
|
|
|
**Usage with Sentence Transformers:** |
|
The model is compatible with SentenceTransformer library and is very easy to use: |
|
|
|
First, install the sentence transformers library |
|
```shell |
|
pip install sentence_transformers |
|
``` |
|
|
|
The model can then be used to encode pairs of text and find the similarity between their representations |
|
|
|
```python |
|
from sentence_transformers import SentenceTransformer, util |
|
|
|
model_path = "ibm-granite/granite-embedding-125m-english" |
|
# Load the Sentence Transformer model |
|
model = SentenceTransformer(model_path) |
|
|
|
input_queries = [ |
|
' Who made the song My achy breaky heart? ', |
|
'summit define' |
|
] |
|
|
|
input_passages = [ |
|
"Achy Breaky Heart is a country song written by Don Von Tress. Originally titled Don't Tell My Heart and performed by The Marcy Brothers in 1991. ", |
|
"Definition of summit for English Language Learners. : 1 the highest point of a mountain : the top of a mountain. : 2 the highest level. : 3 a meeting or series of meetings between the leaders of two or more governments." |
|
] |
|
|
|
# encode queries and passages |
|
query_embeddings = model.encode(input_queries) |
|
passage_embeddings = model.encode(input_passages) |
|
|
|
# calculate cosine similarity |
|
print(util.cos_sim(query_embeddings, passage_embeddings)) |
|
``` |
|
|
|
**Usage with Huggingface Transformers:** |
|
This is a simple example of how to use the Granite-Embedding-125m-English model with the Transformers library and PyTorch. |
|
|
|
First, install the required libraries |
|
```shell |
|
pip install transformers torch |
|
``` |
|
|
|
The model can then be used to encode pairs of text |
|
|
|
```python |
|
import torch |
|
from transformers import AutoModel, AutoTokenizer |
|
|
|
model_path = "ibm-granite/granite-embedding-125m-english" |
|
|
|
# Load the model and tokenizer |
|
model = AutoModel.from_pretrained(model_path) |
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
model.eval() |
|
|
|
input_queries = [ |
|
' Who made the song My achy breaky heart? ', |
|
'summit define' |
|
] |
|
|
|
# tokenize inputs |
|
tokenized_queries = tokenizer(input_queries, padding=True, truncation=True, return_tensors='pt') |
|
|
|
# encode queries |
|
with torch.no_grad(): |
|
# Queries |
|
model_output = model(**tokenized_queries) |
|
# Perform pooling. granite-embedding-125m-english uses CLS Pooling |
|
query_embeddings = model_output[0][:, 0] |
|
|
|
# normalize the embeddings |
|
query_embeddings = torch.nn.functional.normalize(query_embeddings, dim=1) |
|
|
|
``` |
|
**Evaluation:** |
|
|
|
The performance of the Granite-Embedding-125M-English model on MTEB Retrieval (i.e., BEIR) and code retrieval (CoIR) benchmarks is reported below. |
|
|
|
| Model | Paramters (M)| Embedding Dimension | MTEB Retrieval (15) | CoIR (10) | |
|
|---------------------------------|:------------:|:-------------------:|:-------------------: |:----------:| |
|
|granite-embedding-125m-english |125 |768 |52.3 |50.3 | |
|
|
|
**Model Architecture:** |
|
Granite-Embedding-125m-English is based on an encoder-only RoBERTa like transformer architecture, trained internally at IBM Research. |
|
|
|
| Model | granite-embedding-30m-english | granite-embedding-125m-english | granite-embedding-107m-multilingual | granite-embedding-278m-multilingual | |
|
| :--------- | :-------:| :--------: | :-----:| :-----:| |
|
| Embedding size | 384 | **768** | 384 | 768 | |
|
| Number of layers | 6 | **12** | 6 | 12 | |
|
| Number of attention heads | 12 | **12** | 12 | 12 | |
|
| Intermediate size | 1536 | **3072** | 1536 | 3072 | |
|
| Activation Function | GeLU | **GeLU** | GeLU | GeLU | |
|
| Vocabulary Size | 50265| **50265** | 250002 | 250002 | |
|
| Max. Sequence Length | 512 | **512** | 512 | 512 | |
|
| # Parameters | 30M | **125M** | 107M | 278M | |
|
|
|
|
|
**Training Data:** |
|
Overall, the training data consists of four key sources: (1) unsupervised title-body paired data scraped from the web, (2) publicly available paired with permissive, enterprise-friendly license, (3) IBM-internal paired data targetting specific technical domains, and (4) IBM-generated synthetic data. The data is listed below: |
|
|
|
| **Dataset** | **Num. Pairs** | |
|
|----------------------------------------------------|:---------------:| |
|
| SPECTER citation triplets | 684,100 | |
|
| Stack Exchange Duplicate questions (titles) | 304,525 | |
|
| Stack Exchange Duplicate questions (bodies) | 250,519 | |
|
| Stack Exchange Duplicate questions (titles+bodies) | 250,460 | |
|
| Natural Questions (NQ) | 100,231 | |
|
| SQuAD2.0 | 87,599 | |
|
| PAQ (Question, Answer) pairs | 64,371,441 | |
|
| Stack Exchange (Title, Answer) pairs | 4,067,139 | |
|
| Stack Exchange (Title, Body) pairs | 23,978,013 | |
|
| Stack Exchange (Title+Body, Answer) pairs | 187,195 | |
|
| S2ORC Citation pairs (Titles) | 52,603,982 | |
|
| S2ORC (Title, Abstract) | 41,769,185 | |
|
| S2ORC (Citations, abstracts) | 52,603,982 | |
|
| WikiAnswers Duplicate question pairs | 77,427,422 | |
|
| SearchQA | 582,261 | |
|
| HotpotQA | 85,000 | |
|
| Fever | 109,810 | |
|
| Arxiv | 2,358,545 | |
|
| Wikipedia | 20,745,403 | |
|
| PubMed | 20,000,000 | |
|
| Miracl En Pairs | 9,016 | |
|
| DBPedia Title-Body Pairs | 4,635,922 | |
|
| Synthetic: Query-Wikipedia Passage | 1,879,093 | |
|
| Synthetic: Fact Verification | 9,888 | |
|
| IBM Internal Triples | 40,290 | |
|
| IBM Internal Title-Body Pairs | 1,524,586 | |
|
|
|
Notably, we do not use the popular MS-MARCO retrieval dataset in our training corpus due to its non-commercial license, while other open-source models train on this dataset due to its high quality. |
|
|
|
**Infrastructure:** |
|
We train Granite Embedding Models using IBM's computing cluster, Cognitive Compute Cluster, which is outfitted with NVIDIA A100 80gb GPUs. This cluster provides a scalable and efficient infrastructure for training our models over multiple GPUs. |
|
|
|
**Ethical Considerations and Limitations:** |
|
The data used to train the base language model was filtered to remove text containing hate, abuse, and profanity. Granite-Embedding-125m-English is trained only for English texts, and has a context length of 512 tokens (longer texts will be truncated to this size). |
|
|
|
|
|
<!-- ## Citation |
|
``` |
|
@misc{granite-embedding-models, |
|
author = {author 1, author2, ...}, |
|
title = {}, |
|
journal = {}, |
|
volume = {}, |
|
year = {2024}, |
|
url = {https://arxiv.org/abs/0000.00000}, |
|
} |
|
``` --> |