metadata
language:
- en
license: apache-2.0
library_name: sentence-transformers
tags:
- sentence-transformers
- sentence-similarity
- feature-extraction
- generated_from_trainer
- dataset_size:282883
- loss:MatryoshkaLoss
- loss:MultipleNegativesRankingLoss
base_model: BAAI/bge-base-en-v1.5
datasets: []
metrics:
- cosine_accuracy@1
- cosine_accuracy@3
- cosine_accuracy@5
- cosine_accuracy@10
- cosine_precision@1
- cosine_precision@3
- cosine_precision@5
- cosine_precision@10
- cosine_recall@1
- cosine_recall@3
- cosine_recall@5
- cosine_recall@10
- cosine_ndcg@10
- cosine_mrr@10
- cosine_map@100
widget:
- source_sentence: Mwanamke anashona.
sentences:
- Mwanamke akishona blanketi pamoja.
- Lakini Sir James alimkatiza.
- >-
Kwa kuongezea, wabunifu mashuhuri sasa wana maduka ya rejareja katika
hoteli kadhaa za ununuzi.
- source_sentence: Mtandao huwawezesha watu kununua vitu.
sentences:
- Mwanamke fulani anakata kipande cha jibini.
- >-
Hakuna hata mmoja wa wafadhili hawa anayeweza kuruhusu kuacha sasa,
haswa na uchumi unaoteseka ndani na kitaifa.
- >-
Kwa vyovyote vile, kama ningekuwa na nia ya kununua kitabu hicho,
ningekuwa na nafasi nzuri zaidi ya kujadili bei nzuri.
- source_sentence: Je, kweli wewe ni hivyo gullible, Dave Hanson?
sentences:
- >-
Mwanamume amesimama katika mashua paddling kuelekea pwani lined na
fanicha na vitu vingine kubwa.
- Dave Hanson, je, unaamini kila kitu wanachosema?
- Wasichana watatu wakifanya mchezo wa kuigiza jukwaani.
- source_sentence: Wanandoa wakitembea pamoja.
sentences:
- >-
Mwanamume aliyevalia koti la manjano anatembea kando ya gari la kubebea
watu.
- Wenzi wa ndoa wazee wanatembea barabarani wakishikamana mikono.
- Msichana mdogo anapanda kwenye kifaa cha kamba.
- source_sentence: Kuna masuala ya sera.
sentences:
- >-
Mwanamke mwenye makunyanzi sana akishikilia miwani yake na kutembea
kwenye barabara ya jiji.
- Mwanamume anayeigiza kwa ajili ya umati wa watu.
- Masuala ya sera ya mbinu nyingi na maombi.
pipeline_tag: sentence-similarity
model-index:
- name: BGE base Swahili Matryoshka
results:
- task:
type: information-retrieval
name: Information Retrieval
dataset:
name: dim 768
type: dim_768
metrics:
- type: cosine_accuracy@1
value: 0.26803894120641386
name: Cosine Accuracy@1
- type: cosine_accuracy@3
value: 0.3499618223466531
name: Cosine Accuracy@3
- type: cosine_accuracy@5
value: 0.3858806312038687
name: Cosine Accuracy@5
- type: cosine_accuracy@10
value: 0.43318910664291166
name: Cosine Accuracy@10
- type: cosine_precision@1
value: 0.26803894120641386
name: Cosine Precision@1
- type: cosine_precision@3
value: 0.11665394078221768
name: Cosine Precision@3
- type: cosine_precision@5
value: 0.07717612624077373
name: Cosine Precision@5
- type: cosine_precision@10
value: 0.043318910664291166
name: Cosine Precision@10
- type: cosine_recall@1
value: 0.26803894120641386
name: Cosine Recall@1
- type: cosine_recall@3
value: 0.3499618223466531
name: Cosine Recall@3
- type: cosine_recall@5
value: 0.3858806312038687
name: Cosine Recall@5
- type: cosine_recall@10
value: 0.43318910664291166
name: Cosine Recall@10
- type: cosine_ndcg@10
value: 0.34611891078942064
name: Cosine Ndcg@10
- type: cosine_mrr@10
value: 0.3188061049905684
name: Cosine Mrr@10
- type: cosine_map@100
value: 0.3251959746415499
name: Cosine Map@100
- task:
type: information-retrieval
name: Information Retrieval
dataset:
name: dim 512
type: dim_512
metrics:
- type: cosine_accuracy@1
value: 0.26552557902774243
name: Cosine Accuracy@1
- type: cosine_accuracy@3
value: 0.34601679816747266
name: Cosine Accuracy@3
- type: cosine_accuracy@5
value: 0.3810766098243828
name: Cosine Accuracy@5
- type: cosine_accuracy@10
value: 0.4290850089081191
name: Cosine Accuracy@10
- type: cosine_precision@1
value: 0.26552557902774243
name: Cosine Precision@1
- type: cosine_precision@3
value: 0.11533893272249085
name: Cosine Precision@3
- type: cosine_precision@5
value: 0.07621532196487656
name: Cosine Precision@5
- type: cosine_precision@10
value: 0.04290850089081191
name: Cosine Precision@10
- type: cosine_recall@1
value: 0.26552557902774243
name: Cosine Recall@1
- type: cosine_recall@3
value: 0.34601679816747266
name: Cosine Recall@3
- type: cosine_recall@5
value: 0.3810766098243828
name: Cosine Recall@5
- type: cosine_recall@10
value: 0.4290850089081191
name: Cosine Recall@10
- type: cosine_ndcg@10
value: 0.3425120728009226
name: Cosine Ndcg@10
- type: cosine_mrr@10
value: 0.31538232445349546
name: Cosine Mrr@10
- type: cosine_map@100
value: 0.32174207802147353
name: Cosine Map@100
- task:
type: information-retrieval
name: Information Retrieval
dataset:
name: dim 256
type: dim_256
metrics:
- type: cosine_accuracy@1
value: 0.2576355306693815
name: Cosine Accuracy@1
- type: cosine_accuracy@3
value: 0.33790404683125475
name: Cosine Accuracy@3
- type: cosine_accuracy@5
value: 0.37165945533214556
name: Cosine Accuracy@5
- type: cosine_accuracy@10
value: 0.41950878086026977
name: Cosine Accuracy@10
- type: cosine_precision@1
value: 0.2576355306693815
name: Cosine Precision@1
- type: cosine_precision@3
value: 0.1126346822770849
name: Cosine Precision@3
- type: cosine_precision@5
value: 0.07433189106642912
name: Cosine Precision@5
- type: cosine_precision@10
value: 0.041950878086026974
name: Cosine Precision@10
- type: cosine_recall@1
value: 0.2576355306693815
name: Cosine Recall@1
- type: cosine_recall@3
value: 0.33790404683125475
name: Cosine Recall@3
- type: cosine_recall@5
value: 0.37165945533214556
name: Cosine Recall@5
- type: cosine_recall@10
value: 0.41950878086026977
name: Cosine Recall@10
- type: cosine_ndcg@10
value: 0.3338740008089949
name: Cosine Ndcg@10
- type: cosine_mrr@10
value: 0.30705069547968683
name: Cosine Mrr@10
- type: cosine_map@100
value: 0.3134101334652913
name: Cosine Map@100
- task:
type: information-retrieval
name: Information Retrieval
dataset:
name: dim 128
type: dim_128
metrics:
- type: cosine_accuracy@1
value: 0.24494146093153474
name: Cosine Accuracy@1
- type: cosine_accuracy@3
value: 0.3218694324255536
name: Cosine Accuracy@3
- type: cosine_accuracy@5
value: 0.3557202850598117
name: Cosine Accuracy@5
- type: cosine_accuracy@10
value: 0.402901501654365
name: Cosine Accuracy@10
- type: cosine_precision@1
value: 0.24494146093153474
name: Cosine Precision@1
- type: cosine_precision@3
value: 0.10728981080851785
name: Cosine Precision@3
- type: cosine_precision@5
value: 0.07114405701196233
name: Cosine Precision@5
- type: cosine_precision@10
value: 0.0402901501654365
name: Cosine Precision@10
- type: cosine_recall@1
value: 0.24494146093153474
name: Cosine Recall@1
- type: cosine_recall@3
value: 0.3218694324255536
name: Cosine Recall@3
- type: cosine_recall@5
value: 0.3557202850598117
name: Cosine Recall@5
- type: cosine_recall@10
value: 0.402901501654365
name: Cosine Recall@10
- type: cosine_ndcg@10
value: 0.3191027723891013
name: Cosine Ndcg@10
- type: cosine_mrr@10
value: 0.2928823673781056
name: Cosine Mrr@10
- type: cosine_map@100
value: 0.299205934269314
name: Cosine Map@100
- task:
type: information-retrieval
name: Information Retrieval
dataset:
name: dim 64
type: dim_64
metrics:
- type: cosine_accuracy@1
value: 0.21936243318910664
name: Cosine Accuracy@1
- type: cosine_accuracy@3
value: 0.2918045304148638
name: Cosine Accuracy@3
- type: cosine_accuracy@5
value: 0.3234601679816747
name: Cosine Accuracy@5
- type: cosine_accuracy@10
value: 0.3698778315092899
name: Cosine Accuracy@10
- type: cosine_precision@1
value: 0.21936243318910664
name: Cosine Precision@1
- type: cosine_precision@3
value: 0.0972681768049546
name: Cosine Precision@3
- type: cosine_precision@5
value: 0.06469203359633495
name: Cosine Precision@5
- type: cosine_precision@10
value: 0.03698778315092899
name: Cosine Precision@10
- type: cosine_recall@1
value: 0.21936243318910664
name: Cosine Recall@1
- type: cosine_recall@3
value: 0.2918045304148638
name: Cosine Recall@3
- type: cosine_recall@5
value: 0.3234601679816747
name: Cosine Recall@5
- type: cosine_recall@10
value: 0.3698778315092899
name: Cosine Recall@10
- type: cosine_ndcg@10
value: 0.2897472677253453
name: Cosine Ndcg@10
- type: cosine_mrr@10
value: 0.26472963050495346
name: Cosine Mrr@10
- type: cosine_map@100
value: 0.2710377326397304
name: Cosine Map@100
BGE base Swahili Matryoshka
This is a sentence-transformers model finetuned from BAAI/bge-base-en-v1.5. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
Model Details
Model Description
- Model Type: Sentence Transformer
- Base model: BAAI/bge-base-en-v1.5
- Maximum Sequence Length: 512 tokens
- Output Dimensionality: 768 tokens
- Similarity Function: Cosine Similarity
- Language: en
- License: apache-2.0
Model Sources
Full Model Architecture
SentenceTransformer(
(0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
(2): Normalize()
)
Usage
Direct Usage (Sentence Transformers)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("sartifyllc/bge-base-swahili-matryoshka")
sentences = [
'Kuna masuala ya sera.',
'Masuala ya sera ya mbinu nyingi na maombi.',
'Mwanamke mwenye makunyanzi sana akishikilia miwani yake na kutembea kwenye barabara ya jiji.',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
Evaluation
Metrics
Information Retrieval
Metric |
Value |
cosine_accuracy@1 |
0.268 |
cosine_accuracy@3 |
0.35 |
cosine_accuracy@5 |
0.3859 |
cosine_accuracy@10 |
0.4332 |
cosine_precision@1 |
0.268 |
cosine_precision@3 |
0.1167 |
cosine_precision@5 |
0.0772 |
cosine_precision@10 |
0.0433 |
cosine_recall@1 |
0.268 |
cosine_recall@3 |
0.35 |
cosine_recall@5 |
0.3859 |
cosine_recall@10 |
0.4332 |
cosine_ndcg@10 |
0.3461 |
cosine_mrr@10 |
0.3188 |
cosine_map@100 |
0.3252 |
Information Retrieval
Metric |
Value |
cosine_accuracy@1 |
0.2655 |
cosine_accuracy@3 |
0.346 |
cosine_accuracy@5 |
0.3811 |
cosine_accuracy@10 |
0.4291 |
cosine_precision@1 |
0.2655 |
cosine_precision@3 |
0.1153 |
cosine_precision@5 |
0.0762 |
cosine_precision@10 |
0.0429 |
cosine_recall@1 |
0.2655 |
cosine_recall@3 |
0.346 |
cosine_recall@5 |
0.3811 |
cosine_recall@10 |
0.4291 |
cosine_ndcg@10 |
0.3425 |
cosine_mrr@10 |
0.3154 |
cosine_map@100 |
0.3217 |
Information Retrieval
Metric |
Value |
cosine_accuracy@1 |
0.2576 |
cosine_accuracy@3 |
0.3379 |
cosine_accuracy@5 |
0.3717 |
cosine_accuracy@10 |
0.4195 |
cosine_precision@1 |
0.2576 |
cosine_precision@3 |
0.1126 |
cosine_precision@5 |
0.0743 |
cosine_precision@10 |
0.042 |
cosine_recall@1 |
0.2576 |
cosine_recall@3 |
0.3379 |
cosine_recall@5 |
0.3717 |
cosine_recall@10 |
0.4195 |
cosine_ndcg@10 |
0.3339 |
cosine_mrr@10 |
0.3071 |
cosine_map@100 |
0.3134 |
Information Retrieval
Metric |
Value |
cosine_accuracy@1 |
0.2449 |
cosine_accuracy@3 |
0.3219 |
cosine_accuracy@5 |
0.3557 |
cosine_accuracy@10 |
0.4029 |
cosine_precision@1 |
0.2449 |
cosine_precision@3 |
0.1073 |
cosine_precision@5 |
0.0711 |
cosine_precision@10 |
0.0403 |
cosine_recall@1 |
0.2449 |
cosine_recall@3 |
0.3219 |
cosine_recall@5 |
0.3557 |
cosine_recall@10 |
0.4029 |
cosine_ndcg@10 |
0.3191 |
cosine_mrr@10 |
0.2929 |
cosine_map@100 |
0.2992 |
Information Retrieval
Metric |
Value |
cosine_accuracy@1 |
0.2194 |
cosine_accuracy@3 |
0.2918 |
cosine_accuracy@5 |
0.3235 |
cosine_accuracy@10 |
0.3699 |
cosine_precision@1 |
0.2194 |
cosine_precision@3 |
0.0973 |
cosine_precision@5 |
0.0647 |
cosine_precision@10 |
0.037 |
cosine_recall@1 |
0.2194 |
cosine_recall@3 |
0.2918 |
cosine_recall@5 |
0.3235 |
cosine_recall@10 |
0.3699 |
cosine_ndcg@10 |
0.2897 |
cosine_mrr@10 |
0.2647 |
cosine_map@100 |
0.271 |
Training Details
Training Dataset
Unnamed Dataset
- Size: 282,883 training samples
- Columns:
positive
and anchor
- Approximate statistics based on the first 1000 samples:
|
positive |
anchor |
type |
string |
string |
details |
- min: 5 tokens
- mean: 20.1 tokens
- max: 78 tokens
|
- min: 6 tokens
- mean: 38.64 tokens
- max: 184 tokens
|
- Samples:
positive |
anchor |
Alingoja mtu huyo mwingine arudi. |
Ca'daan alingoja hadi alasiri yote mtu huyo atoke tena. |
Sheria hiyo huanzisha mfululizo wa ukaguzi wa majaribio. |
Sheria hiyo pia inatoa sheria ya kudhibiti kwa ajili ya mashirika fulani ambayo yanahitaji kuandaa taarifa za kifedha za mashirika yote na kuzisimamisha kwa wakaguzi wa jumla. |
Mbwa anakimbia na kuruka nje. |
Mbwa mwenye rangi ya kahawia anaruka na kukimbia shambani. |
- Loss:
MatryoshkaLoss
with these parameters:{
"loss": "MultipleNegativesRankingLoss",
"matryoshka_dims": [
768,
512,
256,
128,
64
],
"matryoshka_weights": [
1,
1,
1,
1,
1
],
"n_dims_per_step": -1
}
Training Hyperparameters
Non-Default Hyperparameters
eval_strategy
: epoch
per_device_train_batch_size
: 32
per_device_eval_batch_size
: 16
gradient_accumulation_steps
: 16
learning_rate
: 2e-05
num_train_epochs
: 4
lr_scheduler_type
: cosine
warmup_ratio
: 0.1
bf16
: True
tf32
: True
load_best_model_at_end
: True
optim
: adamw_torch_fused
batch_sampler
: no_duplicates
All Hyperparameters
Click to expand
overwrite_output_dir
: False
do_predict
: False
eval_strategy
: epoch
prediction_loss_only
: True
per_device_train_batch_size
: 32
per_device_eval_batch_size
: 16
per_gpu_train_batch_size
: None
per_gpu_eval_batch_size
: None
gradient_accumulation_steps
: 16
eval_accumulation_steps
: None
learning_rate
: 2e-05
weight_decay
: 0.0
adam_beta1
: 0.9
adam_beta2
: 0.999
adam_epsilon
: 1e-08
max_grad_norm
: 1.0
num_train_epochs
: 4
max_steps
: -1
lr_scheduler_type
: cosine
lr_scheduler_kwargs
: {}
warmup_ratio
: 0.1
warmup_steps
: 0
log_level
: passive
log_level_replica
: warning
log_on_each_node
: True
logging_nan_inf_filter
: True
save_safetensors
: True
save_on_each_node
: False
save_only_model
: False
restore_callback_states_from_checkpoint
: False
no_cuda
: False
use_cpu
: False
use_mps_device
: False
seed
: 42
data_seed
: None
jit_mode_eval
: False
use_ipex
: False
bf16
: True
fp16
: False
fp16_opt_level
: O1
half_precision_backend
: auto
bf16_full_eval
: False
fp16_full_eval
: False
tf32
: True
local_rank
: 0
ddp_backend
: None
tpu_num_cores
: None
tpu_metrics_debug
: False
debug
: []
dataloader_drop_last
: False
dataloader_num_workers
: 0
dataloader_prefetch_factor
: None
past_index
: -1
disable_tqdm
: False
remove_unused_columns
: True
label_names
: None
load_best_model_at_end
: True
ignore_data_skip
: False
fsdp
: []
fsdp_min_num_params
: 0
fsdp_config
: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
fsdp_transformer_layer_cls_to_wrap
: None
accelerator_config
: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
deepspeed
: None
label_smoothing_factor
: 0.0
optim
: adamw_torch_fused
optim_args
: None
adafactor
: False
group_by_length
: False
length_column_name
: length
ddp_find_unused_parameters
: None
ddp_bucket_cap_mb
: None
ddp_broadcast_buffers
: False
dataloader_pin_memory
: True
dataloader_persistent_workers
: False
skip_memory_metrics
: True
use_legacy_prediction_loop
: False
push_to_hub
: False
resume_from_checkpoint
: None
hub_model_id
: None
hub_strategy
: every_save
hub_private_repo
: False
hub_always_push
: False
gradient_checkpointing
: False
gradient_checkpointing_kwargs
: None
include_inputs_for_metrics
: False
eval_do_concat_batches
: True
fp16_backend
: auto
push_to_hub_model_id
: None
push_to_hub_organization
: None
mp_parameters
:
auto_find_batch_size
: False
full_determinism
: False
torchdynamo
: None
ray_scope
: last
ddp_timeout
: 1800
torch_compile
: False
torch_compile_backend
: None
torch_compile_mode
: None
dispatch_batches
: None
split_batches
: None
include_tokens_per_second
: False
include_num_input_tokens_seen
: False
neftune_noise_alpha
: None
optim_target_modules
: None
batch_eval_metrics
: False
batch_sampler
: no_duplicates
multi_dataset_batch_sampler
: proportional
Training Logs
Click to expand
Epoch |
Step |
Training Loss |
dim_128_cosine_map@100 |
dim_256_cosine_map@100 |
dim_512_cosine_map@100 |
dim_64_cosine_map@100 |
dim_768_cosine_map@100 |
0.0181 |
10 |
9.7089 |
- |
- |
- |
- |
- |
0.0362 |
20 |
9.2806 |
- |
- |
- |
- |
- |
0.0543 |
30 |
8.8905 |
- |
- |
- |
- |
- |
0.0724 |
40 |
7.9651 |
- |
- |
- |
- |
- |
0.0905 |
50 |
7.4201 |
- |
- |
- |
- |
- |
0.1086 |
60 |
6.8346 |
- |
- |
- |
- |
- |
0.1267 |
70 |
6.515 |
- |
- |
- |
- |
- |
0.1448 |
80 |
6.2009 |
- |
- |
- |
- |
- |
0.1629 |
90 |
5.8256 |
- |
- |
- |
- |
- |
0.1810 |
100 |
5.549 |
- |
- |
- |
- |
- |
0.1991 |
110 |
5.1667 |
- |
- |
- |
- |
- |
0.2172 |
120 |
5.2684 |
- |
- |
- |
- |
- |
0.2353 |
130 |
5.0678 |
- |
- |
- |
- |
- |
0.2534 |
140 |
4.9183 |
- |
- |
- |
- |
- |
0.2715 |
150 |
4.844 |
- |
- |
- |
- |
- |
0.2896 |
160 |
4.5427 |
- |
- |
- |
- |
- |
0.3077 |
170 |
4.3324 |
- |
- |
- |
- |
- |
0.3258 |
180 |
4.4963 |
- |
- |
- |
- |
- |
0.3439 |
190 |
4.1704 |
- |
- |
- |
- |
- |
0.3620 |
200 |
4.1285 |
- |
- |
- |
- |
- |
0.3800 |
210 |
4.0235 |
- |
- |
- |
- |
- |
0.3981 |
220 |
4.0738 |
- |
- |
- |
- |
- |
0.4162 |
230 |
3.9132 |
- |
- |
- |
- |
- |
0.4343 |
240 |
3.9682 |
- |
- |
- |
- |
- |
0.4524 |
250 |
3.7542 |
- |
- |
- |
- |
- |
0.4705 |
260 |
3.6508 |
- |
- |
- |
- |
- |
0.4886 |
270 |
3.7596 |
- |
- |
- |
- |
- |
0.5067 |
280 |
3.5596 |
- |
- |
- |
- |
- |
0.5248 |
290 |
3.5077 |
- |
- |
- |
- |
- |
0.5429 |
300 |
3.3831 |
- |
- |
- |
- |
- |
0.5610 |
310 |
3.4 |
- |
- |
- |
- |
- |
0.5791 |
320 |
3.296 |
- |
- |
- |
- |
- |
0.5972 |
330 |
3.3646 |
- |
- |
- |
- |
- |
0.6153 |
340 |
3.3533 |
- |
- |
- |
- |
- |
0.6334 |
350 |
3.2171 |
- |
- |
- |
- |
- |
0.6515 |
360 |
3.2324 |
- |
- |
- |
- |
- |
0.6696 |
370 |
3.1544 |
- |
- |
- |
- |
- |
0.6877 |
380 |
3.3393 |
- |
- |
- |
- |
- |
0.7058 |
390 |
3.0864 |
- |
- |
- |
- |
- |
0.7239 |
400 |
3.1069 |
- |
- |
- |
- |
- |
0.7420 |
410 |
3.0722 |
- |
- |
- |
- |
- |
0.7601 |
420 |
3.1446 |
- |
- |
- |
- |
- |
0.7782 |
430 |
3.0847 |
- |
- |
- |
- |
- |
0.7963 |
440 |
3.0331 |
- |
- |
- |
- |
- |
0.8144 |
450 |
3.0197 |
- |
- |
- |
- |
- |
0.8325 |
460 |
2.9667 |
- |
- |
- |
- |
- |
0.8506 |
470 |
2.8331 |
- |
- |
- |
- |
- |
0.8687 |
480 |
2.9333 |
- |
- |
- |
- |
- |
0.8868 |
490 |
2.8714 |
- |
- |
- |
- |
- |
0.9049 |
500 |
2.8578 |
- |
- |
- |
- |
- |
0.9230 |
510 |
2.9689 |
- |
- |
- |
- |
- |
0.9411 |
520 |
2.7977 |
- |
- |
- |
- |
- |
0.9592 |
530 |
2.9832 |
- |
- |
- |
- |
- |
0.9773 |
540 |
2.9761 |
- |
- |
- |
- |
- |
0.9954 |
550 |
2.7711 |
- |
- |
- |
- |
- |
0.9990 |
552 |
- |
0.2772 |
0.2954 |
0.3052 |
0.2445 |
0.3080 |
1.0135 |
560 |
2.7194 |
- |
- |
- |
- |
- |
1.0316 |
570 |
2.8489 |
- |
- |
- |
- |
- |
1.0497 |
580 |
2.6559 |
- |
- |
- |
- |
- |
1.0678 |
590 |
2.6239 |
- |
- |
- |
- |
- |
1.0859 |
600 |
2.7081 |
- |
- |
- |
- |
- |
1.1039 |
610 |
2.6581 |
- |
- |
- |
- |
- |
1.1220 |
620 |
2.7709 |
- |
- |
- |
- |
- |
1.1401 |
630 |
2.6191 |
- |
- |
- |
- |
- |
1.1582 |
640 |
2.6712 |
- |
- |
- |
- |
- |
1.1763 |
650 |
2.5445 |
- |
- |
- |
- |
- |
1.1944 |
660 |
2.5264 |
- |
- |
- |
- |
- |
1.2125 |
670 |
2.5782 |
- |
- |
- |
- |
- |
1.2306 |
680 |
2.5652 |
- |
- |
- |
- |
- |
1.2487 |
690 |
2.6229 |
- |
- |
- |
- |
- |
1.2668 |
700 |
2.5557 |
- |
- |
- |
- |
- |
1.2849 |
710 |
2.5251 |
- |
- |
- |
- |
- |
1.3030 |
720 |
2.4555 |
- |
- |
- |
- |
- |
1.3211 |
730 |
2.5335 |
- |
- |
- |
- |
- |
1.3392 |
740 |
2.5027 |
- |
- |
- |
- |
- |
1.3573 |
750 |
2.3569 |
- |
- |
- |
- |
- |
1.3754 |
760 |
2.4255 |
- |
- |
- |
- |
- |
1.3935 |
770 |
2.4626 |
- |
- |
- |
- |
- |
1.4116 |
780 |
2.363 |
- |
- |
- |
- |
- |
1.4297 |
790 |
2.4 |
- |
- |
- |
- |
- |
1.4478 |
800 |
2.3317 |
- |
- |
- |
- |
- |
1.4659 |
810 |
2.2922 |
- |
- |
- |
- |
- |
1.4840 |
820 |
2.4086 |
- |
- |
- |
- |
- |
1.5021 |
830 |
2.3166 |
- |
- |
- |
- |
- |
1.5202 |
840 |
2.3401 |
- |
- |
- |
- |
- |
1.5383 |
850 |
2.1951 |
- |
- |
- |
- |
- |
1.5564 |
860 |
2.214 |
- |
- |
- |
- |
- |
1.5745 |
870 |
2.1859 |
- |
- |
- |
- |
- |
1.5926 |
880 |
2.3605 |
- |
- |
- |
- |
- |
1.6107 |
890 |
2.2528 |
- |
- |
- |
- |
- |
1.6288 |
900 |
2.2759 |
- |
- |
- |
- |
- |
1.6469 |
910 |
2.1458 |
- |
- |
- |
- |
- |
1.6650 |
920 |
2.187 |
- |
- |
- |
- |
- |
1.6831 |
930 |
2.3406 |
- |
- |
- |
- |
- |
1.7012 |
940 |
2.2151 |
- |
- |
- |
- |
- |
1.7193 |
950 |
2.2971 |
- |
- |
- |
- |
- |
1.7374 |
960 |
2.2736 |
- |
- |
- |
- |
- |
1.7555 |
970 |
2.2329 |
- |
- |
- |
- |
- |
1.7736 |
980 |
2.2602 |
- |
- |
- |
- |
- |
1.7917 |
990 |
2.2402 |
- |
- |
- |
- |
- |
1.8098 |
1000 |
2.1971 |
- |
- |
- |
- |
- |
1.8278 |
1010 |
2.1642 |
- |
- |
- |
- |
- |
1.8459 |
1020 |
2.1274 |
- |
- |
- |
- |
- |
1.8640 |
1030 |
2.1833 |
- |
- |
- |
- |
- |
1.8821 |
1040 |
2.156 |
- |
- |
- |
- |
- |
1.9002 |
1050 |
2.1252 |
- |
- |
- |
- |
- |
1.9183 |
1060 |
2.161 |
- |
- |
- |
- |
- |
1.9364 |
1070 |
2.1267 |
- |
- |
- |
- |
- |
1.9545 |
1080 |
2.2017 |
- |
- |
- |
- |
- |
1.9726 |
1090 |
2.3044 |
- |
- |
- |
- |
- |
1.9907 |
1100 |
2.161 |
- |
- |
- |
- |
- |
1.9998 |
1105 |
- |
0.2928 |
0.3085 |
0.3165 |
0.2632 |
0.3204 |
2.0088 |
1110 |
2.0594 |
- |
- |
- |
- |
- |
2.0269 |
1120 |
2.2277 |
- |
- |
- |
- |
- |
2.0450 |
1130 |
2.1591 |
- |
- |
- |
- |
- |
2.0631 |
1140 |
2.0396 |
- |
- |
- |
- |
- |
2.0812 |
1150 |
2.1007 |
- |
- |
- |
- |
- |
2.0993 |
1160 |
2.0705 |
- |
- |
- |
- |
- |
2.1174 |
1170 |
2.0894 |
- |
- |
- |
- |
- |
2.1355 |
1180 |
2.0677 |
- |
- |
- |
- |
- |
2.1536 |
1190 |
2.0893 |
- |
- |
- |
- |
- |
2.1717 |
1200 |
1.984 |
- |
- |
- |
- |
- |
2.1898 |
1210 |
1.9206 |
- |
- |
- |
- |
- |
2.2079 |
1220 |
2.132 |
- |
- |
- |
- |
- |
2.2260 |
1230 |
2.0457 |
- |
- |
- |
- |
- |
2.2441 |
1240 |
2.1428 |
- |
- |
- |
- |
- |
2.2622 |
1250 |
2.1116 |
- |
- |
- |
- |
- |
2.2803 |
1260 |
1.993 |
- |
- |
- |
- |
- |
2.2984 |
1270 |
2.0181 |
- |
- |
- |
- |
- |
2.3165 |
1280 |
1.9742 |
- |
- |
- |
- |
- |
2.3346 |
1290 |
2.081 |
- |
- |
- |
- |
- |
2.3527 |
1300 |
1.9107 |
- |
- |
- |
- |
- |
2.3708 |
1310 |
1.9507 |
- |
- |
- |
- |
- |
2.3889 |
1320 |
1.9844 |
- |
- |
- |
- |
- |
2.4070 |
1330 |
2.0035 |
- |
- |
- |
- |
- |
2.4251 |
1340 |
1.9121 |
- |
- |
- |
- |
- |
2.4432 |
1350 |
2.0057 |
- |
- |
- |
- |
- |
2.4613 |
1360 |
1.9323 |
- |
- |
- |
- |
- |
2.4794 |
1370 |
1.9216 |
- |
- |
- |
- |
- |
2.4975 |
1380 |
1.995 |
- |
- |
- |
- |
- |
2.5156 |
1390 |
1.9285 |
- |
- |
- |
- |
- |
2.5337 |
1400 |
1.8886 |
- |
- |
- |
- |
- |
2.5517 |
1410 |
1.8298 |
- |
- |
- |
- |
- |
2.5698 |
1420 |
1.8452 |
- |
- |
- |
- |
- |
2.5879 |
1430 |
1.9488 |
- |
- |
- |
- |
- |
2.6060 |
1440 |
1.8928 |
- |
- |
- |
- |
- |
2.6241 |
1450 |
2.0101 |
- |
- |
- |
- |
- |
2.6422 |
1460 |
1.7591 |
- |
- |
- |
- |
- |
2.6603 |
1470 |
1.9177 |
- |
- |
- |
- |
- |
2.6784 |
1480 |
1.9329 |
- |
- |
- |
- |
- |
2.6965 |
1490 |
1.8978 |
- |
- |
- |
- |
- |
2.7146 |
1500 |
1.9589 |
- |
- |
- |
- |
- |
2.7327 |
1510 |
1.9744 |
- |
- |
- |
- |
- |
2.7508 |
1520 |
1.9272 |
- |
- |
- |
- |
- |
2.7689 |
1530 |
1.9234 |
- |
- |
- |
- |
- |
2.7870 |
1540 |
1.9667 |
- |
- |
- |
- |
- |
2.8051 |
1550 |
1.853 |
- |
- |
- |
- |
- |
2.8232 |
1560 |
1.9191 |
- |
- |
- |
- |
- |
2.8413 |
1570 |
1.8083 |
- |
- |
- |
- |
- |
2.8594 |
1580 |
1.8543 |
- |
- |
- |
- |
- |
2.8775 |
1590 |
1.9091 |
- |
- |
- |
- |
- |
2.8956 |
1600 |
1.8079 |
- |
- |
- |
- |
- |
2.9137 |
1610 |
1.8992 |
- |
- |
- |
- |
- |
2.9318 |
1620 |
1.8742 |
- |
- |
- |
- |
- |
2.9499 |
1630 |
1.9313 |
- |
- |
- |
- |
- |
2.9680 |
1640 |
1.9832 |
- |
- |
- |
- |
- |
2.9861 |
1650 |
1.9037 |
- |
- |
- |
- |
- |
2.9988 |
1657 |
- |
0.2982 |
0.3130 |
0.3211 |
0.2697 |
0.3247 |
3.0042 |
1660 |
1.7924 |
- |
- |
- |
- |
- |
3.0223 |
1670 |
1.9677 |
- |
- |
- |
- |
- |
3.0404 |
1680 |
1.9123 |
- |
- |
- |
- |
- |
3.0585 |
1690 |
1.7691 |
- |
- |
- |
- |
- |
3.0766 |
1700 |
1.8822 |
- |
- |
- |
- |
- |
3.0947 |
1710 |
1.8543 |
- |
- |
- |
- |
- |
3.1128 |
1720 |
1.8127 |
- |
- |
- |
- |
- |
3.1309 |
1730 |
1.8844 |
- |
- |
- |
- |
- |
3.1490 |
1740 |
1.911 |
- |
- |
- |
- |
- |
3.1671 |
1750 |
1.7695 |
- |
- |
- |
- |
- |
3.1852 |
1760 |
1.8134 |
- |
- |
- |
- |
- |
3.2033 |
1770 |
1.7794 |
- |
- |
- |
- |
- |
3.2214 |
1780 |
1.8851 |
- |
- |
- |
- |
- |
3.2395 |
1790 |
1.8381 |
- |
- |
- |
- |
- |
3.2576 |
1800 |
1.9184 |
- |
- |
- |
- |
- |
3.2756 |
1810 |
1.8074 |
- |
- |
- |
- |
- |
3.2937 |
1820 |
1.8236 |
- |
- |
- |
- |
- |
3.3118 |
1830 |
1.8203 |
- |
- |
- |
- |
- |
3.3299 |
1840 |
1.8874 |
- |
- |
- |
- |
- |
3.3480 |
1850 |
1.7457 |
- |
- |
- |
- |
- |
3.3661 |
1860 |
1.7933 |
- |
- |
- |
- |
- |
3.3842 |
1870 |
1.759 |
- |
- |
- |
- |
- |
3.4023 |
1880 |
1.8514 |
- |
- |
- |
- |
- |
3.4204 |
1890 |
1.8163 |
- |
- |
- |
- |
- |
3.4385 |
1900 |
1.8299 |
- |
- |
- |
- |
- |
3.4566 |
1910 |
1.8112 |
- |
- |
- |
- |
- |
3.4747 |
1920 |
1.7446 |
- |
- |
- |
- |
- |
3.4928 |
1930 |
1.8314 |
- |
- |
- |
- |
- |
3.5109 |
1940 |
1.742 |
- |
- |
- |
- |
- |
3.5290 |
1950 |
1.7519 |
- |
- |
- |
- |
- |
3.5471 |
1960 |
1.722 |
- |
- |
- |
- |
- |
3.5652 |
1970 |
1.7454 |
- |
- |
- |
- |
- |
3.5833 |
1980 |
1.7875 |
- |
- |
- |
- |
- |
3.6014 |
1990 |
1.7596 |
- |
- |
- |
- |
- |
3.6195 |
2000 |
1.8348 |
- |
- |
- |
- |
- |
3.6376 |
2010 |
1.6954 |
- |
- |
- |
- |
- |
3.6557 |
2020 |
1.7334 |
- |
- |
- |
- |
- |
3.6738 |
2030 |
1.8318 |
- |
- |
- |
- |
- |
3.6919 |
2040 |
1.7982 |
- |
- |
- |
- |
- |
3.7100 |
2050 |
1.7987 |
- |
- |
- |
- |
- |
3.7281 |
2060 |
1.8402 |
- |
- |
- |
- |
- |
3.7462 |
2070 |
1.8569 |
- |
- |
- |
- |
- |
3.7643 |
2080 |
1.8285 |
- |
- |
- |
- |
- |
3.7824 |
2090 |
1.8652 |
- |
- |
- |
- |
- |
3.8005 |
2100 |
1.7731 |
- |
- |
- |
- |
- |
3.8186 |
2110 |
1.8697 |
- |
- |
- |
- |
- |
3.8367 |
2120 |
1.6953 |
- |
- |
- |
- |
- |
3.8548 |
2130 |
1.7493 |
- |
- |
- |
- |
- |
3.8729 |
2140 |
1.8031 |
- |
- |
- |
- |
- |
3.8910 |
2150 |
1.7053 |
- |
- |
- |
- |
- |
3.9091 |
2160 |
1.8436 |
- |
- |
- |
- |
- |
3.9272 |
2170 |
1.7572 |
- |
- |
- |
- |
- |
3.9453 |
2180 |
1.7797 |
- |
- |
- |
- |
- |
3.9634 |
2190 |
1.8827 |
- |
- |
- |
- |
- |
3.9815 |
2200 |
1.8678 |
- |
- |
- |
- |
- |
3.9959 |
2208 |
- |
0.2992 |
0.3134 |
0.3217 |
0.271 |
0.3252 |
- The bold row denotes the saved checkpoint.
Framework Versions
- Python: 3.10.12
- Sentence Transformers: 3.0.1
- Transformers: 4.41.2
- PyTorch: 2.1.2+cu121
- Accelerate: 0.31.0
- Datasets: 2.19.1
- Tokenizers: 0.19.1
Citation
BibTeX
Sentence Transformers
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
MatryoshkaLoss
@misc{kusupati2024matryoshka,
title={Matryoshka Representation Learning},
author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
year={2024},
eprint={2205.13147},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
MultipleNegativesRankingLoss
@misc{henderson2017efficient,
title={Efficient Natural Language Response Suggestion for Smart Reply},
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
year={2017},
eprint={1705.00652},
archivePrefix={arXiv},
primaryClass={cs.CL}
}