metadata

base_model: SQAI/streetlight_sql_embedding
datasets: []
language:
  - en
library_name: sentence-transformers
license: apache-2.0
metrics:
  - cosine_accuracy@1
  - cosine_accuracy@3
  - cosine_accuracy@5
  - cosine_accuracy@10
  - cosine_precision@1
  - cosine_precision@3
  - cosine_precision@5
  - cosine_precision@10
  - cosine_recall@1
  - cosine_recall@3
  - cosine_recall@5
  - cosine_recall@10
  - cosine_ndcg@10
  - cosine_mrr@10
  - cosine_map@100
pipeline_tag: sentence-similarity
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:2161
  - loss:MatryoshkaLoss
  - loss:MultipleNegativesRankingLoss
widget:
  - source_sentence: longitude of streetlight
    sentences:
      - >-
        "What is the recent status of the streetlight at the given longitude,
        considering the current overload conditions?"
      - >-
        "Has there been any recent failure in the metering components of the
        streetlights affecting data reporting, and was the control mode switch
        identifier used for the changes?"
      - >-
        "Can you tell me when was the most recent instance when the current
        exceeded the safe operating threshold, causing a streetlight failure?"
  - source_sentence: Ambient light level detected by the streetlight, measured in lux
    sentences:
      - >-
        "What is the count of how many times the most recent streetlight failure
        has been switched on before the error occurred?"
      - >-
        "What is the recent data on maximum load current indicating potential
        risk and any recent communication issues with the lux sensors?"
      - >-
        "What is the recent dimming schedule applied, the detected ambient light
        level in lux, and were there any recent issues or failures with the
        driver of the streetlight?"
  - source_sentence: >-
      Timestamp of the latest data recorded or action performed by the
      streetlight
    sentences:
      - >-
        "What is the recent failure rate of the relay responsible for operating
        the DALI dimming protocol in our streetlights?"
      - >-
        "Can you provide the recent instances where the current drawn by the
        streetlights was lower than expected, sorted by the unique streetlight
        identifier and street name?"
      - >-
        "What was the most recent threshold level set to stop recording
        flickering events using the SIM card code in the streetlight?"
  - source_sentence: Current exceeds the safe operating threshold for the streetlight (failure)
    sentences:
      - >-
        "What is the hardware version of the recent streetlight experiencing
        faults in its lux module affecting light level sensing and control?"
      - >-
        "Can you provide the recent instances where the current drawn by the
        streetlights was lower than expected, sorted by the unique streetlight
        identifier and street name?"
      - >-
        "Can you identify the most recent instance when the power under load was
        higher than normal, possibly indicating inefficiency or a fault, and
        concurrently, the voltage exceeded the safe operating levels for the
        streetlights?"
  - source_sentence: >-
      Voltage supplied is below the safe operating level for the streetlight
      (failure)
    sentences:
      - >-
        "What is the recent AC voltage supply to the streetlight and the SIM
        card code used for its cellular network communication?"
      - >-
        "What was the most recent threshold level set to stop recording
        flickering events using the SIM card code in the streetlight?"
      - >-
        "What is the most recent internal temperature reading for the operating
        conditions of the streetlight?"
model-index:
  - name: BGE base Financial Matryoshka
    results:
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 768
          type: dim_768
        metrics:
          - type: cosine_accuracy@1
            value: 0.004149377593360996
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.02074688796680498
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.04149377593360996
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.06224066390041494
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.004149377593360996
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.006915629322268326
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.008298755186721992
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.006224066390041493
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.004149377593360996
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.02074688796680498
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.04149377593360996
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.06224066390041494
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.028846821098581887
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.018665612856484225
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.024320046307682447
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 512
          type: dim_512
        metrics:
          - type: cosine_accuracy@1
            value: 0.004149377593360996
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.02074688796680498
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.04149377593360996
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.06224066390041494
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.004149377593360996
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.006915629322268326
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.008298755186721992
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.006224066390041493
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.004149377593360996
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.02074688796680498
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.04149377593360996
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.06224066390041494
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.028846821098581887
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.018665612856484225
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.024320046307682447
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 256
          type: dim_256
        metrics:
          - type: cosine_accuracy@1
            value: 0.008298755186721992
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.02074688796680498
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.04149377593360996
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.058091286307053944
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.008298755186721992
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.006915629322268326
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.008298755186721992
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.0058091286307053935
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.008298755186721992
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.02074688796680498
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.04149377593360996
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.058091286307053944
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.02917470145123319
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.020424158598432458
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.02622693528356527
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 128
          type: dim_128
        metrics:
          - type: cosine_accuracy@1
            value: 0.008298755186721992
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.02074688796680498
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.03734439834024896
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.05394190871369295
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.008298755186721992
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.006915629322268326
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.007468879668049794
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.005394190871369295
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.008298755186721992
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.02074688796680498
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.03734439834024896
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.05394190871369295
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.027438863848135625
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.019311071593229267
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.02603525046406888
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 64
          type: dim_64
        metrics:
          - type: cosine_accuracy@1
            value: 0.008298755186721992
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.012448132780082987
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.029045643153526972
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.05394190871369295
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.008298755186721992
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.004149377593360996
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.005809128630705394
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.005394190871369295
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.008298755186721992
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.012448132780082987
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.029045643153526972
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.05394190871369295
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.025512460997908278
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.017038793387341104
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.02259750227693111
            name: Cosine Map@100

BGE base Financial Matryoshka

This is a sentence-transformers model finetuned from SQAI/streetlight_sql_embedding. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

Model Type: Sentence Transformer
Base model: SQAI/streetlight_sql_embedding
Maximum Sequence Length: 512 tokens
Output Dimensionality: 384 tokens
Similarity Function: Cosine Similarity
Language: en
License: apache-2.0

Model Sources

Documentation: Sentence Transformers Documentation
Repository: Sentence Transformers on GitHub
Hugging Face: Sentence Transformers on Hugging Face

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("SQAI/streetlight_sql_embedding2")
# Run inference
sentences = [
    'Voltage supplied is below the safe operating level for the streetlight (failure)',
    '"What is the recent AC voltage supply to the streetlight and the SIM card code used for its cellular network communication?"',
    '"What was the most recent threshold level set to stop recording flickering events using the SIM card code in the streetlight?"',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Information Retrieval

Dataset: dim_768
Evaluated with InformationRetrievalEvaluator

Metric	Value
cosine_accuracy@1	0.0041
cosine_accuracy@3	0.0207
cosine_accuracy@5	0.0415
cosine_accuracy@10	0.0622
cosine_precision@1	0.0041
cosine_precision@3	0.0069
cosine_precision@5	0.0083
cosine_precision@10	0.0062
cosine_recall@1	0.0041
cosine_recall@3	0.0207
cosine_recall@5	0.0415
cosine_recall@10	0.0622
cosine_ndcg@10	0.0288
cosine_mrr@10	0.0187
cosine_map@100	0.0243

Information Retrieval

Dataset: dim_512
Evaluated with InformationRetrievalEvaluator

Metric	Value
cosine_accuracy@1	0.0041
cosine_accuracy@3	0.0207
cosine_accuracy@5	0.0415
cosine_accuracy@10	0.0622
cosine_precision@1	0.0041
cosine_precision@3	0.0069
cosine_precision@5	0.0083
cosine_precision@10	0.0062
cosine_recall@1	0.0041
cosine_recall@3	0.0207
cosine_recall@5	0.0415
cosine_recall@10	0.0622
cosine_ndcg@10	0.0288
cosine_mrr@10	0.0187
cosine_map@100	0.0243

Information Retrieval

Dataset: dim_256
Evaluated with InformationRetrievalEvaluator

Metric	Value
cosine_accuracy@1	0.0083
cosine_accuracy@3	0.0207
cosine_accuracy@5	0.0415
cosine_accuracy@10	0.0581
cosine_precision@1	0.0083
cosine_precision@3	0.0069
cosine_precision@5	0.0083
cosine_precision@10	0.0058
cosine_recall@1	0.0083
cosine_recall@3	0.0207
cosine_recall@5	0.0415
cosine_recall@10	0.0581
cosine_ndcg@10	0.0292
cosine_mrr@10	0.0204
cosine_map@100	0.0262

Information Retrieval

Dataset: dim_128
Evaluated with InformationRetrievalEvaluator

Metric	Value
cosine_accuracy@1	0.0083
cosine_accuracy@3	0.0207
cosine_accuracy@5	0.0373
cosine_accuracy@10	0.0539
cosine_precision@1	0.0083
cosine_precision@3	0.0069
cosine_precision@5	0.0075
cosine_precision@10	0.0054
cosine_recall@1	0.0083
cosine_recall@3	0.0207
cosine_recall@5	0.0373
cosine_recall@10	0.0539
cosine_ndcg@10	0.0274
cosine_mrr@10	0.0193
cosine_map@100	0.026

Information Retrieval

Dataset: dim_64
Evaluated with InformationRetrievalEvaluator

Metric	Value
cosine_accuracy@1	0.0083
cosine_accuracy@3	0.0124
cosine_accuracy@5	0.029
cosine_accuracy@10	0.0539
cosine_precision@1	0.0083
cosine_precision@3	0.0041
cosine_precision@5	0.0058
cosine_precision@10	0.0054
cosine_recall@1	0.0083
cosine_recall@3	0.0124
cosine_recall@5	0.029
cosine_recall@10	0.0539
cosine_ndcg@10	0.0255
cosine_mrr@10	0.017
cosine_map@100	0.0226

Training Details

Training Dataset

Unnamed Dataset

Size: 2,161 training samples
Columns: positive and anchor
Approximate statistics based on the first 1000 samples:
positive anchor
type string string
details
min: 6 tokens
mean: 14.3 tokens
max: 20 tokens

min: 15 tokens
mean: 32.58 tokens
max: 54 tokens

	positive	anchor
type	string	string
details	min: 6 tokens mean: 14.3 tokens max: 20 tokens	min: 15 tokens mean: 32.58 tokens max: 54 tokens

Samples:

positive	anchor
`Lower lux level below which additional lighting may be necessary`	`"What are the recent faults found in the lux module that affect light level control, in relation to the default dimming level of the streetlights and the control mode switch identifier used for changing settings?"`
`Current dimming level of the streetlight in operation`	`"Can the operator managing the streetlights provide the most recent update on the streetlight that is currently below the expected range and unable to connect to the network for remote management?"`
`Upper voltage limit considered safe and efficient for streetlight operation`	`"Can you provide any recent potential failures of a streetlight group due to unusually high voltage under load or intermittent flashing, within the southernmost geographic area?"`

Loss: MatryoshkaLoss with these parameters:

{
    "loss": "MultipleNegativesRankingLoss",
    "matryoshka_dims": [
        384,
        256,
        128,
        64
    ],
    "matryoshka_weights": [
        1,
        1,
        1,
        1
    ],
    "n_dims_per_step": -1
}

Evaluation Dataset

Unnamed Dataset

Size: 241 evaluation samples
Columns: positive and anchor
Approximate statistics based on the first 1000 samples:
positive anchor
type string string
details
min: 6 tokens
mean: 14.31 tokens
max: 20 tokens

min: 17 tokens
mean: 31.03 tokens
max: 54 tokens

	positive	anchor
type	string	string
details	min: 6 tokens mean: 14.31 tokens max: 20 tokens	min: 17 tokens mean: 31.03 tokens max: 54 tokens

Samples:

positive	anchor
`Timestamp of the latest data recorded or action performed by the streetlight`	`"What was the most recent threshold level set to stop recording flickering events using the SIM card code in the streetlight?"`
`Maximum longitude of the geographic area covered by the group of streetlights`	`"What is the recent power usage in watts for the oldest streetlight on the street with maximum longitude?"`
`Current dimming level of the streetlight in operation`	`"What is the most recent dimming level of the streetlight?"`

Loss: MatryoshkaLoss with these parameters:

{
    "loss": "MultipleNegativesRankingLoss",
    "matryoshka_dims": [
        384,
        256,
        128,
        64
    ],
    "matryoshka_weights": [
        1,
        1,
        1,
        1
    ],
    "n_dims_per_step": -1
}

Training Hyperparameters

Non-Default Hyperparameters

eval_strategy: epoch
per_device_train_batch_size: 32
per_device_eval_batch_size: 16
gradient_accumulation_steps: 16
learning_rate: 1e-05
weight_decay: 0.03
num_train_epochs: 75
lr_scheduler_type: cosine
warmup_ratio: 0.2
bf16: True
tf32: True
load_best_model_at_end: True
optim: adamw_torch_fused
batch_sampler: no_duplicates

All Hyperparameters

Click to expand

overwrite_output_dir: False
do_predict: False
eval_strategy: epoch
prediction_loss_only: True
per_device_train_batch_size: 32
per_device_eval_batch_size: 16
per_gpu_train_batch_size: None
per_gpu_eval_batch_size: None
gradient_accumulation_steps: 16
eval_accumulation_steps: None
learning_rate: 1e-05
weight_decay: 0.03
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1e-08
max_grad_norm: 1.0
num_train_epochs: 75
max_steps: -1
lr_scheduler_type: cosine
lr_scheduler_kwargs: {}
warmup_ratio: 0.2
warmup_steps: 0
log_level: passive
log_level_replica: warning
log_on_each_node: True
logging_nan_inf_filter: True
save_safetensors: True
save_on_each_node: False
save_only_model: False
restore_callback_states_from_checkpoint: False
no_cuda: False
use_cpu: False
use_mps_device: False
seed: 42
data_seed: None
jit_mode_eval: False
use_ipex: False
bf16: True
fp16: False
fp16_opt_level: O1
half_precision_backend: auto
bf16_full_eval: False
fp16_full_eval: False
tf32: True
local_rank: 0
ddp_backend: None
tpu_num_cores: None
tpu_metrics_debug: False
debug: []
dataloader_drop_last: False
dataloader_num_workers: 0
dataloader_prefetch_factor: None
past_index: -1
disable_tqdm: False
remove_unused_columns: True
label_names: None
load_best_model_at_end: True
ignore_data_skip: False
fsdp: []
fsdp_min_num_params: 0
fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
fsdp_transformer_layer_cls_to_wrap: None
accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
deepspeed: None
label_smoothing_factor: 0.0
optim: adamw_torch_fused
optim_args: None
adafactor: False
group_by_length: False
length_column_name: length
ddp_find_unused_parameters: None
ddp_bucket_cap_mb: None
ddp_broadcast_buffers: False
dataloader_pin_memory: True
dataloader_persistent_workers: False
skip_memory_metrics: True
use_legacy_prediction_loop: False
push_to_hub: False
resume_from_checkpoint: None
hub_model_id: None
hub_strategy: every_save
hub_private_repo: False
hub_always_push: False
gradient_checkpointing: False
gradient_checkpointing_kwargs: None
include_inputs_for_metrics: False
eval_do_concat_batches: True
fp16_backend: auto
push_to_hub_model_id: None
push_to_hub_organization: None
mp_parameters:
auto_find_batch_size: False
full_determinism: False
torchdynamo: None
ray_scope: last
ddp_timeout: 1800
torch_compile: False
torch_compile_backend: None
torch_compile_mode: None
dispatch_batches: None
split_batches: None
include_tokens_per_second: False
include_num_input_tokens_seen: False
neftune_noise_alpha: None
optim_target_modules: None
batch_eval_metrics: False
batch_sampler: no_duplicates
multi_dataset_batch_sampler: proportional

Training Logs

Click to expand

Epoch	Step	Training Loss	loss	dim_128_cosine_map@100	dim_256_cosine_map@100	dim_512_cosine_map@100	dim_64_cosine_map@100	dim_768_cosine_map@100
0.2353	1	11.247	-	-	-	-	-	-
0.4706	2	11.4455	-	-	-	-	-	-
0.7059	3	11.5154	-	-	-	-	-	-
0.9412	4	10.4079	-	-	-	-	-	-
1.1765	5	3.3256	-	-	-	-	-	-
1.4118	6	3.812	-	-	-	-	-	-
1.6471	7	4.0302	-	-	-	-	-	-
1.8824	8	3.5832	-	-	-	-	-	-
2.1176	9	3.9586	-	-	-	-	-	-
2.3529	10	4.2835	-	-	-	-	-	-
2.5882	11	1.6391	6.0237	0.0254	0.0354	0.0318	0.0230	0.0318
1.0294	12	1.3873	-	-	-	-	-	-
1.2647	13	11.1729	-	-	-	-	-	-
1.5	14	11.1729	-	-	-	-	-	-
1.7353	15	11.3334	-	-	-	-	-	-
1.9706	16	9.1337	-	-	-	-	-	-
2.2059	17	2.8674	-	-	-	-	-	-
2.4412	18	3.9162	-	-	-	-	-	-
2.6765	19	3.3378	-	-	-	-	-	-
2.9118	20	3.5152	-	-	-	-	-	-
3.1471	21	3.1655	-	-	-	-	-	-
3.3824	22	3.5905	-	-	-	-	-	-
3.6176	23	1.2027	5.5383	0.0265	0.0304	0.0291	0.0235	0.0291
2.0588	24	2.5902	-	-	-	-	-	-
2.2941	25	10.8776	-	-	-	-	-	-
2.5294	26	10.7109	-	-	-	-	-	-
2.7647	27	10.9662	-	-	-	-	-	-
3.0	28	7.5032	-	-	-	-	-	-
3.2353	29	1.9266	-	-	-	-	-	-
3.4706	30	2.5007	-	-	-	-	-	-
3.7059	31	2.2972	-	-	-	-	-	-
3.9412	32	2.3428	-	-	-	-	-	-
4.1765	33	2.4842	-	-	-	-	-	-
4.4118	34	2.371	-	-	-	-	-	-
4.6471	35	0.8811	5.0896	0.0261	0.0356	0.0324	0.0263	0.0324
3.0882	36	3.8163	-	-	-	-	-	-
3.3235	37	10.3601	-	-	-	-	-	-
3.5588	38	9.8085	-	-	-	-	-	-
3.7941	39	10.3201	-	-	-	-	-	-
4.0294	40	5.7213	-	-	-	-	-	-
4.2647	41	1.0641	-	-	-	-	-	-
4.5	42	1.7557	-	-	-	-	-	-
4.7353	43	1.534	-	-	-	-	-	-
4.9706	44	1.2931	-	-	-	-	-	-
5.2059	45	2.0569	-	-	-	-	-	-
5.4412	46	1.6945	-	-	-	-	-	-
5.6765	47	0.6985	4.8110	0.0267	0.0230	0.0343	0.0180	0.0343
4.1176	48	4.8862	-	-	-	-	-	-
4.3529	49	9.9427	-	-	-	-	-	-
4.5882	50	9.7492	-	-	-	-	-	-
4.8235	51	10.1616	-	-	-	-	-	-
5.0588	52	4.3073	-	-	-	-	-	-
5.2941	53	0.9089	-	-	-	-	-	-
5.5294	54	1.2689	-	-	-	-	-	-
5.7647	55	1.2875	-	-	-	-	-	-
6.0	56	1.2756	-	-	-	-	-	-
6.2353	57	1.6222	-	-	-	-	-	-
6.4706	58	1.3049	-	-	-	-	-	-
6.7059	59	0.3305	4.6562	0.0184	0.0327	0.0288	0.0190	0.0288
5.1471	60	5.7286	-	-	-	-	-	-
5.3824	61	9.7399	-	-	-	-	-	-
5.6176	62	9.3036	-	-	-	-	-	-
5.8529	63	9.6674	-	-	-	-	-	-
6.0882	64	2.7979	-	-	-	-	-	-
6.3235	65	0.4978	-	-	-	-	-	-
6.5588	66	1.8006	-	-	-	-	-	-
6.7941	67	1.098	-	-	-	-	-	-
7.0294	68	1.3678	-	-	-	-	-	-
7.2647	69	1.4648	-	-	-	-	-	-
7.5	70	1.1826	-	-	-	-	-	-
7.7353	71	0.0624	4.5802	0.0200	0.0208	0.0216	0.0231	0.0216
6.1765	72	6.8322	-	-	-	-	-	-
6.4118	73	9.3021	-	-	-	-	-	-
6.6471	74	9.1494	-	-	-	-	-	-
6.8824	75	9.631	-	-	-	-	-	-
7.1176	76	1.661	-	-	-	-	-	-
7.3529	77	0.2353	-	-	-	-	-	-
7.5882	78	1.0663	-	-	-	-	-	-
7.8235	79	0.6836	-	-	-	-	-	-
8.0588	80	0.9921	-	-	-	-	-	-
8.2941	81	1.6479	-	-	-	-	-	-
8.5294	82	0.6713	-	-	-	-	-	-
8.7647	83	0.0	4.5499	0.0209	0.0233	0.0249	0.0226	0.0249
7.2059	84	7.775	-	-	-	-	-	-
7.4412	85	9.0508	-	-	-	-	-	-
7.6765	86	9.1417	-	-	-	-	-	-
7.9118	87	8.9087	-	-	-	-	-	-
8.1471	88	0.9757	-	-	-	-	-	-
8.3824	89	0.7521	-	-	-	-	-	-
8.6176	90	0.7292	-	-	-	-	-	-
8.8529	91	0.6088	-	-	-	-	-	-
9.0882	92	0.9514	-	-	-	-	-	-
9.3235	93	1.435	-	-	-	-	-	-
9.5588	94	0.3655	-	-	-	-	-	-
9.7941	95	0.0	4.5162	0.0245	0.0268	0.0224	0.0238	0.0224
8.2353	96	8.7854	-	-	-	-	-	-
8.4706	97	9.0167	-	-	-	-	-	-
8.7059	98	9.0405	-	-	-	-	-	-
8.9412	99	7.7069	-	-	-	-	-	-
9.1765	100	0.6267	-	-	-	-	-	-
9.4118	101	0.4043	-	-	-	-	-	-
9.6471	102	0.7028	-	-	-	-	-	-
9.8824	103	0.751	-	-	-	-	-	-
10.1176	104	0.5994	-	-	-	-	-	-
10.3529	105	1.0402	-	-	-	-	-	-
10.5882	106	0.3983	4.4860	0.0259	0.0301	0.0252	0.0265	0.0252
9.0294	107	1.1037	-	-	-	-	-	-
9.2647	108	8.6263	-	-	-	-	-	-
9.5	109	8.9359	-	-	-	-	-	-
9.7353	110	8.9879	-	-	-	-	-	-
9.9706	111	6.4932	-	-	-	-	-	-
10.2059	112	0.3904	-	-	-	-	-	-
10.4412	113	0.3544	-	-	-	-	-	-
10.6765	114	0.5658	-	-	-	-	-	-
10.9118	115	0.5884	-	-	-	-	-	-
11.1471	116	0.4828	-	-	-	-	-	-
11.3824	117	0.8872	-	-	-	-	-	-
11.6176	118	0.2906	4.4899	0.0237	0.0267	0.0264	0.0242	0.0264
10.0588	119	2.1398	-	-	-	-	-	-
10.2941	120	8.6036	-	-	-	-	-	-
10.5294	121	8.7739	-	-	-	-	-	-
10.7647	122	9.1481	-	-	-	-	-	-
11.0	123	5.2436	-	-	-	-	-	-
11.2353	124	0.2435	-	-	-	-	-	-
11.4706	125	0.4451	-	-	-	-	-	-
11.7059	126	0.4338	-	-	-	-	-	-
11.9412	127	0.5156	-	-	-	-	-	-
12.1765	128	0.7081	-	-	-	-	-	-
12.4118	129	0.375	-	-	-	-	-	-
12.6471	130	0.1906	4.5243	0.0305	0.0253	0.0217	0.0214	0.0217
11.0882	131	3.115	-	-	-	-	-	-
11.3235	132	8.702	-	-	-	-	-	-
11.5588	133	8.4872	-	-	-	-	-	-
11.7941	134	9.0143	-	-	-	-	-	-
12.0294	135	4.2374	-	-	-	-	-	-
12.2647	136	0.1979	-	-	-	-	-	-
12.5	137	0.6371	-	-	-	-	-	-
12.7353	138	0.5763	-	-	-	-	-	-
12.9706	139	0.5716	-	-	-	-	-	-
13.2059	140	0.9894	-	-	-	-	-	-
13.4412	141	0.3963	-	-	-	-	-	-
13.6765	142	0.084	4.5514	0.0224	0.0253	0.0209	0.0250	0.0209
12.1176	143	4.1455	-	-	-	-	-	-
12.3529	144	8.6664	-	-	-	-	-	-
12.5882	145	8.5896	-	-	-	-	-	-
12.8235	146	8.9639	-	-	-	-	-	-
13.0588	147	3.2692	-	-	-	-	-	-
13.2941	148	0.2518	-	-	-	-	-	-
13.5294	149	0.8313	-	-	-	-	-	-
13.7647	150	0.5592	-	-	-	-	-	-
14.0	151	0.3966	-	-	-	-	-	-
14.2353	152	0.829	-	-	-	-	-	-
14.4706	153	0.2369	-	-	-	-	-	-
14.7059	154	0.0629	4.5549	0.0294	0.0312	0.0258	0.0315	0.0258
13.1471	155	5.1674	-	-	-	-	-	-
13.3824	156	8.5543	-	-	-	-	-	-
13.6176	157	8.4481	-	-	-	-	-	-
13.8529	158	8.7815	-	-	-	-	-	-
14.0882	159	1.9305	-	-	-	-	-	-
14.3235	160	0.0925	-	-	-	-	-	-
14.5588	161	0.6568	-	-	-	-	-	-
14.7941	162	0.2796	-	-	-	-	-	-
15.0294	163	0.5503	-	-	-	-	-	-
15.2647	164	0.6386	-	-	-	-	-	-
15.5	165	0.1957	-	-	-	-	-	-
15.7353	166	0.0137	4.5688	0.0210	0.0251	0.0251	0.0223	0.0251
14.1765	167	6.2283	-	-	-	-	-	-
14.4118	168	8.5378	-	-	-	-	-	-
14.6471	169	8.5173	-	-	-	-	-	-
14.8824	170	8.9953	-	-	-	-	-	-
15.1176	171	0.983	-	-	-	-	-	-
15.3529	172	0.1503	-	-	-	-	-	-
15.5882	173	0.9004	-	-	-	-	-	-
15.8235	174	0.3962	-	-	-	-	-	-
16.0588	175	0.4047	-	-	-	-	-	-
16.2941	176	0.8265	-	-	-	-	-	-
16.5294	177	0.3069	-	-	-	-	-	-
16.7647	178	0.0	4.5819	0.0219	0.0271	0.0240	0.0253	0.0240
15.2059	179	7.3186	-	-	-	-	-	-
15.4412	180	8.5984	-	-	-	-	-	-
15.6765	181	8.5362	-	-	-	-	-	-
15.9118	182	8.2934	-	-	-	-	-	-
16.1471	183	0.437	-	-	-	-	-	-
16.3824	184	0.1864	-	-	-	-	-	-
16.6176	185	0.2657	-	-	-	-	-	-
16.8529	186	0.4242	-	-	-	-	-	-
17.0882	187	0.4815	-	-	-	-	-	-
17.3235	188	0.5206	-	-	-	-	-	-
17.5588	189	0.1981	-	-	-	-	-	-
17.7941	190	0.0	4.5795	0.0249	0.0319	0.0287	0.0227	0.0287
16.2353	191	8.2837	-	-	-	-	-	-
16.4706	192	8.5457	-	-	-	-	-	-
16.7059	193	8.6284	-	-	-	-	-	-
16.9412	194	7.1806	-	-	-	-	-	-
17.1765	195	0.2714	-	-	-	-	-	-
17.4118	196	0.65	-	-	-	-	-	-
17.6471	197	0.3627	-	-	-	-	-	-
17.8824	198	0.2502	-	-	-	-	-	-
18.1176	199	0.4651	-	-	-	-	-	-
18.3529	200	0.3878	-	-	-	-	-	-
18.5882	201	0.1728	4.5870	0.0258	0.0321	0.0293	0.0290	0.0293
17.0294	202	1.0158	-	-	-	-	-	-
17.2647	203	8.1391	-	-	-	-	-	-
17.5	204	8.5323	-	-	-	-	-	-
17.7353	205	8.6644	-	-	-	-	-	-
17.9706	206	6.1161	-	-	-	-	-	-
18.2059	207	0.4636	-	-	-	-	-	-
18.4412	208	0.8765	-	-	-	-	-	-
18.6765	209	0.4075	-	-	-	-	-	-
18.9118	210	0.3211	-	-	-	-	-	-
19.1471	211	0.65	-	-	-	-	-	-
19.3824	212	0.4802	-	-	-	-	-	-
19.6176	213	0.0777	4.5921	0.0211	0.0268	0.0238	0.0260	0.0238
18.0588	214	1.9364	-	-	-	-	-	-
18.2941	215	8.3079	-	-	-	-	-	-
18.5294	216	8.4468	-	-	-	-	-	-
18.7647	217	8.8501	-	-	-	-	-	-
19.0	218	5.0076	-	-	-	-	-	-
19.2353	219	0.1596	-	-	-	-	-	-
19.4706	220	0.6482	-	-	-	-	-	-
19.7059	221	0.5019	-	-	-	-	-	-
19.9412	222	0.2596	-	-	-	-	-	-
20.1765	223	0.5857	-	-	-	-	-	-
20.4118	224	0.3469	-	-	-	-	-	-
20.6471	225	0.082	4.5951	0.0251	0.0293	0.0239	0.0259	0.0239
19.0882	226	3.0141	-	-	-	-	-	-
19.3235	227	8.3977	-	-	-	-	-	-
19.5588	228	8.2687	-	-	-	-	-	-
19.7941	229	8.8415	-	-	-	-	-	-
20.0294	230	3.9692	-	-	-	-	-	-
20.2647	231	0.2079	-	-	-	-	-	-
20.5	232	0.6167	-	-	-	-	-	-
20.7353	233	0.255	-	-	-	-	-	-
20.9706	234	0.2403	-	-	-	-	-	-
21.2059	235	0.5944	-	-	-	-	-	-
21.4412	236	0.4212	-	-	-	-	-	-
21.6765	237	0.1031	4.5929	0.0248	0.0301	0.0297	0.0268	0.0297
20.1176	238	4.0698	-	-	-	-	-	-
20.3529	239	8.3696	-	-	-	-	-	-
20.5882	240	8.2668	-	-	-	-	-	-
20.8235	241	8.8194	-	-	-	-	-	-
21.0588	242	2.9283	-	-	-	-	-	-
21.2941	243	0.0974	-	-	-	-	-	-
21.5294	244	0.5172	-	-	-	-	-	-
21.7647	245	0.2451	-	-	-	-	-	-
22.0	246	0.4693	-	-	-	-	-	-
22.2353	247	0.7352	-	-	-	-	-	-
22.4706	248	0.1933	-	-	-	-	-	-
22.7059	249	0.0552	4.5945	0.0261	0.0275	0.0279	0.0204	0.0279
21.1471	250	5.1237	-	-	-	-	-	-
21.3824	251	8.5068	-	-	-	-	-	-
21.6176	252	8.2828	-	-	-	-	-	-
21.8529	253	8.7851	-	-	-	-	-	-
22.0882	254	2.0883	-	-	-	-	-	-
22.3235	255	0.1147	-	-	-	-	-	-
22.5588	256	0.5259	-	-	-	-	-	-
22.7941	257	0.2915	-	-	-	-	-	-
23.0294	258	0.2495	-	-	-	-	-	-
23.2647	259	0.7518	-	-	-	-	-	-
23.5	260	0.1767	-	-	-	-	-	-
23.7353	261	0.0244	4.5944	0.0213	0.0267	0.0265	0.0220	0.0265
22.1765	262	6.1144	-	-	-	-	-	-
22.4118	263	8.3334	-	-	-	-	-	-
22.6471	264	8.4377	-	-	-	-	-	-
22.8824	265	8.8182	-	-	-	-	-	-
23.1176	266	0.8795	-	-	-	-	-	-
23.3529	267	0.0637	-	-	-	-	-	-
23.5882	268	0.3658	-	-	-	-	-	-
23.8235	269	0.3599	-	-	-	-	-	-
24.0588	270	0.283	-	-	-	-	-	-
24.2941	271	0.731	-	-	-	-	-	-
24.5294	272	0.1758	-	-	-	-	-	-
24.7647	273	0.0	4.5963	0.0259	0.0295	0.0247	0.0229	0.0247
23.2059	274	7.1188	-	-	-	-	-	-
23.4412	275	8.354	-	-	-	-	-	-
23.6765	276	8.5186	-	-	-	-	-	-
23.9118	277	8.1633	-	-	-	-	-	-
24.1471	278	0.3481	-	-	-	-	-	-
24.3824	279	0.574	-	-	-	-	-	-
24.6176	280	0.2784	-	-	-	-	-	-
24.8529	281	0.251	-	-	-	-	-	-
25.0882	282	0.4093	-	-	-	-	-	-
25.3235	283	0.5414	-	-	-	-	-	-
25.5588	284	0.149	-	-	-	-	-	-
25.7941	285	0.0	4.5965	0.0223	0.0251	0.0240	0.0204	0.0240
24.2353	286	8.2498	-	-	-	-	-	-
24.4706	287	8.4555	-	-	-	-	-	-
24.7059	288	8.5368	-	-	-	-	-	-
24.9412	289	7.1779	-	-	-	-	-	-
25.1765	290	0.1486	-	-	-	-	-	-
25.4118	291	0.9156	-	-	-	-	-	-
25.6471	292	0.2757	-	-	-	-	-	-
25.8824	293	0.237	-	-	-	-	-	-
26.1176	294	0.2979	-	-	-	-	-	-
26.3529	295	0.5296	-	-	-	-	-	-
26.5882	296	0.2062	4.5949	0.0259	0.0327	0.0308	0.0247	0.0308
25.0294	297	1.0355	-	-	-	-	-	-
25.2647	298	8.1721	-	-	-	-	-	-
25.5	299	8.4028	-	-	-	-	-	-
25.7353	300	8.5989	4.5941	0.0260	0.0262	0.0243	0.0226	0.0243

The bold row denotes the saved checkpoint.

Framework Versions

Python: 3.10.12
Sentence Transformers: 3.0.1
Transformers: 4.41.2
PyTorch: 2.1.2+cu121
Accelerate: 0.32.0
Datasets: 2.19.1
Tokenizers: 0.19.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MatryoshkaLoss

@misc{kusupati2024matryoshka,
    title={Matryoshka Representation Learning}, 
    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
    year={2024},
    eprint={2205.13147},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}

MultipleNegativesRankingLoss

@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply}, 
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}