leelandzhang's picture
Add new SentenceTransformer model.
26f2318 verified
|
raw
history blame
84.4 kB
metadata
base_model: SQAI/streetlight_sql_embedding
datasets: []
language:
  - en
library_name: sentence-transformers
license: apache-2.0
metrics:
  - cosine_accuracy@1
  - cosine_accuracy@3
  - cosine_accuracy@5
  - cosine_accuracy@10
  - cosine_precision@1
  - cosine_precision@3
  - cosine_precision@5
  - cosine_precision@10
  - cosine_recall@1
  - cosine_recall@3
  - cosine_recall@5
  - cosine_recall@10
  - cosine_ndcg@10
  - cosine_mrr@10
  - cosine_map@100
pipeline_tag: sentence-similarity
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:2161
  - loss:MatryoshkaLoss
  - loss:MultipleNegativesRankingLoss
widget:
  - source_sentence: longitude of streetlight
    sentences:
      - >-
        "What is the recent status of the streetlight at the given longitude,
        considering the current overload conditions?"
      - >-
        "Has there been any recent failure in the metering components of the
        streetlights affecting data reporting, and was the control mode switch
        identifier used for the changes?"
      - >-
        "Can you tell me when was the most recent instance when the current
        exceeded the safe operating threshold, causing a streetlight failure?"
  - source_sentence: Ambient light level detected by the streetlight, measured in lux
    sentences:
      - >-
        "What is the count of how many times the most recent streetlight failure
        has been switched on before the error occurred?"
      - >-
        "What is the recent data on maximum load current indicating potential
        risk and any recent communication issues with the lux sensors?"
      - >-
        "What is the recent dimming schedule applied, the detected ambient light
        level in lux, and were there any recent issues or failures with the
        driver of the streetlight?"
  - source_sentence: >-
      Timestamp of the latest data recorded or action performed by the
      streetlight
    sentences:
      - >-
        "What is the recent failure rate of the relay responsible for operating
        the DALI dimming protocol in our streetlights?"
      - >-
        "Can you provide the recent instances where the current drawn by the
        streetlights was lower than expected, sorted by the unique streetlight
        identifier and street name?"
      - >-
        "What was the most recent threshold level set to stop recording
        flickering events using the SIM card code in the streetlight?"
  - source_sentence: Current exceeds the safe operating threshold for the streetlight (failure)
    sentences:
      - >-
        "What is the hardware version of the recent streetlight experiencing
        faults in its lux module affecting light level sensing and control?"
      - >-
        "Can you provide the recent instances where the current drawn by the
        streetlights was lower than expected, sorted by the unique streetlight
        identifier and street name?"
      - >-
        "Can you identify the most recent instance when the power under load was
        higher than normal, possibly indicating inefficiency or a fault, and
        concurrently, the voltage exceeded the safe operating levels for the
        streetlights?"
  - source_sentence: >-
      Voltage supplied is below the safe operating level for the streetlight
      (failure)
    sentences:
      - >-
        "What is the recent AC voltage supply to the streetlight and the SIM
        card code used for its cellular network communication?"
      - >-
        "What was the most recent threshold level set to stop recording
        flickering events using the SIM card code in the streetlight?"
      - >-
        "What is the most recent internal temperature reading for the operating
        conditions of the streetlight?"
model-index:
  - name: BGE base Financial Matryoshka
    results:
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 768
          type: dim_768
        metrics:
          - type: cosine_accuracy@1
            value: 0.004149377593360996
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.02074688796680498
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.04149377593360996
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.06224066390041494
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.004149377593360996
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.006915629322268326
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.008298755186721992
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.006224066390041493
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.004149377593360996
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.02074688796680498
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.04149377593360996
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.06224066390041494
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.028846821098581887
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.018665612856484225
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.024320046307682447
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 512
          type: dim_512
        metrics:
          - type: cosine_accuracy@1
            value: 0.004149377593360996
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.02074688796680498
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.04149377593360996
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.06224066390041494
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.004149377593360996
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.006915629322268326
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.008298755186721992
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.006224066390041493
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.004149377593360996
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.02074688796680498
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.04149377593360996
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.06224066390041494
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.028846821098581887
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.018665612856484225
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.024320046307682447
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 256
          type: dim_256
        metrics:
          - type: cosine_accuracy@1
            value: 0.008298755186721992
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.02074688796680498
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.04149377593360996
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.058091286307053944
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.008298755186721992
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.006915629322268326
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.008298755186721992
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.0058091286307053935
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.008298755186721992
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.02074688796680498
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.04149377593360996
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.058091286307053944
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.02917470145123319
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.020424158598432458
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.02622693528356527
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 128
          type: dim_128
        metrics:
          - type: cosine_accuracy@1
            value: 0.008298755186721992
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.02074688796680498
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.03734439834024896
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.05394190871369295
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.008298755186721992
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.006915629322268326
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.007468879668049794
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.005394190871369295
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.008298755186721992
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.02074688796680498
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.03734439834024896
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.05394190871369295
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.027438863848135625
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.019311071593229267
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.02603525046406888
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 64
          type: dim_64
        metrics:
          - type: cosine_accuracy@1
            value: 0.008298755186721992
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.012448132780082987
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.029045643153526972
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.05394190871369295
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.008298755186721992
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.004149377593360996
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.005809128630705394
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.005394190871369295
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.008298755186721992
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.012448132780082987
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.029045643153526972
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.05394190871369295
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.025512460997908278
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.017038793387341104
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.02259750227693111
            name: Cosine Map@100

BGE base Financial Matryoshka

This is a sentence-transformers model finetuned from SQAI/streetlight_sql_embedding. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

  • Model Type: Sentence Transformer
  • Base model: SQAI/streetlight_sql_embedding
  • Maximum Sequence Length: 512 tokens
  • Output Dimensionality: 384 tokens
  • Similarity Function: Cosine Similarity
  • Language: en
  • License: apache-2.0

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("SQAI/streetlight_sql_embedding2")
# Run inference
sentences = [
    'Voltage supplied is below the safe operating level for the streetlight (failure)',
    '"What is the recent AC voltage supply to the streetlight and the SIM card code used for its cellular network communication?"',
    '"What was the most recent threshold level set to stop recording flickering events using the SIM card code in the streetlight?"',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Information Retrieval

Metric Value
cosine_accuracy@1 0.0041
cosine_accuracy@3 0.0207
cosine_accuracy@5 0.0415
cosine_accuracy@10 0.0622
cosine_precision@1 0.0041
cosine_precision@3 0.0069
cosine_precision@5 0.0083
cosine_precision@10 0.0062
cosine_recall@1 0.0041
cosine_recall@3 0.0207
cosine_recall@5 0.0415
cosine_recall@10 0.0622
cosine_ndcg@10 0.0288
cosine_mrr@10 0.0187
cosine_map@100 0.0243

Information Retrieval

Metric Value
cosine_accuracy@1 0.0041
cosine_accuracy@3 0.0207
cosine_accuracy@5 0.0415
cosine_accuracy@10 0.0622
cosine_precision@1 0.0041
cosine_precision@3 0.0069
cosine_precision@5 0.0083
cosine_precision@10 0.0062
cosine_recall@1 0.0041
cosine_recall@3 0.0207
cosine_recall@5 0.0415
cosine_recall@10 0.0622
cosine_ndcg@10 0.0288
cosine_mrr@10 0.0187
cosine_map@100 0.0243

Information Retrieval

Metric Value
cosine_accuracy@1 0.0083
cosine_accuracy@3 0.0207
cosine_accuracy@5 0.0415
cosine_accuracy@10 0.0581
cosine_precision@1 0.0083
cosine_precision@3 0.0069
cosine_precision@5 0.0083
cosine_precision@10 0.0058
cosine_recall@1 0.0083
cosine_recall@3 0.0207
cosine_recall@5 0.0415
cosine_recall@10 0.0581
cosine_ndcg@10 0.0292
cosine_mrr@10 0.0204
cosine_map@100 0.0262

Information Retrieval

Metric Value
cosine_accuracy@1 0.0083
cosine_accuracy@3 0.0207
cosine_accuracy@5 0.0373
cosine_accuracy@10 0.0539
cosine_precision@1 0.0083
cosine_precision@3 0.0069
cosine_precision@5 0.0075
cosine_precision@10 0.0054
cosine_recall@1 0.0083
cosine_recall@3 0.0207
cosine_recall@5 0.0373
cosine_recall@10 0.0539
cosine_ndcg@10 0.0274
cosine_mrr@10 0.0193
cosine_map@100 0.026

Information Retrieval

Metric Value
cosine_accuracy@1 0.0083
cosine_accuracy@3 0.0124
cosine_accuracy@5 0.029
cosine_accuracy@10 0.0539
cosine_precision@1 0.0083
cosine_precision@3 0.0041
cosine_precision@5 0.0058
cosine_precision@10 0.0054
cosine_recall@1 0.0083
cosine_recall@3 0.0124
cosine_recall@5 0.029
cosine_recall@10 0.0539
cosine_ndcg@10 0.0255
cosine_mrr@10 0.017
cosine_map@100 0.0226

Training Details

Training Dataset

Unnamed Dataset

  • Size: 2,161 training samples
  • Columns: positive and anchor
  • Approximate statistics based on the first 1000 samples:
    positive anchor
    type string string
    details
    • min: 6 tokens
    • mean: 14.3 tokens
    • max: 20 tokens
    • min: 15 tokens
    • mean: 32.58 tokens
    • max: 54 tokens
  • Samples:
    positive anchor
    Lower lux level below which additional lighting may be necessary "What are the recent faults found in the lux module that affect light level control, in relation to the default dimming level of the streetlights and the control mode switch identifier used for changing settings?"
    Current dimming level of the streetlight in operation "Can the operator managing the streetlights provide the most recent update on the streetlight that is currently below the expected range and unable to connect to the network for remote management?"
    Upper voltage limit considered safe and efficient for streetlight operation "Can you provide any recent potential failures of a streetlight group due to unusually high voltage under load or intermittent flashing, within the southernmost geographic area?"
  • Loss: MatryoshkaLoss with these parameters:
    {
        "loss": "MultipleNegativesRankingLoss",
        "matryoshka_dims": [
            384,
            256,
            128,
            64
        ],
        "matryoshka_weights": [
            1,
            1,
            1,
            1
        ],
        "n_dims_per_step": -1
    }
    

Evaluation Dataset

Unnamed Dataset

  • Size: 241 evaluation samples
  • Columns: positive and anchor
  • Approximate statistics based on the first 1000 samples:
    positive anchor
    type string string
    details
    • min: 6 tokens
    • mean: 14.31 tokens
    • max: 20 tokens
    • min: 17 tokens
    • mean: 31.03 tokens
    • max: 54 tokens
  • Samples:
    positive anchor
    Timestamp of the latest data recorded or action performed by the streetlight "What was the most recent threshold level set to stop recording flickering events using the SIM card code in the streetlight?"
    Maximum longitude of the geographic area covered by the group of streetlights "What is the recent power usage in watts for the oldest streetlight on the street with maximum longitude?"
    Current dimming level of the streetlight in operation "What is the most recent dimming level of the streetlight?"
  • Loss: MatryoshkaLoss with these parameters:
    {
        "loss": "MultipleNegativesRankingLoss",
        "matryoshka_dims": [
            384,
            256,
            128,
            64
        ],
        "matryoshka_weights": [
            1,
            1,
            1,
            1
        ],
        "n_dims_per_step": -1
    }
    

Training Hyperparameters

Non-Default Hyperparameters

  • eval_strategy: epoch
  • per_device_train_batch_size: 32
  • per_device_eval_batch_size: 16
  • gradient_accumulation_steps: 16
  • learning_rate: 1e-05
  • weight_decay: 0.03
  • num_train_epochs: 75
  • lr_scheduler_type: cosine
  • warmup_ratio: 0.2
  • bf16: True
  • tf32: True
  • load_best_model_at_end: True
  • optim: adamw_torch_fused
  • batch_sampler: no_duplicates

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • eval_strategy: epoch
  • prediction_loss_only: True
  • per_device_train_batch_size: 32
  • per_device_eval_batch_size: 16
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 16
  • eval_accumulation_steps: None
  • learning_rate: 1e-05
  • weight_decay: 0.03
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1.0
  • num_train_epochs: 75
  • max_steps: -1
  • lr_scheduler_type: cosine
  • lr_scheduler_kwargs: {}
  • warmup_ratio: 0.2
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: True
  • logging_nan_inf_filter: True
  • save_safetensors: True
  • save_on_each_node: False
  • save_only_model: False
  • restore_callback_states_from_checkpoint: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: True
  • fp16: False
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: True
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: False
  • dataloader_num_workers: 0
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: True
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch_fused
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: None
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: False
  • resume_from_checkpoint: None
  • hub_model_id: None
  • hub_strategy: every_save
  • hub_private_repo: False
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • dispatch_batches: None
  • split_batches: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_eval_metrics: False
  • batch_sampler: no_duplicates
  • multi_dataset_batch_sampler: proportional

Training Logs

Click to expand
Epoch Step Training Loss loss dim_128_cosine_map@100 dim_256_cosine_map@100 dim_512_cosine_map@100 dim_64_cosine_map@100 dim_768_cosine_map@100
0.2353 1 11.247 - - - - - -
0.4706 2 11.4455 - - - - - -
0.7059 3 11.5154 - - - - - -
0.9412 4 10.4079 - - - - - -
1.1765 5 3.3256 - - - - - -
1.4118 6 3.812 - - - - - -
1.6471 7 4.0302 - - - - - -
1.8824 8 3.5832 - - - - - -
2.1176 9 3.9586 - - - - - -
2.3529 10 4.2835 - - - - - -
2.5882 11 1.6391 6.0237 0.0254 0.0354 0.0318 0.0230 0.0318
1.0294 12 1.3873 - - - - - -
1.2647 13 11.1729 - - - - - -
1.5 14 11.1729 - - - - - -
1.7353 15 11.3334 - - - - - -
1.9706 16 9.1337 - - - - - -
2.2059 17 2.8674 - - - - - -
2.4412 18 3.9162 - - - - - -
2.6765 19 3.3378 - - - - - -
2.9118 20 3.5152 - - - - - -
3.1471 21 3.1655 - - - - - -
3.3824 22 3.5905 - - - - - -
3.6176 23 1.2027 5.5383 0.0265 0.0304 0.0291 0.0235 0.0291
2.0588 24 2.5902 - - - - - -
2.2941 25 10.8776 - - - - - -
2.5294 26 10.7109 - - - - - -
2.7647 27 10.9662 - - - - - -
3.0 28 7.5032 - - - - - -
3.2353 29 1.9266 - - - - - -
3.4706 30 2.5007 - - - - - -
3.7059 31 2.2972 - - - - - -
3.9412 32 2.3428 - - - - - -
4.1765 33 2.4842 - - - - - -
4.4118 34 2.371 - - - - - -
4.6471 35 0.8811 5.0896 0.0261 0.0356 0.0324 0.0263 0.0324
3.0882 36 3.8163 - - - - - -
3.3235 37 10.3601 - - - - - -
3.5588 38 9.8085 - - - - - -
3.7941 39 10.3201 - - - - - -
4.0294 40 5.7213 - - - - - -
4.2647 41 1.0641 - - - - - -
4.5 42 1.7557 - - - - - -
4.7353 43 1.534 - - - - - -
4.9706 44 1.2931 - - - - - -
5.2059 45 2.0569 - - - - - -
5.4412 46 1.6945 - - - - - -
5.6765 47 0.6985 4.8110 0.0267 0.0230 0.0343 0.0180 0.0343
4.1176 48 4.8862 - - - - - -
4.3529 49 9.9427 - - - - - -
4.5882 50 9.7492 - - - - - -
4.8235 51 10.1616 - - - - - -
5.0588 52 4.3073 - - - - - -
5.2941 53 0.9089 - - - - - -
5.5294 54 1.2689 - - - - - -
5.7647 55 1.2875 - - - - - -
6.0 56 1.2756 - - - - - -
6.2353 57 1.6222 - - - - - -
6.4706 58 1.3049 - - - - - -
6.7059 59 0.3305 4.6562 0.0184 0.0327 0.0288 0.0190 0.0288
5.1471 60 5.7286 - - - - - -
5.3824 61 9.7399 - - - - - -
5.6176 62 9.3036 - - - - - -
5.8529 63 9.6674 - - - - - -
6.0882 64 2.7979 - - - - - -
6.3235 65 0.4978 - - - - - -
6.5588 66 1.8006 - - - - - -
6.7941 67 1.098 - - - - - -
7.0294 68 1.3678 - - - - - -
7.2647 69 1.4648 - - - - - -
7.5 70 1.1826 - - - - - -
7.7353 71 0.0624 4.5802 0.0200 0.0208 0.0216 0.0231 0.0216
6.1765 72 6.8322 - - - - - -
6.4118 73 9.3021 - - - - - -
6.6471 74 9.1494 - - - - - -
6.8824 75 9.631 - - - - - -
7.1176 76 1.661 - - - - - -
7.3529 77 0.2353 - - - - - -
7.5882 78 1.0663 - - - - - -
7.8235 79 0.6836 - - - - - -
8.0588 80 0.9921 - - - - - -
8.2941 81 1.6479 - - - - - -
8.5294 82 0.6713 - - - - - -
8.7647 83 0.0 4.5499 0.0209 0.0233 0.0249 0.0226 0.0249
7.2059 84 7.775 - - - - - -
7.4412 85 9.0508 - - - - - -
7.6765 86 9.1417 - - - - - -
7.9118 87 8.9087 - - - - - -
8.1471 88 0.9757 - - - - - -
8.3824 89 0.7521 - - - - - -
8.6176 90 0.7292 - - - - - -
8.8529 91 0.6088 - - - - - -
9.0882 92 0.9514 - - - - - -
9.3235 93 1.435 - - - - - -
9.5588 94 0.3655 - - - - - -
9.7941 95 0.0 4.5162 0.0245 0.0268 0.0224 0.0238 0.0224
8.2353 96 8.7854 - - - - - -
8.4706 97 9.0167 - - - - - -
8.7059 98 9.0405 - - - - - -
8.9412 99 7.7069 - - - - - -
9.1765 100 0.6267 - - - - - -
9.4118 101 0.4043 - - - - - -
9.6471 102 0.7028 - - - - - -
9.8824 103 0.751 - - - - - -
10.1176 104 0.5994 - - - - - -
10.3529 105 1.0402 - - - - - -
10.5882 106 0.3983 4.4860 0.0259 0.0301 0.0252 0.0265 0.0252
9.0294 107 1.1037 - - - - - -
9.2647 108 8.6263 - - - - - -
9.5 109 8.9359 - - - - - -
9.7353 110 8.9879 - - - - - -
9.9706 111 6.4932 - - - - - -
10.2059 112 0.3904 - - - - - -
10.4412 113 0.3544 - - - - - -
10.6765 114 0.5658 - - - - - -
10.9118 115 0.5884 - - - - - -
11.1471 116 0.4828 - - - - - -
11.3824 117 0.8872 - - - - - -
11.6176 118 0.2906 4.4899 0.0237 0.0267 0.0264 0.0242 0.0264
10.0588 119 2.1398 - - - - - -
10.2941 120 8.6036 - - - - - -
10.5294 121 8.7739 - - - - - -
10.7647 122 9.1481 - - - - - -
11.0 123 5.2436 - - - - - -
11.2353 124 0.2435 - - - - - -
11.4706 125 0.4451 - - - - - -
11.7059 126 0.4338 - - - - - -
11.9412 127 0.5156 - - - - - -
12.1765 128 0.7081 - - - - - -
12.4118 129 0.375 - - - - - -
12.6471 130 0.1906 4.5243 0.0305 0.0253 0.0217 0.0214 0.0217
11.0882 131 3.115 - - - - - -
11.3235 132 8.702 - - - - - -
11.5588 133 8.4872 - - - - - -
11.7941 134 9.0143 - - - - - -
12.0294 135 4.2374 - - - - - -
12.2647 136 0.1979 - - - - - -
12.5 137 0.6371 - - - - - -
12.7353 138 0.5763 - - - - - -
12.9706 139 0.5716 - - - - - -
13.2059 140 0.9894 - - - - - -
13.4412 141 0.3963 - - - - - -
13.6765 142 0.084 4.5514 0.0224 0.0253 0.0209 0.0250 0.0209
12.1176 143 4.1455 - - - - - -
12.3529 144 8.6664 - - - - - -
12.5882 145 8.5896 - - - - - -
12.8235 146 8.9639 - - - - - -
13.0588 147 3.2692 - - - - - -
13.2941 148 0.2518 - - - - - -
13.5294 149 0.8313 - - - - - -
13.7647 150 0.5592 - - - - - -
14.0 151 0.3966 - - - - - -
14.2353 152 0.829 - - - - - -
14.4706 153 0.2369 - - - - - -
14.7059 154 0.0629 4.5549 0.0294 0.0312 0.0258 0.0315 0.0258
13.1471 155 5.1674 - - - - - -
13.3824 156 8.5543 - - - - - -
13.6176 157 8.4481 - - - - - -
13.8529 158 8.7815 - - - - - -
14.0882 159 1.9305 - - - - - -
14.3235 160 0.0925 - - - - - -
14.5588 161 0.6568 - - - - - -
14.7941 162 0.2796 - - - - - -
15.0294 163 0.5503 - - - - - -
15.2647 164 0.6386 - - - - - -
15.5 165 0.1957 - - - - - -
15.7353 166 0.0137 4.5688 0.0210 0.0251 0.0251 0.0223 0.0251
14.1765 167 6.2283 - - - - - -
14.4118 168 8.5378 - - - - - -
14.6471 169 8.5173 - - - - - -
14.8824 170 8.9953 - - - - - -
15.1176 171 0.983 - - - - - -
15.3529 172 0.1503 - - - - - -
15.5882 173 0.9004 - - - - - -
15.8235 174 0.3962 - - - - - -
16.0588 175 0.4047 - - - - - -
16.2941 176 0.8265 - - - - - -
16.5294 177 0.3069 - - - - - -
16.7647 178 0.0 4.5819 0.0219 0.0271 0.0240 0.0253 0.0240
15.2059 179 7.3186 - - - - - -
15.4412 180 8.5984 - - - - - -
15.6765 181 8.5362 - - - - - -
15.9118 182 8.2934 - - - - - -
16.1471 183 0.437 - - - - - -
16.3824 184 0.1864 - - - - - -
16.6176 185 0.2657 - - - - - -
16.8529 186 0.4242 - - - - - -
17.0882 187 0.4815 - - - - - -
17.3235 188 0.5206 - - - - - -
17.5588 189 0.1981 - - - - - -
17.7941 190 0.0 4.5795 0.0249 0.0319 0.0287 0.0227 0.0287
16.2353 191 8.2837 - - - - - -
16.4706 192 8.5457 - - - - - -
16.7059 193 8.6284 - - - - - -
16.9412 194 7.1806 - - - - - -
17.1765 195 0.2714 - - - - - -
17.4118 196 0.65 - - - - - -
17.6471 197 0.3627 - - - - - -
17.8824 198 0.2502 - - - - - -
18.1176 199 0.4651 - - - - - -
18.3529 200 0.3878 - - - - - -
18.5882 201 0.1728 4.5870 0.0258 0.0321 0.0293 0.0290 0.0293
17.0294 202 1.0158 - - - - - -
17.2647 203 8.1391 - - - - - -
17.5 204 8.5323 - - - - - -
17.7353 205 8.6644 - - - - - -
17.9706 206 6.1161 - - - - - -
18.2059 207 0.4636 - - - - - -
18.4412 208 0.8765 - - - - - -
18.6765 209 0.4075 - - - - - -
18.9118 210 0.3211 - - - - - -
19.1471 211 0.65 - - - - - -
19.3824 212 0.4802 - - - - - -
19.6176 213 0.0777 4.5921 0.0211 0.0268 0.0238 0.0260 0.0238
18.0588 214 1.9364 - - - - - -
18.2941 215 8.3079 - - - - - -
18.5294 216 8.4468 - - - - - -
18.7647 217 8.8501 - - - - - -
19.0 218 5.0076 - - - - - -
19.2353 219 0.1596 - - - - - -
19.4706 220 0.6482 - - - - - -
19.7059 221 0.5019 - - - - - -
19.9412 222 0.2596 - - - - - -
20.1765 223 0.5857 - - - - - -
20.4118 224 0.3469 - - - - - -
20.6471 225 0.082 4.5951 0.0251 0.0293 0.0239 0.0259 0.0239
19.0882 226 3.0141 - - - - - -
19.3235 227 8.3977 - - - - - -
19.5588 228 8.2687 - - - - - -
19.7941 229 8.8415 - - - - - -
20.0294 230 3.9692 - - - - - -
20.2647 231 0.2079 - - - - - -
20.5 232 0.6167 - - - - - -
20.7353 233 0.255 - - - - - -
20.9706 234 0.2403 - - - - - -
21.2059 235 0.5944 - - - - - -
21.4412 236 0.4212 - - - - - -
21.6765 237 0.1031 4.5929 0.0248 0.0301 0.0297 0.0268 0.0297
20.1176 238 4.0698 - - - - - -
20.3529 239 8.3696 - - - - - -
20.5882 240 8.2668 - - - - - -
20.8235 241 8.8194 - - - - - -
21.0588 242 2.9283 - - - - - -
21.2941 243 0.0974 - - - - - -
21.5294 244 0.5172 - - - - - -
21.7647 245 0.2451 - - - - - -
22.0 246 0.4693 - - - - - -
22.2353 247 0.7352 - - - - - -
22.4706 248 0.1933 - - - - - -
22.7059 249 0.0552 4.5945 0.0261 0.0275 0.0279 0.0204 0.0279
21.1471 250 5.1237 - - - - - -
21.3824 251 8.5068 - - - - - -
21.6176 252 8.2828 - - - - - -
21.8529 253 8.7851 - - - - - -
22.0882 254 2.0883 - - - - - -
22.3235 255 0.1147 - - - - - -
22.5588 256 0.5259 - - - - - -
22.7941 257 0.2915 - - - - - -
23.0294 258 0.2495 - - - - - -
23.2647 259 0.7518 - - - - - -
23.5 260 0.1767 - - - - - -
23.7353 261 0.0244 4.5944 0.0213 0.0267 0.0265 0.0220 0.0265
22.1765 262 6.1144 - - - - - -
22.4118 263 8.3334 - - - - - -
22.6471 264 8.4377 - - - - - -
22.8824 265 8.8182 - - - - - -
23.1176 266 0.8795 - - - - - -
23.3529 267 0.0637 - - - - - -
23.5882 268 0.3658 - - - - - -
23.8235 269 0.3599 - - - - - -
24.0588 270 0.283 - - - - - -
24.2941 271 0.731 - - - - - -
24.5294 272 0.1758 - - - - - -
24.7647 273 0.0 4.5963 0.0259 0.0295 0.0247 0.0229 0.0247
23.2059 274 7.1188 - - - - - -
23.4412 275 8.354 - - - - - -
23.6765 276 8.5186 - - - - - -
23.9118 277 8.1633 - - - - - -
24.1471 278 0.3481 - - - - - -
24.3824 279 0.574 - - - - - -
24.6176 280 0.2784 - - - - - -
24.8529 281 0.251 - - - - - -
25.0882 282 0.4093 - - - - - -
25.3235 283 0.5414 - - - - - -
25.5588 284 0.149 - - - - - -
25.7941 285 0.0 4.5965 0.0223 0.0251 0.0240 0.0204 0.0240
24.2353 286 8.2498 - - - - - -
24.4706 287 8.4555 - - - - - -
24.7059 288 8.5368 - - - - - -
24.9412 289 7.1779 - - - - - -
25.1765 290 0.1486 - - - - - -
25.4118 291 0.9156 - - - - - -
25.6471 292 0.2757 - - - - - -
25.8824 293 0.237 - - - - - -
26.1176 294 0.2979 - - - - - -
26.3529 295 0.5296 - - - - - -
26.5882 296 0.2062 4.5949 0.0259 0.0327 0.0308 0.0247 0.0308
25.0294 297 1.0355 - - - - - -
25.2647 298 8.1721 - - - - - -
25.5 299 8.4028 - - - - - -
25.7353 300 8.5989 4.5941 0.0260 0.0262 0.0243 0.0226 0.0243
  • The bold row denotes the saved checkpoint.

Framework Versions

  • Python: 3.10.12
  • Sentence Transformers: 3.0.1
  • Transformers: 4.41.2
  • PyTorch: 2.1.2+cu121
  • Accelerate: 0.32.0
  • Datasets: 2.19.1
  • Tokenizers: 0.19.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MatryoshkaLoss

@misc{kusupati2024matryoshka,
    title={Matryoshka Representation Learning}, 
    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
    year={2024},
    eprint={2205.13147},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}

MultipleNegativesRankingLoss

@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply}, 
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}