mrm8488's picture
Add new SentenceTransformer model.
29efd03 verified
metadata
language:
  - en
library_name: sentence-transformers
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - dataset_size:100K<n<1M
  - loss:MatryoshkaLoss
  - loss:MultipleNegativesRankingLoss
base_model: distilbert/distilroberta-base
metrics:
  - pearson_cosine
  - spearman_cosine
  - pearson_manhattan
  - spearman_manhattan
  - pearson_euclidean
  - spearman_euclidean
  - pearson_dot
  - spearman_dot
  - pearson_max
  - spearman_max
widget:
  - source_sentence: Test Rocks
    sentences:
      - Number of testimonies
      - People are at a pool.
      - I've never been to Asia
  - source_sentence: No animals.
    sentences:
      - We don't have a dog.
      - These boys are on bikes
      - A person is climbing.
  - source_sentence: Shrinking.
    sentences:
      - That doesn't seem fair.
      - A man reads the paper.
      - I've never been to Asia
  - source_sentence: Loire Valley
    sentences:
      - A Lake in Loire.
      - people stand near pole
      - A cat is licking itself.
  - source_sentence: It is well.
    sentences:
      - That's convenient.
      - away from the children
      - She hated the restaurant!
pipeline_tag: sentence-similarity
model-index:
  - name: SentenceTransformer based on distilbert/distilroberta-base
    results:
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 768
          type: sts-dev-768
        metrics:
          - type: pearson_cosine
            value: 0.8413274730706258
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8478057476815382
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8414182910991368
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8394684211369814
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8423380151813549
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8401129676358965
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.7854982058734802
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7814388303641997
            name: Spearman Dot
          - type: pearson_max
            value: 0.8423380151813549
            name: Pearson Max
          - type: spearman_max
            value: 0.8478057476815382
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 512
          type: sts-dev-512
        metrics:
          - type: pearson_cosine
            value: 0.8394744649386727
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8469596264857904
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8398552366754626
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8377241640608183
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8406514989809173
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8380050330376462
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.7811135781647157
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7776714775017128
            name: Spearman Dot
          - type: pearson_max
            value: 0.8406514989809173
            name: Pearson Max
          - type: spearman_max
            value: 0.8469596264857904
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 256
          type: sts-dev-256
        metrics:
          - type: pearson_cosine
            value: 0.8326846589795867
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8435757360139872
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.835121668379584
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.833167770567356
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8359785864160201
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8337674519096212
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.7499541215721716
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7452815230357489
            name: Spearman Dot
          - type: pearson_max
            value: 0.8359785864160201
            name: Pearson Max
          - type: spearman_max
            value: 0.8435757360139872
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 128
          type: sts-dev-128
        metrics:
          - type: pearson_cosine
            value: 0.8243384464323462
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8399706247679909
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8281897604718583
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8270317815639731
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8281918243965822
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8267242273030063
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.7110017325551932
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7049602384186016
            name: Spearman Dot
          - type: pearson_max
            value: 0.8281918243965822
            name: Pearson Max
          - type: spearman_max
            value: 0.8399706247679909
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 64
          type: sts-dev-64
        metrics:
          - type: pearson_cosine
            value: 0.811599959622093
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8316629408285197
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8113103800424869
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8104916438729426
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8113924334973999
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8110877753624469
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.641225674602723
            name: Pearson Dot
          - type: spearman_dot
            value: 0.6346995881423587
            name: Spearman Dot
          - type: pearson_max
            value: 0.811599959622093
            name: Pearson Max
          - type: spearman_max
            value: 0.8316629408285197
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 32
          type: sts-dev-32
        metrics:
          - type: pearson_cosine
            value: 0.7834130163353433
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.814057381112976
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.7831854350286095
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.7859760066096324
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.7868628503474937
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.7893614397994021
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.5533705216922039
            name: Pearson Dot
          - type: spearman_dot
            value: 0.5449230360083127
            name: Spearman Dot
          - type: pearson_max
            value: 0.7868628503474937
            name: Pearson Max
          - type: spearman_max
            value: 0.814057381112976
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 16
          type: sts-dev-16
        metrics:
          - type: pearson_cosine
            value: 0.7259201534121641
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.7751337117844075
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.7420762055565752
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.7552849049126117
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.7483211915991654
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.759888035465032
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.4387404126202509
            name: Pearson Dot
          - type: spearman_dot
            value: 0.42591442860202633
            name: Spearman Dot
          - type: pearson_max
            value: 0.7483211915991654
            name: Pearson Max
          - type: spearman_max
            value: 0.7751337117844075
            name: Spearman Max

SentenceTransformer based on distilbert/distilroberta-base

This is a sentence-transformers model finetuned from distilbert/distilroberta-base on the sentence-transformers/all-nli dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: RobertaModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("mrm8488/distilroberta-base-ft-allnli-matryoshka-768-16-1e-128bs")
# Run inference
sentences = [
    'It is well.',
    "That's convenient.",
    'away from the children',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Semantic Similarity

Metric Value
pearson_cosine 0.8413
spearman_cosine 0.8478
pearson_manhattan 0.8414
spearman_manhattan 0.8395
pearson_euclidean 0.8423
spearman_euclidean 0.8401
pearson_dot 0.7855
spearman_dot 0.7814
pearson_max 0.8423
spearman_max 0.8478

Semantic Similarity

Metric Value
pearson_cosine 0.8395
spearman_cosine 0.847
pearson_manhattan 0.8399
spearman_manhattan 0.8377
pearson_euclidean 0.8407
spearman_euclidean 0.838
pearson_dot 0.7811
spearman_dot 0.7777
pearson_max 0.8407
spearman_max 0.847

Semantic Similarity

Metric Value
pearson_cosine 0.8327
spearman_cosine 0.8436
pearson_manhattan 0.8351
spearman_manhattan 0.8332
pearson_euclidean 0.836
spearman_euclidean 0.8338
pearson_dot 0.75
spearman_dot 0.7453
pearson_max 0.836
spearman_max 0.8436

Semantic Similarity

Metric Value
pearson_cosine 0.8243
spearman_cosine 0.84
pearson_manhattan 0.8282
spearman_manhattan 0.827
pearson_euclidean 0.8282
spearman_euclidean 0.8267
pearson_dot 0.711
spearman_dot 0.705
pearson_max 0.8282
spearman_max 0.84

Semantic Similarity

Metric Value
pearson_cosine 0.8116
spearman_cosine 0.8317
pearson_manhattan 0.8113
spearman_manhattan 0.8105
pearson_euclidean 0.8114
spearman_euclidean 0.8111
pearson_dot 0.6412
spearman_dot 0.6347
pearson_max 0.8116
spearman_max 0.8317

Semantic Similarity

Metric Value
pearson_cosine 0.7834
spearman_cosine 0.8141
pearson_manhattan 0.7832
spearman_manhattan 0.786
pearson_euclidean 0.7869
spearman_euclidean 0.7894
pearson_dot 0.5534
spearman_dot 0.5449
pearson_max 0.7869
spearman_max 0.8141

Semantic Similarity

Metric Value
pearson_cosine 0.7259
spearman_cosine 0.7751
pearson_manhattan 0.7421
spearman_manhattan 0.7553
pearson_euclidean 0.7483
spearman_euclidean 0.7599
pearson_dot 0.4387
spearman_dot 0.4259
pearson_max 0.7483
spearman_max 0.7751

Training Details

Training Dataset

sentence-transformers/all-nli

  • Dataset: sentence-transformers/all-nli at d482672
  • Size: 557,850 training samples
  • Columns: anchor, positive, and negative
  • Approximate statistics based on the first 1000 samples:
    anchor positive negative
    type string string string
    details
    • min: 7 tokens
    • mean: 10.38 tokens
    • max: 45 tokens
    • min: 6 tokens
    • mean: 12.8 tokens
    • max: 39 tokens
    • min: 6 tokens
    • mean: 13.4 tokens
    • max: 50 tokens
  • Samples:
    anchor positive negative
    A person on a horse jumps over a broken down airplane. A person is outdoors, on a horse. A person is at a diner, ordering an omelette.
    Children smiling and waving at camera There are children present The kids are frowning
    A boy is jumping on skateboard in the middle of a red bridge. The boy does a skateboarding trick. The boy skates down the sidewalk.
  • Loss: MatryoshkaLoss with these parameters:
    {
        "loss": "MultipleNegativesRankingLoss",
        "matryoshka_dims": [
            768,
            512,
            256,
            128,
            64,
            32,
            16
        ],
        "matryoshka_weights": [
            1,
            1,
            1,
            1,
            1,
            1,
            1
        ],
        "n_dims_per_step": -1
    }
    

Evaluation Dataset

sentence-transformers/all-nli

  • Dataset: sentence-transformers/all-nli at d482672
  • Size: 6,584 evaluation samples
  • Columns: anchor, positive, and negative
  • Approximate statistics based on the first 1000 samples:
    anchor positive negative
    type string string string
    details
    • min: 6 tokens
    • mean: 18.02 tokens
    • max: 66 tokens
    • min: 5 tokens
    • mean: 9.81 tokens
    • max: 29 tokens
    • min: 5 tokens
    • mean: 10.37 tokens
    • max: 29 tokens
  • Samples:
    anchor positive negative
    Two women are embracing while holding to go packages. Two woman are holding packages. The men are fighting outside a deli.
    Two young children in blue jerseys, one with the number 9 and one with the number 2 are standing on wooden steps in a bathroom and washing their hands in a sink. Two kids in numbered jerseys wash their hands. Two kids in jackets walk to school.
    A man selling donuts to a customer during a world exhibition event held in the city of Angeles A man selling donuts to a customer. A woman drinks her coffee in a small cafe.
  • Loss: MatryoshkaLoss with these parameters:
    {
        "loss": "MultipleNegativesRankingLoss",
        "matryoshka_dims": [
            768,
            512,
            256,
            128,
            64,
            32,
            16
        ],
        "matryoshka_weights": [
            1,
            1,
            1,
            1,
            1,
            1,
            1
        ],
        "n_dims_per_step": -1
    }
    

Training Hyperparameters

Non-Default Hyperparameters

  • eval_strategy: steps
  • per_device_train_batch_size: 128
  • per_device_eval_batch_size: 128
  • num_train_epochs: 1
  • warmup_ratio: 0.1
  • fp16: True
  • batch_sampler: no_duplicates

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • eval_strategy: steps
  • prediction_loss_only: True
  • per_device_train_batch_size: 128
  • per_device_eval_batch_size: 128
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 1
  • eval_accumulation_steps: None
  • learning_rate: 5e-05
  • weight_decay: 0.0
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1.0
  • num_train_epochs: 1
  • max_steps: -1
  • lr_scheduler_type: linear
  • lr_scheduler_kwargs: {}
  • warmup_ratio: 0.1
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: True
  • logging_nan_inf_filter: True
  • save_safetensors: True
  • save_on_each_node: False
  • save_only_model: False
  • restore_callback_states_from_checkpoint: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: False
  • fp16: True
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: None
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: False
  • dataloader_num_workers: 0
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: False
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: None
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: False
  • resume_from_checkpoint: None
  • hub_model_id: None
  • hub_strategy: every_save
  • hub_private_repo: False
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • dispatch_batches: None
  • split_batches: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_eval_metrics: False
  • batch_sampler: no_duplicates
  • multi_dataset_batch_sampler: proportional

Training Logs

Epoch Step Training Loss loss sts-dev-128_spearman_cosine sts-dev-16_spearman_cosine sts-dev-256_spearman_cosine sts-dev-32_spearman_cosine sts-dev-512_spearman_cosine sts-dev-64_spearman_cosine sts-dev-768_spearman_cosine
0.0229 100 29.0917 14.1514 0.7659 0.7440 0.7915 0.7749 0.7999 0.7909 0.7918
0.0459 200 15.6915 11.7031 0.7718 0.7487 0.7940 0.7776 0.8005 0.7931 0.7871
0.0688 300 14.3136 11.1970 0.7744 0.7389 0.7952 0.7728 0.8036 0.7925 0.7938
0.0918 400 12.8122 10.4416 0.7899 0.7536 0.8040 0.7764 0.8065 0.7953 0.8018
0.1147 500 12.1747 10.5491 0.7871 0.7513 0.8035 0.7785 0.8094 0.7978 0.8008
0.1376 600 11.6784 9.6618 0.7785 0.7465 0.7956 0.7762 0.8027 0.7953 0.7935
0.1606 700 11.9351 9.3279 0.7907 0.7403 0.7995 0.7706 0.8036 0.7894 0.7982
0.1835 800 10.4998 9.1538 0.7911 0.7516 0.8043 0.7820 0.8078 0.8025 0.8010
0.2065 900 10.6069 9.0531 0.7874 0.7371 0.7974 0.7704 0.8042 0.7910 0.8010
0.2294 1000 10.0316 8.9759 0.7842 0.7356 0.7981 0.7721 0.8024 0.7905 0.7955
0.2524 1100 10.199 8.5398 0.7863 0.7322 0.7961 0.7691 0.8002 0.7910 0.7936
0.2753 1200 9.9393 8.1356 0.7860 0.7304 0.7990 0.7682 0.8025 0.7908 0.7954
0.2982 1300 9.8711 7.9177 0.7932 0.7319 0.8028 0.7708 0.8067 0.7924 0.8013
0.3212 1400 9.3594 7.8870 0.7892 0.7296 0.8032 0.7710 0.8070 0.7961 0.8030
0.3441 1500 9.4534 7.5756 0.8003 0.7518 0.8078 0.7857 0.8112 0.8063 0.8068
0.3671 1600 8.9061 7.8164 0.7781 0.7390 0.7942 0.7761 0.8002 0.7968 0.7941
0.3900 1700 8.5164 7.4869 0.7934 0.7530 0.8063 0.7864 0.8120 0.8055 0.8080
0.4129 1800 8.9262 7.7155 0.7846 0.7301 0.7991 0.7728 0.8065 0.7945 0.8003
0.4359 1900 8.3242 7.3068 0.7850 0.7273 0.7976 0.7710 0.8020 0.7904 0.7976
0.4588 2000 8.5374 7.1026 0.7845 0.7272 0.7993 0.7717 0.8042 0.7925 0.7963
0.4818 2100 8.2304 7.1601 0.7879 0.7354 0.8015 0.7719 0.8059 0.7944 0.8029
0.5047 2200 8.1347 7.8267 0.7715 0.7230 0.7889 0.7626 0.7956 0.7849 0.7930
0.5276 2300 8.3057 8.0057 0.7622 0.7148 0.7814 0.7572 0.7881 0.7769 0.7836
0.5506 2400 8.215 7.6922 0.7772 0.7210 0.7929 0.7637 0.7995 0.7858 0.7956
0.5735 2500 8.4343 7.2104 0.7869 0.7307 0.8017 0.7707 0.8071 0.7929 0.8048
0.5965 2600 8.159 6.9977 0.7893 0.7297 0.8031 0.7733 0.8071 0.7928 0.8045
0.6194 2700 8.2048 6.9465 0.7859 0.7280 0.8006 0.7725 0.8052 0.7926 0.8004
0.6423 2800 8.187 7.3185 0.7790 0.7266 0.7960 0.7690 0.8018 0.7911 0.7964
0.6653 2900 8.4768 7.5535 0.7756 0.7192 0.7913 0.7618 0.7958 0.7827 0.7907
0.6882 3000 8.4153 7.3732 0.7825 0.7276 0.7988 0.7692 0.8029 0.7899 0.7988
0.7112 3100 7.9226 6.8469 0.7912 0.7311 0.8055 0.7765 0.8101 0.7977 0.8058
0.7341 3200 8.1155 6.7604 0.7880 0.7298 0.8024 0.7747 0.8071 0.7959 0.8025
0.7571 3300 6.8463 5.4863 0.8357 0.7638 0.8407 0.8085 0.8431 0.8283 0.8440
0.7800 3400 5.2008 5.2472 0.8362 0.7655 0.8401 0.8105 0.8429 0.8279 0.8445
0.8029 3500 4.5415 5.1649 0.8385 0.7700 0.8421 0.8138 0.8454 0.8304 0.8465
0.8259 3600 4.4474 5.0933 0.8371 0.7693 0.8410 0.8112 0.8443 0.8288 0.8451
0.8488 3700 4.12 5.0555 0.8396 0.7718 0.8439 0.8140 0.8463 0.8311 0.8471
0.8718 3800 3.9104 5.0147 0.8386 0.7749 0.8432 0.8129 0.8459 0.8304 0.8471
0.8947 3900 3.9054 4.9966 0.8379 0.7733 0.8424 0.8125 0.8456 0.8296 0.8464
0.9176 4000 3.757 4.9892 0.8407 0.7763 0.8447 0.8156 0.8478 0.8326 0.8488
0.9406 4100 3.7729 4.9859 0.8400 0.7751 0.8436 0.8141 0.8470 0.8317 0.8478

Framework Versions

  • Python: 3.10.12
  • Sentence Transformers: 3.0.0
  • Transformers: 4.41.1
  • PyTorch: 2.3.0+cu121
  • Accelerate: 0.30.1
  • Datasets: 2.19.2
  • Tokenizers: 0.19.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MatryoshkaLoss

@misc{kusupati2024matryoshka,
    title={Matryoshka Representation Learning}, 
    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
    year={2024},
    eprint={2205.13147},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}

MultipleNegativesRankingLoss

@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply}, 
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}