dipteshkanojia's picture
Add new SentenceTransformer model.
ea94001 verified
|
raw
history blame
36.1 kB
metadata
base_model: FacebookAI/xlm-roberta-large
datasets:
  - sentence-transformers/stsb
language:
  - en
library_name: sentence-transformers
metrics:
  - pearson_cosine
  - spearman_cosine
  - pearson_manhattan
  - spearman_manhattan
  - pearson_euclidean
  - spearman_euclidean
  - pearson_dot
  - spearman_dot
  - pearson_max
  - spearman_max
pipeline_tag: sentence-similarity
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:5749
  - loss:MatryoshkaLoss
  - loss:CoSENTLoss
widget:
  - source_sentence: A chef is preparing some food.
    sentences:
      - Five birds stand on the snow.
      - A chef prepared a meal.
      - There is no 'still' that is not relative to some other object.
  - source_sentence: A woman is adding oil on fishes.
    sentences:
      - Large cruise ship floating on the water.
      - >-
        It refers to the maximum f-stop (which is defined as the ratio of focal
        length to effective aperture diameter).
      - The woman is cutting potatoes.
  - source_sentence: The player shoots the winning points.
    sentences:
      - Minimum wage laws hurt the least skilled, least productive the most.
      - The basketball player is about to score points for his team.
      - Three televisions, on on the floor, the other two on a box.
  - source_sentence: >-
      Stars form in star-formation regions, which itself develop from molecular
      clouds.
    sentences:
      - >-
        Although I believe Searle is mistaken, I don't think you have found the
        problem.
      - >-
        It may be possible for a solar system like ours to exist outside of a
        galaxy.
      - >-
        A blond-haired child performing on the trumpet in front of a house while
        his younger brother watches.
  - source_sentence: >-
      While Queen may refer to both Queen regent (sovereign) or Queen consort,
      the King has always been the sovereign.
    sentences:
      - At first, I thought this is a bit of a tricky question.
      - A man plays the guitar.
      - >-
        There is a very good reason not to refer to the Queen's spouse as "King"
        - because they aren't the King.
model-index:
  - name: SentenceTransformer based on FacebookAI/xlm-roberta-large
    results:
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 768
          type: sts-dev-768
        metrics:
          - type: pearson_cosine
            value: .nan
            name: Pearson Cosine
          - type: spearman_cosine
            value: .nan
            name: Spearman Cosine
          - type: pearson_manhattan
            value: -0.038123417655342585
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: -0.030855987437062582
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: -0.0742298464837288
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: -0.016119009479880368
            name: Spearman Euclidean
          - type: pearson_dot
            value: -0.053239384921975864
            name: Pearson Dot
          - type: spearman_dot
            value: -0.03860610142560432
            name: Spearman Dot
          - type: pearson_max
            value: .nan
            name: Pearson Max
          - type: spearman_max
            value: .nan
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 512
          type: sts-dev-512
        metrics:
          - type: pearson_cosine
            value: .nan
            name: Pearson Cosine
          - type: spearman_cosine
            value: .nan
            name: Spearman Cosine
          - type: pearson_manhattan
            value: -0.040766255073950965
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: -0.028106086435826655
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: -0.076050553000047
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: -0.014573222092867504
            name: Spearman Euclidean
          - type: pearson_dot
            value: -0.06110575151055097
            name: Pearson Dot
          - type: spearman_dot
            value: -0.04818501881621991
            name: Spearman Dot
          - type: pearson_max
            value: .nan
            name: Pearson Max
          - type: spearman_max
            value: .nan
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 256
          type: sts-dev-256
        metrics:
          - type: pearson_cosine
            value: .nan
            name: Pearson Cosine
          - type: spearman_cosine
            value: .nan
            name: Spearman Cosine
          - type: pearson_manhattan
            value: -0.044210895435818166
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: -0.03253407490039325
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: -0.0529355152933442
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: -0.0338167301189937
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.0887169006335579
            name: Pearson Dot
          - type: spearman_dot
            value: 0.06886250477710897
            name: Spearman Dot
          - type: pearson_max
            value: .nan
            name: Pearson Max
          - type: spearman_max
            value: .nan
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 128
          type: sts-dev-128
        metrics:
          - type: pearson_cosine
            value: .nan
            name: Pearson Cosine
          - type: spearman_cosine
            value: .nan
            name: Spearman Cosine
          - type: pearson_manhattan
            value: -0.05321620243744594
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: -0.026531903856252148
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: -0.06064347235216407
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: -0.0270947004666721
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.07199088437564892
            name: Pearson Dot
          - type: spearman_dot
            value: 0.05552894816506978
            name: Spearman Dot
          - type: pearson_max
            value: .nan
            name: Pearson Max
          - type: spearman_max
            value: .nan
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 64
          type: sts-dev-64
        metrics:
          - type: pearson_cosine
            value: .nan
            name: Pearson Cosine
          - type: spearman_cosine
            value: .nan
            name: Spearman Cosine
          - type: pearson_manhattan
            value: -0.046922199302745354
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: -0.027530540631984835
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: -0.04930495975336398
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: -0.02287953412697089
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.05851507366090909
            name: Pearson Dot
          - type: spearman_dot
            value: 0.044913605667507114
            name: Spearman Dot
          - type: pearson_max
            value: .nan
            name: Pearson Max
          - type: spearman_max
            value: .nan
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 768
          type: sts-test-768
        metrics:
          - type: pearson_cosine
            value: .nan
            name: Pearson Cosine
          - type: spearman_cosine
            value: .nan
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.0005203243269627229
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.007914891421418472
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: -0.008479099839233263
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.0002449834909380018
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.015253799995136243
            name: Pearson Dot
          - type: spearman_dot
            value: -0.002544651953260673
            name: Spearman Dot
          - type: pearson_max
            value: .nan
            name: Pearson Max
          - type: spearman_max
            value: .nan
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 512
          type: sts-test-512
        metrics:
          - type: pearson_cosine
            value: .nan
            name: Pearson Cosine
          - type: spearman_cosine
            value: .nan
            name: Spearman Cosine
          - type: pearson_manhattan
            value: -0.000985791968546407
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.009210170664121263
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: -0.010968197464829785
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.0006366521814203481
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.030903954394043587
            name: Pearson Dot
          - type: spearman_dot
            value: 0.0214169911509498
            name: Spearman Dot
          - type: pearson_max
            value: .nan
            name: Pearson Max
          - type: spearman_max
            value: .nan
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 256
          type: sts-test-256
        metrics:
          - type: pearson_cosine
            value: .nan
            name: Pearson Cosine
          - type: spearman_cosine
            value: .nan
            name: Spearman Cosine
          - type: pearson_manhattan
            value: -0.008347426706014351
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.008133437696668973
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: -0.01284332508912676
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.006207692348050752
            name: Spearman Euclidean
          - type: pearson_dot
            value: -0.10411841010392278
            name: Pearson Dot
          - type: spearman_dot
            value: -0.10441611480429308
            name: Spearman Dot
          - type: pearson_max
            value: .nan
            name: Pearson Max
          - type: spearman_max
            value: .nan
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 128
          type: sts-test-128
        metrics:
          - type: pearson_cosine
            value: .nan
            name: Pearson Cosine
          - type: spearman_cosine
            value: .nan
            name: Spearman Cosine
          - type: pearson_manhattan
            value: -0.007293947286825709
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.012461130559236479
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: -0.013785631605643068
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.008355374230034162
            name: Spearman Euclidean
          - type: pearson_dot
            value: -0.07790382803601184
            name: Pearson Dot
          - type: spearman_dot
            value: -0.08277939304968172
            name: Spearman Dot
          - type: pearson_max
            value: .nan
            name: Pearson Max
          - type: spearman_max
            value: .nan
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 64
          type: sts-test-64
        metrics:
          - type: pearson_cosine
            value: .nan
            name: Pearson Cosine
          - type: spearman_cosine
            value: .nan
            name: Spearman Cosine
          - type: pearson_manhattan
            value: -0.012731573411777072
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.003453137865023755
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: -0.013710254571378023
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.0028389826642085166
            name: Spearman Euclidean
          - type: pearson_dot
            value: -0.04900795414419644
            name: Pearson Dot
          - type: spearman_dot
            value: -0.05520642056907742
            name: Spearman Dot
          - type: pearson_max
            value: .nan
            name: Pearson Max
          - type: spearman_max
            value: .nan
            name: Spearman Max

SentenceTransformer based on FacebookAI/xlm-roberta-large

This is a sentence-transformers model finetuned from FacebookAI/xlm-roberta-large on the sentence-transformers/stsb dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: XLMRobertaModel 
  (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("dipteshkanojia/xlm-roberta-large-sts-matryoshka")
# Run inference
sentences = [
    'While Queen may refer to both Queen regent (sovereign) or Queen consort, the King has always been the sovereign.',
    'There is a very good reason not to refer to the Queen\'s spouse as "King" - because they aren\'t the King.',
    'A man plays the guitar.',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 1024]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Semantic Similarity

Metric Value
pearson_cosine nan
spearman_cosine nan
pearson_manhattan -0.0381
spearman_manhattan -0.0309
pearson_euclidean -0.0742
spearman_euclidean -0.0161
pearson_dot -0.0532
spearman_dot -0.0386
pearson_max nan
spearman_max nan

Semantic Similarity

Metric Value
pearson_cosine nan
spearman_cosine nan
pearson_manhattan -0.0408
spearman_manhattan -0.0281
pearson_euclidean -0.0761
spearman_euclidean -0.0146
pearson_dot -0.0611
spearman_dot -0.0482
pearson_max nan
spearman_max nan

Semantic Similarity

Metric Value
pearson_cosine nan
spearman_cosine nan
pearson_manhattan -0.0442
spearman_manhattan -0.0325
pearson_euclidean -0.0529
spearman_euclidean -0.0338
pearson_dot 0.0887
spearman_dot 0.0689
pearson_max nan
spearman_max nan

Semantic Similarity

Metric Value
pearson_cosine nan
spearman_cosine nan
pearson_manhattan -0.0532
spearman_manhattan -0.0265
pearson_euclidean -0.0606
spearman_euclidean -0.0271
pearson_dot 0.072
spearman_dot 0.0555
pearson_max nan
spearman_max nan

Semantic Similarity

Metric Value
pearson_cosine nan
spearman_cosine nan
pearson_manhattan -0.0469
spearman_manhattan -0.0275
pearson_euclidean -0.0493
spearman_euclidean -0.0229
pearson_dot 0.0585
spearman_dot 0.0449
pearson_max nan
spearman_max nan

Semantic Similarity

Metric Value
pearson_cosine nan
spearman_cosine nan
pearson_manhattan 0.0005
spearman_manhattan 0.0079
pearson_euclidean -0.0085
spearman_euclidean 0.0002
pearson_dot 0.0153
spearman_dot -0.0025
pearson_max nan
spearman_max nan

Semantic Similarity

Metric Value
pearson_cosine nan
spearman_cosine nan
pearson_manhattan -0.001
spearman_manhattan 0.0092
pearson_euclidean -0.011
spearman_euclidean 0.0006
pearson_dot 0.0309
spearman_dot 0.0214
pearson_max nan
spearman_max nan

Semantic Similarity

Metric Value
pearson_cosine nan
spearman_cosine nan
pearson_manhattan -0.0083
spearman_manhattan 0.0081
pearson_euclidean -0.0128
spearman_euclidean 0.0062
pearson_dot -0.1041
spearman_dot -0.1044
pearson_max nan
spearman_max nan

Semantic Similarity

Metric Value
pearson_cosine nan
spearman_cosine nan
pearson_manhattan -0.0073
spearman_manhattan 0.0125
pearson_euclidean -0.0138
spearman_euclidean 0.0084
pearson_dot -0.0779
spearman_dot -0.0828
pearson_max nan
spearman_max nan

Semantic Similarity

Metric Value
pearson_cosine nan
spearman_cosine nan
pearson_manhattan -0.0127
spearman_manhattan 0.0035
pearson_euclidean -0.0137
spearman_euclidean 0.0028
pearson_dot -0.049
spearman_dot -0.0552
pearson_max nan
spearman_max nan

Training Details

Training Dataset

sentence-transformers/stsb

  • Dataset: sentence-transformers/stsb at ab7a5ac
  • Size: 5,749 training samples
  • Columns: sentence1, sentence2, and score
  • Approximate statistics based on the first 1000 samples:
    sentence1 sentence2 score
    type string string float
    details
    • min: 6 tokens
    • mean: 11.08 tokens
    • max: 30 tokens
    • min: 7 tokens
    • mean: 11.05 tokens
    • max: 30 tokens
    • min: 0.0
    • mean: 0.54
    • max: 1.0
  • Samples:
    sentence1 sentence2 score
    A plane is taking off. An air plane is taking off. 1.0
    A man is playing a large flute. A man is playing a flute. 0.76
    A man is spreading shreded cheese on a pizza. A man is spreading shredded cheese on an uncooked pizza. 0.76
  • Loss: MatryoshkaLoss with these parameters:
    {
        "loss": "CoSENTLoss",
        "matryoshka_dims": [
            768,
            512,
            256,
            128,
            64
        ],
        "matryoshka_weights": [
            1,
            1,
            1,
            1,
            1
        ],
        "n_dims_per_step": -1
    }
    

Evaluation Dataset

sentence-transformers/stsb

  • Dataset: sentence-transformers/stsb at ab7a5ac
  • Size: 1,500 evaluation samples
  • Columns: sentence1, sentence2, and score
  • Approximate statistics based on the first 1000 samples:
    sentence1 sentence2 score
    type string string float
    details
    • min: 5 tokens
    • mean: 16.55 tokens
    • max: 47 tokens
    • min: 7 tokens
    • mean: 16.5 tokens
    • max: 47 tokens
    • min: 0.0
    • mean: 0.47
    • max: 1.0
  • Samples:
    sentence1 sentence2 score
    A man with a hard hat is dancing. A man wearing a hard hat is dancing. 1.0
    A young child is riding a horse. A child is riding a horse. 0.95
    A man is feeding a mouse to a snake. The man is feeding a mouse to the snake. 1.0
  • Loss: MatryoshkaLoss with these parameters:
    {
        "loss": "CoSENTLoss",
        "matryoshka_dims": [
            768,
            512,
            256,
            128,
            64
        ],
        "matryoshka_weights": [
            1,
            1,
            1,
            1,
            1
        ],
        "n_dims_per_step": -1
    }
    

Training Hyperparameters

Non-Default Hyperparameters

  • eval_strategy: steps
  • per_device_train_batch_size: 6
  • per_device_eval_batch_size: 6
  • num_train_epochs: 8
  • warmup_ratio: 0.1
  • fp16: True

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • eval_strategy: steps
  • prediction_loss_only: True
  • per_device_train_batch_size: 6
  • per_device_eval_batch_size: 6
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 1
  • eval_accumulation_steps: None
  • torch_empty_cache_steps: None
  • learning_rate: 5e-05
  • weight_decay: 0.0
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1.0
  • num_train_epochs: 8
  • max_steps: -1
  • lr_scheduler_type: linear
  • lr_scheduler_kwargs: {}
  • warmup_ratio: 0.1
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: True
  • logging_nan_inf_filter: True
  • save_safetensors: True
  • save_on_each_node: False
  • save_only_model: False
  • restore_callback_states_from_checkpoint: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: False
  • fp16: True
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: None
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: False
  • dataloader_num_workers: 0
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: False
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: None
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: False
  • resume_from_checkpoint: None
  • hub_model_id: None
  • hub_strategy: every_save
  • hub_private_repo: False
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • dispatch_batches: None
  • split_batches: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_eval_metrics: False
  • eval_on_start: False
  • eval_use_gather_object: False
  • batch_sampler: batch_sampler
  • multi_dataset_batch_sampler: proportional

Training Logs

Epoch Step Training Loss loss sts-dev-128_spearman_cosine sts-dev-256_spearman_cosine sts-dev-512_spearman_cosine sts-dev-64_spearman_cosine sts-dev-768_spearman_cosine sts-test-128_spearman_cosine sts-test-256_spearman_cosine sts-test-512_spearman_cosine sts-test-64_spearman_cosine sts-test-768_spearman_cosine
1.0417 500 21.1353 20.8565 nan nan nan nan nan - - - - -
2.0833 1000 20.7941 20.8565 nan nan nan nan nan - - - - -
3.125 1500 20.7823 20.8565 nan nan nan nan nan - - - - -
4.1667 2000 20.781 20.8565 nan nan nan nan nan - - - - -
5.2083 2500 20.7707 20.8565 nan nan nan nan nan - - - - -
6.25 3000 20.7661 20.8565 nan nan nan nan nan - - - - -
7.2917 3500 20.7719 20.8565 nan nan nan nan nan - - - - -
8.0 3840 - - - - - - - nan nan nan nan nan

Framework Versions

  • Python: 3.9.19
  • Sentence Transformers: 3.1.0.dev0
  • Transformers: 4.44.2
  • PyTorch: 2.4.1+cu121
  • Accelerate: 0.34.2
  • Datasets: 2.21.0
  • Tokenizers: 0.19.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MatryoshkaLoss

@misc{kusupati2024matryoshka,
    title={Matryoshka Representation Learning},
    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
    year={2024},
    eprint={2205.13147},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}

CoSENTLoss

@online{kexuefm-8847,
    title={CoSENT: A more efficient sentence vector scheme than Sentence-BERT},
    author={Su Jianlin},
    year={2022},
    month={Jan},
    url={https://kexue.fm/archives/8847},
}