embedding_finetune / README.md
llmvetter's picture
Add new SentenceTransformer model.
b39bdaf verified
metadata
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:3820
  - loss:MultipleNegativesRankingLoss
widget:
  - source_sentence: samsung ms23h3125ak/ms23h3125ak
    sentences:
      - Canon EOS M50 + 15-45mm IS STM
      - Bosch KIV32X23GB Integrated
      - Indesit DIF04B1 Integrated
      - Samsung MS23H3125AK Black
      - Samsung RB29FWRNDBC Black
      - Hisense RQ560N4WC1
      - Samsung UE32M5520
      - Nikon CoolPix A10
      - Hotpoint RPD10457JKK
      - HP Intel Xeon X5670 2.93GHz Socket 1366 3200MHz bus Upgrade Tray
      - Indesit DFG15B1S Silver
      - Samsung WW10M86DQOO
      - Bosch SMV46MX00G Integrated
      - LG 49SK8100PLA
      - Nikon CoolPix W300
      - AMD Ryzen 3 1300X 3.5GHz  Box
      - LG OLED65B8PLA
      - Samsung Galaxy J5 SM-J530
      - LG 65UK6500PLA
      - Siemens WM14T391GB
      - Apple iPhone SE 32GB
  - source_sentence: lg oled65c8pla
    sentences:
      - Beko LCSM1545W White
      - Bosch KAN90VI20G Stainless Steel
      - Canon PowerShot SX60 HS
      - Hotpoint WMAQF621P
      - Apple iPhone 7 Plus 32GB
      - Hotpoint FFU4DK Black
      - Fujifilm Finepix XP130
      - Bosch WAN24108GB
      - LG OLED65E8PLA
      - Intel Core i7-8700K 3.7GHz  Box
      - Fujifilm X-Pro2
      - LG OLED65C8PLA
      - Samsung UE55NU8000
      - LG 49LK5900PLA
      - Apple iPhone 8 64GB
      - Samsung UE65NU7100
      - AEG L6FBG942R
      - AMD Ryzen 7 1700 3GHz Box
      - Panasonic TX-49FX750B
      - Bosch WKD28351GB
      - Bosch GUD15A50GB Integrated
  - source_sentence: 15.748 cm 6.2 2960 x 1440 samoled octa core 2.3ghz quad 1.7gh
    sentences:
      - Apple iPhone SE 32GB
      - Apple iPhone X 64GB
      - LG 55SK9500PLA
      - Sony Cyber-shot DSC-WX500
      - Samsung Galaxy A5 SM-A520F
      - Apple iPhone 8 Plus 64GB
      - Indesit IWDD7123
      - Bosch SMS67MW01G White
      - Bosch KGV33XW30G White
      - Samsung WW80K5413UW
      - AMD Ryzen 3 1300X 3.5GHz  Box
      - Bosch WAW28750GB
      - Samsung Galaxy S8+ 64GB
      - Bosch KGN39VW35G White
      - Intel Core i7-7700K 4.2GHz  Box
      - Hotpoint RZAAV22P White
      - Samsung UE49NU8000
      - HP AMD Opteron 6276 2.3GHz Upgrade Tray
      - Praktica Luxmedia Z250
      - Hotpoint HFC2B19SV White
      - Hisense RB385N4EW1 White
  - source_sentence: boxed processor amd ryzen 3 1200 4 x 3.1 ghz quad
    sentences:
      - Bosch KGN36HI32 Stainless Steel
      - Bosch SMS24AW01G White
      - Hotpoint WDAL8640P
      - Doro 6050
      - Samsung QE55Q7FN
      - AMD Ryzen 3 1200 3.1GHz Box
      - Samsung UE55NU7500
      - Huawei Honor 10 128GB Dual SIM
      - Sony Xperia L1
      - Hotpoint FFU4DK Black
      - Hoover DXOC 68C3B
      - Sony Xperia XA1
      - Nikon D7200 + 18-105mm VR
      - HP Intel Xeon DP E5640 2.66GHz Socket 1366 1066MHz bus Upgrade Tray
      - Samsung UE49NU8000
      - Panasonic Lumix DMC-FT30
      - Hotpoint FDL 9640K UK
      - Apple iPhone 6S Plus 128GB
      - Nikon D5600 + AF-P 18-55mm VR
      - HP AMD Opteron 6238 2.6GHz Upgrade Tray
      - Apple iPhone SE 32GB
  - source_sentence: lg 49uk6300plb/49uk6300plb
    sentences:
      - Bosch KIR24V20GB Integrated
      - Bosch WAWH8660GB
      - Intel Core i5-7600K 3.80GHz  Box
      - Sony Bravia KD-65AF8
      - Samsung RL4362FBASL Stainless Steel
      - Bosch SMI50C15GB Silver
      - Apple iPhone XS Max 256GB
      - Fujifilm X-T100 + XC 15-45/f3.5-5.6 OIS PZ
      - Bosch KGN36VW35G White
      - Samsung WW70K5410UW
      - Samsung Galaxy J6
      - LG 49UK6300PLB
      - Doro Secure 580
      - Sony Xperia XZ1 Compact
      - Bosch SMV50C10GB Integrated
      - Bosch KGN34VB35G Black
      - Panasonic NN-E27JWMBPQ White
      - Samsung WW10M86DQOA/EU
      - LG 55SK9500PLA
      - Samsung QE65Q8DN
      - Canon EOS 80D
pipeline_tag: sentence-similarity
library_name: sentence-transformers
metrics:
  - cosine_accuracy@1
  - cosine_accuracy@3
  - cosine_accuracy@5
  - cosine_accuracy@10
  - cosine_precision@1
  - cosine_precision@3
  - cosine_precision@5
  - cosine_precision@10
  - cosine_recall@1
  - cosine_recall@3
  - cosine_recall@5
  - cosine_recall@10
  - cosine_ndcg@10
  - cosine_mrr@10
  - cosine_map@100
model-index:
  - name: SentenceTransformer
    results:
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: Product Category Retrieval Test
          type: Product-Category-Retrieval-Test
        metrics:
          - type: cosine_accuracy@1
            value: 0.8085774058577406
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.9476987447698745
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.9644351464435147
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.9769874476987448
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.8085774058577406
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.3158995815899582
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.19288702928870294
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.09769874476987449
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.8085774058577406
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.9476987447698745
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.9644351464435147
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.9769874476987448
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.9041917131034228
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.879607906621505
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.8805000617705705
            name: Cosine Map@100

SentenceTransformer

This is a sentence-transformers model trained. It maps sentences & paragraphs to a 512-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

  • Model Type: Sentence Transformer
  • Maximum Sequence Length: 384 tokens
  • Output Dimensionality: 512 dimensions
  • Similarity Function: Cosine Similarity

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): SentenceTransformer(
    (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel 
    (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
    (2): Normalize()
  )
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Dense({'in_features': 768, 'out_features': 512, 'bias': True, 'activation_function': 'torch.nn.modules.activation.Tanh'})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("llmvetter/embedding_finetune")
# Run inference
sentences = [
    'lg 49uk6300plb/49uk6300plb',
    'LG 49UK6300PLB',
    'Samsung Galaxy J6',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 512]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Information Retrieval

Metric Value
cosine_accuracy@1 0.8086
cosine_accuracy@3 0.9477
cosine_accuracy@5 0.9644
cosine_accuracy@10 0.977
cosine_precision@1 0.8086
cosine_precision@3 0.3159
cosine_precision@5 0.1929
cosine_precision@10 0.0977
cosine_recall@1 0.8086
cosine_recall@3 0.9477
cosine_recall@5 0.9644
cosine_recall@10 0.977
cosine_ndcg@10 0.9042
cosine_mrr@10 0.8796
cosine_map@100 0.8805

Training Details

Training Dataset

Unnamed Dataset

  • Size: 3,820 training samples
  • Columns: sentence_0, sentence_1, sentence_2, sentence_3, sentence_4, sentence_5, sentence_6, sentence_7, sentence_8, sentence_9, sentence_10, sentence_11, sentence_12, sentence_13, sentence_14, sentence_15, sentence_16, sentence_17, sentence_18, sentence_19, sentence_20, and sentence_21
  • Approximate statistics based on the first 1000 samples:
    sentence_0 sentence_1 sentence_2 sentence_3 sentence_4 sentence_5 sentence_6 sentence_7 sentence_8 sentence_9 sentence_10 sentence_11 sentence_12 sentence_13 sentence_14 sentence_15 sentence_16 sentence_17 sentence_18 sentence_19 sentence_20 sentence_21
    type string string string string string string string string string string string string string string string string string string string string string string
    details
    • min: 4 tokens
    • mean: 18.41 tokens
    • max: 47 tokens
    • min: 6 tokens
    • mean: 10.94 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 11.11 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 11.15 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 10.89 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 10.89 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 10.98 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 11.07 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 11.04 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 10.84 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 10.82 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 10.81 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 11.05 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 10.92 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 11.18 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 11.07 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 10.93 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 11.02 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 11.04 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 11.02 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 10.95 tokens
    • max: 30 tokens
    • min: 6 tokens
    • mean: 10.86 tokens
    • max: 30 tokens
  • Samples:
    sentence_0 sentence_1 sentence_2 sentence_3 sentence_4 sentence_5 sentence_6 sentence_7 sentence_8 sentence_9 sentence_10 sentence_11 sentence_12 sentence_13 sentence_14 sentence_15 sentence_16 sentence_17 sentence_18 sentence_19 sentence_20 sentence_21
    sony kd49xf8505bu 49 4k ultra hd tv Sony Bravia KD-49XF8505 Intel Core i7-8700K 3.7GHz Box Bosch WAN24100GB AMD FX-6300 3.5GHz Box Bosch WIW28500GB Bosch KGN36VL35G Stainless Steel Indesit XWDE751480XS CAT S41 Dual SIM Sony Xperia XA1 Ultra 32GB Samsung Galaxy J6 Samsung QE55Q7FN Bosch KGN39VW35G White Intel Core i5 7400 3.0GHz Box Neff C17UR02N0B Stainless Steel Samsung RR39M7340SA Silver Samsung RB41J7255SR Stainless Steel Hoover DXOC 68C3B Canon PowerShot SX730 HS Samsung RR39M7340BC Black Praktica Luxmedia WP240 HP Intel Xeon DP E5506 2.13GHz Socket 1366 800MHz bus Upgrade Tray
    doro 8040 4g sim free mobile phone black Doro 8040 Bosch HMT75M551 Stainless Steel Bosch SMI50C15GB Silver Samsung WW90K5413UX Panasonic Lumix DMC-TZ70 Sony KD-49XF7073 Nikon CoolPix W100 Samsung WD90J6A10AW Bosch CFA634GS1B Stainless Steel HP AMD Opteron 8425 HE 2.1GHz Socket F 4800MHz bus Upgrade Tray Canon EOS 800D + 18-55mm IS STM Samsung UE50NU7400 Apple iPhone 6S 128GB Samsung RS52N3313SA/EU Graphite Bosch WAW325H0GB Sony Bravia KD-55AF8 Sony Alpha 6500 Doro 5030 LG GSL761WBXV Black Bosch SMS67MW00G White AEG L6FBG942R
    fridgemaster muz4965 undercounter freezer white a rated Fridgemaster MUZ4965 White Samsung UE49NU7100 Nikon CoolPix A10 Samsung UE55NU7100 Samsung QE55Q7FN Bosch KGN49XL30G Stainless Steel Samsung UE49NU7500 LG 55UK6300PLB Hoover DXOC 68C3B Panasonic Lumix DMC-FZ2000 Panasonic Lumix DMC-TZ80 Bosch WKD28541GB Apple iPhone 6 32GB Sony Bravia KDL-32WE613 Lec TF50152W White Bosch KGV36VW32G White Bosch WAYH8790GB Samsung RS68N8240B1/EU Black Sony Xperia XZ1 HP Intel Xeon DP E5506 2.13GHz Socket 1366 800MHz bus Upgrade Tray Sharp R372WM White
  • Loss: MultipleNegativesRankingLoss with these parameters:
    {
        "scale": 20.0,
        "similarity_fct": "cos_sim"
    }
    

Training Hyperparameters

Non-Default Hyperparameters

  • per_device_train_batch_size: 32
  • per_device_eval_batch_size: 32
  • num_train_epochs: 8
  • multi_dataset_batch_sampler: round_robin

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • eval_strategy: no
  • prediction_loss_only: True
  • per_device_train_batch_size: 32
  • per_device_eval_batch_size: 32
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 1
  • eval_accumulation_steps: None
  • torch_empty_cache_steps: None
  • learning_rate: 5e-05
  • weight_decay: 0.0
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1
  • num_train_epochs: 8
  • max_steps: -1
  • lr_scheduler_type: linear
  • lr_scheduler_kwargs: {}
  • warmup_ratio: 0.0
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: True
  • logging_nan_inf_filter: True
  • save_safetensors: True
  • save_on_each_node: False
  • save_only_model: False
  • restore_callback_states_from_checkpoint: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: False
  • fp16: False
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: None
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: False
  • dataloader_num_workers: 0
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: False
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: None
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: False
  • resume_from_checkpoint: None
  • hub_model_id: None
  • hub_strategy: every_save
  • hub_private_repo: None
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • include_for_metrics: []
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • dispatch_batches: None
  • split_batches: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_eval_metrics: False
  • eval_on_start: False
  • use_liger_kernel: False
  • eval_use_gather_object: False
  • average_tokens_across_devices: False
  • prompts: None
  • batch_sampler: batch_sampler
  • multi_dataset_batch_sampler: round_robin

Training Logs

Epoch Step Training Loss Product-Category-Retrieval-Test_cosine_ndcg@10
1.0 120 - 0.7406
2.0 240 - 0.8437
3.0 360 - 0.8756
4.0 480 - 0.8875
4.1667 500 2.5302 -
5.0 600 - 0.8963
6.0 720 - 0.9015
7.0 840 - 0.9042

Framework Versions

  • Python: 3.11.10
  • Sentence Transformers: 3.3.1
  • Transformers: 4.47.1
  • PyTorch: 2.5.1+cu124
  • Accelerate: 1.2.1
  • Datasets: 3.2.0
  • Tokenizers: 0.21.0

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MultipleNegativesRankingLoss

@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply},
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}