File size: 23,604 Bytes

d1c1ceb

---
language: []
library_name: sentence-transformers
tags:
- sentence-transformers
- sentence-similarity
- feature-extraction
- generated
base_model: sentence-transformers/stsb-distilbert-base
metrics:
- cosine_accuracy
- cosine_accuracy_threshold
- cosine_f1
- cosine_f1_threshold
- cosine_precision
- cosine_recall
- cosine_ap
- manhattan_accuracy
- manhattan_accuracy_threshold
- manhattan_f1
- manhattan_f1_threshold
- manhattan_precision
- manhattan_recall
- manhattan_ap
- euclidean_accuracy
- euclidean_accuracy_threshold
- euclidean_f1
- euclidean_f1_threshold
- euclidean_precision
- euclidean_recall
- euclidean_ap
- dot_accuracy
- dot_accuracy_threshold
- dot_f1
- dot_f1_threshold
- dot_precision
- dot_recall
- dot_ap
- max_accuracy
- max_accuracy_threshold
- max_f1
- max_f1_threshold
- max_precision
- max_recall
- max_ap
- average_precision
- f1
- precision
- recall
- threshold
- cosine_accuracy@1
- cosine_accuracy@3
- cosine_accuracy@5
- cosine_accuracy@10
- cosine_precision@1
- cosine_precision@3
- cosine_precision@5
- cosine_precision@10
- cosine_recall@1
- cosine_recall@3
- cosine_recall@5
- cosine_recall@10
- cosine_ndcg@10
- cosine_mrr@10
- cosine_map@100
- dot_accuracy@1
- dot_accuracy@3
- dot_accuracy@5
- dot_accuracy@10
- dot_precision@1
- dot_precision@3
- dot_precision@5
- dot_precision@10
- dot_recall@1
- dot_recall@3
- dot_recall@5
- dot_recall@10
- dot_ndcg@10
- dot_mrr@10
- dot_map@100
widget:
- source_sentence: How porn is made?
  sentences:
  - How is porn made?
  - How do you study before a test?
  - What is the best book for afcat?
- source_sentence: Is WW3 inevitable?
  sentences:
  - How close to WW3 are we?
  - Is it ok not to know everything?
  - How can I get good marks on my exam?
- source_sentence: How do stop smoking?
  sentences:
  - How did you quit/stop smoking?
  - How can I gain weight naturally?
  - What movie is the best movie of 2016?
- source_sentence: What is astrology?
  sentences:
  - What really is astrology?
  - How do I control blood pressure?
  - How should I reduce weight easily?
- source_sentence: What is SMS API?
  sentences:
  - What is an SMS API?
  - How will Sound travel in SPACE?
  - Do we live inside a black hole?
pipeline_tag: sentence-similarity
model-index:
- name: SentenceTransformer based on sentence-transformers/stsb-distilbert-base
  results:
  - task:
      type: binary-classification
      name: Binary Classification
    dataset:
      name: Unknown
      type: unknown
    metrics:
    - type: cosine_accuracy
      value: 0.770712179816613
      name: Cosine Accuracy
    - type: cosine_accuracy_threshold
      value: 0.8169694542884827
      name: Cosine Accuracy Threshold
    - type: cosine_f1
      value: 0.7086398522340053
      name: Cosine F1
    - type: cosine_f1_threshold
      value: 0.7420324087142944
      name: Cosine F1 Threshold
    - type: cosine_precision
      value: 0.6032968224704479
      name: Cosine Precision
    - type: cosine_recall
      value: 0.8585539007639479
      name: Cosine Recall
    - type: cosine_ap
      value: 0.7191176594498068
      name: Cosine Ap
    - type: manhattan_accuracy
      value: 0.7729301344296882
      name: Manhattan Accuracy
    - type: manhattan_accuracy_threshold
      value: 181.4663848876953
      name: Manhattan Accuracy Threshold
    - type: manhattan_f1
      value: 0.7082838527457715
      name: Manhattan F1
    - type: manhattan_f1_threshold
      value: 222.911865234375
      name: Manhattan F1 Threshold
    - type: manhattan_precision
      value: 0.6063303659742829
      name: Manhattan Precision
    - type: manhattan_recall
      value: 0.8514545875453353
      name: Manhattan Recall
    - type: manhattan_ap
      value: 0.7188011305084623
      name: Manhattan Ap
    - type: euclidean_accuracy
      value: 0.7736333883313948
      name: Euclidean Accuracy
    - type: euclidean_accuracy_threshold
      value: 8.356552124023438
      name: Euclidean Accuracy Threshold
    - type: euclidean_f1
      value: 0.7088200276731988
      name: Euclidean F1
    - type: euclidean_f1_threshold
      value: 10.092880249023438
      name: Euclidean F1 Threshold
    - type: euclidean_precision
      value: 0.6079037421348935
      name: Euclidean Precision
    - type: euclidean_recall
      value: 0.8499112585847673
      name: Euclidean Recall
    - type: euclidean_ap
      value: 0.719131590718056
      name: Euclidean Ap
    - type: dot_accuracy
      value: 0.7441508209136891
      name: Dot Accuracy
    - type: dot_accuracy_threshold
      value: 168.56625366210938
      name: Dot Accuracy Threshold
    - type: dot_f1
      value: 0.6831510249103777
      name: Dot F1
    - type: dot_f1_threshold
      value: 142.45849609375
      name: Dot F1 Threshold
    - type: dot_precision
      value: 0.5665209879052749
      name: Dot Precision
    - type: dot_recall
      value: 0.8602515626205726
      name: Dot Recall
    - type: dot_ap
      value: 0.6693622133717865
      name: Dot Ap
    - type: max_accuracy
      value: 0.7736333883313948
      name: Max Accuracy
    - type: max_accuracy_threshold
      value: 181.4663848876953
      name: Max Accuracy Threshold
    - type: max_f1
      value: 0.7088200276731988
      name: Max F1
    - type: max_f1_threshold
      value: 222.911865234375
      name: Max F1 Threshold
    - type: max_precision
      value: 0.6079037421348935
      name: Max Precision
    - type: max_recall
      value: 0.8602515626205726
      name: Max Recall
    - type: max_ap
      value: 0.719131590718056
      name: Max Ap
  - task:
      type: paraphrase-mining
      name: Paraphrase Mining
    dataset:
      name: dev
      type: dev
    metrics:
    - type: average_precision
      value: 0.47803306271270435
      name: Average Precision
    - type: f1
      value: 0.5119182746878547
      name: F1
    - type: precision
      value: 0.4683281412253375
      name: Precision
    - type: recall
      value: 0.5644555694618273
      name: Recall
    - type: threshold
      value: 0.8193174600601196
      name: Threshold
  - task:
      type: information-retrieval
      name: Information Retrieval
    dataset:
      name: Unknown
      type: unknown
    metrics:
    - type: cosine_accuracy@1
      value: 0.9654
      name: Cosine Accuracy@1
    - type: cosine_accuracy@3
      value: 0.9904
      name: Cosine Accuracy@3
    - type: cosine_accuracy@5
      value: 0.9948
      name: Cosine Accuracy@5
    - type: cosine_accuracy@10
      value: 0.9974
      name: Cosine Accuracy@10
    - type: cosine_precision@1
      value: 0.9654
      name: Cosine Precision@1
    - type: cosine_precision@3
      value: 0.43553333333333333
      name: Cosine Precision@3
    - type: cosine_precision@5
      value: 0.28064
      name: Cosine Precision@5
    - type: cosine_precision@10
      value: 0.14934
      name: Cosine Precision@10
    - type: cosine_recall@1
      value: 0.8251379240296788
      name: Cosine Recall@1
    - type: cosine_recall@3
      value: 0.9549051140803786
      name: Cosine Recall@3
    - type: cosine_recall@5
      value: 0.9757885342898082
      name: Cosine Recall@5
    - type: cosine_recall@10
      value: 0.9898260744103871
      name: Cosine Recall@10
    - type: cosine_ndcg@10
      value: 0.9786162291363164
      name: Cosine Ndcg@10
    - type: cosine_mrr@10
      value: 0.9785615873015873
      name: Cosine Mrr@10
    - type: cosine_map@100
      value: 0.9713888565523412
      name: Cosine Map@100
    - type: dot_accuracy@1
      value: 0.9512
      name: Dot Accuracy@1
    - type: dot_accuracy@3
      value: 0.985
      name: Dot Accuracy@3
    - type: dot_accuracy@5
      value: 0.9914
      name: Dot Accuracy@5
    - type: dot_accuracy@10
      value: 0.9964
      name: Dot Accuracy@10
    - type: dot_precision@1
      value: 0.9512
      name: Dot Precision@1
    - type: dot_precision@3
      value: 0.4303333333333333
      name: Dot Precision@3
    - type: dot_precision@5
      value: 0.2788
      name: Dot Precision@5
    - type: dot_precision@10
      value: 0.14896
      name: Dot Precision@10
    - type: dot_recall@1
      value: 0.8119095906963455
      name: Dot Recall@1
    - type: dot_recall@3
      value: 0.9459636855089498
      name: Dot Recall@3
    - type: dot_recall@5
      value: 0.9708092557905298
      name: Dot Recall@5
    - type: dot_recall@10
      value: 0.9883617291912786
      name: Dot Recall@10
    - type: dot_ndcg@10
      value: 0.9702609044345125
      name: Dot Ndcg@10
    - type: dot_mrr@10
      value: 0.9693138888888887
      name: Dot Mrr@10
    - type: dot_map@100
      value: 0.9599586870108953
      name: Dot Map@100
---

# SentenceTransformer based on sentence-transformers/stsb-distilbert-base

This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/stsb-distilbert-base](https://huggingface.co/sentence-transformers/stsb-distilbert-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

## Model Details

### Model Description
- **Model Type:** Sentence Transformer
- **Base model:** [sentence-transformers/stsb-distilbert-base](https://huggingface.co/sentence-transformers/stsb-distilbert-base)
- **Maximum Sequence Length:** 128 tokens
- **Output Dimensionality:** 768 tokens
<!-- - **Training Dataset:** Unknown -->
<!-- - **Language:** Unknown -->
<!-- - **License:** Unknown -->

### Model Sources

- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)

### Full Model Architecture

```
SentenceTransformer(
  (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: DistilBertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)
```

## Usage

### Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

```bash
pip install -U sentence-transformers
```

Then you can load this model and run inference.
```python
from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("tomaarsen/stsb-distilbert-base-quora-duplicate-questions")
# Run inference
sentences = [
    "What is a fetish?",
    "What's a fetish?",
    "Is it good to read sex stories?",
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]
```

<!--
### Direct Usage (Transformers)

<details><summary>Click to see the direct usage in Transformers</summary>

</details>
-->

<!--
### Downstream Usage (Sentence Transformers)

You can finetune this model on your own dataset.

<details><summary>Click to expand</summary>

</details>
-->

<!--
### Out-of-Scope Use

*List how the model may foreseeably be misused and address what users ought not to do with the model.*
-->

## Evaluation

### Metrics

#### Binary Classification

* Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)

| Metric                       | Value      |
|:-----------------------------|:-----------|
| **cosine_accuracy**          | **0.7707** |
| cosine_accuracy_threshold    | 0.817      |
| cosine_f1                    | 0.7086     |
| cosine_f1_threshold          | 0.742      |
| cosine_precision             | 0.6033     |
| cosine_recall                | 0.8586     |
| cosine_ap                    | 0.7191     |
| manhattan_accuracy           | 0.7729     |
| manhattan_accuracy_threshold | 181.4664   |
| manhattan_f1                 | 0.7083     |
| manhattan_f1_threshold       | 222.9119   |
| manhattan_precision          | 0.6063     |
| manhattan_recall             | 0.8515     |
| manhattan_ap                 | 0.7188     |
| euclidean_accuracy           | 0.7736     |
| euclidean_accuracy_threshold | 8.3566     |
| euclidean_f1                 | 0.7088     |
| euclidean_f1_threshold       | 10.0929    |
| euclidean_precision          | 0.6079     |
| euclidean_recall             | 0.8499     |
| euclidean_ap                 | 0.7191     |
| dot_accuracy                 | 0.7442     |
| dot_accuracy_threshold       | 168.5663   |
| dot_f1                       | 0.6832     |
| dot_f1_threshold             | 142.4585   |
| dot_precision                | 0.5665     |
| dot_recall                   | 0.8603     |
| dot_ap                       | 0.6694     |
| max_accuracy                 | 0.7736     |
| max_accuracy_threshold       | 181.4664   |
| max_f1                       | 0.7088     |
| max_f1_threshold             | 222.9119   |
| max_precision                | 0.6079     |
| max_recall                   | 0.8603     |
| max_ap                       | 0.7191     |

#### Paraphrase Mining
* Dataset: `dev`
* Evaluated with [<code>ParaphraseMiningEvaluator</code>](https://sbert.net/docs/package_reference/evaluation.html#sentence_transformers.evaluation.ParaphraseMiningEvaluator)

| Metric                | Value     |
|:----------------------|:----------|
| **average_precision** | **0.478** |
| f1                    | 0.5119    |
| precision             | 0.4683    |
| recall                | 0.5645    |
| threshold             | 0.8193    |

#### Information Retrieval

* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)

| Metric              | Value      |
|:--------------------|:-----------|
| cosine_accuracy@1   | 0.9654     |
| cosine_accuracy@3   | 0.9904     |
| cosine_accuracy@5   | 0.9948     |
| cosine_accuracy@10  | 0.9974     |
| cosine_precision@1  | 0.9654     |
| cosine_precision@3  | 0.4355     |
| cosine_precision@5  | 0.2806     |
| cosine_precision@10 | 0.1493     |
| cosine_recall@1     | 0.8251     |
| cosine_recall@3     | 0.9549     |
| cosine_recall@5     | 0.9758     |
| cosine_recall@10    | 0.9898     |
| cosine_ndcg@10      | 0.9786     |
| cosine_mrr@10       | 0.9786     |
| **cosine_map@100**  | **0.9714** |
| dot_accuracy@1      | 0.9512     |
| dot_accuracy@3      | 0.985      |
| dot_accuracy@5      | 0.9914     |
| dot_accuracy@10     | 0.9964     |
| dot_precision@1     | 0.9512     |
| dot_precision@3     | 0.4303     |
| dot_precision@5     | 0.2788     |
| dot_precision@10    | 0.149      |
| dot_recall@1        | 0.8119     |
| dot_recall@3        | 0.946      |
| dot_recall@5        | 0.9708     |
| dot_recall@10       | 0.9884     |
| dot_ndcg@10         | 0.9703     |
| dot_mrr@10          | 0.9693     |
| dot_map@100         | 0.96       |

<!--
## Bias, Risks and Limitations

*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
-->

<!--
### Recommendations

*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
-->

## Training Details

### Training Dataset

#### Unnamed Dataset


* Size: 207,326 training samples
* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
* Approximate statistics based on the first 1000 samples:
  |         | sentence_0                                                                        | sentence_1                                                                        | label                         |
  |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------|
  | type    | string                                                                            | string                                                                            | int                           |
  | details | <ul><li>min: 6 tokens</li><li>mean: 13.75 tokens</li><li>max: 42 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 13.74 tokens</li><li>max: 44 tokens</li></ul> | <ul><li>1: ~100.00%</li></ul> |
* Samples:
  | sentence_0                                                                                            | sentence_1                                                                                                           | label          |
  |:------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------|:---------------|
  | <code>How do I improve writing skill by myself?</code>                                                | <code>How can I improve writing skills?</code>                                                                       | <code>1</code> |
  | <code>Is it best to switch to Node.js from PHP?</code>                                                | <code>Should I switch to Node.js or continue using PHP?</code>                                                       | <code>1</code> |
  | <code>What do Hillary Clinton's supporters say when confronted with all her lies and scandals?</code> | <code>What do Clinton supporters say when confronted with her scandals such as the emails and 'Clinton Cash'?</code> | <code>1</code> |
* Loss: [<code>sentence_transformers.losses.MultipleNegativesRankingLoss.MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/losses.html#multiplenegativesrankingloss) with these parameters:
  ```json
  {
      "scale": 20.0,
      "similarity_fct": "cos_sim"
  }
  ```

### Training Hyperparameters
#### Non-Default Hyperparameters

- per_device_train_batch_size: 64
- per_device_eval_batch_size: 64
- num_train_epochs: 1
- round_robin_sampler: True

#### All Hyperparameters
<details><summary>Click to expand</summary>

- overwrite_output_dir: False
- do_predict: False
- prediction_loss_only: False
- per_device_train_batch_size: 64
- per_device_eval_batch_size: 64
- per_gpu_train_batch_size: None
- per_gpu_eval_batch_size: None
- gradient_accumulation_steps: 1
- eval_accumulation_steps: None
- learning_rate: 5e-05
- weight_decay: 0.0
- adam_beta1: 0.9
- adam_beta2: 0.999
- adam_epsilon: 1e-08
- max_grad_norm: 1
- num_train_epochs: 1
- max_steps: -1
- lr_scheduler_type: linear
- lr_scheduler_kwargs: {}
- warmup_ratio: 0.0
- warmup_steps: 0
- log_level: passive
- log_level_replica: warning
- log_on_each_node: True
- logging_nan_inf_filter: True
- save_safetensors: True
- save_on_each_node: False
- save_only_model: False
- no_cuda: False
- use_cpu: False
- use_mps_device: False
- seed: 42
- data_seed: None
- jit_mode_eval: False
- use_ipex: False
- bf16: False
- fp16: False
- fp16_opt_level: O1
- half_precision_backend: auto
- bf16_full_eval: False
- fp16_full_eval: False
- tf32: None
- local_rank: 0
- ddp_backend: None
- tpu_num_cores: None
- tpu_metrics_debug: False
- debug: []
- dataloader_drop_last: False
- dataloader_num_workers: 0
- dataloader_prefetch_factor: None
- past_index: -1
- disable_tqdm: False
- remove_unused_columns: True
- label_names: None
- load_best_model_at_end: False
- ignore_data_skip: False
- fsdp: []
- fsdp_min_num_params: 0
- fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
- fsdp_transformer_layer_cls_to_wrap: None
- accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}
- deepspeed: None
- label_smoothing_factor: 0.0
- optim: adamw_torch
- optim_args: None
- adafactor: False
- group_by_length: False
- length_column_name: length
- ddp_find_unused_parameters: None
- ddp_bucket_cap_mb: None
- ddp_broadcast_buffers: None
- dataloader_pin_memory: True
- dataloader_persistent_workers: False
- skip_memory_metrics: True
- use_legacy_prediction_loop: False
- push_to_hub: False
- resume_from_checkpoint: None
- hub_model_id: None
- hub_strategy: every_save
- hub_private_repo: False
- hub_always_push: False
- gradient_checkpointing: False
- gradient_checkpointing_kwargs: None
- include_inputs_for_metrics: False
- fp16_backend: auto
- push_to_hub_model_id: None
- push_to_hub_organization: None
- mp_parameters: 
- auto_find_batch_size: False
- full_determinism: False
- torchdynamo: None
- ray_scope: last
- ddp_timeout: 1800
- torch_compile: False
- torch_compile_backend: None
- torch_compile_mode: None
- dispatch_batches: None
- split_batches: None
- include_tokens_per_second: False
- include_num_input_tokens_seen: False
- neftune_noise_alpha: None
- optim_target_modules: None
- round_robin_sampler: True

</details>

### Training Logs
| Epoch  | Step | Training Loss | cosine_accuracy | cosine_map@100 | dev_average_precision |
|:------:|:----:|:-------------:|:---------------:|:--------------:|:---------------------:|
| 0      | 0    | -             | 0.7661          | 0.9371         | 0.4137                |
| 0.1543 | 500  | 0.1055        | 0.7632          | 0.9620         | 0.4731                |
| 0.3086 | 1000 | 0.0677        | 0.7608          | 0.9675         | 0.4732                |
| 0.4630 | 1500 | 0.0612        | 0.7663          | 0.9710         | 0.4856                |
| 0.6173 | 2000 | 0.0584        | 0.7719          | 0.9693         | 0.4925                |
| 0.7716 | 2500 | 0.0506        | 0.7714          | 0.9709         | 0.4808                |
| 0.9259 | 3000 | 0.0488        | 0.7708          | 0.9713         | 0.4784                |
| 1.0    | 3240 | -             | 0.7707          | 0.9714         | 0.4780                |


### Framework Versions
- Python: 3.11.6
- Sentence Transformers: 2.7.0.dev0
- Transformers: 4.39.3
- PyTorch: 2.1.0+cu121
- Accelerate: 0.26.1
- Datasets: 2.18.0
- Tokenizers: 0.15.2

## Citation

### BibTeX

#### Sentence Transformers
```bibtex
@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}
```

#### MultipleNegativesRankingLoss
```bibtex
@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply}, 
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}
```

<!--
## Glossary

*Clearly define terms in order to be accessible across audiences.*
-->

<!--
## Model Card Authors

*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
-->

<!--
## Model Card Contact

*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
-->