Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +465 -0
- config.json +31 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +65 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,465 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
license: mit
|
5 |
+
tags:
|
6 |
+
- sentence-transformers
|
7 |
+
- sentence-similarity
|
8 |
+
- feature-extraction
|
9 |
+
- generated_from_trainer
|
10 |
+
- dataset_size:225245
|
11 |
+
- loss:MultipleNegativesRankingLoss
|
12 |
+
base_model: BAAI/bge-small-en-v1.5
|
13 |
+
widget:
|
14 |
+
- source_sentence: Infections affecting the acromioclavicular joint can present with
|
15 |
+
symptoms such as shoulder pain, swelling, and potential fever. Patients may experience
|
16 |
+
systemic signs of infection, including chills and malaise, alongside localized
|
17 |
+
symptoms related to joint movement.
|
18 |
+
sentences:
|
19 |
+
- Gastric stromal tumors with uncertain behavior can present with symptoms such
|
20 |
+
as abdominal pain, nausea, and changes in appetite. Patients may experience psychological
|
21 |
+
distress due to concerns about malignancy and the need for further evaluation.
|
22 |
+
The presence of these tumors can lead to complications depending on their size
|
23 |
+
and location.
|
24 |
+
- 'Disease Name : Complication, umbrella device, vascular, embolism'
|
25 |
+
- 'Disease Name : Infection, infected, infective, acromioclavicular'
|
26 |
+
- source_sentence: 'cyanosis: a bluish discoloration of the skin; fatigue: extreme
|
27 |
+
tiredness; shortness of breath: difficulty in breathing; headache: pain in the
|
28 |
+
head; confusion: difficulty in thinking clearly.'
|
29 |
+
sentences:
|
30 |
+
- 'Disease Name : Anomaly, anomalous, spine, spinal NEC, column NEC, kyphosis'
|
31 |
+
- 'Disease Name : Methemoglobinemia, Hb M disease'
|
32 |
+
- Superficial insect bites on the finger can lead to localized swelling, redness,
|
33 |
+
and itching. Patients may experience discomfort and potential allergic reactions,
|
34 |
+
requiring symptomatic treatment.
|
35 |
+
- source_sentence: 'Disease Name : Excess, excessive, excessively, crying, in infant'
|
36 |
+
sentences:
|
37 |
+
- Excessive crying in infants can manifest as prolonged periods of distress, often
|
38 |
+
accompanied by signs of discomfort such as clenching fists, arching of the back,
|
39 |
+
and difficulty in soothing. The infant may exhibit a high-pitched cry and may
|
40 |
+
seem inconsolable, leading to parental concern and anxiety about the child's well-being.
|
41 |
+
- 'Disease Name : Syphilis, syphilitic, meningitis'
|
42 |
+
- 'Disease Name : Prolapse, prolapsed, ileostomy bud'
|
43 |
+
- source_sentence: Cysts affecting the lacrimal passages or sac can lead to symptoms
|
44 |
+
such as tearing, swelling, and discomfort in the inner corner of the eye. Patients
|
45 |
+
may experience recurrent infections, redness, and a sensation of fullness, which
|
46 |
+
can significantly impact tear drainage and overall eye health.
|
47 |
+
sentences:
|
48 |
+
- A condition characterized by the fusion of a single suture can lead to various
|
49 |
+
physical deformities and developmental challenges. Symptoms may include cognitive
|
50 |
+
impairment and various physical challenges that can impact daily life.
|
51 |
+
- 'Disease Name : Cyst, Lacrimal, Passages or Sac'
|
52 |
+
- This condition presents with swelling and pain in the area of the placenta, often
|
53 |
+
accompanied by tenderness and discomfort. Patients may experience complications
|
54 |
+
during pregnancy, including bleeding and changes in fetal movement. In some cases,
|
55 |
+
there may be associated risks to both the mother and the fetus.
|
56 |
+
- source_sentence: 'Disease Name : Tuberculosis, tubercular, tuberculous, brain'
|
57 |
+
sentences:
|
58 |
+
- 'hoarseness: abnormal voice quality due to vocal cord dysfunction; difficulty
|
59 |
+
breathing: shortness of breath or stridor from airway compression; loss of voice:
|
60 |
+
inability to speak due to vocal cord paralysis; throat discomfort: pain or irritation
|
61 |
+
in the throat area.'
|
62 |
+
- Symptoms include cough, wheezing, and difficulty breathing, often exacerbated
|
63 |
+
by exposure to specific environmental triggers such as air conditioning systems.
|
64 |
+
Patients may also experience fever and malaise, with a history of allergic reactions
|
65 |
+
or asthma. The condition can lead to significant respiratory distress if not managed
|
66 |
+
appropriately.
|
67 |
+
- Symptoms often include headaches, confusion, and potential neurological deficits,
|
68 |
+
along with systemic signs such as fever and fatigue. Patients may also experience
|
69 |
+
seizures or altered consciousness, indicating a serious underlying condition.
|
70 |
+
pipeline_tag: sentence-similarity
|
71 |
+
library_name: sentence-transformers
|
72 |
+
---
|
73 |
+
|
74 |
+
# bge-small-en
|
75 |
+
|
76 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
77 |
+
|
78 |
+
## Model Details
|
79 |
+
|
80 |
+
### Model Description
|
81 |
+
- **Model Type:** Sentence Transformer
|
82 |
+
- **Base model:** [BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5) <!-- at revision 5c38ec7c405ec4b44b94cc5a9bb96e735b38267a -->
|
83 |
+
- **Maximum Sequence Length:** 512 tokens
|
84 |
+
- **Output Dimensionality:** 384 dimensions
|
85 |
+
- **Similarity Function:** Cosine Similarity
|
86 |
+
<!-- - **Training Dataset:** Unknown -->
|
87 |
+
- **Language:** en
|
88 |
+
- **License:** mit
|
89 |
+
|
90 |
+
### Model Sources
|
91 |
+
|
92 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
93 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
94 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
95 |
+
|
96 |
+
### Full Model Architecture
|
97 |
+
|
98 |
+
```
|
99 |
+
SentenceTransformer(
|
100 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel
|
101 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
102 |
+
(2): Normalize()
|
103 |
+
)
|
104 |
+
```
|
105 |
+
|
106 |
+
## Usage
|
107 |
+
|
108 |
+
### Direct Usage (Sentence Transformers)
|
109 |
+
|
110 |
+
First install the Sentence Transformers library:
|
111 |
+
|
112 |
+
```bash
|
113 |
+
pip install -U sentence-transformers
|
114 |
+
```
|
115 |
+
|
116 |
+
Then you can load this model and run inference.
|
117 |
+
```python
|
118 |
+
from sentence_transformers import SentenceTransformer
|
119 |
+
|
120 |
+
# Download from the 🤗 Hub
|
121 |
+
model = SentenceTransformer("SalmanFaroz/DisEmbed-v1")
|
122 |
+
# Run inference
|
123 |
+
sentences = [
|
124 |
+
'Disease Name : Tuberculosis, tubercular, tuberculous, brain',
|
125 |
+
'Symptoms often include headaches, confusion, and potential neurological deficits, along with systemic signs such as fever and fatigue. Patients may also experience seizures or altered consciousness, indicating a serious underlying condition.',
|
126 |
+
'Symptoms include cough, wheezing, and difficulty breathing, often exacerbated by exposure to specific environmental triggers such as air conditioning systems. Patients may also experience fever and malaise, with a history of allergic reactions or asthma. The condition can lead to significant respiratory distress if not managed appropriately.',
|
127 |
+
]
|
128 |
+
embeddings = model.encode(sentences)
|
129 |
+
print(embeddings.shape)
|
130 |
+
# [3, 384]
|
131 |
+
|
132 |
+
# Get the similarity scores for the embeddings
|
133 |
+
similarities = model.similarity(embeddings, embeddings)
|
134 |
+
print(similarities.shape)
|
135 |
+
# [3, 3]
|
136 |
+
```
|
137 |
+
|
138 |
+
<!--
|
139 |
+
### Direct Usage (Transformers)
|
140 |
+
|
141 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
142 |
+
|
143 |
+
</details>
|
144 |
+
-->
|
145 |
+
|
146 |
+
<!--
|
147 |
+
### Downstream Usage (Sentence Transformers)
|
148 |
+
|
149 |
+
You can finetune this model on your own dataset.
|
150 |
+
|
151 |
+
<details><summary>Click to expand</summary>
|
152 |
+
|
153 |
+
</details>
|
154 |
+
-->
|
155 |
+
|
156 |
+
<!--
|
157 |
+
### Out-of-Scope Use
|
158 |
+
|
159 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
160 |
+
-->
|
161 |
+
|
162 |
+
<!--
|
163 |
+
## Bias, Risks and Limitations
|
164 |
+
|
165 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
166 |
+
-->
|
167 |
+
|
168 |
+
<!--
|
169 |
+
### Recommendations
|
170 |
+
|
171 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
172 |
+
-->
|
173 |
+
|
174 |
+
## Training Details
|
175 |
+
|
176 |
+
### Training Dataset
|
177 |
+
|
178 |
+
#### Unnamed Dataset
|
179 |
+
|
180 |
+
|
181 |
+
* Size: 225,245 training samples
|
182 |
+
* Columns: <code>0</code> and <code>1</code>
|
183 |
+
* Approximate statistics based on the first 1000 samples:
|
184 |
+
| | 0 | 1 |
|
185 |
+
|:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
|
186 |
+
| type | string | string |
|
187 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 35.19 tokens</li><li>max: 347 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 34.92 tokens</li><li>max: 281 tokens</li></ul> |
|
188 |
+
* Samples:
|
189 |
+
| 0 | 1 |
|
190 |
+
|:--------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
191 |
+
| <code>Disease Name : Lymphadenitis, due to, diphtheria</code> | <code>This condition involves lymphadenitis due to diphtheria infection, leading to symptoms such as swelling, tenderness, and potential pain in the lymph nodes. Patients may experience systemic symptoms like fever and malaise, indicating an underlying issue that requires attention. Complications can arise if the condition is not managed properly.</code> |
|
192 |
+
| <code>nephropathy: kidney damage or disease; proteinuria: presence of excess protein in urine; edema: swelling due to fluid retention; ...</code> | <code>Disease Name : Nephropathy, phosphate-losing</code> |
|
193 |
+
| <code>Disease Name : Cyst, renal</code> | <code>Renal cysts can lead to symptoms such as flank pain, hematuria, and potential urinary obstruction. If these cysts become infected, they may present with fever, chills, and significant discomfort in the abdominal or back regions.</code> |
|
194 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
195 |
+
```json
|
196 |
+
{
|
197 |
+
"scale": 20.0,
|
198 |
+
"similarity_fct": "cos_sim"
|
199 |
+
}
|
200 |
+
```
|
201 |
+
|
202 |
+
### Training Hyperparameters
|
203 |
+
#### Non-Default Hyperparameters
|
204 |
+
|
205 |
+
- `per_device_train_batch_size`: 122
|
206 |
+
- `per_device_eval_batch_size`: 122
|
207 |
+
- `learning_rate`: 2e-05
|
208 |
+
- `num_train_epochs`: 1
|
209 |
+
- `warmup_ratio`: 0.1
|
210 |
+
- `fp16`: True
|
211 |
+
- `batch_sampler`: no_duplicates
|
212 |
+
|
213 |
+
#### All Hyperparameters
|
214 |
+
<details><summary>Click to expand</summary>
|
215 |
+
|
216 |
+
- `overwrite_output_dir`: False
|
217 |
+
- `do_predict`: False
|
218 |
+
- `eval_strategy`: no
|
219 |
+
- `prediction_loss_only`: True
|
220 |
+
- `per_device_train_batch_size`: 122
|
221 |
+
- `per_device_eval_batch_size`: 122
|
222 |
+
- `per_gpu_train_batch_size`: None
|
223 |
+
- `per_gpu_eval_batch_size`: None
|
224 |
+
- `gradient_accumulation_steps`: 1
|
225 |
+
- `eval_accumulation_steps`: None
|
226 |
+
- `torch_empty_cache_steps`: None
|
227 |
+
- `learning_rate`: 2e-05
|
228 |
+
- `weight_decay`: 0.0
|
229 |
+
- `adam_beta1`: 0.9
|
230 |
+
- `adam_beta2`: 0.999
|
231 |
+
- `adam_epsilon`: 1e-08
|
232 |
+
- `max_grad_norm`: 1.0
|
233 |
+
- `num_train_epochs`: 1
|
234 |
+
- `max_steps`: -1
|
235 |
+
- `lr_scheduler_type`: linear
|
236 |
+
- `lr_scheduler_kwargs`: {}
|
237 |
+
- `warmup_ratio`: 0.1
|
238 |
+
- `warmup_steps`: 0
|
239 |
+
- `log_level`: passive
|
240 |
+
- `log_level_replica`: warning
|
241 |
+
- `log_on_each_node`: True
|
242 |
+
- `logging_nan_inf_filter`: True
|
243 |
+
- `save_safetensors`: True
|
244 |
+
- `save_on_each_node`: False
|
245 |
+
- `save_only_model`: False
|
246 |
+
- `restore_callback_states_from_checkpoint`: False
|
247 |
+
- `no_cuda`: False
|
248 |
+
- `use_cpu`: False
|
249 |
+
- `use_mps_device`: False
|
250 |
+
- `seed`: 42
|
251 |
+
- `data_seed`: None
|
252 |
+
- `jit_mode_eval`: False
|
253 |
+
- `use_ipex`: False
|
254 |
+
- `bf16`: False
|
255 |
+
- `fp16`: True
|
256 |
+
- `fp16_opt_level`: O1
|
257 |
+
- `half_precision_backend`: auto
|
258 |
+
- `bf16_full_eval`: False
|
259 |
+
- `fp16_full_eval`: False
|
260 |
+
- `tf32`: None
|
261 |
+
- `local_rank`: 0
|
262 |
+
- `ddp_backend`: None
|
263 |
+
- `tpu_num_cores`: None
|
264 |
+
- `tpu_metrics_debug`: False
|
265 |
+
- `debug`: []
|
266 |
+
- `dataloader_drop_last`: False
|
267 |
+
- `dataloader_num_workers`: 0
|
268 |
+
- `dataloader_prefetch_factor`: None
|
269 |
+
- `past_index`: -1
|
270 |
+
- `disable_tqdm`: False
|
271 |
+
- `remove_unused_columns`: True
|
272 |
+
- `label_names`: None
|
273 |
+
- `load_best_model_at_end`: False
|
274 |
+
- `ignore_data_skip`: False
|
275 |
+
- `fsdp`: []
|
276 |
+
- `fsdp_min_num_params`: 0
|
277 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
278 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
279 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
280 |
+
- `deepspeed`: None
|
281 |
+
- `label_smoothing_factor`: 0.0
|
282 |
+
- `optim`: adamw_torch
|
283 |
+
- `optim_args`: None
|
284 |
+
- `adafactor`: False
|
285 |
+
- `group_by_length`: False
|
286 |
+
- `length_column_name`: length
|
287 |
+
- `ddp_find_unused_parameters`: None
|
288 |
+
- `ddp_bucket_cap_mb`: None
|
289 |
+
- `ddp_broadcast_buffers`: False
|
290 |
+
- `dataloader_pin_memory`: True
|
291 |
+
- `dataloader_persistent_workers`: False
|
292 |
+
- `skip_memory_metrics`: True
|
293 |
+
- `use_legacy_prediction_loop`: False
|
294 |
+
- `push_to_hub`: False
|
295 |
+
- `resume_from_checkpoint`: None
|
296 |
+
- `hub_model_id`: None
|
297 |
+
- `hub_strategy`: every_save
|
298 |
+
- `hub_private_repo`: None
|
299 |
+
- `hub_always_push`: False
|
300 |
+
- `gradient_checkpointing`: False
|
301 |
+
- `gradient_checkpointing_kwargs`: None
|
302 |
+
- `include_inputs_for_metrics`: False
|
303 |
+
- `include_for_metrics`: []
|
304 |
+
- `eval_do_concat_batches`: True
|
305 |
+
- `fp16_backend`: auto
|
306 |
+
- `push_to_hub_model_id`: None
|
307 |
+
- `push_to_hub_organization`: None
|
308 |
+
- `mp_parameters`:
|
309 |
+
- `auto_find_batch_size`: False
|
310 |
+
- `full_determinism`: False
|
311 |
+
- `torchdynamo`: None
|
312 |
+
- `ray_scope`: last
|
313 |
+
- `ddp_timeout`: 1800
|
314 |
+
- `torch_compile`: False
|
315 |
+
- `torch_compile_backend`: None
|
316 |
+
- `torch_compile_mode`: None
|
317 |
+
- `dispatch_batches`: None
|
318 |
+
- `split_batches`: None
|
319 |
+
- `include_tokens_per_second`: False
|
320 |
+
- `include_num_input_tokens_seen`: False
|
321 |
+
- `neftune_noise_alpha`: None
|
322 |
+
- `optim_target_modules`: None
|
323 |
+
- `batch_eval_metrics`: False
|
324 |
+
- `eval_on_start`: False
|
325 |
+
- `use_liger_kernel`: False
|
326 |
+
- `eval_use_gather_object`: False
|
327 |
+
- `average_tokens_across_devices`: False
|
328 |
+
- `prompts`: None
|
329 |
+
- `batch_sampler`: no_duplicates
|
330 |
+
- `multi_dataset_batch_sampler`: proportional
|
331 |
+
|
332 |
+
</details>
|
333 |
+
|
334 |
+
### Training Logs
|
335 |
+
| Epoch | Step | Training Loss |
|
336 |
+
|:------:|:----:|:-------------:|
|
337 |
+
| 0.0541 | 100 | 2.5621 |
|
338 |
+
| 0.1083 | 200 | 1.3308 |
|
339 |
+
| 0.1624 | 300 | 1.1403 |
|
340 |
+
| 0.2166 | 400 | 1.0506 |
|
341 |
+
| 0.2707 | 500 | 1.0135 |
|
342 |
+
| 0.3249 | 600 | 0.9443 |
|
343 |
+
| 0.3790 | 700 | 0.9412 |
|
344 |
+
| 0.4331 | 800 | 0.9095 |
|
345 |
+
| 0.4873 | 900 | 0.8945 |
|
346 |
+
| 0.5414 | 1000 | 0.8533 |
|
347 |
+
| 0.5956 | 1100 | 0.8601 |
|
348 |
+
| 0.6497 | 1200 | 0.8425 |
|
349 |
+
| 0.7038 | 1300 | 0.2919 |
|
350 |
+
| 0.7580 | 1400 | 0.0249 |
|
351 |
+
| 0.8121 | 1500 | 0.0231 |
|
352 |
+
| 0.8663 | 1600 | 0.0182 |
|
353 |
+
| 0.9204 | 1700 | 0.0206 |
|
354 |
+
| 0.9746 | 1800 | 0.0206 |
|
355 |
+
| 0.0541 | 100 | 0.8606 |
|
356 |
+
| 0.1083 | 200 | 0.7361 |
|
357 |
+
| 0.1624 | 300 | 0.6648 |
|
358 |
+
| 0.2166 | 400 | 0.6506 |
|
359 |
+
| 0.2707 | 500 | 0.6502 |
|
360 |
+
| 0.3249 | 600 | 0.6249 |
|
361 |
+
| 0.3790 | 700 | 0.6473 |
|
362 |
+
| 0.4331 | 800 | 0.6391 |
|
363 |
+
| 0.4873 | 900 | 0.6474 |
|
364 |
+
| 0.5414 | 1000 | 0.6316 |
|
365 |
+
| 0.5956 | 1100 | 0.6543 |
|
366 |
+
| 0.6497 | 1200 | 0.6493 |
|
367 |
+
| 0.7038 | 1300 | 0.2173 |
|
368 |
+
| 0.7580 | 1400 | 0.0135 |
|
369 |
+
| 0.8121 | 1500 | 0.0149 |
|
370 |
+
| 0.8663 | 1600 | 0.0128 |
|
371 |
+
| 0.9204 | 1700 | 0.0158 |
|
372 |
+
| 0.9746 | 1800 | 0.0169 |
|
373 |
+
| 0.0541 | 100 | 0.6698 |
|
374 |
+
| 0.1083 | 200 | 0.5107 |
|
375 |
+
| 0.1624 | 300 | 0.4378 |
|
376 |
+
| 0.2166 | 400 | 0.4408 |
|
377 |
+
| 0.2707 | 500 | 0.4452 |
|
378 |
+
| 0.3249 | 600 | 0.4391 |
|
379 |
+
| 0.3790 | 700 | 0.4672 |
|
380 |
+
| 0.4331 | 800 | 0.4712 |
|
381 |
+
| 0.4873 | 900 | 0.489 |
|
382 |
+
| 0.5414 | 1000 | 0.4878 |
|
383 |
+
| 0.5956 | 1100 | 0.5196 |
|
384 |
+
| 0.6497 | 1200 | 0.5245 |
|
385 |
+
| 0.7038 | 1300 | 0.1768 |
|
386 |
+
| 0.7580 | 1400 | 0.0091 |
|
387 |
+
| 0.8121 | 1500 | 0.0107 |
|
388 |
+
| 0.8663 | 1600 | 0.0099 |
|
389 |
+
| 0.9204 | 1700 | 0.0127 |
|
390 |
+
| 0.9746 | 1800 | 0.0147 |
|
391 |
+
| 0.0541 | 100 | 0.5605 |
|
392 |
+
| 0.1083 | 200 | 0.3476 |
|
393 |
+
| 0.1624 | 300 | 0.2772 |
|
394 |
+
| 0.2166 | 400 | 0.2862 |
|
395 |
+
| 0.2707 | 500 | 0.2937 |
|
396 |
+
| 0.3249 | 600 | 0.2983 |
|
397 |
+
| 0.3790 | 700 | 0.3293 |
|
398 |
+
| 0.4331 | 800 | 0.3421 |
|
399 |
+
| 0.4873 | 900 | 0.3634 |
|
400 |
+
| 0.5414 | 1000 | 0.3732 |
|
401 |
+
| 0.5956 | 1100 | 0.4125 |
|
402 |
+
| 0.6497 | 1200 | 0.4266 |
|
403 |
+
| 0.7038 | 1300 | 0.1474 |
|
404 |
+
| 0.7580 | 1400 | 0.007 |
|
405 |
+
| 0.8121 | 1500 | 0.0081 |
|
406 |
+
| 0.8663 | 1600 | 0.0079 |
|
407 |
+
| 0.9204 | 1700 | 0.0104 |
|
408 |
+
| 0.9746 | 1800 | 0.0132 |
|
409 |
+
|
410 |
+
|
411 |
+
### Framework Versions
|
412 |
+
- Python: 3.10.12
|
413 |
+
- Sentence Transformers: 3.3.1
|
414 |
+
- Transformers: 4.47.0
|
415 |
+
- PyTorch: 2.1.0+cu118
|
416 |
+
- Accelerate: 1.2.1
|
417 |
+
- Datasets: 3.2.0
|
418 |
+
- Tokenizers: 0.21.0
|
419 |
+
|
420 |
+
## Citation
|
421 |
+
|
422 |
+
### BibTeX
|
423 |
+
|
424 |
+
#### Sentence Transformers
|
425 |
+
```bibtex
|
426 |
+
@inproceedings{reimers-2019-sentence-bert,
|
427 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
428 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
429 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
430 |
+
month = "11",
|
431 |
+
year = "2019",
|
432 |
+
publisher = "Association for Computational Linguistics",
|
433 |
+
url = "https://arxiv.org/abs/1908.10084",
|
434 |
+
}
|
435 |
+
```
|
436 |
+
|
437 |
+
#### MultipleNegativesRankingLoss
|
438 |
+
```bibtex
|
439 |
+
@misc{henderson2017efficient,
|
440 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
441 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
442 |
+
year={2017},
|
443 |
+
eprint={1705.00652},
|
444 |
+
archivePrefix={arXiv},
|
445 |
+
primaryClass={cs.CL}
|
446 |
+
}
|
447 |
+
```
|
448 |
+
|
449 |
+
<!--
|
450 |
+
## Glossary
|
451 |
+
|
452 |
+
*Clearly define terms in order to be accessible across audiences.*
|
453 |
+
-->
|
454 |
+
|
455 |
+
<!--
|
456 |
+
## Model Card Authors
|
457 |
+
|
458 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
459 |
+
-->
|
460 |
+
|
461 |
+
<!--
|
462 |
+
## Model Card Contact
|
463 |
+
|
464 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
465 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "SalmanFaroz/DisEmbed-v1",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 384,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0"
|
13 |
+
},
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 1536,
|
16 |
+
"label2id": {
|
17 |
+
"LABEL_0": 0
|
18 |
+
},
|
19 |
+
"layer_norm_eps": 1e-12,
|
20 |
+
"max_position_embeddings": 512,
|
21 |
+
"model_type": "bert",
|
22 |
+
"num_attention_heads": 12,
|
23 |
+
"num_hidden_layers": 12,
|
24 |
+
"pad_token_id": 0,
|
25 |
+
"position_embedding_type": "absolute",
|
26 |
+
"torch_dtype": "float32",
|
27 |
+
"transformers_version": "4.46.3",
|
28 |
+
"type_vocab_size": 2,
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 30522
|
31 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.3.1",
|
4 |
+
"transformers": "4.46.3",
|
5 |
+
"pytorch": "2.5.1+cu124"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23cee5731f4992a1a26049fe68cb44b9581f0b64bc76c962b1e38db27795057d
|
3 |
+
size 133462128
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": true
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"extra_special_tokens": {},
|
49 |
+
"mask_token": "[MASK]",
|
50 |
+
"max_length": 512,
|
51 |
+
"model_max_length": 512,
|
52 |
+
"never_split": null,
|
53 |
+
"pad_to_multiple_of": null,
|
54 |
+
"pad_token": "[PAD]",
|
55 |
+
"pad_token_type_id": 0,
|
56 |
+
"padding_side": "right",
|
57 |
+
"sep_token": "[SEP]",
|
58 |
+
"stride": 0,
|
59 |
+
"strip_accents": null,
|
60 |
+
"tokenize_chinese_chars": true,
|
61 |
+
"tokenizer_class": "BertTokenizer",
|
62 |
+
"truncation_side": "right",
|
63 |
+
"truncation_strategy": "longest_first",
|
64 |
+
"unk_token": "[UNK]"
|
65 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|