Add new SentenceTransformer model.
Browse files- 1_Pooling/config.json +10 -0
- README.md +467 -0
- added_tokens.json +5 -0
- config.json +33 -0
- config_sentence_transformers.json +12 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +20 -0
- tokenizer.json +0 -0
- tokenizer_config.json +57 -0
- vocab.json +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 1536,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": true,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,467 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: Qwen/Qwen2-1.5B-instruct
|
3 |
+
datasets: []
|
4 |
+
language: []
|
5 |
+
library_name: sentence-transformers
|
6 |
+
pipeline_tag: sentence-similarity
|
7 |
+
tags:
|
8 |
+
- sentence-transformers
|
9 |
+
- sentence-similarity
|
10 |
+
- feature-extraction
|
11 |
+
- generated_from_trainer
|
12 |
+
- dataset_size:103114
|
13 |
+
- loss:CoSENTLoss
|
14 |
+
---
|
15 |
+
|
16 |
+
# SentenceTransformer based on Qwen/Qwen2-1.5B-instruct
|
17 |
+
|
18 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from Qwen/Qwen2-1.5B-instruct. It maps sentences & paragraphs to a 1536-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
19 |
+
|
20 |
+
## Model Details
|
21 |
+
|
22 |
+
### Model Description
|
23 |
+
- **Model Type:** Sentence Transformer
|
24 |
+
- **Base model:** Qwen/Qwen2-1.5B-instruct
|
25 |
+
- **Maximum Sequence Length:** 32768 tokens
|
26 |
+
- **Output Dimensionality:** 1536 tokens
|
27 |
+
- **Similarity Function:** Cosine Similarity
|
28 |
+
<!-- - **Training Dataset:** Unknown -->
|
29 |
+
<!-- - **Language:** Unknown -->
|
30 |
+
<!-- - **License:** Unknown -->
|
31 |
+
|
32 |
+
### Model Sources
|
33 |
+
|
34 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
35 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
36 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
37 |
+
|
38 |
+
### Full Model Architecture
|
39 |
+
|
40 |
+
```
|
41 |
+
SentenceTransformer(
|
42 |
+
(0): Transformer({'max_seq_length': 32768, 'do_lower_case': False}) with Transformer model: Qwen2Model
|
43 |
+
(1): Pooling({'word_embedding_dimension': 1536, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': True, 'include_prompt': True})
|
44 |
+
(2): Normalize()
|
45 |
+
)
|
46 |
+
```
|
47 |
+
|
48 |
+
## Usage
|
49 |
+
|
50 |
+
### Direct Usage (Sentence Transformers)
|
51 |
+
|
52 |
+
First install the Sentence Transformers library:
|
53 |
+
|
54 |
+
```bash
|
55 |
+
pip install -U sentence-transformers
|
56 |
+
```
|
57 |
+
|
58 |
+
Then you can load this model and run inference.
|
59 |
+
```python
|
60 |
+
from sentence_transformers import SentenceTransformer
|
61 |
+
|
62 |
+
# Download from the 🤗 Hub
|
63 |
+
model = SentenceTransformer("asbabiy/crm-mail-embedder-cosent")
|
64 |
+
# Run inference
|
65 |
+
sentences = [
|
66 |
+
'Mail Queue: ratehawk-b2b\nMail From: aa3b09f5a33cf090e29667bf72936a77@travelclub.ae\nMail To: support@ratehawk.com\n\nMail Subject: Ticket Closed - URGENT : Reconfirmation & HCN for ATS160057 : 139201464/Check-in date - 12 Mar 2024\n\nMail Body:\n"""\nDear Support, Your ticket - URGENT : Reconfirmation & HCN for ATS160057 : 139201464/Check-in date - 12 Mar 2024 - has been closed. We hope that the ticket was resolved to your satisfaction. If you feel that the ticket should not be closed or if the ticket has not been resolved, please reply to this email. Sincerely, Travelclub Support Team https://blue7tech-help.freshdesk.com/helpdesk/tickets/63824\n"""',
|
67 |
+
"Email category: 'TPP -- Auto template'. Email category description: 'This is an automated email from the supplier acknowledging receipt of a previous communication or providing a status update on a pending request without any specific update on the request. It solely includes a phrase indicating that the request has been acknowledged. Such emails may contain messages such as: information that the request has been taken or in process; that the ticket for the request has been created; that it is a holiday and the office hours have changed; that the company's working hours have been adjusted; that a number has been assigned to the request and updates will be provided once available; that the information has been received and transffered to the guest or hotel; or that they will contact us shortly. Also this can be message from any of our supplier stating that our account recently attempted to log in from New Browser. The purpose of this email is to let you know that your message has been received and is being handled.Email lacks personalized details specific to the recipient's situation or references to a unique order or request, which may indicate it is a generic automated response. Auto-emails are often rich with html formatting, tabular data and have a lot of tags or links.'",
|
68 |
+
"Email category: 'TPP -- Additional request of arrival time'. Email category description: 'A request from the supplier asking for the client to provide the exact or approximate check-in/arrival time as this is requested by the hotel due to different reasons. For example, the hotel does not have 24 hour reception and for this reason is asking for the arrival time. Information about the check-in helps the hotel better prepare for the guest's arrival and plan the schedule of the hotel staff.'",
|
69 |
+
]
|
70 |
+
embeddings = model.encode(sentences)
|
71 |
+
print(embeddings.shape)
|
72 |
+
# [3, 1536]
|
73 |
+
|
74 |
+
# Get the similarity scores for the embeddings
|
75 |
+
similarities = model.similarity(embeddings, embeddings)
|
76 |
+
print(similarities.shape)
|
77 |
+
# [3, 3]
|
78 |
+
```
|
79 |
+
|
80 |
+
<!--
|
81 |
+
### Direct Usage (Transformers)
|
82 |
+
|
83 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
84 |
+
|
85 |
+
</details>
|
86 |
+
-->
|
87 |
+
|
88 |
+
<!--
|
89 |
+
### Downstream Usage (Sentence Transformers)
|
90 |
+
|
91 |
+
You can finetune this model on your own dataset.
|
92 |
+
|
93 |
+
<details><summary>Click to expand</summary>
|
94 |
+
|
95 |
+
</details>
|
96 |
+
-->
|
97 |
+
|
98 |
+
<!--
|
99 |
+
### Out-of-Scope Use
|
100 |
+
|
101 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
102 |
+
-->
|
103 |
+
|
104 |
+
<!--
|
105 |
+
## Bias, Risks and Limitations
|
106 |
+
|
107 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
108 |
+
-->
|
109 |
+
|
110 |
+
<!--
|
111 |
+
### Recommendations
|
112 |
+
|
113 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
114 |
+
-->
|
115 |
+
|
116 |
+
### Training Hyperparameters
|
117 |
+
#### Non-Default Hyperparameters
|
118 |
+
|
119 |
+
- `eval_strategy`: steps
|
120 |
+
- `per_device_train_batch_size`: 4
|
121 |
+
- `per_device_eval_batch_size`: 4
|
122 |
+
- `gradient_accumulation_steps`: 16
|
123 |
+
- `learning_rate`: 1e-05
|
124 |
+
- `num_train_epochs`: 1
|
125 |
+
- `warmup_ratio`: 0.1
|
126 |
+
- `bf16`: True
|
127 |
+
- `load_best_model_at_end`: True
|
128 |
+
|
129 |
+
#### All Hyperparameters
|
130 |
+
<details><summary>Click to expand</summary>
|
131 |
+
|
132 |
+
- `overwrite_output_dir`: False
|
133 |
+
- `do_predict`: False
|
134 |
+
- `eval_strategy`: steps
|
135 |
+
- `prediction_loss_only`: True
|
136 |
+
- `per_device_train_batch_size`: 4
|
137 |
+
- `per_device_eval_batch_size`: 4
|
138 |
+
- `per_gpu_train_batch_size`: None
|
139 |
+
- `per_gpu_eval_batch_size`: None
|
140 |
+
- `gradient_accumulation_steps`: 16
|
141 |
+
- `eval_accumulation_steps`: None
|
142 |
+
- `torch_empty_cache_steps`: None
|
143 |
+
- `learning_rate`: 1e-05
|
144 |
+
- `weight_decay`: 0.0
|
145 |
+
- `adam_beta1`: 0.9
|
146 |
+
- `adam_beta2`: 0.999
|
147 |
+
- `adam_epsilon`: 1e-08
|
148 |
+
- `max_grad_norm`: 1.0
|
149 |
+
- `num_train_epochs`: 1
|
150 |
+
- `max_steps`: -1
|
151 |
+
- `lr_scheduler_type`: linear
|
152 |
+
- `lr_scheduler_kwargs`: {}
|
153 |
+
- `warmup_ratio`: 0.1
|
154 |
+
- `warmup_steps`: 0
|
155 |
+
- `log_level`: passive
|
156 |
+
- `log_level_replica`: warning
|
157 |
+
- `log_on_each_node`: True
|
158 |
+
- `logging_nan_inf_filter`: True
|
159 |
+
- `save_safetensors`: True
|
160 |
+
- `save_on_each_node`: False
|
161 |
+
- `save_only_model`: False
|
162 |
+
- `restore_callback_states_from_checkpoint`: False
|
163 |
+
- `no_cuda`: False
|
164 |
+
- `use_cpu`: False
|
165 |
+
- `use_mps_device`: False
|
166 |
+
- `seed`: 42
|
167 |
+
- `data_seed`: None
|
168 |
+
- `jit_mode_eval`: False
|
169 |
+
- `use_ipex`: False
|
170 |
+
- `bf16`: True
|
171 |
+
- `fp16`: False
|
172 |
+
- `fp16_opt_level`: O1
|
173 |
+
- `half_precision_backend`: auto
|
174 |
+
- `bf16_full_eval`: False
|
175 |
+
- `fp16_full_eval`: False
|
176 |
+
- `tf32`: None
|
177 |
+
- `local_rank`: 0
|
178 |
+
- `ddp_backend`: None
|
179 |
+
- `tpu_num_cores`: None
|
180 |
+
- `tpu_metrics_debug`: False
|
181 |
+
- `debug`: []
|
182 |
+
- `dataloader_drop_last`: False
|
183 |
+
- `dataloader_num_workers`: 0
|
184 |
+
- `dataloader_prefetch_factor`: None
|
185 |
+
- `past_index`: -1
|
186 |
+
- `disable_tqdm`: False
|
187 |
+
- `remove_unused_columns`: True
|
188 |
+
- `label_names`: None
|
189 |
+
- `load_best_model_at_end`: True
|
190 |
+
- `ignore_data_skip`: False
|
191 |
+
- `fsdp`: []
|
192 |
+
- `fsdp_min_num_params`: 0
|
193 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
194 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
195 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
196 |
+
- `deepspeed`: None
|
197 |
+
- `label_smoothing_factor`: 0.0
|
198 |
+
- `optim`: adamw_torch
|
199 |
+
- `optim_args`: None
|
200 |
+
- `adafactor`: False
|
201 |
+
- `group_by_length`: False
|
202 |
+
- `length_column_name`: length
|
203 |
+
- `ddp_find_unused_parameters`: None
|
204 |
+
- `ddp_bucket_cap_mb`: None
|
205 |
+
- `ddp_broadcast_buffers`: False
|
206 |
+
- `dataloader_pin_memory`: True
|
207 |
+
- `dataloader_persistent_workers`: False
|
208 |
+
- `skip_memory_metrics`: True
|
209 |
+
- `use_legacy_prediction_loop`: False
|
210 |
+
- `push_to_hub`: False
|
211 |
+
- `resume_from_checkpoint`: None
|
212 |
+
- `hub_model_id`: None
|
213 |
+
- `hub_strategy`: every_save
|
214 |
+
- `hub_private_repo`: False
|
215 |
+
- `hub_always_push`: False
|
216 |
+
- `gradient_checkpointing`: False
|
217 |
+
- `gradient_checkpointing_kwargs`: None
|
218 |
+
- `include_inputs_for_metrics`: False
|
219 |
+
- `eval_do_concat_batches`: True
|
220 |
+
- `fp16_backend`: auto
|
221 |
+
- `push_to_hub_model_id`: None
|
222 |
+
- `push_to_hub_organization`: None
|
223 |
+
- `mp_parameters`:
|
224 |
+
- `auto_find_batch_size`: False
|
225 |
+
- `full_determinism`: False
|
226 |
+
- `torchdynamo`: None
|
227 |
+
- `ray_scope`: last
|
228 |
+
- `ddp_timeout`: 1800
|
229 |
+
- `torch_compile`: False
|
230 |
+
- `torch_compile_backend`: None
|
231 |
+
- `torch_compile_mode`: None
|
232 |
+
- `dispatch_batches`: None
|
233 |
+
- `split_batches`: None
|
234 |
+
- `include_tokens_per_second`: False
|
235 |
+
- `include_num_input_tokens_seen`: False
|
236 |
+
- `neftune_noise_alpha`: None
|
237 |
+
- `optim_target_modules`: None
|
238 |
+
- `batch_eval_metrics`: False
|
239 |
+
- `eval_on_start`: False
|
240 |
+
- `eval_use_gather_object`: False
|
241 |
+
- `batch_sampler`: batch_sampler
|
242 |
+
- `multi_dataset_batch_sampler`: proportional
|
243 |
+
|
244 |
+
</details>
|
245 |
+
|
246 |
+
### Training Logs
|
247 |
+
<details><summary>Click to expand</summary>
|
248 |
+
|
249 |
+
| Epoch | Step | Training Loss | loss |
|
250 |
+
|:------:|:----:|:-------------:|:------:|
|
251 |
+
| 0.0031 | 5 | 1.8139 | - |
|
252 |
+
| 0.0062 | 10 | 1.699 | - |
|
253 |
+
| 0.0093 | 15 | 1.6467 | - |
|
254 |
+
| 0.0124 | 20 | 1.7853 | - |
|
255 |
+
| 0.0155 | 25 | 1.7918 | - |
|
256 |
+
| 0.0186 | 30 | 1.9042 | - |
|
257 |
+
| 0.0217 | 35 | 1.7087 | - |
|
258 |
+
| 0.0248 | 40 | 1.7143 | - |
|
259 |
+
| 0.0279 | 45 | 1.7357 | - |
|
260 |
+
| 0.0310 | 50 | 1.5956 | 1.6129 |
|
261 |
+
| 0.0341 | 55 | 1.7191 | - |
|
262 |
+
| 0.0372 | 60 | 1.5434 | - |
|
263 |
+
| 0.0403 | 65 | 1.6527 | - |
|
264 |
+
| 0.0434 | 70 | 1.6267 | - |
|
265 |
+
| 0.0465 | 75 | 1.5512 | - |
|
266 |
+
| 0.0497 | 80 | 1.4611 | - |
|
267 |
+
| 0.0528 | 85 | 1.49 | - |
|
268 |
+
| 0.0559 | 90 | 1.4336 | - |
|
269 |
+
| 0.0590 | 95 | 1.3646 | - |
|
270 |
+
| 0.0621 | 100 | 1.5523 | 1.4122 |
|
271 |
+
| 0.0652 | 105 | 1.4359 | - |
|
272 |
+
| 0.0683 | 110 | 1.4459 | - |
|
273 |
+
| 0.0714 | 115 | 1.4872 | - |
|
274 |
+
| 0.0745 | 120 | 1.3775 | - |
|
275 |
+
| 0.0776 | 125 | 1.3807 | - |
|
276 |
+
| 0.0807 | 130 | 1.3692 | - |
|
277 |
+
| 0.0838 | 135 | 1.3156 | - |
|
278 |
+
| 0.0869 | 140 | 1.328 | - |
|
279 |
+
| 0.0900 | 145 | 1.5123 | - |
|
280 |
+
| 0.0931 | 150 | 1.4037 | 1.3554 |
|
281 |
+
| 0.0962 | 155 | 1.4797 | - |
|
282 |
+
| 0.0993 | 160 | 1.4434 | - |
|
283 |
+
| 0.1024 | 165 | 1.3876 | - |
|
284 |
+
| 0.1055 | 170 | 1.3611 | - |
|
285 |
+
| 0.1086 | 175 | 1.3986 | - |
|
286 |
+
| 0.1117 | 180 | 1.3135 | - |
|
287 |
+
| 0.1148 | 185 | 1.3268 | - |
|
288 |
+
| 0.1179 | 190 | 1.2853 | - |
|
289 |
+
| 0.1210 | 195 | 1.3606 | - |
|
290 |
+
| 0.1241 | 200 | 1.4254 | 1.3225 |
|
291 |
+
| 0.1272 | 205 | 1.3152 | - |
|
292 |
+
| 0.1303 | 210 | 1.3482 | - |
|
293 |
+
| 0.1334 | 215 | 1.347 | - |
|
294 |
+
| 0.1365 | 220 | 1.3722 | - |
|
295 |
+
| 0.1396 | 225 | 1.3877 | - |
|
296 |
+
| 0.1428 | 230 | 1.3635 | - |
|
297 |
+
| 0.1459 | 235 | 1.4738 | - |
|
298 |
+
| 0.1490 | 240 | 1.4063 | - |
|
299 |
+
| 0.1521 | 245 | 1.3481 | - |
|
300 |
+
| 0.1552 | 250 | 1.3221 | 1.2848 |
|
301 |
+
| 0.1583 | 255 | 1.1117 | - |
|
302 |
+
| 0.1614 | 260 | 1.33 | - |
|
303 |
+
| 0.1645 | 265 | 1.3461 | - |
|
304 |
+
| 0.1676 | 270 | 1.2067 | - |
|
305 |
+
| 0.1707 | 275 | 1.3238 | - |
|
306 |
+
| 0.1738 | 280 | 1.4214 | - |
|
307 |
+
| 0.1769 | 285 | 1.3172 | - |
|
308 |
+
| 0.1800 | 290 | 1.2829 | - |
|
309 |
+
| 0.1831 | 295 | 1.3561 | - |
|
310 |
+
| 0.1862 | 300 | 1.2153 | 1.2869 |
|
311 |
+
| 0.1893 | 305 | 1.3482 | - |
|
312 |
+
| 0.1924 | 310 | 1.4491 | - |
|
313 |
+
| 0.1955 | 315 | 1.296 | - |
|
314 |
+
| 0.1986 | 320 | 1.5481 | - |
|
315 |
+
| 0.2017 | 325 | 1.3483 | - |
|
316 |
+
| 0.2048 | 330 | 1.2984 | - |
|
317 |
+
| 0.2079 | 335 | 1.2619 | - |
|
318 |
+
| 0.2110 | 340 | 1.2424 | - |
|
319 |
+
| 0.2141 | 345 | 1.3138 | - |
|
320 |
+
| 0.2172 | 350 | 1.4771 | 1.2831 |
|
321 |
+
| 0.2203 | 355 | 1.4589 | - |
|
322 |
+
| 0.2234 | 360 | 1.2647 | - |
|
323 |
+
| 0.2265 | 365 | 1.3268 | - |
|
324 |
+
| 0.2296 | 370 | 1.2185 | - |
|
325 |
+
| 0.2327 | 375 | 1.2264 | - |
|
326 |
+
| 0.2359 | 380 | 1.4256 | - |
|
327 |
+
| 0.2390 | 385 | 1.5409 | - |
|
328 |
+
| 0.2421 | 390 | 1.3106 | - |
|
329 |
+
| 0.2452 | 395 | 1.3129 | - |
|
330 |
+
| 0.2483 | 400 | 1.4063 | 1.2688 |
|
331 |
+
| 0.2514 | 405 | 1.1013 | - |
|
332 |
+
| 0.2545 | 410 | 1.3415 | - |
|
333 |
+
| 0.2576 | 415 | 1.4586 | - |
|
334 |
+
| 0.2607 | 420 | 1.2412 | - |
|
335 |
+
| 0.2638 | 425 | 1.3019 | - |
|
336 |
+
| 0.2669 | 430 | 1.2388 | - |
|
337 |
+
| 0.2700 | 435 | 1.3902 | - |
|
338 |
+
| 0.2731 | 440 | 1.3822 | - |
|
339 |
+
| 0.2762 | 445 | 1.2138 | - |
|
340 |
+
| 0.2793 | 450 | 1.4039 | 1.2490 |
|
341 |
+
| 0.2824 | 455 | 1.1758 | - |
|
342 |
+
| 0.2855 | 460 | 1.306 | - |
|
343 |
+
| 0.2886 | 465 | 1.4698 | - |
|
344 |
+
| 0.2917 | 470 | 1.2116 | - |
|
345 |
+
| 0.2948 | 475 | 1.2531 | - |
|
346 |
+
| 0.2979 | 480 | 1.3357 | - |
|
347 |
+
| 0.3010 | 485 | 1.1919 | - |
|
348 |
+
| 0.3041 | 490 | 1.3818 | - |
|
349 |
+
| 0.3072 | 495 | 1.2979 | - |
|
350 |
+
| 0.3103 | 500 | 1.2832 | 1.2466 |
|
351 |
+
| 0.3134 | 505 | 1.1689 | - |
|
352 |
+
| 0.3165 | 510 | 1.2198 | - |
|
353 |
+
| 0.3196 | 515 | 1.2775 | - |
|
354 |
+
| 0.3227 | 520 | 1.1344 | - |
|
355 |
+
| 0.3258 | 525 | 1.4492 | - |
|
356 |
+
| 0.3289 | 530 | 1.2328 | - |
|
357 |
+
| 0.3321 | 535 | 1.3306 | - |
|
358 |
+
| 0.3352 | 540 | 1.1076 | - |
|
359 |
+
| 0.3383 | 545 | 1.285 | - |
|
360 |
+
| 0.3414 | 550 | 1.2523 | 1.2435 |
|
361 |
+
| 0.3445 | 555 | 1.1712 | - |
|
362 |
+
| 0.3476 | 560 | 1.4021 | - |
|
363 |
+
| 0.3507 | 565 | 1.3476 | - |
|
364 |
+
| 0.3538 | 570 | 1.1485 | - |
|
365 |
+
| 0.3569 | 575 | 1.2621 | - |
|
366 |
+
| 0.3600 | 580 | 1.2829 | - |
|
367 |
+
| 0.3631 | 585 | 1.274 | - |
|
368 |
+
| 0.3662 | 590 | 1.2649 | - |
|
369 |
+
| 0.3693 | 595 | 1.2262 | - |
|
370 |
+
| 0.3724 | 600 | 1.1743 | 1.2378 |
|
371 |
+
| 0.3755 | 605 | 1.1773 | - |
|
372 |
+
| 0.3786 | 610 | 1.1977 | - |
|
373 |
+
| 0.3817 | 615 | 1.3976 | - |
|
374 |
+
| 0.3848 | 620 | 1.1817 | - |
|
375 |
+
| 0.3879 | 625 | 1.1928 | - |
|
376 |
+
| 0.3910 | 630 | 1.2338 | - |
|
377 |
+
| 0.3941 | 635 | 1.1803 | - |
|
378 |
+
| 0.3972 | 640 | 1.3811 | - |
|
379 |
+
| 0.4003 | 645 | 1.3125 | - |
|
380 |
+
| 0.4034 | 650 | 1.1878 | 1.2311 |
|
381 |
+
| 0.4065 | 655 | 1.4805 | - |
|
382 |
+
| 0.4096 | 660 | 1.1262 | - |
|
383 |
+
| 0.4127 | 665 | 1.1919 | - |
|
384 |
+
| 0.4158 | 670 | 1.2076 | - |
|
385 |
+
| 0.4189 | 675 | 1.2401 | - |
|
386 |
+
| 0.4220 | 680 | 1.3019 | - |
|
387 |
+
| 0.4252 | 685 | 1.3285 | - |
|
388 |
+
| 0.4283 | 690 | 1.1257 | - |
|
389 |
+
| 0.4314 | 695 | 1.2628 | - |
|
390 |
+
| 0.4345 | 700 | 1.1846 | 1.2354 |
|
391 |
+
| 0.4376 | 705 | 1.0939 | - |
|
392 |
+
| 0.4407 | 710 | 1.2502 | - |
|
393 |
+
| 0.4438 | 715 | 1.3645 | - |
|
394 |
+
| 0.4469 | 720 | 1.2408 | - |
|
395 |
+
| 0.4500 | 725 | 1.3127 | - |
|
396 |
+
| 0.4531 | 730 | 1.2795 | - |
|
397 |
+
| 0.4562 | 735 | 1.3127 | - |
|
398 |
+
| 0.4593 | 740 | 1.2164 | - |
|
399 |
+
| 0.4624 | 745 | 1.2942 | - |
|
400 |
+
| 0.4655 | 750 | 1.1968 | 1.2342 |
|
401 |
+
| 0.4686 | 755 | 1.2426 | - |
|
402 |
+
| 0.4717 | 760 | 1.2269 | - |
|
403 |
+
| 0.4748 | 765 | 1.3602 | - |
|
404 |
+
| 0.4779 | 770 | 1.2335 | - |
|
405 |
+
| 0.4810 | 775 | 1.3015 | - |
|
406 |
+
| 0.4841 | 780 | 1.1144 | - |
|
407 |
+
| 0.4872 | 785 | 1.3083 | - |
|
408 |
+
| 0.4903 | 790 | 1.273 | - |
|
409 |
+
| 0.4934 | 795 | 1.1784 | - |
|
410 |
+
| 0.4965 | 800 | 1.204 | 1.2348 |
|
411 |
+
|
412 |
+
</details>
|
413 |
+
|
414 |
+
### Framework Versions
|
415 |
+
- Python: 3.10.12
|
416 |
+
- Sentence Transformers: 3.0.1
|
417 |
+
- Transformers: 4.44.0
|
418 |
+
- PyTorch: 2.2.0+cu121
|
419 |
+
- Accelerate: 0.33.0
|
420 |
+
- Datasets: 2.20.0
|
421 |
+
- Tokenizers: 0.19.1
|
422 |
+
|
423 |
+
## Citation
|
424 |
+
|
425 |
+
### BibTeX
|
426 |
+
|
427 |
+
#### Sentence Transformers
|
428 |
+
```bibtex
|
429 |
+
@inproceedings{reimers-2019-sentence-bert,
|
430 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
431 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
432 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
433 |
+
month = "11",
|
434 |
+
year = "2019",
|
435 |
+
publisher = "Association for Computational Linguistics",
|
436 |
+
url = "https://arxiv.org/abs/1908.10084",
|
437 |
+
}
|
438 |
+
```
|
439 |
+
|
440 |
+
#### CoSENTLoss
|
441 |
+
```bibtex
|
442 |
+
@online{kexuefm-8847,
|
443 |
+
title={CoSENT: A more efficient sentence vector scheme than Sentence-BERT},
|
444 |
+
author={Su Jianlin},
|
445 |
+
year={2022},
|
446 |
+
month={Jan},
|
447 |
+
url={https://kexue.fm/archives/8847},
|
448 |
+
}
|
449 |
+
```
|
450 |
+
|
451 |
+
<!--
|
452 |
+
## Glossary
|
453 |
+
|
454 |
+
*Clearly define terms in order to be accessible across audiences.*
|
455 |
+
-->
|
456 |
+
|
457 |
+
<!--
|
458 |
+
## Model Card Authors
|
459 |
+
|
460 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
461 |
+
-->
|
462 |
+
|
463 |
+
<!--
|
464 |
+
## Model Card Contact
|
465 |
+
|
466 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
467 |
+
-->
|
added_tokens.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<|endoftext|>": 151643,
|
3 |
+
"<|im_end|>": 151645,
|
4 |
+
"<|im_start|>": 151644
|
5 |
+
}
|
config.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "asbabiy/crm-mail-embedder-v4-cosent",
|
3 |
+
"architectures": [
|
4 |
+
"Qwen2Model"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"auto_map": {
|
8 |
+
"AutoModel": "Alibaba-NLP/gte-Qwen2-1.5B-instruct--modeling_qwen.Qwen2Model",
|
9 |
+
"AutoModelForCausalLM": "Alibaba-NLP/gte-Qwen2-1.5B-instruct--modeling_qwen.Qwen2ForCausalLM",
|
10 |
+
"AutoModelForSequenceClassification": "Alibaba-NLP/gte-Qwen2-1.5B-instruct--modeling_qwen.Qwen2ForSequenceClassification"
|
11 |
+
},
|
12 |
+
"bos_token_id": 151643,
|
13 |
+
"eos_token_id": 151643,
|
14 |
+
"hidden_act": "silu",
|
15 |
+
"hidden_size": 1536,
|
16 |
+
"initializer_range": 0.02,
|
17 |
+
"intermediate_size": 8960,
|
18 |
+
"max_position_embeddings": 131072,
|
19 |
+
"max_window_layers": 21,
|
20 |
+
"model_type": "qwen2",
|
21 |
+
"num_attention_heads": 12,
|
22 |
+
"num_hidden_layers": 28,
|
23 |
+
"num_key_value_heads": 2,
|
24 |
+
"rms_norm_eps": 1e-06,
|
25 |
+
"rope_theta": 1000000.0,
|
26 |
+
"sliding_window": null,
|
27 |
+
"tie_word_embeddings": false,
|
28 |
+
"torch_dtype": "bfloat16",
|
29 |
+
"transformers_version": "4.44.0",
|
30 |
+
"use_cache": true,
|
31 |
+
"use_sliding_window": false,
|
32 |
+
"vocab_size": 151646
|
33 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.44.0",
|
5 |
+
"pytorch": "2.1.0+cu118"
|
6 |
+
},
|
7 |
+
"prompts": {
|
8 |
+
"mail_reason": "Instruct: Given an email, retrieve relevant email categories that describe email contents.\nQuery: "
|
9 |
+
},
|
10 |
+
"default_prompt_name": "mail_reason",
|
11 |
+
"similarity_fn_name": "cosine"
|
12 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc86c2bc06b257a1b8443b7cec87e28ecce3b5c3c744d680d0203faf57087523
|
3 |
+
size 3086574240
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 32768,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<|im_start|>",
|
4 |
+
"<|im_end|>"
|
5 |
+
],
|
6 |
+
"eos_token": {
|
7 |
+
"content": "<|endoftext|>",
|
8 |
+
"lstrip": false,
|
9 |
+
"normalized": false,
|
10 |
+
"rstrip": false,
|
11 |
+
"single_word": false
|
12 |
+
},
|
13 |
+
"pad_token": {
|
14 |
+
"content": "<|endoftext|>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false
|
19 |
+
}
|
20 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_eos_token": true,
|
3 |
+
"add_prefix_space": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"151643": {
|
6 |
+
"content": "<|endoftext|>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"151644": {
|
14 |
+
"content": "<|im_start|>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"151645": {
|
22 |
+
"content": "<|im_end|>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"additional_special_tokens": [
|
31 |
+
"<|im_start|>",
|
32 |
+
"<|im_end|>"
|
33 |
+
],
|
34 |
+
"auto_map": {
|
35 |
+
"AutoTokenizer": [
|
36 |
+
"Alibaba-NLP/gte-Qwen2-1.5B-instruct--tokenization_qwen.Qwen2Tokenizer",
|
37 |
+
"Alibaba-NLP/gte-Qwen2-1.5B-instruct--tokenization_qwen.Qwen2TokenizerFast"
|
38 |
+
]
|
39 |
+
},
|
40 |
+
"bos_token": null,
|
41 |
+
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
42 |
+
"clean_up_tokenization_spaces": false,
|
43 |
+
"eos_token": "<|endoftext|>",
|
44 |
+
"errors": "replace",
|
45 |
+
"max_length": 32768,
|
46 |
+
"model_max_length": 32768,
|
47 |
+
"pad_to_multiple_of": null,
|
48 |
+
"pad_token": "<|endoftext|>",
|
49 |
+
"pad_token_type_id": 0,
|
50 |
+
"padding_side": "right",
|
51 |
+
"split_special_tokens": false,
|
52 |
+
"stride": 0,
|
53 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
54 |
+
"truncation_side": "right",
|
55 |
+
"truncation_strategy": "longest_first",
|
56 |
+
"unk_token": null
|
57 |
+
}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|