david-scilo
commited on
Commit
•
0b02645
1
Parent(s):
d1e6f53
Add new SentenceTransformer model.
Browse files- 1_Pooling/config.json +10 -0
- README.md +379 -0
- config.json +25 -0
- config_sentence_transformers.json +12 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +64 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,379 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: sentence-transformers
|
3 |
+
pipeline_tag: sentence-similarity
|
4 |
+
tags:
|
5 |
+
- sentence-transformers
|
6 |
+
- sentence-similarity
|
7 |
+
- feature-extraction
|
8 |
+
- generated_from_trainer
|
9 |
+
- dataset_size:1999
|
10 |
+
- loss:MultipleNegativesRankingLoss
|
11 |
+
widget:
|
12 |
+
- source_sentence: Which area code corresponds to the city listed as Richmond in the
|
13 |
+
context, and in which state is Richmond located?
|
14 |
+
sentences:
|
15 |
+
- ple, suppose an organization has two network interfaces. suppose both interfaces
|
16 |
+
are assigned to a single bandwidth policy. the virtual interface for the organization
|
17 |
+
will represent both network interfaces
|
18 |
+
- field, sl1 automatically calculates the width of the text field l height. the
|
19 |
+
height of the text field, including the label text, in pixels. if you do not supply
|
20 |
+
a value in this
|
21 |
+
- ) [1] => array ( [state] => va [city] => richmond [area_code] => 804 ) [2]
|
22 |
+
=> array ( [state] => dc
|
23 |
+
- source_sentence: What connection details should you set in the `connect()` function
|
24 |
+
of your `main.go` file after running the command `go mod tidy`?
|
25 |
+
sentences:
|
26 |
+
- run go mod tidy go mod tidy set your connection details earlier you looked
|
27 |
+
up your connection details. set them in main.go in the connect() function
|
28 |
+
- 7. to leave an annotation for a status policy or rule, click the annotation icon
|
29 |
+
( ) next to the rule or tab. type your annotation text in the annotation window
|
30 |
+
and click [save]. the annotation icon now displays as solid blue, while empty
|
31 |
+
annotation icons contain a plus sign
|
32 |
+
- the destination takes as its value the path where the le or directory is mounted
|
33 |
+
in the container. can be speci ed as destination , dst , or target
|
34 |
+
- source_sentence: What is the total memory limit for the steprunner in the xlqueue,
|
35 |
+
and how much more memory is allocated compared to the standard steprunner?
|
36 |
+
sentences:
|
37 |
+
- noise reduction is the percentage of alerts that did not become events in sl1.
|
38 |
+
a mature, tuned sl1 system will have a high noise reduction percentage, as sl1
|
39 |
+
is sharing only the events that matter to your environment
|
40 |
+
- <link uri="/api/device/852" description="vmpea1" elemtype="href"/> <link uri="/api/device/853"
|
41 |
+
description="cloudservicepeu2"
|
42 |
+
- 'l 1x steprunner: 15 gb memory limit (xlqueue steprunner): 15 gb total limits/max
|
43 |
+
expected memory usage'
|
44 |
+
- source_sentence: During the execution of a classic discovery session, how does the
|
45 |
+
system determine the value to be used if the default value in a specific field
|
46 |
+
is deleted and no other value is specified by the user?
|
47 |
+
sentences:
|
48 |
+
- '"sciencelogic, inc. | em7 admin portal" examples and reference 349 350 ] ] ] ] ] ] "sciencelogic,
|
49 |
+
inc. | em7 database"'
|
50 |
+
- isacknowledged = true message search for details about an event message message
|
51 |
+
contains "problem"
|
52 |
+
- 134 managing classic discovery sessions o during the execution of this discovery
|
53 |
+
session, sl1 uses the value in this field first. if you delete the default values
|
54 |
+
and do not specify another value in this field, sl1 uses the value in the global
|
55 |
+
threshold settings page (system > settings > thresholds)
|
56 |
+
- source_sentence: Which statement is true regarding the configuration of SL1 systems
|
57 |
+
to use Powerflow for syncing with third-party applications like ServiceNow or
|
58 |
+
Cherwell?
|
59 |
+
sentences:
|
60 |
+
- l you can configure one or more sl1 systems to use powerflow to sync with a single
|
61 |
+
instance of a third-party application like servicenow or cherwell. you cannot
|
62 |
+
configure one sl1 system to use powerflow to sync with multiple instances of a
|
63 |
+
third-party application like servicenow or cherwell. the relationship between
|
64 |
+
sl1 and the third-party application can be either one-to-one or many-to-one, but
|
65 |
+
- l apply_64gb_override and verify_64gb_override the following command is an example
|
66 |
+
of a pfctl command to apply the 32 gb override
|
67 |
+
- ( provisioning a customer (provision_customer.php) 229 [0] => array ( ['start_ip']
|
68 |
+
=> ['end_ip'] =>
|
69 |
+
---
|
70 |
+
|
71 |
+
# SentenceTransformer
|
72 |
+
|
73 |
+
This is a [sentence-transformers](https://www.SBERT.net) model trained. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
74 |
+
|
75 |
+
## Model Details
|
76 |
+
|
77 |
+
### Model Description
|
78 |
+
- **Model Type:** Sentence Transformer
|
79 |
+
<!-- - **Base model:** [Unknown](https://huggingface.co/unknown) -->
|
80 |
+
- **Maximum Sequence Length:** 512 tokens
|
81 |
+
- **Output Dimensionality:** 384 tokens
|
82 |
+
- **Similarity Function:** Cosine Similarity
|
83 |
+
<!-- - **Training Dataset:** Unknown -->
|
84 |
+
<!-- - **Language:** Unknown -->
|
85 |
+
<!-- - **License:** Unknown -->
|
86 |
+
|
87 |
+
### Model Sources
|
88 |
+
|
89 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
90 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
91 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
92 |
+
|
93 |
+
### Full Model Architecture
|
94 |
+
|
95 |
+
```
|
96 |
+
SentenceTransformer(
|
97 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
|
98 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
99 |
+
(2): Normalize()
|
100 |
+
)
|
101 |
+
```
|
102 |
+
|
103 |
+
## Usage
|
104 |
+
|
105 |
+
### Direct Usage (Sentence Transformers)
|
106 |
+
|
107 |
+
First install the Sentence Transformers library:
|
108 |
+
|
109 |
+
```bash
|
110 |
+
pip install -U sentence-transformers
|
111 |
+
```
|
112 |
+
|
113 |
+
Then you can load this model and run inference.
|
114 |
+
```python
|
115 |
+
from sentence_transformers import SentenceTransformer
|
116 |
+
|
117 |
+
# Download from the 🤗 Hub
|
118 |
+
model = SentenceTransformer("david-scilo/test_upload_10_17_2024")
|
119 |
+
# Run inference
|
120 |
+
sentences = [
|
121 |
+
'Which statement is true regarding the configuration of SL1 systems to use Powerflow for syncing with third-party applications like ServiceNow or Cherwell?',
|
122 |
+
'l you can configure one or more sl1 systems to use powerflow to sync with a single instance of a third-party application like servicenow or cherwell. you cannot configure one sl1 system to use powerflow to sync with multiple instances of a third-party application like servicenow or cherwell. the relationship between sl1 and the third-party application can be either one-to-one or many-to-one, but',
|
123 |
+
"( provisioning a customer (provision_customer.php) 229 [0] => array ( ['start_ip'] => ['end_ip'] =>",
|
124 |
+
]
|
125 |
+
embeddings = model.encode(sentences)
|
126 |
+
print(embeddings.shape)
|
127 |
+
# [3, 384]
|
128 |
+
|
129 |
+
# Get the similarity scores for the embeddings
|
130 |
+
similarities = model.similarity(embeddings, embeddings)
|
131 |
+
print(similarities.shape)
|
132 |
+
# [3, 3]
|
133 |
+
```
|
134 |
+
|
135 |
+
<!--
|
136 |
+
### Direct Usage (Transformers)
|
137 |
+
|
138 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
139 |
+
|
140 |
+
</details>
|
141 |
+
-->
|
142 |
+
|
143 |
+
<!--
|
144 |
+
### Downstream Usage (Sentence Transformers)
|
145 |
+
|
146 |
+
You can finetune this model on your own dataset.
|
147 |
+
|
148 |
+
<details><summary>Click to expand</summary>
|
149 |
+
|
150 |
+
</details>
|
151 |
+
-->
|
152 |
+
|
153 |
+
<!--
|
154 |
+
### Out-of-Scope Use
|
155 |
+
|
156 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
157 |
+
-->
|
158 |
+
|
159 |
+
<!--
|
160 |
+
## Bias, Risks and Limitations
|
161 |
+
|
162 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
163 |
+
-->
|
164 |
+
|
165 |
+
<!--
|
166 |
+
### Recommendations
|
167 |
+
|
168 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
169 |
+
-->
|
170 |
+
|
171 |
+
## Training Details
|
172 |
+
|
173 |
+
### Training Dataset
|
174 |
+
|
175 |
+
#### Unnamed Dataset
|
176 |
+
|
177 |
+
|
178 |
+
* Size: 1,999 training samples
|
179 |
+
* Columns: <code>sentence_0</code> and <code>sentence_1</code>
|
180 |
+
* Approximate statistics based on the first 1000 samples:
|
181 |
+
| | sentence_0 | sentence_1 |
|
182 |
+
|:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
183 |
+
| type | string | string |
|
184 |
+
| details | <ul><li>min: 13 tokens</li><li>mean: 34.55 tokens</li><li>max: 138 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 52.41 tokens</li><li>max: 210 tokens</li></ul> |
|
185 |
+
* Samples:
|
186 |
+
| sentence_0 | sentence_1 |
|
187 |
+
|:--------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
188 |
+
| <code>Which software ignores the authentication method field for users who are authenticated with single sign-on (SSO) in the context of EM7?</code> | <code>note: for users who are authenticated with single sign on (sso), em7 ignores the authentication method field. for details on configuring sl1 to use single sign on (sso) authentication, see the manual on using using single sign on</code> |
|
189 |
+
| <code>Which command would you use to enable the ol8_appstream repository for package management on a Linux system using DNF?</code> | <code>sudo dnf install yum-utils sudo dnf config-manager --enable ol8_baseos_latest sudo dnf config-manager --enable ol8_appstream</code> |
|
190 |
+
| <code>Which process should you consider using if you encounter environmental problems during the automated upgrade scripts for AWS environments?</code> | <code>the automated upgrade scripts will likely work for aws environments, but due to potential environmental differences between chosen amis, there might be other package updates or requirements. if you encounter environmental problems, you should consider using the back up, re-install, and restore process instead</code> |
|
191 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
192 |
+
```json
|
193 |
+
{
|
194 |
+
"scale": 20.0,
|
195 |
+
"similarity_fct": "cos_sim"
|
196 |
+
}
|
197 |
+
```
|
198 |
+
|
199 |
+
### Training Hyperparameters
|
200 |
+
#### Non-Default Hyperparameters
|
201 |
+
|
202 |
+
- `eval_strategy`: steps
|
203 |
+
- `per_device_train_batch_size`: 10
|
204 |
+
- `per_device_eval_batch_size`: 10
|
205 |
+
- `num_train_epochs`: 1
|
206 |
+
- `multi_dataset_batch_sampler`: round_robin
|
207 |
+
|
208 |
+
#### All Hyperparameters
|
209 |
+
<details><summary>Click to expand</summary>
|
210 |
+
|
211 |
+
- `overwrite_output_dir`: False
|
212 |
+
- `do_predict`: False
|
213 |
+
- `eval_strategy`: steps
|
214 |
+
- `prediction_loss_only`: True
|
215 |
+
- `per_device_train_batch_size`: 10
|
216 |
+
- `per_device_eval_batch_size`: 10
|
217 |
+
- `per_gpu_train_batch_size`: None
|
218 |
+
- `per_gpu_eval_batch_size`: None
|
219 |
+
- `gradient_accumulation_steps`: 1
|
220 |
+
- `eval_accumulation_steps`: None
|
221 |
+
- `torch_empty_cache_steps`: None
|
222 |
+
- `learning_rate`: 5e-05
|
223 |
+
- `weight_decay`: 0.0
|
224 |
+
- `adam_beta1`: 0.9
|
225 |
+
- `adam_beta2`: 0.999
|
226 |
+
- `adam_epsilon`: 1e-08
|
227 |
+
- `max_grad_norm`: 1
|
228 |
+
- `num_train_epochs`: 1
|
229 |
+
- `max_steps`: -1
|
230 |
+
- `lr_scheduler_type`: linear
|
231 |
+
- `lr_scheduler_kwargs`: {}
|
232 |
+
- `warmup_ratio`: 0.0
|
233 |
+
- `warmup_steps`: 0
|
234 |
+
- `log_level`: passive
|
235 |
+
- `log_level_replica`: warning
|
236 |
+
- `log_on_each_node`: True
|
237 |
+
- `logging_nan_inf_filter`: True
|
238 |
+
- `save_safetensors`: True
|
239 |
+
- `save_on_each_node`: False
|
240 |
+
- `save_only_model`: False
|
241 |
+
- `restore_callback_states_from_checkpoint`: False
|
242 |
+
- `no_cuda`: False
|
243 |
+
- `use_cpu`: False
|
244 |
+
- `use_mps_device`: False
|
245 |
+
- `seed`: 42
|
246 |
+
- `data_seed`: None
|
247 |
+
- `jit_mode_eval`: False
|
248 |
+
- `use_ipex`: False
|
249 |
+
- `bf16`: False
|
250 |
+
- `fp16`: False
|
251 |
+
- `fp16_opt_level`: O1
|
252 |
+
- `half_precision_backend`: auto
|
253 |
+
- `bf16_full_eval`: False
|
254 |
+
- `fp16_full_eval`: False
|
255 |
+
- `tf32`: None
|
256 |
+
- `local_rank`: 0
|
257 |
+
- `ddp_backend`: None
|
258 |
+
- `tpu_num_cores`: None
|
259 |
+
- `tpu_metrics_debug`: False
|
260 |
+
- `debug`: []
|
261 |
+
- `dataloader_drop_last`: False
|
262 |
+
- `dataloader_num_workers`: 0
|
263 |
+
- `dataloader_prefetch_factor`: None
|
264 |
+
- `past_index`: -1
|
265 |
+
- `disable_tqdm`: False
|
266 |
+
- `remove_unused_columns`: True
|
267 |
+
- `label_names`: None
|
268 |
+
- `load_best_model_at_end`: False
|
269 |
+
- `ignore_data_skip`: False
|
270 |
+
- `fsdp`: []
|
271 |
+
- `fsdp_min_num_params`: 0
|
272 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
273 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
274 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
275 |
+
- `deepspeed`: None
|
276 |
+
- `label_smoothing_factor`: 0.0
|
277 |
+
- `optim`: adamw_torch
|
278 |
+
- `optim_args`: None
|
279 |
+
- `adafactor`: False
|
280 |
+
- `group_by_length`: False
|
281 |
+
- `length_column_name`: length
|
282 |
+
- `ddp_find_unused_parameters`: None
|
283 |
+
- `ddp_bucket_cap_mb`: None
|
284 |
+
- `ddp_broadcast_buffers`: False
|
285 |
+
- `dataloader_pin_memory`: True
|
286 |
+
- `dataloader_persistent_workers`: False
|
287 |
+
- `skip_memory_metrics`: True
|
288 |
+
- `use_legacy_prediction_loop`: False
|
289 |
+
- `push_to_hub`: False
|
290 |
+
- `resume_from_checkpoint`: None
|
291 |
+
- `hub_model_id`: None
|
292 |
+
- `hub_strategy`: every_save
|
293 |
+
- `hub_private_repo`: False
|
294 |
+
- `hub_always_push`: False
|
295 |
+
- `gradient_checkpointing`: False
|
296 |
+
- `gradient_checkpointing_kwargs`: None
|
297 |
+
- `include_inputs_for_metrics`: False
|
298 |
+
- `eval_do_concat_batches`: True
|
299 |
+
- `fp16_backend`: auto
|
300 |
+
- `push_to_hub_model_id`: None
|
301 |
+
- `push_to_hub_organization`: None
|
302 |
+
- `mp_parameters`:
|
303 |
+
- `auto_find_batch_size`: False
|
304 |
+
- `full_determinism`: False
|
305 |
+
- `torchdynamo`: None
|
306 |
+
- `ray_scope`: last
|
307 |
+
- `ddp_timeout`: 1800
|
308 |
+
- `torch_compile`: False
|
309 |
+
- `torch_compile_backend`: None
|
310 |
+
- `torch_compile_mode`: None
|
311 |
+
- `dispatch_batches`: None
|
312 |
+
- `split_batches`: None
|
313 |
+
- `include_tokens_per_second`: False
|
314 |
+
- `include_num_input_tokens_seen`: False
|
315 |
+
- `neftune_noise_alpha`: None
|
316 |
+
- `optim_target_modules`: None
|
317 |
+
- `batch_eval_metrics`: False
|
318 |
+
- `eval_on_start`: False
|
319 |
+
- `eval_use_gather_object`: False
|
320 |
+
- `batch_sampler`: batch_sampler
|
321 |
+
- `multi_dataset_batch_sampler`: round_robin
|
322 |
+
|
323 |
+
</details>
|
324 |
+
|
325 |
+
### Framework Versions
|
326 |
+
- Python: 3.11.9
|
327 |
+
- Sentence Transformers: 3.1.1
|
328 |
+
- Transformers: 4.43.3
|
329 |
+
- PyTorch: 2.3.1+cu121
|
330 |
+
- Accelerate: 0.30.1
|
331 |
+
- Datasets: 2.19.1
|
332 |
+
- Tokenizers: 0.19.1
|
333 |
+
|
334 |
+
## Citation
|
335 |
+
|
336 |
+
### BibTeX
|
337 |
+
|
338 |
+
#### Sentence Transformers
|
339 |
+
```bibtex
|
340 |
+
@inproceedings{reimers-2019-sentence-bert,
|
341 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
342 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
343 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
344 |
+
month = "11",
|
345 |
+
year = "2019",
|
346 |
+
publisher = "Association for Computational Linguistics",
|
347 |
+
url = "https://arxiv.org/abs/1908.10084",
|
348 |
+
}
|
349 |
+
```
|
350 |
+
|
351 |
+
#### MultipleNegativesRankingLoss
|
352 |
+
```bibtex
|
353 |
+
@misc{henderson2017efficient,
|
354 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
355 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
356 |
+
year={2017},
|
357 |
+
eprint={1705.00652},
|
358 |
+
archivePrefix={arXiv},
|
359 |
+
primaryClass={cs.CL}
|
360 |
+
}
|
361 |
+
```
|
362 |
+
|
363 |
+
<!--
|
364 |
+
## Glossary
|
365 |
+
|
366 |
+
*Clearly define terms in order to be accessible across audiences.*
|
367 |
+
-->
|
368 |
+
|
369 |
+
<!--
|
370 |
+
## Model Card Authors
|
371 |
+
|
372 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
373 |
+
-->
|
374 |
+
|
375 |
+
<!--
|
376 |
+
## Model Card Contact
|
377 |
+
|
378 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
379 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "david-scilo/test_upload_10_17_2024",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 384,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 1536,
|
13 |
+
"layer_norm_eps": 1e-12,
|
14 |
+
"max_position_embeddings": 512,
|
15 |
+
"model_type": "bert",
|
16 |
+
"num_attention_heads": 12,
|
17 |
+
"num_hidden_layers": 12,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"position_embedding_type": "absolute",
|
20 |
+
"torch_dtype": "float32",
|
21 |
+
"transformers_version": "4.43.3",
|
22 |
+
"type_vocab_size": 2,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 30522
|
25 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.1.1",
|
4 |
+
"transformers": "4.43.3",
|
5 |
+
"pytorch": "2.3.1+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {
|
8 |
+
"query": "Represent this sentence for searching relevant passages: "
|
9 |
+
},
|
10 |
+
"default_prompt_name": null,
|
11 |
+
"similarity_fn_name": null
|
12 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd7b1ed735c6b306ad6a6bb01bd93b8535bcdabe1c153935410b6cfa1ae25a41
|
3 |
+
size 133462128
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"max_length": 512,
|
50 |
+
"model_max_length": 512,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_to_multiple_of": null,
|
53 |
+
"pad_token": "[PAD]",
|
54 |
+
"pad_token_type_id": 0,
|
55 |
+
"padding_side": "right",
|
56 |
+
"sep_token": "[SEP]",
|
57 |
+
"stride": 0,
|
58 |
+
"strip_accents": null,
|
59 |
+
"tokenize_chinese_chars": true,
|
60 |
+
"tokenizer_class": "BertTokenizer",
|
61 |
+
"truncation_side": "right",
|
62 |
+
"truncation_strategy": "longest_first",
|
63 |
+
"unk_token": "[UNK]"
|
64 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|