Initial Commit
Browse files- 1_Pooling/config.json +10 -0
- README.md +390 -3
- config.json +26 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +64 -0
- training_args.bin +3 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
CHANGED
@@ -1,3 +1,390 @@
|
|
1 |
-
---
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
<br>
|
5 |
<br>Monday: 426
|
6 |
<br>Tuesday: 2,150
|
7 |
<br>
|
8 |
<br>And the numbers are going to get bigger. https://t.co/fUeg2RL2dl</code> | <code>0.3</code> |
|
|
|
9 |
<br>
|
10 |
<br>Oil prices resume their march downward as Covid 19 continues to spread
|
11 |
<br>FO cracks very strong
|
12 |
<br>Gasoil cracks strengthen
|
13 |
<br>Light distillate and Kero cracks weaker https://t.co/3mB0p5BSZ5</code> | <code>E-cigarette users and tobacco smokers are more in danger from the new coronavirus than the average healthy person. Heres why. https://t.co/D1ynRUYFUP</code> | <code>0.3</code> |
|
|
|
14 |
<br>
|
15 |
<br>Prices of Hand Sanitizers are be</code> | <code>Minister Didiza pleads with the public not to hoard food stuffs durning #Covid_19 as panic buying may affect food prices. @DRDLR_online</code> | <code>0.3</code> |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: sentence-transformers/all-MiniLM-L6-v2
|
3 |
+
library_name: sentence-transformers
|
4 |
+
pipeline_tag: sentence-similarity
|
5 |
+
tags:
|
6 |
+
- sentence-transformers
|
7 |
+
- sentence-similarity
|
8 |
+
- feature-extraction
|
9 |
+
- generated_from_trainer
|
10 |
+
- dataset_size:13205
|
11 |
+
- loss:CosineSimilarityLoss
|
12 |
+
widget:
|
13 |
+
- source_sentence: COVID 19 Tips for safely online
|
14 |
+
sentences:
|
15 |
+
- Energy Minister Alexander Novak said on Thursday that may return to oil negotiations
|
16 |
+
with Saudi Arabia after talks collapsed last month which coupled with the spread
|
17 |
+
of the new dragged prices to their 18 year lows
|
18 |
+
- "Low-skilled workers according to the UK Government:\r\r\n\r\r\n- Paramedic\r\r\
|
19 |
+
\n- Nurse\r\r\n- Midwife\r\r\n- Social Worker\r\r\n- Carer\r\r\n- Supermarket\
|
20 |
+
\ worker\r\r\n- Bus Driver\r\r\n- Nursery teacher\r\r\n\r\r\nWhat a difference\
|
21 |
+
\ a month makes...\r\r\n\r\r\n#clapforNHS #ThankYouNHS #coronavirus \r\r\n\r\r\
|
22 |
+
\nhttps://t.co/Ne2BaByS6J"
|
23 |
+
- "Trump is pushing for higher oil and gas prices at a time when millions of jobless\
|
24 |
+
\ Americans are coping with utility bills. This hurts everyone. Pass it on. \r\
|
25 |
+
\r\n#Rutgers #PrincetonU #TCNJ #NJIT #StayHome #ThanksForDelivery #coronavirus\
|
26 |
+
\ #RutgersNewark #Maga2020 #Newark @CoryBooker https://t.co/Dxp4NWHopU"
|
27 |
+
- source_sentence: "As the number of Covid-19 cases continues to rise volunteer groups\
|
28 |
+
\ are getting organised to help people in isolation.\r\r\n\r\r\nOne Christchurch\
|
29 |
+
\ man got together with a group of friends to shop for those who can't go to the\
|
30 |
+
\ supermarket.\r\r\n\r\r\nhttps://t.co/31qJR5wFrR"
|
31 |
+
sentences:
|
32 |
+
- "TOILET PAPER MYSTERY FINALLY SOLVED!??\r\r\n#ToiletPaperApocalypse #toiletpaper\
|
33 |
+
\ #toiletpapercrisis\r\r\n#ToiletPaperPanic #groceries \r\r\n#CoronaVirusUpdates\
|
34 |
+
\ #coronavirus #COVID19 \r\r\n\r\r\nWhat Everyone\x92s Getting Wrong About the\
|
35 |
+
\ Toilet Paper Shortage by @WillOremus in @MRKR https://t.co/0WvYybajgd"
|
36 |
+
- "They've probably caught the #coronavirus via their panic buying & crowding\
|
37 |
+
\ together with other people!\r\r\nhttps://t.co/PqAsDM7nMr\r\r\n#Food"
|
38 |
+
- '@piersmorgan 20,000 armed services at the ready .......if the doubters have a
|
39 |
+
problem believing COVID 19 just take a trip to your local shop ,supermarket there
|
40 |
+
is literally nothing on the shelves ...my day off yesterday from the NHS ,I went
|
41 |
+
for my basic s'
|
42 |
+
- source_sentence: "@IvankaTrump @USDA I'd like to know how much stock you & Jared\
|
43 |
+
\ dumped while Daddy was telling the country #Coronavirus was a Dem hoax? \r\r\
|
44 |
+
\n\r\r\nBy the way, YOU don't have to tell US, the average, everyday American,\
|
45 |
+
\ to be thankful for our food supply chai"
|
46 |
+
sentences:
|
47 |
+
- "What #CONVID19 safety measures r being taken by online shopping companies &\
|
48 |
+
\ their courier partners @amazonIN @Flipkart etc?\r\r\n I fear that shopping packages\
|
49 |
+
\ which travel vast distances through flights/trains & r handled by many along\
|
50 |
+
\ d way can b potential #coronavirus carriers??"
|
51 |
+
- 'Demand at food bank on the rise in Kelowna due to COVID-19 #Kelowna https://t.co/4YWr6BkbBV
|
52 |
+
https://t.co/2Bho8KBry8'
|
53 |
+
- "The line to go grocery shopping in LA. It\x92s the first day of the official\
|
54 |
+
\ lockdown of #California. There is only a limited amount of people inside the\
|
55 |
+
\ store at a time. People stocking up on supplies before the weekend. #coronavirus\
|
56 |
+
\ @featurestory https://t.co/5NHtHhcUdq"
|
57 |
+
- source_sentence: "Get the #facts on #coronavirusau\r\r\n#covid19australia #coronavirus\
|
58 |
+
\ #covid19\r\r\n\r\r\nSurveys show less than half of #Australia is #panickbuying\
|
59 |
+
\ and other important #statistics for #business\r\r\n https://t.co/dDaiM5KcqY"
|
60 |
+
sentences:
|
61 |
+
- 'Again for those at the back: sympathy for people panic buying and hoarding essential
|
62 |
+
medicines and food items is not a progressive position #auspol #COVID2019AU #COVID2019
|
63 |
+
#coronavirus'
|
64 |
+
- "Back in early March, in my hometown of Volgograd (Stalingrad), everything was\
|
65 |
+
\ calm. No panic. And so, people rushed to buy sugar, buckwheat, canned food,\
|
66 |
+
\ toilet paper ...\r\r\n\r\r\n#COVID?19 #COVID19 #coronavirus"
|
67 |
+
- "@DrDenaGrayson @mrplannings Look at all these UK MP's putting in 48 hour shifts,\
|
68 |
+
\ facing empty supermarket shelves and risking their own lives to help others...\r\
|
69 |
+
\r\n\r\r\nOh Wait !!!\r\r\n\r\r\n#SaveOurNurses #PPE #NHSheroes \r\r\n\r\r\n#CoronaVirusUpdate\
|
70 |
+
\ #CoronaCrisis"
|
71 |
+
- source_sentence: in the doing nicely on the back of as people queue to get their
|
72 |
+
Firmly in buy on our system See chart Key above the cloud In Buy below In Sell
|
73 |
+
HD
|
74 |
+
sentences:
|
75 |
+
- I JUST GOT THIS FROM MY VET We recommend that you take preliminary precautions
|
76 |
+
and stock up on your pet s food medications and pet related items that you know
|
77 |
+
that you will use to avoid problems if more quarantine measures are implemented
|
78 |
+
within the community
|
79 |
+
- "In times of uncertainty, it is imperative to keep consumers up-to-date with your\
|
80 |
+
\ company\x92s weekly updates. If a consumer can\x92t find a place to know if\
|
81 |
+
\ you\x92re open or closed, they will find a company who has this in place! \r\
|
82 |
+
\r\n\r\r\n#WebsiteTip #CoronaVirus #Business https://t.co/Dj1KV9Pu2B"
|
83 |
+
- 'Rapid delivery food order made (since no slots elsewhere for weeks). All seemed
|
84 |
+
fine until email listing what was out of stock. They are about to deliver...one
|
85 |
+
bottle of orange juice! #coronavirus #panicbuying #whatashitshow'
|
86 |
+
---
|
87 |
+
|
88 |
+
# SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
|
89 |
+
|
90 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
91 |
+
|
92 |
+
## Model Details
|
93 |
+
|
94 |
+
### Model Description
|
95 |
+
- **Model Type:** Sentence Transformer
|
96 |
+
- **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision fa97f6e7cb1a59073dff9e6b13e2715cf7475ac9 -->
|
97 |
+
- **Maximum Sequence Length:** 256 tokens
|
98 |
+
- **Output Dimensionality:** 384 tokens
|
99 |
+
- **Similarity Function:** Cosine Similarity
|
100 |
+
<!-- - **Training Dataset:** Unknown -->
|
101 |
+
<!-- - **Language:** Unknown -->
|
102 |
+
<!-- - **License:** Unknown -->
|
103 |
+
|
104 |
+
### Model Sources
|
105 |
+
|
106 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
107 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
108 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
109 |
+
|
110 |
+
### Full Model Architecture
|
111 |
+
|
112 |
+
```
|
113 |
+
SentenceTransformer(
|
114 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
|
115 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
116 |
+
(2): Normalize()
|
117 |
+
)
|
118 |
+
```
|
119 |
+
|
120 |
+
## Usage
|
121 |
+
|
122 |
+
### Direct Usage (Sentence Transformers)
|
123 |
+
|
124 |
+
First install the Sentence Transformers library:
|
125 |
+
|
126 |
+
```bash
|
127 |
+
pip install -U sentence-transformers
|
128 |
+
```
|
129 |
+
|
130 |
+
Then you can load this model and run inference.
|
131 |
+
```python
|
132 |
+
from sentence_transformers import SentenceTransformer
|
133 |
+
|
134 |
+
# Download from the 🤗 Hub
|
135 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
136 |
+
# Run inference
|
137 |
+
sentences = [
|
138 |
+
'in the doing nicely on the back of as people queue to get their Firmly in buy on our system See chart Key above the cloud In Buy below In Sell HD',
|
139 |
+
'In times of uncertainty, it is imperative to keep consumers up-to-date with your company\x92s weekly updates. If a consumer can\x92t find a place to know if you\x92re open or closed, they will find a company who has this in place! \r\r\n\r\r\n#WebsiteTip #CoronaVirus #Business https://t.co/Dj1KV9Pu2B',
|
140 |
+
'I JUST GOT THIS FROM MY VET We recommend that you take preliminary precautions and stock up on your pet s food medications and pet related items that you know that you will use to avoid problems if more quarantine measures are implemented within the community',
|
141 |
+
]
|
142 |
+
embeddings = model.encode(sentences)
|
143 |
+
print(embeddings.shape)
|
144 |
+
# [3, 384]
|
145 |
+
|
146 |
+
# Get the similarity scores for the embeddings
|
147 |
+
similarities = model.similarity(embeddings, embeddings)
|
148 |
+
print(similarities.shape)
|
149 |
+
# [3, 3]
|
150 |
+
```
|
151 |
+
|
152 |
+
<!--
|
153 |
+
### Direct Usage (Transformers)
|
154 |
+
|
155 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
156 |
+
|
157 |
+
</details>
|
158 |
+
-->
|
159 |
+
|
160 |
+
<!--
|
161 |
+
### Downstream Usage (Sentence Transformers)
|
162 |
+
|
163 |
+
You can finetune this model on your own dataset.
|
164 |
+
|
165 |
+
<details><summary>Click to expand</summary>
|
166 |
+
|
167 |
+
</details>
|
168 |
+
-->
|
169 |
+
|
170 |
+
<!--
|
171 |
+
### Out-of-Scope Use
|
172 |
+
|
173 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
174 |
+
-->
|
175 |
+
|
176 |
+
<!--
|
177 |
+
## Bias, Risks and Limitations
|
178 |
+
|
179 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
180 |
+
-->
|
181 |
+
|
182 |
+
<!--
|
183 |
+
### Recommendations
|
184 |
+
|
185 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
186 |
+
-->
|
187 |
+
|
188 |
+
## Training Details
|
189 |
+
|
190 |
+
### Training Dataset
|
191 |
+
|
192 |
+
#### Unnamed Dataset
|
193 |
+
|
194 |
+
|
195 |
+
* Size: 13,205 training samples
|
196 |
+
* Columns: <code>positive</code>, <code>negative</code>, and <code>label</code>
|
197 |
+
* Approximate statistics based on the first 1000 samples:
|
198 |
+
| | positive | negative | label |
|
199 |
+
|:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:---------------------------------------------------------------|
|
200 |
+
| type | string | string | float |
|
201 |
+
| details | <ul><li>min: 9 tokens</li><li>mean: 55.28 tokens</li><li>max: 113 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 54.08 tokens</li><li>max: 127 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.18</li><li>max: 0.3</li></ul> |
|
202 |
+
* Samples:
|
203 |
+
| positive | negative | label |
|
204 |
+
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
|
205 |
+
| <code>Food redistribution organisations across England will benefit from 3 25 million of government to help them cut food waste and redistribute up to 14 000 tonnes of stock during the outbreak</code> | <code>Unemployment claims made online in Virginia this week:
|
206 |
<br>
|
207 |
<br>Monday: 426
|
208 |
<br>Tuesday: 2,150
|
209 |
<br>
|
210 |
<br>And the numbers are going to get bigger. https://t.co/fUeg2RL2dl</code> | <code>0.3</code> |
|
211 |
+
| <code>In today's Oil Price Digest
|
212 |
<br>
|
213 |
<br>Oil prices resume their march downward as Covid 19 continues to spread
|
214 |
<br>FO cracks very strong
|
215 |
<br>Gasoil cracks strengthen
|
216 |
<br>Light distillate and Kero cracks weaker https://t.co/3mB0p5BSZ5</code> | <code>E-cigarette users and tobacco smokers are more in danger from the new coronavirus than the average healthy person. Heres why. https://t.co/D1ynRUYFUP</code> | <code>0.3</code> |
|
217 |
+
| <code>@DrJoeAbah DEAR @LASG @NCDCgov @jidesanwoolu @Omojuwa @aproko_doctor @segalink Alot of clubs and hotels are open with over 50+ crowded ignorant Nigerians. They are not taking the Covid-19 pandemic situation seriously.
|
218 |
<br>
|
219 |
<br>Prices of Hand Sanitizers are be</code> | <code>Minister Didiza pleads with the public not to hoard food stuffs durning #Covid_19 as panic buying may affect food prices. @DRDLR_online</code> | <code>0.3</code> |
|
220 |
+
* Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
|
221 |
+
```json
|
222 |
+
{
|
223 |
+
"loss_fct": "torch.nn.modules.loss.MSELoss"
|
224 |
+
}
|
225 |
+
```
|
226 |
+
|
227 |
+
### Training Hyperparameters
|
228 |
+
|
229 |
+
#### All Hyperparameters
|
230 |
+
<details><summary>Click to expand</summary>
|
231 |
+
|
232 |
+
- `overwrite_output_dir`: False
|
233 |
+
- `do_predict`: False
|
234 |
+
- `eval_strategy`: no
|
235 |
+
- `prediction_loss_only`: True
|
236 |
+
- `per_device_train_batch_size`: 8
|
237 |
+
- `per_device_eval_batch_size`: 8
|
238 |
+
- `per_gpu_train_batch_size`: None
|
239 |
+
- `per_gpu_eval_batch_size`: None
|
240 |
+
- `gradient_accumulation_steps`: 1
|
241 |
+
- `eval_accumulation_steps`: None
|
242 |
+
- `torch_empty_cache_steps`: None
|
243 |
+
- `learning_rate`: 5e-05
|
244 |
+
- `weight_decay`: 0.0
|
245 |
+
- `adam_beta1`: 0.9
|
246 |
+
- `adam_beta2`: 0.999
|
247 |
+
- `adam_epsilon`: 1e-08
|
248 |
+
- `max_grad_norm`: 1.0
|
249 |
+
- `num_train_epochs`: 3.0
|
250 |
+
- `max_steps`: -1
|
251 |
+
- `lr_scheduler_type`: linear
|
252 |
+
- `lr_scheduler_kwargs`: {}
|
253 |
+
- `warmup_ratio`: 0.0
|
254 |
+
- `warmup_steps`: 0
|
255 |
+
- `log_level`: passive
|
256 |
+
- `log_level_replica`: warning
|
257 |
+
- `log_on_each_node`: True
|
258 |
+
- `logging_nan_inf_filter`: True
|
259 |
+
- `save_safetensors`: True
|
260 |
+
- `save_on_each_node`: False
|
261 |
+
- `save_only_model`: False
|
262 |
+
- `restore_callback_states_from_checkpoint`: False
|
263 |
+
- `no_cuda`: False
|
264 |
+
- `use_cpu`: False
|
265 |
+
- `use_mps_device`: False
|
266 |
+
- `seed`: 42
|
267 |
+
- `data_seed`: None
|
268 |
+
- `jit_mode_eval`: False
|
269 |
+
- `use_ipex`: False
|
270 |
+
- `bf16`: False
|
271 |
+
- `fp16`: False
|
272 |
+
- `fp16_opt_level`: O1
|
273 |
+
- `half_precision_backend`: auto
|
274 |
+
- `bf16_full_eval`: False
|
275 |
+
- `fp16_full_eval`: False
|
276 |
+
- `tf32`: None
|
277 |
+
- `local_rank`: 0
|
278 |
+
- `ddp_backend`: None
|
279 |
+
- `tpu_num_cores`: None
|
280 |
+
- `tpu_metrics_debug`: False
|
281 |
+
- `debug`: []
|
282 |
+
- `dataloader_drop_last`: False
|
283 |
+
- `dataloader_num_workers`: 0
|
284 |
+
- `dataloader_prefetch_factor`: None
|
285 |
+
- `past_index`: -1
|
286 |
+
- `disable_tqdm`: False
|
287 |
+
- `remove_unused_columns`: True
|
288 |
+
- `label_names`: None
|
289 |
+
- `load_best_model_at_end`: False
|
290 |
+
- `ignore_data_skip`: False
|
291 |
+
- `fsdp`: []
|
292 |
+
- `fsdp_min_num_params`: 0
|
293 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
294 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
295 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
296 |
+
- `deepspeed`: None
|
297 |
+
- `label_smoothing_factor`: 0.0
|
298 |
+
- `optim`: adamw_torch
|
299 |
+
- `optim_args`: None
|
300 |
+
- `adafactor`: False
|
301 |
+
- `group_by_length`: False
|
302 |
+
- `length_column_name`: length
|
303 |
+
- `ddp_find_unused_parameters`: None
|
304 |
+
- `ddp_bucket_cap_mb`: None
|
305 |
+
- `ddp_broadcast_buffers`: False
|
306 |
+
- `dataloader_pin_memory`: True
|
307 |
+
- `dataloader_persistent_workers`: False
|
308 |
+
- `skip_memory_metrics`: True
|
309 |
+
- `use_legacy_prediction_loop`: False
|
310 |
+
- `push_to_hub`: False
|
311 |
+
- `resume_from_checkpoint`: None
|
312 |
+
- `hub_model_id`: None
|
313 |
+
- `hub_strategy`: every_save
|
314 |
+
- `hub_private_repo`: False
|
315 |
+
- `hub_always_push`: False
|
316 |
+
- `gradient_checkpointing`: False
|
317 |
+
- `gradient_checkpointing_kwargs`: None
|
318 |
+
- `include_inputs_for_metrics`: False
|
319 |
+
- `eval_do_concat_batches`: True
|
320 |
+
- `fp16_backend`: auto
|
321 |
+
- `push_to_hub_model_id`: None
|
322 |
+
- `push_to_hub_organization`: None
|
323 |
+
- `mp_parameters`:
|
324 |
+
- `auto_find_batch_size`: False
|
325 |
+
- `full_determinism`: False
|
326 |
+
- `torchdynamo`: None
|
327 |
+
- `ray_scope`: last
|
328 |
+
- `ddp_timeout`: 1800
|
329 |
+
- `torch_compile`: False
|
330 |
+
- `torch_compile_backend`: None
|
331 |
+
- `torch_compile_mode`: None
|
332 |
+
- `dispatch_batches`: None
|
333 |
+
- `split_batches`: None
|
334 |
+
- `include_tokens_per_second`: False
|
335 |
+
- `include_num_input_tokens_seen`: False
|
336 |
+
- `neftune_noise_alpha`: None
|
337 |
+
- `optim_target_modules`: None
|
338 |
+
- `batch_eval_metrics`: False
|
339 |
+
- `eval_on_start`: False
|
340 |
+
- `eval_use_gather_object`: False
|
341 |
+
- `batch_sampler`: batch_sampler
|
342 |
+
- `multi_dataset_batch_sampler`: proportional
|
343 |
+
|
344 |
+
</details>
|
345 |
+
|
346 |
+
### Training Logs
|
347 |
+
| Epoch | Step | Training Loss |
|
348 |
+
|:------:|:----:|:-------------:|
|
349 |
+
| 0.3028 | 500 | 0.0851 |
|
350 |
+
| 0.6057 | 1000 | 0.0368 |
|
351 |
+
| 0.9085 | 1500 | 0.0286 |
|
352 |
+
| 1.2114 | 2000 | 0.023 |
|
353 |
+
| 1.5142 | 2500 | 0.0189 |
|
354 |
+
| 1.8171 | 3000 | 0.0174 |
|
355 |
+
| 2.1199 | 3500 | 0.0159 |
|
356 |
+
| 2.4228 | 4000 | 0.0142 |
|
357 |
+
| 2.7256 | 4500 | 0.013 |
|
358 |
+
|
359 |
+
|
360 |
+
### Framework Versions
|
361 |
+
- Python: 3.10.12
|
362 |
+
- Sentence Transformers: 3.2.1
|
363 |
+
- Transformers: 4.44.2
|
364 |
+
- PyTorch: 2.5.0+cu121
|
365 |
+
- Accelerate: 0.34.2
|
366 |
+
- Datasets: 3.1.0
|
367 |
+
- Tokenizers: 0.19.1
|
368 |
+
|
369 |
+
## Citation
|
370 |
+
|
371 |
+
### BibTeX
|
372 |
+
|
373 |
+
#### Sentence Transformers
|
374 |
+
```bibtex
|
375 |
+
@inproceedings{reimers-2019-sentence-bert,
|
376 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
377 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
378 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
379 |
+
month = "11",
|
380 |
+
year = "2019",
|
381 |
+
publisher = "Association for Computational Linguistics",
|
382 |
+
url = "https://arxiv.org/abs/1908.10084",
|
383 |
+
}
|
384 |
+
```
|
385 |
+
|
386 |
+
<!--
|
387 |
+
## Glossary
|
388 |
+
|
389 |
+
*Clearly define terms in order to be accessible across audiences.*
|
390 |
+
-->
|
391 |
+
|
392 |
+
<!--
|
393 |
+
## Model Card Authors
|
394 |
+
|
395 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
396 |
+
-->
|
397 |
+
|
398 |
+
<!--
|
399 |
+
## Model Card Contact
|
400 |
+
|
401 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
402 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sentence-transformers/all-MiniLM-L6-v2",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 384,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 1536,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 6,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.44.2",
|
23 |
+
"type_vocab_size": 2,
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 30522
|
26 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.2.1",
|
4 |
+
"transformers": "4.44.2",
|
5 |
+
"pytorch": "2.5.0+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:edd0dfda2ba2f67ead4a085f201a40878aa3e35fd7dcd1e5af20ada981773ee5
|
3 |
+
size 90864192
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 256,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"max_length": 128,
|
50 |
+
"model_max_length": 256,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_to_multiple_of": null,
|
53 |
+
"pad_token": "[PAD]",
|
54 |
+
"pad_token_type_id": 0,
|
55 |
+
"padding_side": "right",
|
56 |
+
"sep_token": "[SEP]",
|
57 |
+
"stride": 0,
|
58 |
+
"strip_accents": null,
|
59 |
+
"tokenize_chinese_chars": true,
|
60 |
+
"tokenizer_class": "BertTokenizer",
|
61 |
+
"truncation_side": "right",
|
62 |
+
"truncation_strategy": "longest_first",
|
63 |
+
"unk_token": "[UNK]"
|
64 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fafd941a6c38737fd9d93f842ba2cae5128d3395f03ce915b3a46088bd7a3da1
|
3 |
+
size 5432
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|