Add SetFit model
Browse files- 1_Pooling/config.json +1 -1
- README.md +25 -34
- config.json +10 -20
- config_sentence_transformers.json +3 -3
- model.safetensors +2 -2
- model_head.pkl +2 -2
- sentence_bert_config.json +1 -1
- tokenizer.json +2 -2
- tokenizer_config.json +8 -1
- vocab.txt +0 -6
1_Pooling/config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"word_embedding_dimension":
|
3 |
"pooling_mode_cls_token": false,
|
4 |
"pooling_mode_mean_tokens": true,
|
5 |
"pooling_mode_max_tokens": false,
|
|
|
1 |
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
"pooling_mode_cls_token": false,
|
4 |
"pooling_mode_mean_tokens": true,
|
5 |
"pooling_mode_max_tokens": false,
|
README.md
CHANGED
@@ -24,9 +24,9 @@ widget:
|
|
24 |
ground-based reference data.
|
25 |
pipeline_tag: text-classification
|
26 |
inference: true
|
27 |
-
base_model:
|
28 |
model-index:
|
29 |
-
- name: SetFit with
|
30 |
results:
|
31 |
- task:
|
32 |
type: text-classification
|
@@ -37,13 +37,13 @@ model-index:
|
|
37 |
split: test
|
38 |
metrics:
|
39 |
- type: accuracy
|
40 |
-
value: 0.
|
41 |
name: Accuracy
|
42 |
---
|
43 |
|
44 |
-
# SetFit with
|
45 |
|
46 |
-
This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [
|
47 |
|
48 |
The model has been trained using an efficient few-shot learning technique that involves:
|
49 |
|
@@ -54,9 +54,9 @@ The model has been trained using an efficient few-shot learning technique that i
|
|
54 |
|
55 |
### Model Description
|
56 |
- **Model Type:** SetFit
|
57 |
-
- **Sentence Transformer body:** [
|
58 |
- **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
|
59 |
-
- **Maximum Sequence Length:**
|
60 |
- **Number of Classes:** 13 classes
|
61 |
<!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
|
62 |
<!-- - **Language:** Unknown -->
|
@@ -90,7 +90,7 @@ The model has been trained using an efficient few-shot learning technique that i
|
|
90 |
### Metrics
|
91 |
| Label | Accuracy |
|
92 |
|:--------|:---------|
|
93 |
-
| **all** | 0.
|
94 |
|
95 |
## Uses
|
96 |
|
@@ -181,32 +181,23 @@ preds = model("This paper focuses on mining association rules between sets of it
|
|
181 |
### Training Results
|
182 |
| Epoch | Step | Training Loss | Validation Loss |
|
183 |
|:------:|:----:|:-------------:|:---------------:|
|
184 |
-
| 0.
|
185 |
-
| 0.
|
186 |
-
| 0.
|
187 |
-
| 0.
|
188 |
-
| 0.
|
189 |
-
| 0.
|
190 |
-
| 0.
|
191 |
-
| 0.
|
192 |
-
| 0.
|
193 |
-
| 0.
|
194 |
-
| 0.
|
195 |
-
| 0.
|
196 |
-
| 0.
|
197 |
-
| 0.
|
198 |
-
| 0.
|
199 |
-
| 0.
|
200 |
-
| 0.
|
201 |
-
| 0.4920 | 400 | 0.0745 | - |
|
202 |
-
| 0.5535 | 450 | 0.0807 | - |
|
203 |
-
| 0.6150 | 500 | 0.0736 | - |
|
204 |
-
| 0.6765 | 550 | 0.0571 | - |
|
205 |
-
| 0.7380 | 600 | 0.0649 | - |
|
206 |
-
| 0.7995 | 650 | 0.0672 | - |
|
207 |
-
| 0.8610 | 700 | 0.0586 | - |
|
208 |
-
| 0.9225 | 750 | 0.0624 | - |
|
209 |
-
| 0.9840 | 800 | 0.0614 | - |
|
210 |
|
211 |
### Framework Versions
|
212 |
- Python: 3.10.12
|
|
|
24 |
ground-based reference data.
|
25 |
pipeline_tag: text-classification
|
26 |
inference: true
|
27 |
+
base_model: sentence-transformers/paraphrase-MiniLM-L3-v2
|
28 |
model-index:
|
29 |
+
- name: SetFit with sentence-transformers/paraphrase-MiniLM-L3-v2
|
30 |
results:
|
31 |
- task:
|
32 |
type: text-classification
|
|
|
37 |
split: test
|
38 |
metrics:
|
39 |
- type: accuracy
|
40 |
+
value: 0.7407692307692307
|
41 |
name: Accuracy
|
42 |
---
|
43 |
|
44 |
+
# SetFit with sentence-transformers/paraphrase-MiniLM-L3-v2
|
45 |
|
46 |
+
This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/paraphrase-MiniLM-L3-v2](https://huggingface.co/sentence-transformers/paraphrase-MiniLM-L3-v2) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
|
47 |
|
48 |
The model has been trained using an efficient few-shot learning technique that involves:
|
49 |
|
|
|
54 |
|
55 |
### Model Description
|
56 |
- **Model Type:** SetFit
|
57 |
+
- **Sentence Transformer body:** [sentence-transformers/paraphrase-MiniLM-L3-v2](https://huggingface.co/sentence-transformers/paraphrase-MiniLM-L3-v2)
|
58 |
- **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
|
59 |
+
- **Maximum Sequence Length:** 128 tokens
|
60 |
- **Number of Classes:** 13 classes
|
61 |
<!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
|
62 |
<!-- - **Language:** Unknown -->
|
|
|
90 |
### Metrics
|
91 |
| Label | Accuracy |
|
92 |
|:--------|:---------|
|
93 |
+
| **all** | 0.7408 |
|
94 |
|
95 |
## Uses
|
96 |
|
|
|
181 |
### Training Results
|
182 |
| Epoch | Step | Training Loss | Validation Loss |
|
183 |
|:------:|:----:|:-------------:|:---------------:|
|
184 |
+
| 0.0012 | 1 | 0.4201 | - |
|
185 |
+
| 0.0615 | 50 | 0.2562 | - |
|
186 |
+
| 0.1230 | 100 | 0.2334 | - |
|
187 |
+
| 0.1845 | 150 | 0.1974 | - |
|
188 |
+
| 0.2460 | 200 | 0.195 | - |
|
189 |
+
| 0.3075 | 250 | 0.1768 | - |
|
190 |
+
| 0.3690 | 300 | 0.146 | - |
|
191 |
+
| 0.4305 | 350 | 0.1541 | - |
|
192 |
+
| 0.4920 | 400 | 0.1647 | - |
|
193 |
+
| 0.5535 | 450 | 0.154 | - |
|
194 |
+
| 0.6150 | 500 | 0.1568 | - |
|
195 |
+
| 0.6765 | 550 | 0.1494 | - |
|
196 |
+
| 0.7380 | 600 | 0.1554 | - |
|
197 |
+
| 0.7995 | 650 | 0.1456 | - |
|
198 |
+
| 0.8610 | 700 | 0.1527 | - |
|
199 |
+
| 0.9225 | 750 | 0.1488 | - |
|
200 |
+
| 0.9840 | 800 | 0.1312 | - |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
### Framework Versions
|
203 |
- Python: 3.10.12
|
config.json
CHANGED
@@ -1,36 +1,26 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/root/.cache/torch/sentence_transformers/
|
3 |
"architectures": [
|
4 |
-
"
|
5 |
],
|
6 |
-
"attention_probs_dropout_prob": 0.
|
7 |
-
"attn_implementation": null,
|
8 |
-
"auto_map": {
|
9 |
-
"AutoConfig": "configuration_bert.JinaBertConfig",
|
10 |
-
"AutoModel": "modeling_bert.JinaBertModel",
|
11 |
-
"AutoModelForMaskedLM": "jinaai/jina-bert-implementation--modeling_bert.JinaBertForMaskedLM",
|
12 |
-
"AutoModelForSequenceClassification": "jinaai/jina-bert-implementation--modeling_bert.JinaBertForSequenceClassification"
|
13 |
-
},
|
14 |
"classifier_dropout": null,
|
15 |
-
"emb_pooler": "mean",
|
16 |
-
"feed_forward_type": "geglu",
|
17 |
"gradient_checkpointing": false,
|
18 |
"hidden_act": "gelu",
|
19 |
"hidden_dropout_prob": 0.1,
|
20 |
-
"hidden_size":
|
21 |
"initializer_range": 0.02,
|
22 |
-
"intermediate_size":
|
23 |
"layer_norm_eps": 1e-12,
|
24 |
-
"max_position_embeddings":
|
25 |
-
"model_max_length": 8192,
|
26 |
"model_type": "bert",
|
27 |
-
"num_attention_heads":
|
28 |
-
"num_hidden_layers":
|
29 |
"pad_token_id": 0,
|
30 |
-
"position_embedding_type": "
|
31 |
"torch_dtype": "float32",
|
32 |
"transformers_version": "4.36.2",
|
33 |
"type_vocab_size": 2,
|
34 |
"use_cache": true,
|
35 |
-
"vocab_size":
|
36 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/root/.cache/torch/sentence_transformers/sentence-transformers_paraphrase-MiniLM-L3-v2/",
|
3 |
"architectures": [
|
4 |
+
"BertModel"
|
5 |
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
"classifier_dropout": null,
|
|
|
|
|
8 |
"gradient_checkpointing": false,
|
9 |
"hidden_act": "gelu",
|
10 |
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 384,
|
12 |
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 1536,
|
14 |
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
|
|
16 |
"model_type": "bert",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 3,
|
19 |
"pad_token_id": 0,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
"torch_dtype": "float32",
|
22 |
"transformers_version": "4.36.2",
|
23 |
"type_vocab_size": 2,
|
24 |
"use_cache": true,
|
25 |
+
"vocab_size": 30522
|
26 |
}
|
config_sentence_transformers.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"__version__": {
|
3 |
-
"sentence_transformers": "2.
|
4 |
-
"transformers": "4.
|
5 |
-
"pytorch": "
|
6 |
}
|
7 |
}
|
|
|
1 |
{
|
2 |
"__version__": {
|
3 |
+
"sentence_transformers": "2.0.0",
|
4 |
+
"transformers": "4.7.0",
|
5 |
+
"pytorch": "1.9.0+cu102"
|
6 |
}
|
7 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:782421e8a8f86650f5c4c24184bb8cde66eb095e4f2bce737ad3508d1c844bd8
|
3 |
+
size 69565312
|
model_head.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6790c7fffe6c2ab476607806d7b8ab06f8b147b2dce5a6a6eba84ea624ba05b8
|
3 |
+
size 41647
|
sentence_bert_config.json
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
{
|
2 |
-
"max_seq_length":
|
3 |
"do_lower_case": false
|
4 |
}
|
|
|
1 |
{
|
2 |
+
"max_seq_length": 128,
|
3 |
"do_lower_case": false
|
4 |
}
|
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2fc687b11de0bc1b3d8348f92e3b49ef1089a621506c7661fbf3248fcd54947e
|
3 |
+
size 711649
|
tokenizer_config.json
CHANGED
@@ -46,12 +46,19 @@
|
|
46 |
"do_basic_tokenize": true,
|
47 |
"do_lower_case": true,
|
48 |
"mask_token": "[MASK]",
|
49 |
-
"
|
|
|
50 |
"never_split": null,
|
|
|
51 |
"pad_token": "[PAD]",
|
|
|
|
|
52 |
"sep_token": "[SEP]",
|
|
|
53 |
"strip_accents": null,
|
54 |
"tokenize_chinese_chars": true,
|
55 |
"tokenizer_class": "BertTokenizer",
|
|
|
|
|
56 |
"unk_token": "[UNK]"
|
57 |
}
|
|
|
46 |
"do_basic_tokenize": true,
|
47 |
"do_lower_case": true,
|
48 |
"mask_token": "[MASK]",
|
49 |
+
"max_length": 128,
|
50 |
+
"model_max_length": 512,
|
51 |
"never_split": null,
|
52 |
+
"pad_to_multiple_of": null,
|
53 |
"pad_token": "[PAD]",
|
54 |
+
"pad_token_type_id": 0,
|
55 |
+
"padding_side": "right",
|
56 |
"sep_token": "[SEP]",
|
57 |
+
"stride": 0,
|
58 |
"strip_accents": null,
|
59 |
"tokenize_chinese_chars": true,
|
60 |
"tokenizer_class": "BertTokenizer",
|
61 |
+
"truncation_side": "right",
|
62 |
+
"truncation_strategy": "longest_first",
|
63 |
"unk_token": "[UNK]"
|
64 |
}
|
vocab.txt
CHANGED
@@ -30520,9 +30520,3 @@ necessitated
|
|
30520 |
##:
|
30521 |
##?
|
30522 |
##~
|
30523 |
-
bowang
|
30524 |
-
georgiosmastrapas
|
30525 |
-
jackminong
|
30526 |
-
jonathangeuter
|
30527 |
-
louismilliken
|
30528 |
-
michaelguenther
|
|
|
30520 |
##:
|
30521 |
##?
|
30522 |
##~
|
|
|
|
|
|
|
|
|
|
|
|