luis-espinosa commited on
Commit
9d59717
1 Parent(s): 6a493fa

Upload trained SetFit model

Browse files
1_Pooling/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "word_embedding_dimension": 768,
3
  "pooling_mode_cls_token": false,
4
  "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
 
1
  {
2
+ "word_embedding_dimension": 1024,
3
  "pooling_mode_cls_token": false,
4
  "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
2_Dense/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"in_features": 1024, "out_features": 1024, "bias": true, "activation_function": "torch.nn.modules.linear.Identity"}
2_Dense/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fa4dd117b012a263c595513383de9c79cd0f08ad5eb4cb909e91a9c8bd3f56b
3
+ size 4198560
README.md CHANGED
@@ -22,9 +22,9 @@ metrics:
22
  pipeline_tag: text-classification
23
  library_name: setfit
24
  inference: true
25
- base_model: infgrad/stella-base-en-v2
26
  model-index:
27
- - name: SetFit with infgrad/stella-base-en-v2
28
  results:
29
  - task:
30
  type: text-classification
@@ -39,9 +39,9 @@ model-index:
39
  name: Accuracy
40
  ---
41
 
42
- # SetFit with infgrad/stella-base-en-v2
43
 
44
- This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [infgrad/stella-base-en-v2](https://huggingface.co/infgrad/stella-base-en-v2) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
45
 
46
  The model has been trained using an efficient few-shot learning technique that involves:
47
 
@@ -52,7 +52,7 @@ The model has been trained using an efficient few-shot learning technique that i
52
 
53
  ### Model Description
54
  - **Model Type:** SetFit
55
- - **Sentence Transformer body:** [infgrad/stella-base-en-v2](https://huggingface.co/infgrad/stella-base-en-v2)
56
  - **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
57
  - **Maximum Sequence Length:** 512 tokens
58
  - **Number of Classes:** 2 classes
@@ -158,17 +158,17 @@ preds = model("The tech giant announced today the appointment of a new CTO to le
158
  ### Training Results
159
  | Epoch | Step | Training Loss | Validation Loss |
160
  |:------:|:----:|:-------------:|:---------------:|
161
- | 0.0059 | 1 | 0.1849 | - |
162
- | 0.2941 | 50 | 0.1668 | - |
163
- | 0.5882 | 100 | 0.0221 | - |
164
- | 0.8824 | 150 | 0.0008 | - |
165
- | 1.1765 | 200 | 0.0004 | - |
166
- | 1.4706 | 250 | 0.0003 | - |
167
- | 1.7647 | 300 | 0.0003 | - |
168
- | 2.0588 | 350 | 0.0002 | - |
169
- | 2.3529 | 400 | 0.0002 | - |
170
- | 2.6471 | 450 | 0.0002 | - |
171
- | 2.9412 | 500 | 0.0002 | - |
172
 
173
  ### Framework Versions
174
  - Python: 3.10.12
 
22
  pipeline_tag: text-classification
23
  library_name: setfit
24
  inference: true
25
+ base_model: dunzhang/stella_en_400M_v5
26
  model-index:
27
+ - name: SetFit with dunzhang/stella_en_400M_v5
28
  results:
29
  - task:
30
  type: text-classification
 
39
  name: Accuracy
40
  ---
41
 
42
+ # SetFit with dunzhang/stella_en_400M_v5
43
 
44
+ This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [dunzhang/stella_en_400M_v5](https://huggingface.co/dunzhang/stella_en_400M_v5) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
45
 
46
  The model has been trained using an efficient few-shot learning technique that involves:
47
 
 
52
 
53
  ### Model Description
54
  - **Model Type:** SetFit
55
+ - **Sentence Transformer body:** [dunzhang/stella_en_400M_v5](https://huggingface.co/dunzhang/stella_en_400M_v5)
56
  - **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
57
  - **Maximum Sequence Length:** 512 tokens
58
  - **Number of Classes:** 2 classes
 
158
  ### Training Results
159
  | Epoch | Step | Training Loss | Validation Loss |
160
  |:------:|:----:|:-------------:|:---------------:|
161
+ | 0.0059 | 1 | 0.2381 | - |
162
+ | 0.2941 | 50 | 0.1096 | - |
163
+ | 0.5882 | 100 | 0.0006 | - |
164
+ | 0.8824 | 150 | 0.0003 | - |
165
+ | 1.1765 | 200 | 0.0001 | - |
166
+ | 1.4706 | 250 | 0.0 | - |
167
+ | 1.7647 | 300 | 0.0 | - |
168
+ | 2.0588 | 350 | 0.0 | - |
169
+ | 2.3529 | 400 | 0.0 | - |
170
+ | 2.6471 | 450 | 0.0 | - |
171
+ | 2.9412 | 500 | 0.0 | - |
172
 
173
  ### Framework Versions
174
  - Python: 3.10.12
config.json CHANGED
@@ -1,32 +1,39 @@
1
  {
2
- "_name_or_path": "infgrad/stella-base-en-v2",
3
  "architectures": [
4
- "BertModel"
5
  ],
6
- "attention_probs_dropout_prob": 0.1,
 
 
 
 
7
  "classifier_dropout": null,
8
- "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
- "id2label": {
13
- "0": "LABEL_0"
14
- },
15
  "initializer_range": 0.02,
16
- "intermediate_size": 3072,
17
- "label2id": {
18
- "LABEL_0": 0
19
- },
20
  "layer_norm_eps": 1e-12,
21
- "max_position_embeddings": 512,
22
- "model_type": "bert",
23
- "num_attention_heads": 12,
24
- "num_hidden_layers": 12,
 
 
 
 
25
  "pad_token_id": 0,
26
- "position_embedding_type": "absolute",
 
 
 
 
 
27
  "torch_dtype": "float32",
28
  "transformers_version": "4.42.2",
29
  "type_vocab_size": 2,
30
- "use_cache": true,
31
- "vocab_size": 30522
 
32
  }
 
1
  {
2
+ "_name_or_path": "dunzhang/stella_en_400M_v5",
3
  "architectures": [
4
+ "NewModel"
5
  ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "dunzhang/stella_en_400M_v5--configuration.NewConfig",
9
+ "AutoModel": "dunzhang/stella_en_400M_v5--modeling.NewModel"
10
+ },
11
  "classifier_dropout": null,
 
12
  "hidden_act": "gelu",
13
  "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 1024,
 
 
 
15
  "initializer_range": 0.02,
16
+ "intermediate_size": 4096,
 
 
 
17
  "layer_norm_eps": 1e-12,
18
+ "layer_norm_type": "layer_norm",
19
+ "logn_attention_clip1": false,
20
+ "logn_attention_scale": false,
21
+ "max_position_embeddings": 8192,
22
+ "model_type": "new",
23
+ "num_attention_heads": 16,
24
+ "num_hidden_layers": 24,
25
+ "pack_qkv": true,
26
  "pad_token_id": 0,
27
+ "position_embedding_type": "rope",
28
+ "rope_scaling": {
29
+ "factor": 2.0,
30
+ "type": "ntk"
31
+ },
32
+ "rope_theta": 160000,
33
  "torch_dtype": "float32",
34
  "transformers_version": "4.42.2",
35
  "type_vocab_size": 2,
36
+ "unpad_inputs": true,
37
+ "use_memory_efficient_attention": true,
38
+ "vocab_size": 30528
39
  }
config_sentence_transformers.json CHANGED
@@ -4,7 +4,10 @@
4
  "transformers": "4.42.2",
5
  "pytorch": "2.5.1+cu124"
6
  },
7
- "prompts": {},
 
 
 
8
  "default_prompt_name": null,
9
  "similarity_fn_name": "cosine"
10
  }
 
4
  "transformers": "4.42.2",
5
  "pytorch": "2.5.1+cu124"
6
  },
7
+ "prompts": {
8
+ "s2p_query": "Instruct: Given a web search query, retrieve relevant passages that answer the query.\nQuery: ",
9
+ "s2s_query": "Instruct: Retrieve semantically similar text.\nQuery: "
10
+ },
11
  "default_prompt_name": null,
12
  "similarity_fn_name": "cosine"
13
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae59e4ff2eea5837ef4cd520e5611a45475aac965c350b62cab8c0f668ae29dd
3
- size 437951328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ffd07aa46d5cdd19d4057da342f2f7bcc48171ebc30d7cccc5a3cd4ac7fbbe2
3
+ size 1736585680
model_head.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:042cb4f5fbe2e418f3a5e56a4ee00c0db5e12c60c1ed2c70c6812320b5e44a9e
3
- size 6991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a77607f61850ddd7171a17f3caefa7776f7a3f7b232aaf3cb3e42ba71d8d493a
3
+ size 9039
modules.json CHANGED
@@ -10,5 +10,11 @@
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
 
 
 
 
 
 
13
  }
14
  ]
 
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Dense",
18
+ "type": "sentence_transformers.models.Dense"
19
  }
20
  ]
tokenizer_config.json CHANGED
@@ -43,15 +43,20 @@
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
- "do_basic_tokenize": true,
47
  "do_lower_case": true,
48
  "mask_token": "[MASK]",
 
49
  "model_max_length": 512,
50
- "never_split": null,
51
  "pad_token": "[PAD]",
 
 
52
  "sep_token": "[SEP]",
 
53
  "strip_accents": null,
54
  "tokenize_chinese_chars": true,
55
  "tokenizer_class": "BertTokenizer",
 
 
56
  "unk_token": "[UNK]"
57
  }
 
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
 
46
  "do_lower_case": true,
47
  "mask_token": "[MASK]",
48
+ "max_length": 8000,
49
  "model_max_length": 512,
50
+ "pad_to_multiple_of": null,
51
  "pad_token": "[PAD]",
52
+ "pad_token_type_id": 0,
53
+ "padding_side": "right",
54
  "sep_token": "[SEP]",
55
+ "stride": 0,
56
  "strip_accents": null,
57
  "tokenize_chinese_chars": true,
58
  "tokenizer_class": "BertTokenizer",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
  "unk_token": "[UNK]"
62
  }