Add SetFit model

Browse files

Files changed (5) hide show

README.md +35 -46
config.json +1 -1
model.safetensors +1 -1
model_head.pkl +1 -1
tokenizer_config.json +7 -0

README.md CHANGED Viewed

@@ -11,14 +11,11 @@ metrics:
 - recall
 - f1
 widget:
-- text: Maintenance to the cambridge.org website is scheduled for 14 March at 12am
-    – 8am GMT.
-- text: Quarterly Earnings
-- text: 'So set sail for Long John Silver''s and discover why wa''re America''s most
-    popular sealood vestments antannro fi '
-- text: "\n                                                        OPEC oil price\
-    \ annually 1960-2024\n                                                    "
-- text: 'RUSSELL WILSON OF THE SEATTLE SEAHAWKS — DURING SUPER BOWL XLVIII '
 pipeline_tag: text-classification
 inference: true
 base_model: sentence-transformers/paraphrase-mpnet-base-v2
@@ -34,16 +31,16 @@ model-index:
       split: test
     metrics:
     - type: accuracy
-      value: 0.8083333333333333
       name: Accuracy
     - type: precision
-      value: 0.7894736842105263
       name: Precision
     - type: recall
-      value: 0.8035714285714286
       name: Recall
     - type: f1
-      value: 0.7964601769911505
       name: F1
 ---
@@ -75,17 +72,17 @@ The model has been trained using an efficient few-shot learning technique that i
 - **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
 ### Model Labels
-| Label | Examples                                                                                                                                                                                                         |
-|:------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| False | <ul><li>'Learn more about this provider'</li><li>'Verletzte und Festnahmen'</li><li>'Bulgaria'</li></ul>                                                                                                         |
-| True  | <ul><li>'Free Quotes on Doors '</li><li>'Pakistan Cricket Board, Gaddafi Stadium, Ferozepur Road, Lahore, Pakistan. E-Mail: careers@pcb.com.pk '</li><li>"‘here's a new predator in the urban jungle "</li></ul> |
 ## Evaluation
 ### Metrics
 | Label   | Accuracy | Precision | Recall | F1     |
 |:--------|:---------|:----------|:-------|:-------|
-| **all** | 0.8083   | 0.7895    | 0.8036 | 0.7965 |
 ## Uses
@@ -105,7 +102,7 @@ from setfit import SetFitModel
 # Download from the 🤗 Hub
 model = SetFitModel.from_pretrained("setfit_model_id")
 # Run inference
-preds = model("Quarterly Earnings")
 ```
 <!--
@@ -137,12 +134,12 @@ preds = model("Quarterly Earnings")
 ### Training Set Metrics
 | Training set | Min | Median | Max |
 |:-------------|:----|:-------|:----|
-| Word count   | 1   | 8.2229 | 242 |
 | Label | Training Sample Count |
 |:------|:----------------------|
-| False | 236                   |
-| True  | 244                   |
 ### Training Hyperparameters
 - batch_size: (16, 2)
@@ -166,31 +163,23 @@ preds = model("Quarterly Earnings")
 ### Training Results
 | Epoch  | Step | Training Loss | Validation Loss |
 |:------:|:----:|:-------------:|:---------------:|
-| 0.0008 | 1    | 0.3892        | -               |
-| 0.0417 | 50   | 0.2262        | -               |
-| 0.0833 | 100  | 0.2138        | -               |
-| 0.125  | 150  | 0.1058        | -               |
-| 0.1667 | 200  | 0.1327        | -               |
-| 0.2083 | 250  | 0.098         | -               |
-| 0.25   | 300  | 0.0719        | -               |
-| 0.2917 | 350  | 0.0634        | -               |
-| 0.3333 | 400  | 0.0021        | -               |
-| 0.375  | 450  | 0.0084        | -               |
-| 0.4167 | 500  | 0.0799        | -               |
-| 0.4583 | 550  | 0.0822        | -               |
-| 0.5    | 600  | 0.0775        | -               |
-| 0.5417 | 650  | 0.0114        | -               |
-| 0.5833 | 700  | 0.0013        | -               |
-| 0.625  | 750  | 0.0121        | -               |
-| 0.6667 | 800  | 0.1034        | -               |
-| 0.7083 | 850  | 0.0539        | -               |
-| 0.75   | 900  | 0.0076        | -               |
-| 0.7917 | 950  | 0.0114        | -               |
-| 0.8333 | 1000 | 0.0223        | -               |
-| 0.875  | 1050 | 0.0208        | -               |
-| 0.9167 | 1100 | 0.0246        | -               |
-| 0.9583 | 1150 | 0.0098        | -               |
-| 1.0    | 1200 | 0.003         | -               |
 ### Framework Versions
 - Python: 3.11.0

 - recall
 - f1
 widget:
+- text: 'Some women are more alive than others. '
+- text: ': Session'
+- text: '. Manage your cookie preferences:'
+- text: Download for Mac
+- text: HTI Haiti
 pipeline_tag: text-classification
 inference: true
 base_model: sentence-transformers/paraphrase-mpnet-base-v2
       split: test
     metrics:
     - type: accuracy
+      value: 0.8625
       name: Accuracy
     - type: precision
+      value: 0.825
       name: Precision
     - type: recall
+      value: 0.8918918918918919
       name: Recall
     - type: f1
+      value: 0.8571428571428571
       name: F1
 ---
 - **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
 ### Model Labels
+| Label | Examples                                                                                                           |
+|:------|:-------------------------------------------------------------------------------------------------------------------|
+| True  | <ul><li>'715-462-3626 Open Daily @ 7am '</li><li>': HTTP'</li><li>'Zmywarka modutowa. Pasuje wszedzie. '</li></ul> |
+| False | <ul><li>'(retencja w dniach:  180)'</li><li>'Bosnia and Herzegovina'</li><li>'Arruda dos Vinhos'</li></ul>         |
 ## Evaluation
 ### Metrics
 | Label   | Accuracy | Precision | Recall | F1     |
 |:--------|:---------|:----------|:-------|:-------|
+| **all** | 0.8625   | 0.825     | 0.8919 | 0.8571 |
 ## Uses
 # Download from the 🤗 Hub
 model = SetFitModel.from_pretrained("setfit_model_id")
 # Run inference
+preds = model(": Session")
 ```
 <!--
 ### Training Set Metrics
 | Training set | Min | Median | Max |
 |:-------------|:----|:-------|:----|
+| Word count   | 1   | 8.5094 | 146 |
 | Label | Training Sample Count |
 |:------|:----------------------|
+| False | 157                   |
+| True  | 163                   |
 ### Training Hyperparameters
 - batch_size: (16, 2)
 ### Training Results
 | Epoch  | Step | Training Loss | Validation Loss |
 |:------:|:----:|:-------------:|:---------------:|
+| 0.0013 | 1    | 0.2507        | -               |
+| 0.0625 | 50   | 0.0961        | -               |
+| 0.125  | 100  | 0.2456        | -               |
+| 0.1875 | 150  | 0.0709        | -               |
+| 0.25   | 200  | 0.0213        | -               |
+| 0.3125 | 250  | 0.0193        | -               |
+| 0.375  | 300  | 0.0827        | -               |
+| 0.4375 | 350  | 0.015         | -               |
+| 0.5    | 400  | 0.0039        | -               |
+| 0.5625 | 450  | 0.0087        | -               |
+| 0.625  | 500  | 0.0064        | -               |
+| 0.6875 | 550  | 0.001         | -               |
+| 0.75   | 600  | 0.0236        | -               |
+| 0.8125 | 650  | 0.0553        | -               |
+| 0.875  | 700  | 0.0661        | -               |
+| 0.9375 | 750  | 0.0006        | -               |
+| 1.0    | 800  | 0.0604        | -               |
 ### Framework Versions
 - Python: 3.11.0

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "sentence-transformers/paraphrase-mpnet-base-v2",
   "architectures": [
     "MPNetModel"
   ],

 {
+  "_name_or_path": ".\\checkpoints\\step_1000",
   "architectures": [
     "MPNetModel"
   ],

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1dce3490c1781a93d8a6bc3b433e33b2f24c7458d050821ab4c70090abe529ca
 size 437967672

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6363a984a4b306ef4b288961f84095ff24e2ac6e546cd8c549f8f83ddb38ec9
 size 437967672

model_head.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43732b1b3359749d4d2294079fbe7f24336e077549284d45e08b3b566f444501
 size 6991

 version https://git-lfs.github.com/spec/v1
+oid sha256:34e64675dfa144355bc242399101ab996ca9d3d135b7410196ad0f700f551382
 size 6991

tokenizer_config.json CHANGED Viewed

@@ -48,12 +48,19 @@
   "do_lower_case": true,
   "eos_token": "</s>",
   "mask_token": "<mask>",
   "model_max_length": 512,
   "never_split": null,
   "pad_token": "<pad>",
   "sep_token": "</s>",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "MPNetTokenizer",
   "unk_token": "[UNK]"
 }

   "do_lower_case": true,
   "eos_token": "</s>",
   "mask_token": "<mask>",
+  "max_length": 512,
   "model_max_length": 512,
   "never_split": null,
+  "pad_to_multiple_of": null,
   "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
   "sep_token": "</s>",
+  "stride": 0,
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "MPNetTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]"
 }