1-13-am
/

deberta-pii-finetuned

@@ -18,10 +18,10 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.0015
-- F Beta: 0.8072
-- Precision: 0.9817
-- Recall: 0.8015
 ## Model description
@@ -44,23 +44,24 @@ The following hyperparameters were used during training:
 - train_batch_size: 8
 - eval_batch_size: 16
 - seed: 42
-- gradient_accumulation_steps: 3
-- total_train_batch_size: 24
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.05
-- num_epochs: 1
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | F Beta | Precision | Recall |
 |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|
-| 0.0169        | 0.19  | 150  | 0.0026          | 0.8985 | 0.9836    | 0.8954 |
-| 0.0036        | 0.37  | 300  | 0.0019          | 0.8693 | 0.9778    | 0.8655 |
-| 0.0036        | 0.56  | 450  | 0.0017          | 0.8120 | 0.9741    | 0.8066 |
-| 0.0053        | 0.74  | 600  | 0.0016          | 0.7912 | 0.9796    | 0.7851 |
-| 0.0059        | 0.93  | 750  | 0.0015          | 0.8072 | 0.9817    | 0.8015 |
 ### Framework versions

 This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0065
+- F Beta: 0.9611
+- Precision: 0.9932
+- Recall: 0.9598
 ## Model description
 - train_batch_size: 8
 - eval_batch_size: 16
 - seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 32
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.05
+- num_epochs: 3
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | F Beta | Precision | Recall |
 |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|
+| 0.0291        | 0.46  | 300  | 0.0104          | 0.9756 | 0.9854    | 0.9752 |
+| 0.0062        | 0.93  | 600  | 0.0041          | 0.9830 | 0.9901    | 0.9827 |
+| 0.0044        | 1.39  | 900  | 0.0057          | 0.9713 | 0.9895    | 0.9706 |
+| 0.0258        | 1.85  | 1200 | 0.0040          | 0.9799 | 0.9920    | 0.9794 |
+| 0.0135        | 2.32  | 1500 | 0.0050          | 0.9845 | 0.9943    | 0.9841 |
+| 0.0023        | 2.78  | 1800 | 0.0065          | 0.9611 | 0.9932    | 0.9598 |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,43 +1,43 @@
 {
   "_name_or_path": "microsoft/deberta-v3-base",
   "architectures": [
-    "DebertaForTokenClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
-    "0": "O",
-    "1": "B-NAME_STUDENT",
-    "2": "B-URL_PERSONAL",
-    "3": "B-ID_NUM",
-    "4": "B-STREET_ADDRESS",
-    "5": "B-PHONE_NUM",
-    "6": "B-EMAIL",
-    "7": "B-USERNAME",
-    "8": "I-NAME_STUDENT",
-    "9": "I-URL_PERSONAL",
-    "10": "I-ID_NUM",
-    "11": "I-STREET_ADDRESS",
-    "12": "I-PHONE_NUM"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
-    "B-EMAIL": 6,
-    "B-ID_NUM": 3,
-    "B-NAME_STUDENT": 1,
-    "B-PHONE_NUM": 5,
-    "B-STREET_ADDRESS": 4,
-    "B-URL_PERSONAL": 2,
-    "B-USERNAME": 7,
-    "I-ID_NUM": 10,
-    "I-NAME_STUDENT": 8,
-    "I-PHONE_NUM": 12,
-    "I-STREET_ADDRESS": 11,
-    "I-URL_PERSONAL": 9,
-    "O": 0
   },
   "layer_norm_eps": 1e-07,
   "max_position_embeddings": 512,

 {
   "_name_or_path": "microsoft/deberta-v3-base",
   "architectures": [
+    "DebertaV2ForTokenClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
+    "0": "B-NAME_STUDENT",
+    "1": "B-URL_PERSONAL",
+    "2": "B-ID_NUM",
+    "3": "B-STREET_ADDRESS",
+    "4": "B-PHONE_NUM",
+    "5": "B-EMAIL",
+    "6": "B-USERNAME",
+    "7": "I-NAME_STUDENT",
+    "8": "I-URL_PERSONAL",
+    "9": "I-ID_NUM",
+    "10": "I-STREET_ADDRESS",
+    "11": "I-PHONE_NUM",
+    "12": "O"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
+    "B-EMAIL": 5,
+    "B-ID_NUM": 2,
+    "B-NAME_STUDENT": 0,
+    "B-PHONE_NUM": 4,
+    "B-STREET_ADDRESS": 3,
+    "B-URL_PERSONAL": 1,
+    "B-USERNAME": 6,
+    "I-ID_NUM": 9,
+    "I-NAME_STUDENT": 7,
+    "I-PHONE_NUM": 11,
+    "I-STREET_ADDRESS": 10,
+    "I-URL_PERSONAL": 8,
+    "O": 12
   },
   "layer_norm_eps": 1e-07,
   "max_position_embeddings": 512,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34447759db9417cbc162db7781412e40bf51daf43bd819a83aab067bbc1e5b18
-size 737753016

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d9fef5246efc1d9faa5b336b2beba637f1f9305c8838a7d776c68fd34de4dbe
+size 735390572

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa66f07f19802685670b87f42045c629cb3b2ec946120717686b93d620b135a4
 size 4219

 version https://git-lfs.github.com/spec/v1
+oid sha256:123496ed5e2b5a7009aed827cfd8eab76419269d2e734e57f08a9e641702879b
 size 4219