cyber_deberta

Browse files

Files changed (7) hide show

README.md +18 -18
config.json +16 -19
model.safetensors +2 -2
special_tokens_map.json +5 -49
tokenizer.json +2 -2
tokenizer_config.json +11 -18
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-license: mit
-base_model: MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7
 tags:
 - generated_from_trainer
 metrics:
@@ -18,13 +18,13 @@ should probably proofread and complete it, then remove this comment. -->
 # cyber_deberta
-This model is a fine-tuned version of [MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7](https://huggingface.co/MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.4424
-- Accuracy: 0.8383
-- F1: 0.8222
-- Precision: 0.8189
-- Recall: 0.8260
 ## Model description
@@ -59,16 +59,16 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1     | Precision | Recall |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
-| 0.5757        | 1.0   | 105  | 0.5760          | 0.6948   | 0.5676 | 0.6774    | 0.5811 |
-| 0.4861        | 2.0   | 210  | 0.4837          | 0.7663   | 0.7328 | 0.7410    | 0.7272 |
-| 0.4106        | 3.0   | 315  | 0.4200          | 0.8033   | 0.7800 | 0.7817    | 0.7785 |
-| 0.3777        | 4.0   | 420  | 0.3928          | 0.8200   | 0.7953 | 0.8033    | 0.7893 |
-| 0.2995        | 5.0   | 525  | 0.3835          | 0.8331   | 0.8187 | 0.8129    | 0.8272 |
-| 0.3012        | 6.0   | 630  | 0.3786          | 0.8404   | 0.8222 | 0.8227    | 0.8217 |
-| 0.26          | 7.0   | 735  | 0.3827          | 0.8399   | 0.8265 | 0.8202    | 0.8361 |
-| 0.2388        | 8.0   | 840  | 0.4340          | 0.8346   | 0.8139 | 0.8180    | 0.8104 |
-| 0.2092        | 9.0   | 945  | 0.4377          | 0.8388   | 0.8237 | 0.8192    | 0.8294 |
-| 0.1957        | 10.0  | 1050 | 0.4424          | 0.8383   | 0.8222 | 0.8189    | 0.8260 |
 ### Framework versions

 ---
+license: apache-2.0
+base_model: google-bert/bert-base-multilingual-cased
 tags:
 - generated_from_trainer
 metrics:
 # cyber_deberta
+This model is a fine-tuned version of [google-bert/bert-base-multilingual-cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.4669
+- Accuracy: 0.8315
+- F1: 0.8135
+- Precision: 0.8121
+- Recall: 0.8150
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1     | Precision | Recall |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
+| 0.5788        | 1.0   | 105  | 0.5623          | 0.6755   | 0.4813 | 0.6766    | 0.5352 |
+| 0.478         | 2.0   | 210  | 0.4430          | 0.7746   | 0.7444 | 0.7501    | 0.7401 |
+| 0.4087        | 3.0   | 315  | 0.3948          | 0.8096   | 0.7835 | 0.7911    | 0.7777 |
+| 0.4004        | 4.0   | 420  | 0.3868          | 0.8080   | 0.7917 | 0.7864    | 0.7998 |
+| 0.3216        | 5.0   | 525  | 0.4005          | 0.8106   | 0.7928 | 0.7888    | 0.7980 |
+| 0.3144        | 6.0   | 630  | 0.3878          | 0.8299   | 0.8062 | 0.8153    | 0.7994 |
+| 0.2598        | 7.0   | 735  | 0.4040          | 0.8258   | 0.8084 | 0.8053    | 0.8121 |
+| 0.2234        | 8.0   | 840  | 0.4280          | 0.8284   | 0.8108 | 0.8083    | 0.8137 |
+| 0.2088        | 9.0   | 945  | 0.4580          | 0.8320   | 0.8154 | 0.8121    | 0.8194 |
+| 0.1775        | 10.0  | 1050 | 0.4669          | 0.8315   | 0.8135 | 0.8121    | 0.8150 |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,9 +1,11 @@
 {
-  "_name_or_path": "MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7",
   "architectures": [
-    "DebertaV2ForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
@@ -17,27 +19,22 @@
     "No": 0,
     "Yes": 1
   },
-  "layer_norm_eps": 1e-07,
   "max_position_embeddings": 512,
-  "max_relative_positions": -1,
-  "model_type": "deberta-v2",
-  "norm_rel_ebd": "layer_norm",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
   "pad_token_id": 0,
-  "pooler_dropout": 0,
-  "pooler_hidden_act": "gelu",
-  "pooler_hidden_size": 768,
-  "pos_att_type": [
-    "p2c",
-    "c2p"
-  ],
-  "position_biased_input": false,
-  "position_buckets": 256,
-  "relative_attention": true,
-  "share_att_key": true,
   "torch_dtype": "float32",
   "transformers_version": "4.41.2",
-  "type_vocab_size": 0,
-  "vocab_size": 251000
 }

 {
+  "_name_or_path": "google-bert/bert-base-multilingual-cased",
   "architectures": [
+    "BertForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
     "No": 0,
     "Yes": 1
   },
+  "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
+  "model_type": "bert",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
   "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.41.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 119547
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fcc562bd25e738338e588ab90790dc9f408b69a300aeea8d858e2970f5636ae
-size 1115268200

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ada620f62ce7f4490ed74ce016091e1891cc24343111572f6f612c88a3f4f19
+size 711443456

special_tokens_map.json CHANGED Viewed

@@ -1,51 +1,7 @@
 {
-  "bos_token": {
-    "content": "[CLS]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "cls_token": {
-    "content": "[CLS]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "[SEP]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "mask_token": {
-    "content": "[MASK]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "[PAD]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "sep_token": {
-    "content": "[SEP]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "[UNK]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
 }

 {
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
 }

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac9bd80d04d21b4df95917bdf7c750cbea28acc30a4462c76ca2b9c86d371863
-size 16316225

 version https://git-lfs.github.com/spec/v1
+oid sha256:bf1b59b7b11c95f194f51708d918eea378e09d05f84c0e1656dc5180e8117088
+size 2919362

tokenizer_config.json CHANGED Viewed

@@ -8,31 +8,31 @@
       "single_word": false,
       "special": true
     },
-    "1": {
-      "content": "[CLS]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "2": {
-      "content": "[SEP]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "3": {
-      "content": "[UNK]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "250101": {
       "content": "[MASK]",
       "lstrip": false,
       "normalized": false,
@@ -41,22 +41,15 @@
       "special": true
     }
   },
-  "bos_token": "[CLS]",
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "do_lower_case": false,
-  "eos_token": "[SEP]",
   "mask_token": "[MASK]",
-  "max_length": 512,
   "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
-  "sp_model_kwargs": {},
-  "split_by_punct": false,
-  "stride": 0,
-  "tokenizer_class": "DebertaV2Tokenizer",
-  "truncation_side": "right",
-  "truncation_strategy": "longest_first",
-  "unk_token": "[UNK]",
-  "vocab_type": "spm"
 }

       "single_word": false,
       "special": true
     },
+    "100": {
+      "content": "[UNK]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "101": {
+      "content": "[CLS]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "102": {
+      "content": "[SEP]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "103": {
       "content": "[MASK]",
       "lstrip": false,
       "normalized": false,
       "special": true
     }
   },
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "do_lower_case": false,
   "mask_token": "[MASK]",
   "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
 }

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff