sentiment_deberta

Browse files

Files changed (8) hide show

README.md +23 -15
config.json +16 -19
model.safetensors +2 -2
special_tokens_map.json +5 -49
tokenizer.json +2 -2
tokenizer_config.json +11 -18
training_args.bin +2 -2
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-license: mit
-base_model: MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7
 tags:
 - generated_from_trainer
 metrics:
@@ -18,13 +18,13 @@ should probably proofread and complete it, then remove this comment. -->
 # sentiment_deberta
-This model is a fine-tuned version of [MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7](https://huggingface.co/MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.6252
-- Accuracy: 0.7418
-- F1: 0.6848
-- Precision: 0.6668
-- Recall: 0.7317
 ## Model description
@@ -47,20 +47,28 @@ The following hyperparameters were used during training:
 - train_batch_size: 64
 - eval_batch_size: 64
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
-- lr_scheduler_type: linear
-- num_epochs: 5
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1     | Precision | Recall |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
-| 0.7168        | 1.0   | 94   | 0.8071          | 0.6421   | 0.6022 | 0.6012    | 0.6733 |
-| 0.6442        | 2.0   | 188  | 0.6195          | 0.7428   | 0.6789 | 0.6617    | 0.7176 |
-| 0.5657        | 3.0   | 282  | 0.7655          | 0.6615   | 0.6319 | 0.6301    | 0.7172 |
-| 0.5001        | 4.0   | 376  | 0.6058          | 0.7465   | 0.6896 | 0.6717    | 0.7352 |
-| 0.5145        | 5.0   | 470  | 0.6252          | 0.7418   | 0.6848 | 0.6668    | 0.7317 |
 ### Framework versions

 ---
+license: apache-2.0
+base_model: google-bert/bert-base-multilingual-cased
 tags:
 - generated_from_trainer
 metrics:
 # sentiment_deberta
+This model is a fine-tuned version of [google-bert/bert-base-multilingual-cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.7123
+- Accuracy: 0.6938
+- F1: 0.6401
+- Precision: 0.6262
+- Recall: 0.6854
 ## Model description
 - train_batch_size: 64
 - eval_batch_size: 64
 - seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 128
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 500
+- num_epochs: 10
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1     | Precision | Recall |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
+| 1.087         | 1.0   | 47   | 1.1008          | 0.2551   | 0.3042 | 0.4734    | 0.4956 |
+| 0.9933        | 2.0   | 94   | 0.9692          | 0.5545   | 0.5098 | 0.5126    | 0.5496 |
+| 0.8709        | 3.0   | 141  | 0.9352          | 0.5003   | 0.5003 | 0.5301    | 0.5804 |
+| 0.8444        | 4.0   | 188  | 0.8729          | 0.5874   | 0.5602 | 0.5671    | 0.6204 |
+| 0.7833        | 5.0   | 235  | 0.9394          | 0.4778   | 0.4980 | 0.5643    | 0.6353 |
+| 0.7003        | 6.0   | 282  | 0.7279          | 0.6834   | 0.6306 | 0.6150    | 0.6828 |
+| 0.6383        | 7.0   | 329  | 0.7808          | 0.6390   | 0.6123 | 0.6073    | 0.7007 |
+| 0.5996        | 8.0   | 376  | 0.7379          | 0.6802   | 0.6367 | 0.6231    | 0.6993 |
+| 0.5514        | 9.0   | 423  | 0.7846          | 0.6745   | 0.6204 | 0.6015    | 0.6901 |
+| 0.4837        | 10.0  | 470  | 0.7123          | 0.6938   | 0.6401 | 0.6262    | 0.6854 |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,9 +1,11 @@
 {
-  "_name_or_path": "MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7",
   "architectures": [
-    "DebertaV2ForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
@@ -19,27 +21,22 @@
     "NEUTRAL": 1,
     "POSITIVE": 2
   },
-  "layer_norm_eps": 1e-07,
   "max_position_embeddings": 512,
-  "max_relative_positions": -1,
-  "model_type": "deberta-v2",
-  "norm_rel_ebd": "layer_norm",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
   "pad_token_id": 0,
-  "pooler_dropout": 0,
-  "pooler_hidden_act": "gelu",
-  "pooler_hidden_size": 768,
-  "pos_att_type": [
-    "p2c",
-    "c2p"
-  ],
-  "position_biased_input": false,
-  "position_buckets": 256,
-  "relative_attention": true,
-  "share_att_key": true,
   "torch_dtype": "float32",
   "transformers_version": "4.41.2",
-  "type_vocab_size": 0,
-  "vocab_size": 251000
 }

 {
+  "_name_or_path": "google-bert/bert-base-multilingual-cased",
   "architectures": [
+    "BertForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
     "NEUTRAL": 1,
     "POSITIVE": 2
   },
+  "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
+  "model_type": "bert",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
   "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.41.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 119547
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ebd4dcbd3cd844a07267d750b94ee13ab85843274318442b88a3acf0495c7c5b
-size 1115271284

 version https://git-lfs.github.com/spec/v1
+oid sha256:e391cc2742fcb1e3c2817cc7fe8c73686914afc3ba0df47f05fb6129409c0649
+size 711446532

special_tokens_map.json CHANGED Viewed

@@ -1,51 +1,7 @@
 {
-  "bos_token": {
-    "content": "[CLS]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "cls_token": {
-    "content": "[CLS]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "[SEP]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "mask_token": {
-    "content": "[MASK]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "[PAD]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "sep_token": {
-    "content": "[SEP]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "[UNK]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
 }

 {
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
 }

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac9bd80d04d21b4df95917bdf7c750cbea28acc30a4462c76ca2b9c86d371863
-size 16316225

 version https://git-lfs.github.com/spec/v1
+oid sha256:bf1b59b7b11c95f194f51708d918eea378e09d05f84c0e1656dc5180e8117088
+size 2919362

tokenizer_config.json CHANGED Viewed

@@ -8,31 +8,31 @@
       "single_word": false,
       "special": true
     },
-    "1": {
-      "content": "[CLS]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "2": {
-      "content": "[SEP]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "3": {
-      "content": "[UNK]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "250101": {
       "content": "[MASK]",
       "lstrip": false,
       "normalized": false,
@@ -41,22 +41,15 @@
       "special": true
     }
   },
-  "bos_token": "[CLS]",
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "do_lower_case": false,
-  "eos_token": "[SEP]",
   "mask_token": "[MASK]",
-  "max_length": 512,
   "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
-  "sp_model_kwargs": {},
-  "split_by_punct": false,
-  "stride": 0,
-  "tokenizer_class": "DebertaV2Tokenizer",
-  "truncation_side": "right",
-  "truncation_strategy": "longest_first",
-  "unk_token": "[UNK]",
-  "vocab_type": "spm"
 }

       "single_word": false,
       "special": true
     },
+    "100": {
+      "content": "[UNK]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "101": {
+      "content": "[CLS]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "102": {
+      "content": "[SEP]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "103": {
       "content": "[MASK]",
       "lstrip": false,
       "normalized": false,
       "special": true
     }
   },
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "do_lower_case": false,
   "mask_token": "[MASK]",
   "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:18c5ceef72839a46609d9466fc3866ccfdde6c8fc80e4b8efa5ced51cbd630a6
-size 5112

 version https://git-lfs.github.com/spec/v1
+oid sha256:464da6ad6afc0fcad8c68a2ca16a6073f0d6e6bdb18fa948527aa68b83f97e71
+size 5048

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff