Training in progress, epoch 0

Browse files

Files changed (5) hide show

README.md +24 -23
config.json +2 -6
model.safetensors +2 -2
special_tokens_map.json +35 -5
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -21,12 +21,12 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [classla/bcms-bertic](https://huggingface.co/classla/bcms-bertic) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.6168
-- Accuracy: 0.7829
-- Auc Score: 0.8669
-- F1: 0.8083
-- Precision: 0.7949
-- Recall: 0.8221
 ## Model description
@@ -57,23 +57,24 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss | Accuracy | Auc Score | F1     | Precision | Recall |
 |:-------------:|:------:|:----:|:---------------:|:--------:|:---------:|:------:|:---------:|:------:|
-| 0.6346        | 0.1736 | 100  | 0.5729          | 0.7036   | 0.8184    | 0.7729 | 0.6738    | 0.9061 |
-| 0.5153        | 0.3472 | 200  | 0.5228          | 0.7522   | 0.8334    | 0.7893 | 0.7490    | 0.8343 |
-| 0.524         | 0.5208 | 300  | 0.6139          | 0.7239   | 0.8494    | 0.7918 | 0.6821    | 0.9436 |
-| 0.5021        | 0.6944 | 400  | 0.4913          | 0.7675   | 0.8467    | 0.7946 | 0.7818    | 0.8077 |
-| 0.4612        | 0.8681 | 500  | 0.5028          | 0.7601   | 0.8612    | 0.8038 | 0.7378    | 0.8829 |
-| 0.4288        | 1.0417 | 600  | 0.4997          | 0.7804   | 0.8603    | 0.8080 | 0.7872    | 0.8298 |
-| 0.3585        | 1.2153 | 700  | 0.5485          | 0.7700   | 0.8600    | 0.8038 | 0.7652    | 0.8464 |
-| 0.4089        | 1.3889 | 800  | 0.4786          | 0.7860   | 0.8626    | 0.8077 | 0.8077    | 0.8077 |
-| 0.3785        | 1.5625 | 900  | 0.4961          | 0.7749   | 0.8630    | 0.7832 | 0.8442    | 0.7304 |
-| 0.3513        | 1.7361 | 1000 | 0.5348          | 0.7780   | 0.8647    | 0.8064 | 0.7833    | 0.8309 |
-| 0.3674        | 1.9097 | 1100 | 0.4934          | 0.7823   | 0.8674    | 0.8007 | 0.8163    | 0.7856 |
-| 0.2826        | 2.0833 | 1200 | 0.5916          | 0.7792   | 0.8680    | 0.8106 | 0.7758    | 0.8486 |
-| 0.2445        | 2.2569 | 1300 | 0.6143          | 0.7706   | 0.8648    | 0.8058 | 0.7618    | 0.8552 |
-| 0.247         | 2.4306 | 1400 | 0.5840          | 0.7811   | 0.8630    | 0.8069 | 0.7923    | 0.8221 |
-| 0.2443        | 2.6042 | 1500 | 0.5923          | 0.7872   | 0.8676    | 0.8054 | 0.8202    | 0.7912 |
-| 0.2512        | 2.7778 | 1600 | 0.5986          | 0.7860   | 0.8674    | 0.8032 | 0.8227    | 0.7845 |
-| 0.2119        | 2.9514 | 1700 | 0.6168          | 0.7829   | 0.8669    | 0.8083 | 0.7949    | 0.8221 |
 ### Framework versions

 This model is a fine-tuned version of [classla/bcms-bertic](https://huggingface.co/classla/bcms-bertic) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.5390
+- Accuracy: 0.7669
+- Auc Score: 0.8334
+- F1: 0.6965
+- Precision: 0.6768
+- Recall: 0.7174
 ## Model description
 | Training Loss | Epoch  | Step | Validation Loss | Accuracy | Auc Score | F1     | Precision | Recall |
 |:-------------:|:------:|:----:|:---------------:|:--------:|:---------:|:------:|:---------:|:------:|
+| 0.643         | 0.1616 | 100  | 0.6137          | 0.6403   | 0.7142    | 0.1180 | 0.6885    | 0.0645 |
+| 0.6015        | 0.3231 | 200  | 0.5704          | 0.6861   | 0.7557    | 0.6257 | 0.5633    | 0.7035 |
+| 0.5784        | 0.4847 | 300  | 0.5420          | 0.7153   | 0.7807    | 0.5667 | 0.6552    | 0.4992 |
+| 0.5545        | 0.6462 | 400  | 0.5177          | 0.7394   | 0.8052    | 0.5991 | 0.7025    | 0.5223 |
+| 0.5328        | 0.8078 | 500  | 0.5342          | 0.7194   | 0.8100    | 0.6729 | 0.5950    | 0.7742 |
+| 0.5524        | 0.9693 | 600  | 0.5678          | 0.6976   | 0.8124    | 0.6796 | 0.5617    | 0.8602 |
+| 0.4563        | 1.1309 | 700  | 0.5280          | 0.7423   | 0.8101    | 0.6596 | 0.6498    | 0.6697 |
+| 0.4425        | 1.2924 | 800  | 0.5082          | 0.7549   | 0.8202    | 0.6480 | 0.6973    | 0.6052 |
+| 0.4436        | 1.4540 | 900  | 0.5129          | 0.7537   | 0.8250    | 0.6356 | 0.7089    | 0.5760 |
+| 0.42          | 1.6155 | 1000 | 0.5073          | 0.7635   | 0.8242    | 0.6656 | 0.7038    | 0.6313 |
+| 0.426         | 1.7771 | 1100 | 0.5008          | 0.7635   | 0.8276    | 0.6816 | 0.6842    | 0.6790 |
+| 0.4425        | 1.9386 | 1200 | 0.5007          | 0.7646   | 0.8337    | 0.6940 | 0.6734    | 0.7158 |
+| 0.3873        | 2.1002 | 1300 | 0.5566          | 0.7646   | 0.8377    | 0.7028 | 0.6639    | 0.7465 |
+| 0.306         | 2.2617 | 1400 | 0.5551          | 0.7600   | 0.8343    | 0.6939 | 0.6616    | 0.7296 |
+| 0.3297        | 2.4233 | 1500 | 0.5707          | 0.7726   | 0.8309    | 0.6700 | 0.7301    | 0.6190 |
+| 0.3335        | 2.5848 | 1600 | 0.5412          | 0.7686   | 0.8338    | 0.6878 | 0.6921    | 0.6836 |
+| 0.3149        | 2.7464 | 1700 | 0.5438          | 0.7692   | 0.8338    | 0.6864 | 0.6956    | 0.6774 |
+| 0.3577        | 2.9079 | 1800 | 0.5390          | 0.7669   | 0.8334    | 0.6965 | 0.6768    | 0.7174 |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "architectures": [
-    "ElectraForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
@@ -12,16 +12,12 @@
   "intermediate_size": 3072,
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
-  "model_type": "electra",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
-  "summary_activation": "gelu",
-  "summary_last_dropout": 0.1,
-  "summary_type": "first",
-  "summary_use_proj": true,
   "torch_dtype": "float32",
   "transformers_version": "4.52.4",
   "type_vocab_size": 2,

 {
   "architectures": [
+    "BertForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
   "intermediate_size": 3072,
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
+  "model_type": "bert",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.52.4",
   "type_vocab_size": 2,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:89fd178b278ec5576ce071464e05e1667d667d3107ff6d08d3f0a3af757a949c
-size 442499672

 version https://git-lfs.github.com/spec/v1
+oid sha256:3dedf46e7b2d8eae3eee4faf49893f6a32e9f9cf038e0502179deaedec53e531
+size 442499064

special_tokens_map.json CHANGED Viewed

@@ -1,7 +1,37 @@
 {
-  "cls_token": "[CLS]",
-  "mask_token": "[MASK]",
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "unk_token": "[UNK]"
 }

 {
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c04be30d77da3124f89838d95e5c5f73fe8812c3eb70775b5f59d6c5ccc1d7e9
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:7dad0e36d84adbb572d1c02d802bad9a42408ce4be2b9c4ba29fae3b3465657d
 size 5304