End of training

Browse files

Files changed (7) hide show

README.md +42 -42
config.json +9 -16
model.safetensors +2 -2
runs/Mar26_15-18-23_a22b7578fb0a/events.out.tfevents.1711466309.a22b7578fb0a.310.6 +2 -2
runs/Mar26_17-55-36_a22b7578fb0a/events.out.tfevents.1711475748.a22b7578fb0a.310.7 +3 -0
runs/Mar26_17-55-36_a22b7578fb0a/events.out.tfevents.1711483712.a22b7578fb0a.310.8 +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -20,11 +20,11 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [distilbert-base-cased](https://huggingface.co/distilbert-base-cased) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.9158
-- Precision: 0.8211
-- Recall: 0.8179
-- F1: 0.8168
-- Accuracy: 0.8179
 ## Model description
@@ -55,43 +55,43 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1     | Accuracy |
 |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
-| 1.9425        | 0.14  | 30   | 1.8775          | 0.1595    | 0.1793 | 0.1149 | 0.1793   |
-| 1.7643        | 0.27  | 60   | 1.4748          | 0.5705    | 0.5098 | 0.4480 | 0.5098   |
-| 1.3512        | 0.41  | 90   | 1.2528          | 0.6393    | 0.5546 | 0.5091 | 0.5546   |
-| 1.1957        | 0.54  | 120  | 1.1123          | 0.6994    | 0.6359 | 0.6163 | 0.6359   |
-| 1.1178        | 0.68  | 150  | 0.9101          | 0.7262    | 0.6807 | 0.6648 | 0.6807   |
-| 0.9729        | 0.81  | 180  | 0.7698          | 0.7443    | 0.6975 | 0.6791 | 0.6975   |
-| 0.7257        | 0.95  | 210  | 0.6921          | 0.7836    | 0.7647 | 0.7613 | 0.7647   |
-| 0.5082        | 1.08  | 240  | 0.6223          | 0.7940    | 0.7899 | 0.7893 | 0.7899   |
-| 0.479         | 1.22  | 270  | 0.6887          | 0.8128    | 0.7871 | 0.7862 | 0.7871   |
-| 0.411         | 1.35  | 300  | 0.5868          | 0.8064    | 0.7899 | 0.7867 | 0.7899   |
-| 0.3041        | 1.49  | 330  | 0.6567          | 0.7997    | 0.7899 | 0.7897 | 0.7899   |
-| 0.4224        | 1.62  | 360  | 0.6459          | 0.8254    | 0.8151 | 0.8148 | 0.8151   |
-| 0.3496        | 1.76  | 390  | 0.7095          | 0.7862    | 0.7731 | 0.7659 | 0.7731   |
-| 0.2739        | 1.89  | 420  | 0.6407          | 0.8345    | 0.8291 | 0.8279 | 0.8291   |
-| 0.3914        | 2.03  | 450  | 0.6103          | 0.8372    | 0.8319 | 0.8310 | 0.8319   |
-| 0.21          | 2.16  | 480  | 0.7185          | 0.8394    | 0.8291 | 0.8276 | 0.8291   |
-| 0.1107        | 2.3   | 510  | 0.7908          | 0.8227    | 0.8151 | 0.8139 | 0.8151   |
-| 0.1499        | 2.43  | 540  | 0.8070          | 0.8305    | 0.8179 | 0.8179 | 0.8179   |
-| 0.2544        | 2.57  | 570  | 0.7113          | 0.8475    | 0.8375 | 0.8372 | 0.8375   |
-| 0.1411        | 2.7   | 600  | 0.7436          | 0.8360    | 0.8291 | 0.8287 | 0.8291   |
-| 0.1955        | 2.84  | 630  | 0.7029          | 0.8336    | 0.8319 | 0.8314 | 0.8319   |
-| 0.1027        | 2.97  | 660  | 0.8696          | 0.8184    | 0.8067 | 0.8053 | 0.8067   |
-| 0.0888        | 3.11  | 690  | 0.7953          | 0.8442    | 0.8347 | 0.8349 | 0.8347   |
-| 0.0836        | 3.24  | 720  | 0.8188          | 0.8328    | 0.8291 | 0.8284 | 0.8291   |
-| 0.0351        | 3.38  | 750  | 0.8557          | 0.8436    | 0.8375 | 0.8368 | 0.8375   |
-| 0.0237        | 3.51  | 780  | 0.8777          | 0.8404    | 0.8375 | 0.8369 | 0.8375   |
-| 0.0712        | 3.65  | 810  | 0.8978          | 0.8260    | 0.8207 | 0.8200 | 0.8207   |
-| 0.0535        | 3.78  | 840  | 0.9443          | 0.8236    | 0.8179 | 0.8180 | 0.8179   |
-| 0.0735        | 3.92  | 870  | 0.8792          | 0.8341    | 0.8291 | 0.8295 | 0.8291   |
-| 0.0525        | 4.05  | 900  | 0.8573          | 0.8295    | 0.8263 | 0.8263 | 0.8263   |
-| 0.0254        | 4.19  | 930  | 0.9061          | 0.8313    | 0.8263 | 0.8265 | 0.8263   |
-| 0.0076        | 4.32  | 960  | 0.8811          | 0.8214    | 0.8179 | 0.8173 | 0.8179   |
-| 0.0177        | 4.46  | 990  | 0.8747          | 0.8240    | 0.8207 | 0.8198 | 0.8207   |
-| 0.0067        | 4.59  | 1020 | 0.8855          | 0.8240    | 0.8207 | 0.8193 | 0.8207   |
-| 0.003         | 4.73  | 1050 | 0.8960          | 0.8235    | 0.8207 | 0.8196 | 0.8207   |
-| 0.0071        | 4.86  | 1080 | 0.9137          | 0.8177    | 0.8151 | 0.8136 | 0.8151   |
-| 0.0414        | 5.0   | 1110 | 0.9158          | 0.8211    | 0.8179 | 0.8168 | 0.8179   |
 ### Framework versions

 This model is a fine-tuned version of [distilbert-base-cased](https://huggingface.co/distilbert-base-cased) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.9689
+- Precision: 0.8231
+- Recall: 0.8151
+- F1: 0.8155
+- Accuracy: 0.8151
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1     | Accuracy |
 |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
+| 1.9367        | 0.14  | 30   | 1.8290          | 0.1621    | 0.2605 | 0.1694 | 0.2605   |
+| 1.8256        | 0.27  | 60   | 1.7350          | 0.1476    | 0.2745 | 0.1786 | 0.2745   |
+| 1.6271        | 0.41  | 90   | 1.5305          | 0.4700    | 0.4594 | 0.3823 | 0.4594   |
+| 1.3898        | 0.54  | 120  | 1.3535          | 0.5555    | 0.4790 | 0.4664 | 0.4790   |
+| 1.2341        | 0.68  | 150  | 1.0183          | 0.7309    | 0.7087 | 0.7041 | 0.7087   |
+| 1.0666        | 0.81  | 180  | 0.9651          | 0.7280    | 0.6583 | 0.6284 | 0.6583   |
+| 0.8155        | 0.95  | 210  | 0.8172          | 0.7889    | 0.7647 | 0.7620 | 0.7647   |
+| 0.6679        | 1.08  | 240  | 0.6941          | 0.7973    | 0.7843 | 0.7818 | 0.7843   |
+| 0.52          | 1.22  | 270  | 0.6729          | 0.8009    | 0.7927 | 0.7921 | 0.7927   |
+| 0.4683        | 1.35  | 300  | 0.7385          | 0.8072    | 0.7955 | 0.7962 | 0.7955   |
+| 0.3937        | 1.49  | 330  | 0.6951          | 0.8094    | 0.7983 | 0.7994 | 0.7983   |
+| 0.4883        | 1.62  | 360  | 0.6793          | 0.8099    | 0.8039 | 0.8034 | 0.8039   |
+| 0.4532        | 1.76  | 390  | 0.6710          | 0.8108    | 0.8067 | 0.8046 | 0.8067   |
+| 0.3099        | 1.89  | 420  | 0.6839          | 0.8136    | 0.8067 | 0.8055 | 0.8067   |
+| 0.3798        | 2.03  | 450  | 0.8117          | 0.8197    | 0.8095 | 0.8099 | 0.8095   |
+| 0.2304        | 2.16  | 480  | 0.7814          | 0.8299    | 0.8263 | 0.8251 | 0.8263   |
+| 0.1489        | 2.3   | 510  | 0.8918          | 0.8082    | 0.7955 | 0.7943 | 0.7955   |
+| 0.1525        | 2.43  | 540  | 0.9288          | 0.8161    | 0.8039 | 0.8048 | 0.8039   |
+| 0.2774        | 2.57  | 570  | 0.8478          | 0.8347    | 0.8291 | 0.8278 | 0.8291   |
+| 0.2452        | 2.7   | 600  | 0.8499          | 0.8342    | 0.8291 | 0.8296 | 0.8291   |
+| 0.1811        | 2.84  | 630  | 0.8531          | 0.8381    | 0.8347 | 0.8340 | 0.8347   |
+| 0.1509        | 2.97  | 660  | 0.9766          | 0.8150    | 0.7955 | 0.7967 | 0.7955   |
+| 0.1073        | 3.11  | 690  | 0.8532          | 0.8269    | 0.8179 | 0.8179 | 0.8179   |
+| 0.1273        | 3.24  | 720  | 0.9157          | 0.8315    | 0.8235 | 0.8247 | 0.8235   |
+| 0.0614        | 3.38  | 750  | 0.9050          | 0.8364    | 0.8291 | 0.8303 | 0.8291   |
+| 0.0876        | 3.51  | 780  | 0.9221          | 0.8421    | 0.8347 | 0.8352 | 0.8347   |
+| 0.0574        | 3.65  | 810  | 0.9416          | 0.8351    | 0.8263 | 0.8273 | 0.8263   |
+| 0.0783        | 3.78  | 840  | 0.9414          | 0.8377    | 0.8291 | 0.8302 | 0.8291   |
+| 0.0357        | 3.92  | 870  | 0.9270          | 0.8312    | 0.8207 | 0.8219 | 0.8207   |
+| 0.0589        | 4.05  | 900  | 0.9254          | 0.8379    | 0.8263 | 0.8275 | 0.8263   |
+| 0.0297        | 4.19  | 930  | 0.9402          | 0.8189    | 0.8095 | 0.8104 | 0.8095   |
+| 0.0324        | 4.32  | 960  | 0.9545          | 0.8303    | 0.8235 | 0.8241 | 0.8235   |
+| 0.0405        | 4.46  | 990  | 0.9574          | 0.8273    | 0.8207 | 0.8209 | 0.8207   |
+| 0.0039        | 4.59  | 1020 | 0.9553          | 0.8309    | 0.8235 | 0.8240 | 0.8235   |
+| 0.0052        | 4.73  | 1050 | 0.9650          | 0.8335    | 0.8263 | 0.8267 | 0.8263   |
+| 0.0473        | 4.86  | 1080 | 0.9678          | 0.8259    | 0.8179 | 0.8186 | 0.8179   |
+| 0.0417        | 5.0   | 1110 | 0.9689          | 0.8231    | 0.8151 | 0.8155 | 0.8151   |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,19 +1,13 @@
 {
-  "_name_or_path": "siebert/sentiment-roberta-large-english",
   "activation": "gelu",
   "architectures": [
     "DistilBertForSequenceClassification"
   ],
   "attention_dropout": 0.1,
-  "attention_probs_dropout_prob": 0.1,
-  "bos_token_id": 0,
-  "dim": 1024,
   "dropout": 0.1,
-  "eos_token_id": 2,
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
   "hidden_dim": 3072,
-  "hidden_dropout_prob": 0.1,
   "id2label": {
     "0": "anger",
     "1": "fear",
@@ -24,7 +18,6 @@
     "6": "surprise"
   },
   "initializer_range": 0.02,
-  "intermediate_size": 4096,
   "label2id": {
     "LABEL_0": 0,
     "LABEL_1": 1,
@@ -34,18 +27,18 @@
     "LABEL_5": 5,
     "LABEL_6": 6
   },
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 514,
   "model_type": "distilbert",
-  "n_heads": 16,
-  "n_layers": 24,
-  "pad_token_id": 1,
   "problem_type": "single_label_classification",
   "qa_dropout": 0.1,
   "seq_classif_dropout": 0.2,
   "sinusoidal_pos_embds": false,
   "torch_dtype": "float32",
   "transformers_version": "4.39.1",
-  "type_vocab_size": 1,
-  "vocab_size": 50265
 }

 {
+  "_name_or_path": "distilbert-base-cased",
   "activation": "gelu",
   "architectures": [
     "DistilBertForSequenceClassification"
   ],
   "attention_dropout": 0.1,
+  "dim": 768,
   "dropout": 0.1,
   "hidden_dim": 3072,
   "id2label": {
     "0": "anger",
     "1": "fear",
     "6": "surprise"
   },
   "initializer_range": 0.02,
   "label2id": {
     "LABEL_0": 0,
     "LABEL_1": 1,
     "LABEL_5": 5,
     "LABEL_6": 6
   },
+  "max_position_embeddings": 512,
   "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
   "problem_type": "single_label_classification",
   "qa_dropout": 0.1,
   "seq_classif_dropout": 0.2,
   "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
   "torch_dtype": "float32",
   "transformers_version": "4.39.1",
+  "vocab_size": 28996
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:212c8fa16036f0a0350b0f9772f80ae76367af9c3702232ef56e6bd900c730c9
-size 1220087268

 version https://git-lfs.github.com/spec/v1
+oid sha256:cd36eb358587d3efcb9c731a3781676d5487868f5e6480538ef6c029568f06c7
+size 263160068

runs/Mar26_15-18-23_a22b7578fb0a/events.out.tfevents.1711466309.a22b7578fb0a.310.6 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c249bbbfd41706781378032a5b91cd7316cc9659aba4e267cb6a8a6801e9325e
-size 6031

 version https://git-lfs.github.com/spec/v1
+oid sha256:998fe2c9f0c29844b878b98c0c74f48ce960ecded2f678160b7e40155eadd8b6
+size 6385

runs/Mar26_17-55-36_a22b7578fb0a/events.out.tfevents.1711475748.a22b7578fb0a.310.7 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:47950cf6c5a58ba1f2963554bba53bb46c2271f544e70a2bfa5b45bf9fa899d0
+size 30349

runs/Mar26_17-55-36_a22b7578fb0a/events.out.tfevents.1711483712.a22b7578fb0a.310.8 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea1d971676c0f0b97dd9c74db5322b0abc50ed62be4c599b8bc3b6d633369d2f
+size 560

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:680c33bde336be706af7fa68126b99b73579daa9bfa85c5554cdeefca07a765a
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:8328692412b66985bc84f2473c4b57629a858923170411bc1ac199db3ba916e1
 size 4920