SimoneJLaudani
commited on
Commit
•
a24c1c8
1
Parent(s):
7ce7ce2
End of training
Browse files- README.md +42 -42
- config.json +9 -16
- model.safetensors +2 -2
- runs/Mar26_15-18-23_a22b7578fb0a/events.out.tfevents.1711466309.a22b7578fb0a.310.6 +2 -2
- runs/Mar26_17-55-36_a22b7578fb0a/events.out.tfevents.1711475748.a22b7578fb0a.310.7 +3 -0
- runs/Mar26_17-55-36_a22b7578fb0a/events.out.tfevents.1711483712.a22b7578fb0a.310.8 +3 -0
- training_args.bin +1 -1
README.md
CHANGED
@@ -20,11 +20,11 @@ should probably proofread and complete it, then remove this comment. -->
|
|
20 |
|
21 |
This model is a fine-tuned version of [distilbert-base-cased](https://huggingface.co/distilbert-base-cased) on the None dataset.
|
22 |
It achieves the following results on the evaluation set:
|
23 |
-
- Loss: 0.
|
24 |
-
- Precision: 0.
|
25 |
-
- Recall: 0.
|
26 |
-
- F1: 0.
|
27 |
-
- Accuracy: 0.
|
28 |
|
29 |
## Model description
|
30 |
|
@@ -55,43 +55,43 @@ The following hyperparameters were used during training:
|
|
55 |
|
56 |
| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
|
57 |
|:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
|
58 |
-
| 1.
|
59 |
-
| 1.
|
60 |
-
| 1.
|
61 |
-
| 1.
|
62 |
-
| 1.
|
63 |
-
|
|
64 |
-
| 0.
|
65 |
-
| 0.
|
66 |
-
| 0.
|
67 |
-
| 0.
|
68 |
-
| 0.
|
69 |
-
| 0.
|
70 |
-
| 0.
|
71 |
-
| 0.
|
72 |
-
| 0.
|
73 |
-
| 0.
|
74 |
-
| 0.
|
75 |
-
| 0.
|
76 |
-
| 0.
|
77 |
-
| 0.
|
78 |
-
| 0.
|
79 |
-
| 0.
|
80 |
-
| 0.
|
81 |
-
| 0.
|
82 |
-
| 0.
|
83 |
-
| 0.
|
84 |
-
| 0.
|
85 |
-
| 0.
|
86 |
-
| 0.
|
87 |
-
| 0.
|
88 |
-
| 0.
|
89 |
-
| 0.
|
90 |
-
| 0.
|
91 |
-
| 0.
|
92 |
-
| 0.
|
93 |
-
| 0.
|
94 |
-
| 0.
|
95 |
|
96 |
|
97 |
### Framework versions
|
|
|
20 |
|
21 |
This model is a fine-tuned version of [distilbert-base-cased](https://huggingface.co/distilbert-base-cased) on the None dataset.
|
22 |
It achieves the following results on the evaluation set:
|
23 |
+
- Loss: 0.9689
|
24 |
+
- Precision: 0.8231
|
25 |
+
- Recall: 0.8151
|
26 |
+
- F1: 0.8155
|
27 |
+
- Accuracy: 0.8151
|
28 |
|
29 |
## Model description
|
30 |
|
|
|
55 |
|
56 |
| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
|
57 |
|:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
|
58 |
+
| 1.9367 | 0.14 | 30 | 1.8290 | 0.1621 | 0.2605 | 0.1694 | 0.2605 |
|
59 |
+
| 1.8256 | 0.27 | 60 | 1.7350 | 0.1476 | 0.2745 | 0.1786 | 0.2745 |
|
60 |
+
| 1.6271 | 0.41 | 90 | 1.5305 | 0.4700 | 0.4594 | 0.3823 | 0.4594 |
|
61 |
+
| 1.3898 | 0.54 | 120 | 1.3535 | 0.5555 | 0.4790 | 0.4664 | 0.4790 |
|
62 |
+
| 1.2341 | 0.68 | 150 | 1.0183 | 0.7309 | 0.7087 | 0.7041 | 0.7087 |
|
63 |
+
| 1.0666 | 0.81 | 180 | 0.9651 | 0.7280 | 0.6583 | 0.6284 | 0.6583 |
|
64 |
+
| 0.8155 | 0.95 | 210 | 0.8172 | 0.7889 | 0.7647 | 0.7620 | 0.7647 |
|
65 |
+
| 0.6679 | 1.08 | 240 | 0.6941 | 0.7973 | 0.7843 | 0.7818 | 0.7843 |
|
66 |
+
| 0.52 | 1.22 | 270 | 0.6729 | 0.8009 | 0.7927 | 0.7921 | 0.7927 |
|
67 |
+
| 0.4683 | 1.35 | 300 | 0.7385 | 0.8072 | 0.7955 | 0.7962 | 0.7955 |
|
68 |
+
| 0.3937 | 1.49 | 330 | 0.6951 | 0.8094 | 0.7983 | 0.7994 | 0.7983 |
|
69 |
+
| 0.4883 | 1.62 | 360 | 0.6793 | 0.8099 | 0.8039 | 0.8034 | 0.8039 |
|
70 |
+
| 0.4532 | 1.76 | 390 | 0.6710 | 0.8108 | 0.8067 | 0.8046 | 0.8067 |
|
71 |
+
| 0.3099 | 1.89 | 420 | 0.6839 | 0.8136 | 0.8067 | 0.8055 | 0.8067 |
|
72 |
+
| 0.3798 | 2.03 | 450 | 0.8117 | 0.8197 | 0.8095 | 0.8099 | 0.8095 |
|
73 |
+
| 0.2304 | 2.16 | 480 | 0.7814 | 0.8299 | 0.8263 | 0.8251 | 0.8263 |
|
74 |
+
| 0.1489 | 2.3 | 510 | 0.8918 | 0.8082 | 0.7955 | 0.7943 | 0.7955 |
|
75 |
+
| 0.1525 | 2.43 | 540 | 0.9288 | 0.8161 | 0.8039 | 0.8048 | 0.8039 |
|
76 |
+
| 0.2774 | 2.57 | 570 | 0.8478 | 0.8347 | 0.8291 | 0.8278 | 0.8291 |
|
77 |
+
| 0.2452 | 2.7 | 600 | 0.8499 | 0.8342 | 0.8291 | 0.8296 | 0.8291 |
|
78 |
+
| 0.1811 | 2.84 | 630 | 0.8531 | 0.8381 | 0.8347 | 0.8340 | 0.8347 |
|
79 |
+
| 0.1509 | 2.97 | 660 | 0.9766 | 0.8150 | 0.7955 | 0.7967 | 0.7955 |
|
80 |
+
| 0.1073 | 3.11 | 690 | 0.8532 | 0.8269 | 0.8179 | 0.8179 | 0.8179 |
|
81 |
+
| 0.1273 | 3.24 | 720 | 0.9157 | 0.8315 | 0.8235 | 0.8247 | 0.8235 |
|
82 |
+
| 0.0614 | 3.38 | 750 | 0.9050 | 0.8364 | 0.8291 | 0.8303 | 0.8291 |
|
83 |
+
| 0.0876 | 3.51 | 780 | 0.9221 | 0.8421 | 0.8347 | 0.8352 | 0.8347 |
|
84 |
+
| 0.0574 | 3.65 | 810 | 0.9416 | 0.8351 | 0.8263 | 0.8273 | 0.8263 |
|
85 |
+
| 0.0783 | 3.78 | 840 | 0.9414 | 0.8377 | 0.8291 | 0.8302 | 0.8291 |
|
86 |
+
| 0.0357 | 3.92 | 870 | 0.9270 | 0.8312 | 0.8207 | 0.8219 | 0.8207 |
|
87 |
+
| 0.0589 | 4.05 | 900 | 0.9254 | 0.8379 | 0.8263 | 0.8275 | 0.8263 |
|
88 |
+
| 0.0297 | 4.19 | 930 | 0.9402 | 0.8189 | 0.8095 | 0.8104 | 0.8095 |
|
89 |
+
| 0.0324 | 4.32 | 960 | 0.9545 | 0.8303 | 0.8235 | 0.8241 | 0.8235 |
|
90 |
+
| 0.0405 | 4.46 | 990 | 0.9574 | 0.8273 | 0.8207 | 0.8209 | 0.8207 |
|
91 |
+
| 0.0039 | 4.59 | 1020 | 0.9553 | 0.8309 | 0.8235 | 0.8240 | 0.8235 |
|
92 |
+
| 0.0052 | 4.73 | 1050 | 0.9650 | 0.8335 | 0.8263 | 0.8267 | 0.8263 |
|
93 |
+
| 0.0473 | 4.86 | 1080 | 0.9678 | 0.8259 | 0.8179 | 0.8186 | 0.8179 |
|
94 |
+
| 0.0417 | 5.0 | 1110 | 0.9689 | 0.8231 | 0.8151 | 0.8155 | 0.8151 |
|
95 |
|
96 |
|
97 |
### Framework versions
|
config.json
CHANGED
@@ -1,19 +1,13 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"activation": "gelu",
|
4 |
"architectures": [
|
5 |
"DistilBertForSequenceClassification"
|
6 |
],
|
7 |
"attention_dropout": 0.1,
|
8 |
-
"
|
9 |
-
"bos_token_id": 0,
|
10 |
-
"dim": 1024,
|
11 |
"dropout": 0.1,
|
12 |
-
"eos_token_id": 2,
|
13 |
-
"gradient_checkpointing": false,
|
14 |
-
"hidden_act": "gelu",
|
15 |
"hidden_dim": 3072,
|
16 |
-
"hidden_dropout_prob": 0.1,
|
17 |
"id2label": {
|
18 |
"0": "anger",
|
19 |
"1": "fear",
|
@@ -24,7 +18,6 @@
|
|
24 |
"6": "surprise"
|
25 |
},
|
26 |
"initializer_range": 0.02,
|
27 |
-
"intermediate_size": 4096,
|
28 |
"label2id": {
|
29 |
"LABEL_0": 0,
|
30 |
"LABEL_1": 1,
|
@@ -34,18 +27,18 @@
|
|
34 |
"LABEL_5": 5,
|
35 |
"LABEL_6": 6
|
36 |
},
|
37 |
-
"
|
38 |
-
"max_position_embeddings": 514,
|
39 |
"model_type": "distilbert",
|
40 |
-
"n_heads":
|
41 |
-
"n_layers":
|
42 |
-
"
|
|
|
43 |
"problem_type": "single_label_classification",
|
44 |
"qa_dropout": 0.1,
|
45 |
"seq_classif_dropout": 0.2,
|
46 |
"sinusoidal_pos_embds": false,
|
|
|
47 |
"torch_dtype": "float32",
|
48 |
"transformers_version": "4.39.1",
|
49 |
-
"
|
50 |
-
"vocab_size": 50265
|
51 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "distilbert-base-cased",
|
3 |
"activation": "gelu",
|
4 |
"architectures": [
|
5 |
"DistilBertForSequenceClassification"
|
6 |
],
|
7 |
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
|
|
|
|
9 |
"dropout": 0.1,
|
|
|
|
|
|
|
10 |
"hidden_dim": 3072,
|
|
|
11 |
"id2label": {
|
12 |
"0": "anger",
|
13 |
"1": "fear",
|
|
|
18 |
"6": "surprise"
|
19 |
},
|
20 |
"initializer_range": 0.02,
|
|
|
21 |
"label2id": {
|
22 |
"LABEL_0": 0,
|
23 |
"LABEL_1": 1,
|
|
|
27 |
"LABEL_5": 5,
|
28 |
"LABEL_6": 6
|
29 |
},
|
30 |
+
"max_position_embeddings": 512,
|
|
|
31 |
"model_type": "distilbert",
|
32 |
+
"n_heads": 12,
|
33 |
+
"n_layers": 6,
|
34 |
+
"output_past": true,
|
35 |
+
"pad_token_id": 0,
|
36 |
"problem_type": "single_label_classification",
|
37 |
"qa_dropout": 0.1,
|
38 |
"seq_classif_dropout": 0.2,
|
39 |
"sinusoidal_pos_embds": false,
|
40 |
+
"tie_weights_": true,
|
41 |
"torch_dtype": "float32",
|
42 |
"transformers_version": "4.39.1",
|
43 |
+
"vocab_size": 28996
|
|
|
44 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd36eb358587d3efcb9c731a3781676d5487868f5e6480538ef6c029568f06c7
|
3 |
+
size 263160068
|
runs/Mar26_15-18-23_a22b7578fb0a/events.out.tfevents.1711466309.a22b7578fb0a.310.6
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:998fe2c9f0c29844b878b98c0c74f48ce960ecded2f678160b7e40155eadd8b6
|
3 |
+
size 6385
|
runs/Mar26_17-55-36_a22b7578fb0a/events.out.tfevents.1711475748.a22b7578fb0a.310.7
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47950cf6c5a58ba1f2963554bba53bb46c2271f544e70a2bfa5b45bf9fa899d0
|
3 |
+
size 30349
|
runs/Mar26_17-55-36_a22b7578fb0a/events.out.tfevents.1711483712.a22b7578fb0a.310.8
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea1d971676c0f0b97dd9c74db5322b0abc50ed62be4c599b8bc3b6d633369d2f
|
3 |
+
size 560
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8328692412b66985bc84f2473c4b57629a858923170411bc1ac199db3ba916e1
|
3 |
size 4920
|