SimoneJLaudani commited on
Commit
2e01ae9
1 Parent(s): e64b055

End of training

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: bert-base-uncased
4
  tags:
5
  - generated_from_trainer
6
  metrics:
@@ -18,13 +18,13 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  # trainer6
20
 
21
- This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the None dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 1.5569
24
- - Precision: 0.5963
25
- - Recall: 0.5952
26
- - F1: 0.5950
27
- - Accuracy: 0.5952
28
 
29
  ## Model description
30
 
@@ -43,21 +43,36 @@ More information needed
43
  ### Training hyperparameters
44
 
45
  The following hyperparameters were used during training:
46
- - learning_rate: 2e-05
47
- - train_batch_size: 16
48
- - eval_batch_size: 16
49
  - seed: 42
50
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
51
  - lr_scheduler_type: linear
52
- - num_epochs: 2
53
 
54
  ### Training results
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
 
58
  ### Framework versions
59
 
60
- - Transformers 4.38.2
61
  - Pytorch 2.2.1+cu121
62
- - Datasets 2.18.0
63
- - Tokenizers 0.15.2
 
1
  ---
2
  license: apache-2.0
3
+ base_model: distilbert-base-uncased
4
  tags:
5
  - generated_from_trainer
6
  metrics:
 
18
 
19
  # trainer6
20
 
21
+ This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on the None dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.8990
24
+ - Precision: 0.8531
25
+ - Recall: 0.8519
26
+ - F1: 0.8521
27
+ - Accuracy: 0.8519
28
 
29
  ## Model description
30
 
 
43
  ### Training hyperparameters
44
 
45
  The following hyperparameters were used during training:
46
+ - learning_rate: 5e-05
47
+ - train_batch_size: 64
48
+ - eval_batch_size: 64
49
  - seed: 42
50
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
51
  - lr_scheduler_type: linear
52
+ - num_epochs: 30
53
 
54
  ### Training results
55
 
56
+ | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
57
+ |:-------------:|:-------:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
58
+ | 1.0015 | 2.2222 | 100 | 0.5893 | 0.8219 | 0.8183 | 0.8179 | 0.8183 |
59
+ | 0.1719 | 4.4444 | 200 | 0.6665 | 0.8264 | 0.8201 | 0.8209 | 0.8201 |
60
+ | 0.0505 | 6.6667 | 300 | 0.7009 | 0.8474 | 0.8430 | 0.8439 | 0.8430 |
61
+ | 0.0182 | 8.8889 | 400 | 0.7909 | 0.8481 | 0.8413 | 0.8425 | 0.8413 |
62
+ | 0.0124 | 11.1111 | 500 | 0.8725 | 0.8401 | 0.8377 | 0.8377 | 0.8377 |
63
+ | 0.0075 | 13.3333 | 600 | 0.8820 | 0.8469 | 0.8430 | 0.8426 | 0.8430 |
64
+ | 0.0081 | 15.5556 | 700 | 0.8637 | 0.8396 | 0.8360 | 0.8362 | 0.8360 |
65
+ | 0.0051 | 17.7778 | 800 | 0.8981 | 0.8471 | 0.8448 | 0.8447 | 0.8448 |
66
+ | 0.0047 | 20.0 | 900 | 0.8829 | 0.8549 | 0.8536 | 0.8539 | 0.8536 |
67
+ | 0.0027 | 22.2222 | 1000 | 0.8943 | 0.8522 | 0.8501 | 0.8504 | 0.8501 |
68
+ | 0.0026 | 24.4444 | 1100 | 0.8910 | 0.8531 | 0.8519 | 0.8521 | 0.8519 |
69
+ | 0.0027 | 26.6667 | 1200 | 0.8933 | 0.8532 | 0.8519 | 0.8520 | 0.8519 |
70
+ | 0.0028 | 28.8889 | 1300 | 0.8990 | 0.8531 | 0.8519 | 0.8521 | 0.8519 |
71
 
72
 
73
  ### Framework versions
74
 
75
+ - Transformers 4.40.0
76
  - Pytorch 2.2.1+cu121
77
+ - Datasets 2.19.0
78
+ - Tokenizers 0.19.1
config.json CHANGED
@@ -1,14 +1,13 @@
1
  {
2
- "_name_or_path": "bert-base-uncased",
 
3
  "architectures": [
4
- "BertForSequenceClassification"
5
  ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "gradient_checkpointing": false,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
  "id2label": {
13
  "0": "anger",
14
  "1": "fear",
@@ -19,7 +18,6 @@
19
  "6": "surprise"
20
  },
21
  "initializer_range": 0.02,
22
- "intermediate_size": 3072,
23
  "label2id": {
24
  "LABEL_0": 0,
25
  "LABEL_1": 1,
@@ -29,17 +27,17 @@
29
  "LABEL_5": 5,
30
  "LABEL_6": 6
31
  },
32
- "layer_norm_eps": 1e-12,
33
  "max_position_embeddings": 512,
34
- "model_type": "bert",
35
- "num_attention_heads": 12,
36
- "num_hidden_layers": 12,
37
  "pad_token_id": 0,
38
- "position_embedding_type": "absolute",
39
  "problem_type": "single_label_classification",
 
 
 
 
40
  "torch_dtype": "float32",
41
- "transformers_version": "4.38.2",
42
- "type_vocab_size": 2,
43
- "use_cache": true,
44
  "vocab_size": 30522
45
  }
 
1
  {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
  "architectures": [
5
+ "DistilBertForSequenceClassification"
6
  ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
 
 
11
  "id2label": {
12
  "0": "anger",
13
  "1": "fear",
 
18
  "6": "surprise"
19
  },
20
  "initializer_range": 0.02,
 
21
  "label2id": {
22
  "LABEL_0": 0,
23
  "LABEL_1": 1,
 
27
  "LABEL_5": 5,
28
  "LABEL_6": 6
29
  },
 
30
  "max_position_embeddings": 512,
31
+ "model_type": "distilbert",
32
+ "n_heads": 12,
33
+ "n_layers": 6,
34
  "pad_token_id": 0,
 
35
  "problem_type": "single_label_classification",
36
+ "qa_dropout": 0.1,
37
+ "seq_classif_dropout": 0.2,
38
+ "sinusoidal_pos_embds": false,
39
+ "tie_weights_": true,
40
  "torch_dtype": "float32",
41
+ "transformers_version": "4.40.0",
 
 
42
  "vocab_size": 30522
43
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5581ff57a6a2951e9092a7453ceb9fbb0b6171e2588a789c7a90b4e271c7adcd
3
- size 437974028
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2a372ee7f96d4b09a0986f8203d36bd942893a1490a4b43f6c11146317b9b95
3
+ size 267847948
runs/Apr21_10-40-37_7c6a6620afc9/events.out.tfevents.1713696045.7c6a6620afc9.602.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3325fd606e2c2de653bcee75a97432b1654f75ee9ab6236148e6a41acb5a1e2b
3
+ size 5508
runs/Apr21_10-43-19_7c6a6620afc9/events.out.tfevents.1713696203.7c6a6620afc9.602.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cc47f1fbd2b1f8c3bef1a5dc873fa19eb61a449be9455fc4bafe02e11311d4f
3
+ size 14790
runs/Apr21_11-03-28_7c6a6620afc9/events.out.tfevents.1713697412.7c6a6620afc9.602.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25ee4d97e8ff8f7d78c8d4ddf95b46f73fe1eaeec8309579441f4290fdf4d8fc
3
+ size 14056
tokenizer_config.json CHANGED
@@ -43,13 +43,15 @@
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
 
46
  "do_lower_case": true,
47
  "mask_token": "[MASK]",
48
- "model_max_length": 512,
 
49
  "pad_token": "[PAD]",
50
  "sep_token": "[SEP]",
51
  "strip_accents": null,
52
  "tokenize_chinese_chars": true,
53
- "tokenizer_class": "BertTokenizer",
54
  "unk_token": "[UNK]"
55
  }
 
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
  "do_lower_case": true,
48
  "mask_token": "[MASK]",
49
+ "model_max_length": 1000000000000000019884624838656,
50
+ "never_split": null,
51
  "pad_token": "[PAD]",
52
  "sep_token": "[SEP]",
53
  "strip_accents": null,
54
  "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "DistilBertTokenizer",
56
  "unk_token": "[UNK]"
57
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:028d6e8ef0d9b78668ad615028f40eccfddc498a05435a4de230b299097e251b
3
- size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1bd6ec0c8864e9602f2156c5092a9b0839dcbb1f8341753f228a5da77fdab42
3
+ size 4984