helling100 commited on
Commit
4d4edc0
1 Parent(s): b5ce512

Upload TFDistilBertForSequenceClassification

Browse files
Files changed (3) hide show
  1. README.md +23 -35
  2. config.json +17 -23
  3. tf_model.h5 +2 -2
README.md CHANGED
@@ -12,19 +12,17 @@ probably proofread and complete it, then remove this comment. -->
12
 
13
  # Regression_bert_1
14
 
15
- This model is a fine-tuned version of [bert-base-cased](https://huggingface.co/bert-base-cased) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
- - Train Loss: 0.2391
18
- - Train Mae: 0.3195
19
- - Train Mse: 0.1560
20
- - Train R2-score: 0.6527
21
- - Train Accuracy: 0.7000
22
- - Validation Loss: 0.2004
23
- - Validation Mae: 0.3928
24
- - Validation Mse: 0.1992
25
- - Validation R2-score: 0.7864
26
- - Validation Accuracy: 0.5045
27
- - Epoch: 19
28
 
29
  ## Model description
30
 
@@ -43,33 +41,23 @@ More information needed
43
  ### Training hyperparameters
44
 
45
  The following hyperparameters were used during training:
46
- - optimizer: {'name': 'Adam', 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'jit_compile': True, 'is_legacy_optimizer': False, 'learning_rate': 1e-06, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False}
47
  - training_precision: float32
48
 
49
  ### Training results
50
 
51
- | Train Loss | Train Mae | Train Mse | Train R2-score | Train Accuracy | Validation Loss | Validation Mae | Validation Mse | Validation R2-score | Validation Accuracy | Epoch |
52
- |:----------:|:---------:|:---------:|:--------------:|:--------------:|:---------------:|:--------------:|:--------------:|:-------------------:|:-------------------:|:-----:|
53
- | 0.3059 | 0.3004 | 0.1391 | 0.2698 | 0.7615 | 0.1679 | 0.3612 | 0.1664 | 0.8226 | 0.7297 | 0 |
54
- | 0.3078 | 0.3128 | 0.1536 | 0.5256 | 0.7333 | 0.1964 | 0.3883 | 0.1953 | 0.7905 | 0.5225 | 1 |
55
- | 0.2835 | 0.3117 | 0.1453 | 0.6731 | 0.7436 | 0.1552 | 0.3466 | 0.1536 | 0.8344 | 0.9009 | 2 |
56
- | 0.2894 | 0.2934 | 0.1319 | 0.5769 | 0.7872 | 0.1648 | 0.3593 | 0.1633 | 0.8246 | 0.7387 | 3 |
57
- | 0.2968 | 0.3093 | 0.1457 | 0.6067 | 0.7590 | 0.1735 | 0.3612 | 0.1721 | 0.8169 | 0.7658 | 4 |
58
- | 0.2352 | 0.3072 | 0.1469 | 0.4849 | 0.7385 | 0.1841 | 0.3698 | 0.1828 | 0.8045 | 0.6757 | 5 |
59
- | 0.2289 | 0.3087 | 0.1453 | 0.5043 | 0.7513 | 0.1505 | 0.3435 | 0.1488 | 0.8395 | 0.9369 | 6 |
60
- | 0.3171 | 0.2925 | 0.1307 | 0.6744 | 0.7949 | 0.1885 | 0.3742 | 0.1873 | 0.7988 | 0.5315 | 7 |
61
- | 0.3035 | 0.2992 | 0.1367 | 0.5324 | 0.7538 | 0.1655 | 0.3536 | 0.1640 | 0.8240 | 0.8559 | 8 |
62
- | 0.2230 | 0.3118 | 0.1506 | 0.6671 | 0.7154 | 0.1699 | 0.3573 | 0.1685 | 0.8197 | 0.7838 | 9 |
63
- | 0.2696 | 0.3023 | 0.1446 | 0.3957 | 0.7308 | 0.1795 | 0.3697 | 0.1781 | 0.8092 | 0.7207 | 10 |
64
- | 0.3948 | 0.3093 | 0.1457 | 0.6762 | 0.7385 | 0.1685 | 0.3593 | 0.1670 | 0.8189 | 0.7207 | 11 |
65
- | 0.3111 | 0.3081 | 0.1456 | 0.6799 | 0.7205 | 0.2205 | 0.4121 | 0.2195 | 0.7623 | 0.4955 | 12 |
66
- | 0.3261 | 0.3255 | 0.1638 | 0.4975 | 0.6974 | 0.1668 | 0.3542 | 0.1653 | 0.8229 | 0.9279 | 13 |
67
- | 0.2362 | 0.2918 | 0.1294 | 0.6853 | 0.8026 | 0.1577 | 0.3525 | 0.1561 | 0.8317 | 0.7387 | 14 |
68
- | 0.3563 | 0.3067 | 0.1461 | 0.5645 | 0.7256 | 0.2071 | 0.3959 | 0.2059 | 0.7812 | 0.3694 | 15 |
69
- | 0.2664 | 0.3261 | 0.1659 | 0.5289 | 0.6641 | 0.1852 | 0.3706 | 0.1839 | 0.8049 | 0.6937 | 16 |
70
- | 0.2194 | 0.2982 | 0.1387 | 0.6153 | 0.7513 | 0.1685 | 0.3569 | 0.1671 | 0.8220 | 0.7928 | 17 |
71
- | 0.3482 | 0.3021 | 0.1380 | 0.5936 | 0.7590 | 0.2050 | 0.3943 | 0.2039 | 0.7821 | 0.4775 | 18 |
72
- | 0.2391 | 0.3195 | 0.1560 | 0.6527 | 0.7000 | 0.2004 | 0.3928 | 0.1992 | 0.7864 | 0.5045 | 19 |
73
 
74
 
75
  ### Framework versions
 
12
 
13
  # Regression_bert_1
14
 
15
+ This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Train Loss: 0.1782
18
+ - Train Mae: 0.2820
19
+ - Train Mse: 0.1314
20
+ - Train R2-score: 0.7570
21
+ - Validation Loss: 0.1590
22
+ - Validation Mae: 0.3493
23
+ - Validation Mse: 0.1575
24
+ - Validation R2-score: 0.8711
25
+ - Epoch: 9
 
 
26
 
27
  ## Model description
28
 
 
41
  ### Training hyperparameters
42
 
43
  The following hyperparameters were used during training:
44
+ - optimizer: {'name': 'Adam', 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'jit_compile': True, 'is_legacy_optimizer': False, 'learning_rate': 1e-04, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False}
45
  - training_precision: float32
46
 
47
  ### Training results
48
 
49
+ | Train Loss | Train Mae | Train Mse | Train R2-score | Validation Loss | Validation Mae | Validation Mse | Validation R2-score | Epoch |
50
+ |:----------:|:---------:|:---------:|:--------------:|:---------------:|:--------------:|:--------------:|:-------------------:|:-----:|
51
+ | 0.2900 | 0.2663 | 0.1055 | 0.4630 | 0.1897 | 0.3738 | 0.1885 | 0.8539 | 0 |
52
+ | 0.1922 | 0.3084 | 0.1536 | 0.4341 | 0.2302 | 0.4254 | 0.2293 | 0.8277 | 1 |
53
+ | 0.3804 | 0.3059 | 0.1401 | 0.2020 | 0.1218 | 0.3206 | 0.1197 | 0.8852 | 2 |
54
+ | 0.2828 | 0.3079 | 0.1426 | 0.7285 | 0.1612 | 0.3507 | 0.1597 | 0.8700 | 3 |
55
+ | 0.1689 | 0.2803 | 0.1282 | 0.7133 | 0.2451 | 0.4425 | 0.2443 | 0.8173 | 4 |
56
+ | 0.1746 | 0.2955 | 0.1469 | 0.7545 | 0.2007 | 0.3887 | 0.1995 | 0.8472 | 5 |
57
+ | 0.1674 | 0.2840 | 0.1372 | -4.4884 | 0.1849 | 0.3671 | 0.1836 | 0.8569 | 6 |
58
+ | 0.1691 | 0.2853 | 0.1351 | 0.7348 | 0.1985 | 0.3857 | 0.1973 | 0.8486 | 7 |
59
+ | 0.1615 | 0.2909 | 0.1414 | 0.7511 | 0.1867 | 0.3696 | 0.1854 | 0.8558 | 8 |
60
+ | 0.1782 | 0.2820 | 0.1314 | 0.7570 | 0.1590 | 0.3493 | 0.1575 | 0.8711 | 9 |
 
 
 
 
 
 
 
 
 
 
61
 
62
 
63
  ### Framework versions
config.json CHANGED
@@ -1,36 +1,30 @@
1
  {
2
- "_name_or_path": "bert-base-cased",
 
3
  "architectures": [
4
- "BertForSequenceClassification"
5
  ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "gradient_checkpointing": false,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
  "id2label": {
13
- "0": "LABEL_0",
14
- "1": "LABEL_1",
15
- "2": "LABEL_2"
16
  },
17
  "initializer_range": 0.02,
18
- "intermediate_size": 3072,
19
  "label2id": {
20
- "LABEL_0": 0,
21
- "LABEL_1": 1,
22
- "LABEL_2": 2
23
  },
24
- "layer_norm_eps": 1e-12,
25
  "max_position_embeddings": 512,
26
- "model_type": "bert",
27
- "num_attention_heads": 12,
28
- "num_hidden_layers": 12,
29
  "pad_token_id": 0,
30
- "position_embedding_type": "absolute",
31
  "problem_type": "regression",
 
 
 
 
32
  "transformers_version": "4.27.3",
33
- "type_vocab_size": 2,
34
- "use_cache": true,
35
- "vocab_size": 28996
36
  }
 
1
  {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
  "architectures": [
5
+ "DistilBertForSequenceClassification"
6
  ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
 
 
11
  "id2label": {
12
+ "0": "LABEL_0"
 
 
13
  },
14
  "initializer_range": 0.02,
 
15
  "label2id": {
16
+ "LABEL_0": 0
 
 
17
  },
 
18
  "max_position_embeddings": 512,
19
+ "model_type": "distilbert",
20
+ "n_heads": 12,
21
+ "n_layers": 6,
22
  "pad_token_id": 0,
 
23
  "problem_type": "regression",
24
+ "qa_dropout": 0.1,
25
+ "seq_classif_dropout": 0.2,
26
+ "sinusoidal_pos_embds": false,
27
+ "tie_weights_": true,
28
  "transformers_version": "4.27.3",
29
+ "vocab_size": 30522
 
 
30
  }
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7287079d6577636ca6b9e0deafa5babf515088f3d9f04789de7fef4669d5ddd6
3
- size 433538332
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c42021b5e61d82f7f1155d50dfece44689de66383f245e9231ecb971e59d1c8e
3
+ size 267952072