peymansyh commited on
Commit
e68a5e3
1 Parent(s): b3abef1

End of training

Browse files
Files changed (4) hide show
  1. README.md +19 -23
  2. config.json +5 -5
  3. pytorch_model.bin +1 -1
  4. training_args.bin +2 -2
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.82
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on the GTZAN dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.6177
36
- - Accuracy: 0.82
37
 
38
  ## Model description
39
 
@@ -52,37 +52,33 @@ More information needed
52
  ### Training hyperparameters
53
 
54
  The following hyperparameters were used during training:
55
- - learning_rate: 5e-05
56
  - train_batch_size: 4
57
  - eval_batch_size: 4
58
  - seed: 42
59
- - gradient_accumulation_steps: 4
60
- - total_train_batch_size: 16
61
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
62
  - lr_scheduler_type: linear
63
  - lr_scheduler_warmup_ratio: 0.1
64
- - num_epochs: 16
65
 
66
  ### Training results
67
 
68
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
69
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
70
- | 2.2044 | 1.0 | 56 | 2.1322 | 0.51 |
71
- | 1.6674 | 1.99 | 112 | 1.5763 | 0.64 |
72
- | 1.3307 | 2.99 | 168 | 1.1971 | 0.71 |
73
- | 0.9855 | 4.0 | 225 | 1.0534 | 0.71 |
74
- | 0.7781 | 5.0 | 281 | 0.8894 | 0.76 |
75
- | 0.6672 | 5.99 | 337 | 0.8119 | 0.77 |
76
- | 0.5625 | 6.99 | 393 | 0.7451 | 0.82 |
77
- | 0.3619 | 8.0 | 450 | 0.6832 | 0.78 |
78
- | 0.4671 | 9.0 | 506 | 0.6875 | 0.79 |
79
- | 0.226 | 9.99 | 562 | 0.5677 | 0.88 |
80
- | 0.2049 | 10.99 | 618 | 0.6321 | 0.81 |
81
- | 0.1404 | 12.0 | 675 | 0.5729 | 0.82 |
82
- | 0.1456 | 13.0 | 731 | 0.6103 | 0.83 |
83
- | 0.0831 | 13.99 | 787 | 0.5913 | 0.82 |
84
- | 0.0683 | 14.99 | 843 | 0.6315 | 0.82 |
85
- | 0.0863 | 15.93 | 896 | 0.6177 | 0.82 |
86
 
87
 
88
  ### Framework versions
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.87
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on the GTZAN dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.6139
36
+ - Accuracy: 0.87
37
 
38
  ## Model description
39
 
 
52
  ### Training hyperparameters
53
 
54
  The following hyperparameters were used during training:
55
+ - learning_rate: 8e-05
56
  - train_batch_size: 4
57
  - eval_batch_size: 4
58
  - seed: 42
59
+ - gradient_accumulation_steps: 2
60
+ - total_train_batch_size: 8
61
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
62
  - lr_scheduler_type: linear
63
  - lr_scheduler_warmup_ratio: 0.1
64
+ - num_epochs: 12
65
 
66
  ### Training results
67
 
68
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
69
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
70
+ | 2.0172 | 1.0 | 112 | 1.8314 | 0.37 |
71
+ | 1.5433 | 2.0 | 225 | 1.2575 | 0.5 |
72
+ | 1.1517 | 3.0 | 337 | 0.9577 | 0.7 |
73
+ | 0.904 | 4.0 | 450 | 0.7582 | 0.77 |
74
+ | 0.4788 | 5.0 | 562 | 0.7504 | 0.79 |
75
+ | 0.3843 | 6.0 | 675 | 0.6265 | 0.79 |
76
+ | 0.3683 | 7.0 | 787 | 0.6683 | 0.8 |
77
+ | 0.2278 | 8.0 | 900 | 0.8167 | 0.77 |
78
+ | 0.4534 | 9.0 | 1012 | 0.6023 | 0.83 |
79
+ | 0.2357 | 10.0 | 1125 | 0.6185 | 0.83 |
80
+ | 0.3674 | 11.0 | 1237 | 0.6079 | 0.86 |
81
+ | 0.148 | 11.95 | 1344 | 0.6139 | 0.87 |
 
 
 
 
82
 
83
 
84
  ### Framework versions
config.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "_name_or_path": "ntu-spml/distilhubert",
3
- "activation_dropout": 0.1,
4
  "apply_spec_augment": false,
5
  "architectures": [
6
  "HubertForSequenceClassification"
7
  ],
8
- "attention_dropout": 0.1,
9
  "bos_token_id": 1,
10
  "classifier_proj_size": 256,
11
  "conv_bias": false,
@@ -44,9 +44,9 @@
44
  "feat_extract_norm": "group",
45
  "feat_proj_dropout": 0.0,
46
  "feat_proj_layer_norm": false,
47
- "final_dropout": 0.0,
48
  "hidden_act": "gelu",
49
- "hidden_dropout": 0.7,
50
  "hidden_size": 768,
51
  "id2label": {
52
  "0": "blues",
@@ -75,7 +75,7 @@
75
  "rock": "9"
76
  },
77
  "layer_norm_eps": 1e-05,
78
- "layerdrop": 0.0,
79
  "mask_feature_length": 10,
80
  "mask_feature_min_masks": 0,
81
  "mask_feature_prob": 0.0,
 
1
  {
2
  "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.7,
4
  "apply_spec_augment": false,
5
  "architectures": [
6
  "HubertForSequenceClassification"
7
  ],
8
+ "attention_dropout": 0.5,
9
  "bos_token_id": 1,
10
  "classifier_proj_size": 256,
11
  "conv_bias": false,
 
44
  "feat_extract_norm": "group",
45
  "feat_proj_dropout": 0.0,
46
  "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.5,
48
  "hidden_act": "gelu",
49
+ "hidden_dropout": 0.5,
50
  "hidden_size": 768,
51
  "id2label": {
52
  "0": "blues",
 
75
  "rock": "9"
76
  },
77
  "layer_norm_eps": 1e-05,
78
+ "layerdrop": 0.1,
79
  "mask_feature_length": 10,
80
  "mask_feature_min_masks": 0,
81
  "mask_feature_prob": 0.0,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1a49e3edff80063c930877ed9c933d8481e65e7817fe85a4015a982e4181ff5
3
  size 94783376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:328f55a11052a43d166463f0b1abfdc54ccee0e5eaa749254a6972fcf8ae1860
3
  size 94783376
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a54180a6e56baaf78099d9dc607b4c00235a5b613ed6f39f36ce36e22f227cb2
3
- size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18b79cc5e186ee2dfc4553015096d16433b1e60bccdfb987027833fa8123d3f8
3
+ size 4091