peymansyh
/

distilhubert-finetuned-gtzan

@@ -22,7 +22,7 @@ model-index:
     metrics:
     - name: Accuracy
       type: accuracy
-      value: 0.82
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on the GTZAN dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.6177
-- Accuracy: 0.82
 ## Model description
@@ -52,37 +52,33 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 5e-05
 - train_batch_size: 4
 - eval_batch_size: 4
 - seed: 42
-- gradient_accumulation_steps: 4
-- total_train_batch_size: 16
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_ratio: 0.1
-- num_epochs: 16
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Accuracy |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|
-| 2.2044        | 1.0   | 56   | 2.1322          | 0.51     |
-| 1.6674        | 1.99  | 112  | 1.5763          | 0.64     |
-| 1.3307        | 2.99  | 168  | 1.1971          | 0.71     |
-| 0.9855        | 4.0   | 225  | 1.0534          | 0.71     |
-| 0.7781        | 5.0   | 281  | 0.8894          | 0.76     |
-| 0.6672        | 5.99  | 337  | 0.8119          | 0.77     |
-| 0.5625        | 6.99  | 393  | 0.7451          | 0.82     |
-| 0.3619        | 8.0   | 450  | 0.6832          | 0.78     |
-| 0.4671        | 9.0   | 506  | 0.6875          | 0.79     |
-| 0.226         | 9.99  | 562  | 0.5677          | 0.88     |
-| 0.2049        | 10.99 | 618  | 0.6321          | 0.81     |
-| 0.1404        | 12.0  | 675  | 0.5729          | 0.82     |
-| 0.1456        | 13.0  | 731  | 0.6103          | 0.83     |
-| 0.0831        | 13.99 | 787  | 0.5913          | 0.82     |
-| 0.0683        | 14.99 | 843  | 0.6315          | 0.82     |
-| 0.0863        | 15.93 | 896  | 0.6177          | 0.82     |
 ### Framework versions

     metrics:
     - name: Accuracy
       type: accuracy
+      value: 0.87
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on the GTZAN dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.6139
+- Accuracy: 0.87
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 8e-05
 - train_batch_size: 4
 - eval_batch_size: 4
 - seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 8
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 12
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Accuracy |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|
+| 2.0172        | 1.0   | 112  | 1.8314          | 0.37     |
+| 1.5433        | 2.0   | 225  | 1.2575          | 0.5      |
+| 1.1517        | 3.0   | 337  | 0.9577          | 0.7      |
+| 0.904         | 4.0   | 450  | 0.7582          | 0.77     |
+| 0.4788        | 5.0   | 562  | 0.7504          | 0.79     |
+| 0.3843        | 6.0   | 675  | 0.6265          | 0.79     |
+| 0.3683        | 7.0   | 787  | 0.6683          | 0.8      |
+| 0.2278        | 8.0   | 900  | 0.8167          | 0.77     |
+| 0.4534        | 9.0   | 1012 | 0.6023          | 0.83     |
+| 0.2357        | 10.0  | 1125 | 0.6185          | 0.83     |
+| 0.3674        | 11.0  | 1237 | 0.6079          | 0.86     |
+| 0.148         | 11.95 | 1344 | 0.6139          | 0.87     |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
   "_name_or_path": "ntu-spml/distilhubert",
-  "activation_dropout": 0.1,
   "apply_spec_augment": false,
   "architectures": [
     "HubertForSequenceClassification"
   ],
-  "attention_dropout": 0.1,
   "bos_token_id": 1,
   "classifier_proj_size": 256,
   "conv_bias": false,
@@ -44,9 +44,9 @@
   "feat_extract_norm": "group",
   "feat_proj_dropout": 0.0,
   "feat_proj_layer_norm": false,
-  "final_dropout": 0.0,
   "hidden_act": "gelu",
-  "hidden_dropout": 0.7,
   "hidden_size": 768,
   "id2label": {
     "0": "blues",
@@ -75,7 +75,7 @@
     "rock": "9"
   },
   "layer_norm_eps": 1e-05,
-  "layerdrop": 0.0,
   "mask_feature_length": 10,
   "mask_feature_min_masks": 0,
   "mask_feature_prob": 0.0,

 {
   "_name_or_path": "ntu-spml/distilhubert",
+  "activation_dropout": 0.7,
   "apply_spec_augment": false,
   "architectures": [
     "HubertForSequenceClassification"
   ],
+  "attention_dropout": 0.5,
   "bos_token_id": 1,
   "classifier_proj_size": 256,
   "conv_bias": false,
   "feat_extract_norm": "group",
   "feat_proj_dropout": 0.0,
   "feat_proj_layer_norm": false,
+  "final_dropout": 0.5,
   "hidden_act": "gelu",
+  "hidden_dropout": 0.5,
   "hidden_size": 768,
   "id2label": {
     "0": "blues",
     "rock": "9"
   },
   "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
   "mask_feature_length": 10,
   "mask_feature_min_masks": 0,
   "mask_feature_prob": 0.0,

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1a49e3edff80063c930877ed9c933d8481e65e7817fe85a4015a982e4181ff5
 size 94783376

 version https://git-lfs.github.com/spec/v1
+oid sha256:328f55a11052a43d166463f0b1abfdc54ccee0e5eaa749254a6972fcf8ae1860
 size 94783376

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a54180a6e56baaf78099d9dc607b4c00235a5b613ed6f39f36ce36e22f227cb2
-size 4027

 version https://git-lfs.github.com/spec/v1
+oid sha256:18b79cc5e186ee2dfc4553015096d16433b1e60bccdfb987027833fa8123d3f8
+size 4091