floriangardin commited on
Commit
b93e3a0
1 Parent(s): 4099e02

musiclang/musiclang-v2-bpe

Browse files
Files changed (5) hide show
  1. README.md +18 -35
  2. config.json +5 -6
  3. generation_config.json +2 -2
  4. model.safetensors +2 -2
  5. training_args.bin +1 -1
README.md CHANGED
@@ -1,5 +1,4 @@
1
  ---
2
- base_model: musiclang/musiclang-v2-xl
3
  tags:
4
  - generated_from_trainer
5
  model-index:
@@ -12,9 +11,9 @@ should probably proofread and complete it, then remove this comment. -->
12
 
13
  # model
14
 
15
- This model is a fine-tuned version of [musiclang/musiclang-v2-xl](https://huggingface.co/musiclang/musiclang-v2-xl) on the None dataset.
16
  It achieves the following results on the evaluation set:
17
- - Loss: 0.2930
18
 
19
  ## Model description
20
 
@@ -34,8 +33,8 @@ More information needed
34
 
35
  The following hyperparameters were used during training:
36
  - learning_rate: 0.0001
37
- - train_batch_size: 24
38
- - eval_batch_size: 24
39
  - seed: 42
40
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
41
  - lr_scheduler_type: cosine_with_restarts
@@ -47,36 +46,20 @@ The following hyperparameters were used during training:
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:-----:|:---------------:|
50
- | 0.4098 | 0.03 | 2000 | 0.3828 |
51
- | 0.3901 | 0.07 | 4000 | 0.3684 |
52
- | 0.3737 | 0.1 | 6000 | 0.3569 |
53
- | 0.3652 | 0.13 | 8000 | 0.3489 |
54
- | 0.356 | 0.17 | 10000 | 0.3393 |
55
- | 0.35 | 0.2 | 12000 | 0.3342 |
56
- | 0.3443 | 0.23 | 14000 | 0.3282 |
57
- | 0.3371 | 0.26 | 16000 | 0.3235 |
58
- | 0.3361 | 0.3 | 18000 | 0.3201 |
59
- | 0.3301 | 0.33 | 20000 | 0.3160 |
60
- | 0.3253 | 0.36 | 22000 | 0.3131 |
61
- | 0.327 | 0.4 | 24000 | 0.3109 |
62
- | 0.3225 | 0.43 | 26000 | 0.3089 |
63
- | 0.3156 | 0.46 | 28000 | 0.3066 |
64
- | 0.3147 | 0.5 | 30000 | 0.3045 |
65
- | 0.3182 | 0.53 | 32000 | 0.3026 |
66
- | 0.3129 | 0.56 | 34000 | 0.3017 |
67
- | 0.3132 | 0.59 | 36000 | 0.3008 |
68
- | 0.3109 | 0.63 | 38000 | 0.2987 |
69
- | 0.3092 | 0.66 | 40000 | 0.2972 |
70
- | 0.3091 | 0.69 | 42000 | 0.2963 |
71
- | 0.3034 | 0.73 | 44000 | 0.2960 |
72
- | 0.3061 | 0.76 | 46000 | 0.2956 |
73
- | 0.3044 | 0.79 | 48000 | 0.2946 |
74
- | 0.3036 | 0.83 | 50000 | 0.2940 |
75
- | 0.3003 | 0.86 | 52000 | 0.2939 |
76
- | 0.303 | 0.89 | 54000 | 0.2934 |
77
- | 0.3007 | 0.93 | 56000 | 0.2932 |
78
- | 0.3009 | 0.96 | 58000 | 0.2930 |
79
- | 0.3 | 0.99 | 60000 | 0.2930 |
80
 
81
 
82
  ### Framework versions
 
1
  ---
 
2
  tags:
3
  - generated_from_trainer
4
  model-index:
 
11
 
12
  # model
13
 
14
+ This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
15
  It achieves the following results on the evaluation set:
16
+ - Loss: 0.9350
17
 
18
  ## Model description
19
 
 
33
 
34
  The following hyperparameters were used during training:
35
  - learning_rate: 0.0001
36
+ - train_batch_size: 16
37
+ - eval_batch_size: 16
38
  - seed: 42
39
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
40
  - lr_scheduler_type: cosine_with_restarts
 
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:-----:|:---------------:|
49
+ | 2.799 | 0.07 | 2000 | 2.6594 |
50
+ | 1.5819 | 0.13 | 4000 | 1.4878 |
51
+ | 1.3252 | 0.2 | 6000 | 1.2718 |
52
+ | 1.2293 | 0.27 | 8000 | 1.1748 |
53
+ | 1.141 | 0.34 | 10000 | 1.1004 |
54
+ | 1.093 | 0.4 | 12000 | 1.0582 |
55
+ | 1.0601 | 0.47 | 14000 | 1.0282 |
56
+ | 1.0285 | 0.54 | 16000 | 0.9957 |
57
+ | 1.002 | 0.61 | 18000 | 0.9794 |
58
+ | 0.9876 | 0.67 | 20000 | 0.9605 |
59
+ | 0.9903 | 0.74 | 22000 | 0.9489 |
60
+ | 0.9698 | 0.81 | 24000 | 0.9418 |
61
+ | 0.962 | 0.88 | 26000 | 0.9370 |
62
+ | 0.9598 | 0.94 | 28000 | 0.9350 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
 
65
  ### Framework versions
config.json CHANGED
@@ -1,22 +1,21 @@
1
  {
2
- "_name_or_path": "musiclang/musiclang-v2-xl",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
6
  ],
7
  "attn_pdrop": 0.1,
8
- "bos_token_id": 5,
9
  "embd_pdrop": 0.1,
10
- "eos_token_id": 374,
11
  "initializer_range": 0.02,
12
  "layer_norm_epsilon": 1e-05,
13
  "model_type": "gpt2",
14
- "n_embd": 300,
15
  "n_head": 10,
16
  "n_inner": null,
17
  "n_layer": 10,
18
  "n_positions": 4096,
19
- "padding_token_id": 3,
20
  "reorder_and_upcast_attn": false,
21
  "resid_pdrop": 0.1,
22
  "scale_attn_by_inverse_layer_idx": false,
@@ -29,5 +28,5 @@
29
  "torch_dtype": "float32",
30
  "transformers_version": "4.37.2",
31
  "use_cache": true,
32
- "vocab_size": 374
33
  }
 
1
  {
 
2
  "activation_function": "gelu_new",
3
  "architectures": [
4
  "GPT2LMHeadModel"
5
  ],
6
  "attn_pdrop": 0.1,
7
+ "bos_token_id": 30000,
8
  "embd_pdrop": 0.1,
9
+ "eos_token_id": 30000,
10
  "initializer_range": 0.02,
11
  "layer_norm_epsilon": 1e-05,
12
  "model_type": "gpt2",
13
+ "n_embd": 600,
14
  "n_head": 10,
15
  "n_inner": null,
16
  "n_layer": 10,
17
  "n_positions": 4096,
18
+ "padding_token_id": 30000,
19
  "reorder_and_upcast_attn": false,
20
  "resid_pdrop": 0.1,
21
  "scale_attn_by_inverse_layer_idx": false,
 
28
  "torch_dtype": "float32",
29
  "transformers_version": "4.37.2",
30
  "use_cache": true,
31
+ "vocab_size": 30001
32
  }
generation_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 5,
4
- "eos_token_id": 374,
5
  "transformers_version": "4.37.2"
6
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 30000,
4
+ "eos_token_id": 30000,
5
  "transformers_version": "4.37.2"
6
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0edaec61be7b684821241f0871d5e8a7b48d865f5ad1aee2495c9dc9f54290a8
3
- size 48734696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22726d5d220ff7c2d8b4bda44044f456543da712c6f13abceb7ebfbdf291e29c
3
+ size 254962056
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ded85577c88c0610c6cc2a534d279f3cac2c97cc898df360f85bd6ae1a843d78
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db2daf1ba377eb851dad0905d389222e965f048ff071e14d1039b7e459c05c67
3
  size 4664