lilyray commited on
Commit
15be979
1 Parent(s): 0647b45

distilbert-emotion-hyper

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +12 -10
  2. model.safetensors +1 -1
  3. run-0/checkpoint-1000/config.json +1 -1
  4. run-0/checkpoint-1000/model.safetensors +1 -1
  5. run-0/checkpoint-1000/optimizer.pt +1 -1
  6. run-0/checkpoint-1000/rng_state.pth +2 -2
  7. run-0/checkpoint-1000/scheduler.pt +1 -1
  8. run-0/checkpoint-1000/trainer_state.json +17 -17
  9. run-0/checkpoint-1000/training_args.bin +1 -1
  10. run-0/checkpoint-2000/config.json +41 -0
  11. run-0/checkpoint-2000/model.safetensors +3 -0
  12. run-0/checkpoint-2000/optimizer.pt +3 -0
  13. run-0/checkpoint-2000/rng_state.pth +3 -0
  14. run-0/checkpoint-2000/scheduler.pt +3 -0
  15. run-0/checkpoint-2000/special_tokens_map.json +7 -0
  16. run-0/checkpoint-2000/tokenizer_config.json +57 -0
  17. run-0/checkpoint-2000/trainer_state.json +72 -0
  18. run-0/checkpoint-2000/training_args.bin +3 -0
  19. run-0/checkpoint-2000/vocab.txt +0 -0
  20. run-0/checkpoint-3000/config.json +41 -0
  21. run-0/checkpoint-3000/model.safetensors +3 -0
  22. run-0/checkpoint-3000/optimizer.pt +3 -0
  23. run-0/checkpoint-3000/rng_state.pth +3 -0
  24. run-0/checkpoint-3000/scheduler.pt +3 -0
  25. run-0/checkpoint-3000/special_tokens_map.json +7 -0
  26. run-0/checkpoint-3000/tokenizer_config.json +57 -0
  27. run-0/checkpoint-3000/trainer_state.json +95 -0
  28. run-0/checkpoint-3000/training_args.bin +3 -0
  29. run-0/checkpoint-3000/vocab.txt +0 -0
  30. run-0/checkpoint-4000/config.json +41 -0
  31. run-0/checkpoint-4000/model.safetensors +3 -0
  32. run-0/checkpoint-4000/optimizer.pt +3 -0
  33. run-0/checkpoint-4000/rng_state.pth +3 -0
  34. run-0/checkpoint-4000/scheduler.pt +3 -0
  35. run-0/checkpoint-4000/special_tokens_map.json +7 -0
  36. run-0/checkpoint-4000/tokenizer_config.json +57 -0
  37. run-0/checkpoint-4000/trainer_state.json +118 -0
  38. run-0/checkpoint-4000/training_args.bin +3 -0
  39. run-0/checkpoint-4000/vocab.txt +0 -0
  40. run-0/checkpoint-5000/config.json +41 -0
  41. run-0/checkpoint-5000/model.safetensors +3 -0
  42. run-0/checkpoint-5000/optimizer.pt +3 -0
  43. run-0/checkpoint-5000/rng_state.pth +3 -0
  44. run-0/checkpoint-5000/scheduler.pt +3 -0
  45. run-0/checkpoint-5000/special_tokens_map.json +7 -0
  46. run-0/checkpoint-5000/tokenizer_config.json +57 -0
  47. run-0/checkpoint-5000/trainer_state.json +141 -0
  48. run-0/checkpoint-5000/training_args.bin +3 -0
  49. run-0/checkpoint-5000/vocab.txt +0 -0
  50. run-1/checkpoint-1000/config.json +41 -0
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.944
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on the emotion dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.1774
36
- - Accuracy: 0.944
37
 
38
  ## Model description
39
 
@@ -52,21 +52,23 @@ More information needed
52
  ### Training hyperparameters
53
 
54
  The following hyperparameters were used during training:
55
- - learning_rate: 2e-05
56
- - train_batch_size: 8
57
  - eval_batch_size: 8
58
- - seed: 42
59
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
60
  - lr_scheduler_type: linear
61
- - num_epochs: 3
62
 
63
  ### Training results
64
 
65
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
66
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
67
- | 0.2605 | 1.0 | 2000 | 0.2376 | 0.93 |
68
- | 0.1559 | 2.0 | 4000 | 0.1909 | 0.936 |
69
- | 0.1022 | 3.0 | 6000 | 0.1774 | 0.944 |
 
 
70
 
71
 
72
  ### Framework versions
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.921
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on the emotion dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.2046
36
+ - Accuracy: 0.921
37
 
38
  ## Model description
39
 
 
52
  ### Training hyperparameters
53
 
54
  The following hyperparameters were used during training:
55
+ - learning_rate: 3.507837996446784e-06
56
+ - train_batch_size: 16
57
  - eval_batch_size: 8
58
+ - seed: 16
59
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
60
  - lr_scheduler_type: linear
61
+ - num_epochs: 5
62
 
63
  ### Training results
64
 
65
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
66
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
67
+ | 0.8349 | 1.0 | 1000 | 0.6184 | 0.7905 |
68
+ | 0.384 | 2.0 | 2000 | 0.3057 | 0.909 |
69
+ | 0.2544 | 3.0 | 3000 | 0.2316 | 0.926 |
70
+ | 0.2027 | 4.0 | 4000 | 0.2088 | 0.928 |
71
+ | 0.1757 | 5.0 | 5000 | 0.2030 | 0.9295 |
72
 
73
 
74
  ### Framework versions
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92ae041864a7bce41a596cfef38707c5d3c31f362722cdd30190b65b86e33842
3
  size 267844872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5d704cc9862ddf1d8d932f63fd2a04f445025136eada995440f6c63cbfceb69
3
  size 267844872
run-0/checkpoint-1000/config.json CHANGED
@@ -36,6 +36,6 @@
36
  "sinusoidal_pos_embds": false,
37
  "tie_weights_": true,
38
  "torch_dtype": "float32",
39
- "transformers_version": "4.38.1",
40
  "vocab_size": 30522
41
  }
 
36
  "sinusoidal_pos_embds": false,
37
  "tie_weights_": true,
38
  "torch_dtype": "float32",
39
+ "transformers_version": "4.38.2",
40
  "vocab_size": 30522
41
  }
run-0/checkpoint-1000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6f714d0b136162061a284a20cfdda6222efc2bf27857f5630f85ab7599d93f8
3
  size 267844872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:747beece75f8af284340a68dea598af6266f80094e18a38e1f3f96cba906ffa3
3
  size 267844872
run-0/checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0f32cfc0ad23d420d6d61f12b69ec49a371cc1e10726edbfdce43d3f6831f54
3
  size 535751866
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3d9524173dfc250a400cf3b08492b825804443aec9b18a05a31588844131d30
3
  size 535751866
run-0/checkpoint-1000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8148aef41ba09d2856c03fa103eb3ba16dcc1ed9b2086510cb930a647a48c4ba
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af50b4fd0da5321ee737e722708435e6858a8884fd52e3e2ba2c6e8ee695d8ea
3
+ size 14244
run-0/checkpoint-1000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4f8103da0f4bfa98cebe625453e24cafb981ea2e82d0ea5928d01c6a1e0a51f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a7cd9a1052aaeb3908d09e7d68dd345ad3d9621cae3dcbe46c5a456a8e29f89
3
  size 1064
run-0/checkpoint-1000/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.1864173263311386,
3
  "best_model_checkpoint": "./results/run-0/checkpoint-1000",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,40 +10,40 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.5,
13
- "grad_norm": 2.885716676712036,
14
- "learning_rate": 3.732667213318159e-05,
15
- "loss": 0.5811,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
- "grad_norm": 45.26211166381836,
21
- "learning_rate": 2.986133770654527e-05,
22
- "loss": 0.2295,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 1.0,
27
- "eval_accuracy": 0.9345,
28
- "eval_loss": 0.1864173263311386,
29
- "eval_runtime": 33.637,
30
- "eval_samples_per_second": 59.458,
31
- "eval_steps_per_second": 3.716,
32
  "step": 1000
33
  }
34
  ],
35
  "logging_steps": 500,
36
- "max_steps": 3000,
37
  "num_input_tokens_seen": 0,
38
- "num_train_epochs": 3,
39
  "save_steps": 500,
40
  "total_flos": 2119629570048000.0,
41
  "train_batch_size": 16,
42
  "trial_name": null,
43
  "trial_params": {
44
- "learning_rate": 4.479200655981791e-05,
45
- "num_train_epochs": 3,
46
  "per_device_train_batch_size": 16,
47
- "seed": 13
48
  }
49
  }
 
1
  {
2
+ "best_metric": 0.6183840036392212,
3
  "best_model_checkpoint": "./results/run-0/checkpoint-1000",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.5,
13
+ "grad_norm": 4.100419521331787,
14
+ "learning_rate": 3.1570541968021056e-06,
15
+ "loss": 1.3549,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "grad_norm": 11.960673332214355,
21
+ "learning_rate": 2.8062703971574272e-06,
22
+ "loss": 0.8349,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 1.0,
27
+ "eval_accuracy": 0.7905,
28
+ "eval_loss": 0.6183840036392212,
29
+ "eval_runtime": 9.0459,
30
+ "eval_samples_per_second": 221.096,
31
+ "eval_steps_per_second": 27.637,
32
  "step": 1000
33
  }
34
  ],
35
  "logging_steps": 500,
36
+ "max_steps": 5000,
37
  "num_input_tokens_seen": 0,
38
+ "num_train_epochs": 5,
39
  "save_steps": 500,
40
  "total_flos": 2119629570048000.0,
41
  "train_batch_size": 16,
42
  "trial_name": null,
43
  "trial_params": {
44
+ "learning_rate": 3.507837996446784e-06,
45
+ "num_train_epochs": 5,
46
  "per_device_train_batch_size": 16,
47
+ "seed": 16
48
  }
49
  }
run-0/checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bf14954d29b1ae5aea2c382af10f5eb08277fe38bdff48b601d514f3dd3c259
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91f8f40dfcf0b2c79af71df18fa68d8aea743148ae225d30899abc38be627e74
3
  size 4920
run-0/checkpoint-2000/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4",
17
+ "5": "LABEL_5"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "label2id": {
21
+ "LABEL_0": 0,
22
+ "LABEL_1": 1,
23
+ "LABEL_2": 2,
24
+ "LABEL_3": 3,
25
+ "LABEL_4": 4,
26
+ "LABEL_5": 5
27
+ },
28
+ "max_position_embeddings": 512,
29
+ "model_type": "distilbert",
30
+ "n_heads": 12,
31
+ "n_layers": 6,
32
+ "pad_token_id": 0,
33
+ "problem_type": "single_label_classification",
34
+ "qa_dropout": 0.1,
35
+ "seq_classif_dropout": 0.2,
36
+ "sinusoidal_pos_embds": false,
37
+ "tie_weights_": true,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.38.2",
40
+ "vocab_size": 30522
41
+ }
run-0/checkpoint-2000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53a7d9aa3ca94fe6dd893ffdf9f7717e7304e06d21be4def3d022e9633d5409b
3
+ size 267844872
run-0/checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4dd58e88842ee66f117e257dd68bfa45836e260640fe05797b79f28b5420073
3
+ size 535751866
run-0/checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fb9afe5983bb41189c2d9b014c7655c37f0b479811b0d4d8c2502121dbbd3d5
3
+ size 14244
run-0/checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dce918e9a0d2e7e7af8db106a5648e11b498998cb964aafb1d95603b9176eb7
3
+ size 1064
run-0/checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "DistilBertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-0/checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.30574527382850647,
3
+ "best_model_checkpoint": "./results/run-0/checkpoint-2000",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2000,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.5,
13
+ "grad_norm": 4.100419521331787,
14
+ "learning_rate": 3.1570541968021056e-06,
15
+ "loss": 1.3549,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "grad_norm": 11.960673332214355,
21
+ "learning_rate": 2.8062703971574272e-06,
22
+ "loss": 0.8349,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 1.0,
27
+ "eval_accuracy": 0.7905,
28
+ "eval_loss": 0.6183840036392212,
29
+ "eval_runtime": 9.0459,
30
+ "eval_samples_per_second": 221.096,
31
+ "eval_steps_per_second": 27.637,
32
+ "step": 1000
33
+ },
34
+ {
35
+ "epoch": 1.5,
36
+ "grad_norm": 5.748434543609619,
37
+ "learning_rate": 2.4554865975127484e-06,
38
+ "loss": 0.5184,
39
+ "step": 1500
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "grad_norm": 22.25712776184082,
44
+ "learning_rate": 2.10470279786807e-06,
45
+ "loss": 0.384,
46
+ "step": 2000
47
+ },
48
+ {
49
+ "epoch": 2.0,
50
+ "eval_accuracy": 0.909,
51
+ "eval_loss": 0.30574527382850647,
52
+ "eval_runtime": 9.0588,
53
+ "eval_samples_per_second": 220.779,
54
+ "eval_steps_per_second": 27.597,
55
+ "step": 2000
56
+ }
57
+ ],
58
+ "logging_steps": 500,
59
+ "max_steps": 5000,
60
+ "num_input_tokens_seen": 0,
61
+ "num_train_epochs": 5,
62
+ "save_steps": 500,
63
+ "total_flos": 4239259140096000.0,
64
+ "train_batch_size": 16,
65
+ "trial_name": null,
66
+ "trial_params": {
67
+ "learning_rate": 3.507837996446784e-06,
68
+ "num_train_epochs": 5,
69
+ "per_device_train_batch_size": 16,
70
+ "seed": 16
71
+ }
72
+ }
run-0/checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91f8f40dfcf0b2c79af71df18fa68d8aea743148ae225d30899abc38be627e74
3
+ size 4920
run-0/checkpoint-2000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-3000/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4",
17
+ "5": "LABEL_5"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "label2id": {
21
+ "LABEL_0": 0,
22
+ "LABEL_1": 1,
23
+ "LABEL_2": 2,
24
+ "LABEL_3": 3,
25
+ "LABEL_4": 4,
26
+ "LABEL_5": 5
27
+ },
28
+ "max_position_embeddings": 512,
29
+ "model_type": "distilbert",
30
+ "n_heads": 12,
31
+ "n_layers": 6,
32
+ "pad_token_id": 0,
33
+ "problem_type": "single_label_classification",
34
+ "qa_dropout": 0.1,
35
+ "seq_classif_dropout": 0.2,
36
+ "sinusoidal_pos_embds": false,
37
+ "tie_weights_": true,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.38.2",
40
+ "vocab_size": 30522
41
+ }
run-0/checkpoint-3000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b15a33652cc1fae58dd5ee8cd21a9e9d21c610dd089b9aac5885025b66c670
3
+ size 267844872
run-0/checkpoint-3000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd844e763d33d7d4713949e397c3f28ace287c699e05264ec2b04a093300a488
3
+ size 535751866
run-0/checkpoint-3000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e48587ce948f135b2307e71cc2ebb47fcf96a8b09e28fac4a40913827a1941c7
3
+ size 14244
run-0/checkpoint-3000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1eb477b2218e2219e9c3c790828b11022ce9659f22be28dbfbf8680de93d2aa
3
+ size 1064
run-0/checkpoint-3000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-3000/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "DistilBertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-0/checkpoint-3000/trainer_state.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.23158574104309082,
3
+ "best_model_checkpoint": "./results/run-0/checkpoint-3000",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 3000,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.5,
13
+ "grad_norm": 4.100419521331787,
14
+ "learning_rate": 3.1570541968021056e-06,
15
+ "loss": 1.3549,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "grad_norm": 11.960673332214355,
21
+ "learning_rate": 2.8062703971574272e-06,
22
+ "loss": 0.8349,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 1.0,
27
+ "eval_accuracy": 0.7905,
28
+ "eval_loss": 0.6183840036392212,
29
+ "eval_runtime": 9.0459,
30
+ "eval_samples_per_second": 221.096,
31
+ "eval_steps_per_second": 27.637,
32
+ "step": 1000
33
+ },
34
+ {
35
+ "epoch": 1.5,
36
+ "grad_norm": 5.748434543609619,
37
+ "learning_rate": 2.4554865975127484e-06,
38
+ "loss": 0.5184,
39
+ "step": 1500
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "grad_norm": 22.25712776184082,
44
+ "learning_rate": 2.10470279786807e-06,
45
+ "loss": 0.384,
46
+ "step": 2000
47
+ },
48
+ {
49
+ "epoch": 2.0,
50
+ "eval_accuracy": 0.909,
51
+ "eval_loss": 0.30574527382850647,
52
+ "eval_runtime": 9.0588,
53
+ "eval_samples_per_second": 220.779,
54
+ "eval_steps_per_second": 27.597,
55
+ "step": 2000
56
+ },
57
+ {
58
+ "epoch": 2.5,
59
+ "grad_norm": 5.657580852508545,
60
+ "learning_rate": 1.753918998223392e-06,
61
+ "loss": 0.2753,
62
+ "step": 2500
63
+ },
64
+ {
65
+ "epoch": 3.0,
66
+ "grad_norm": 8.047184944152832,
67
+ "learning_rate": 1.4031351985787136e-06,
68
+ "loss": 0.2544,
69
+ "step": 3000
70
+ },
71
+ {
72
+ "epoch": 3.0,
73
+ "eval_accuracy": 0.926,
74
+ "eval_loss": 0.23158574104309082,
75
+ "eval_runtime": 9.085,
76
+ "eval_samples_per_second": 220.144,
77
+ "eval_steps_per_second": 27.518,
78
+ "step": 3000
79
+ }
80
+ ],
81
+ "logging_steps": 500,
82
+ "max_steps": 5000,
83
+ "num_input_tokens_seen": 0,
84
+ "num_train_epochs": 5,
85
+ "save_steps": 500,
86
+ "total_flos": 6358888710144000.0,
87
+ "train_batch_size": 16,
88
+ "trial_name": null,
89
+ "trial_params": {
90
+ "learning_rate": 3.507837996446784e-06,
91
+ "num_train_epochs": 5,
92
+ "per_device_train_batch_size": 16,
93
+ "seed": 16
94
+ }
95
+ }
run-0/checkpoint-3000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91f8f40dfcf0b2c79af71df18fa68d8aea743148ae225d30899abc38be627e74
3
+ size 4920
run-0/checkpoint-3000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-4000/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4",
17
+ "5": "LABEL_5"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "label2id": {
21
+ "LABEL_0": 0,
22
+ "LABEL_1": 1,
23
+ "LABEL_2": 2,
24
+ "LABEL_3": 3,
25
+ "LABEL_4": 4,
26
+ "LABEL_5": 5
27
+ },
28
+ "max_position_embeddings": 512,
29
+ "model_type": "distilbert",
30
+ "n_heads": 12,
31
+ "n_layers": 6,
32
+ "pad_token_id": 0,
33
+ "problem_type": "single_label_classification",
34
+ "qa_dropout": 0.1,
35
+ "seq_classif_dropout": 0.2,
36
+ "sinusoidal_pos_embds": false,
37
+ "tie_weights_": true,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.38.2",
40
+ "vocab_size": 30522
41
+ }
run-0/checkpoint-4000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b521f6a9f4286c6b3d96b51d9404fb66778d34e231a0e2bc965c1d423e834bb
3
+ size 267844872
run-0/checkpoint-4000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b096cb1b39e534de82f466c7a34c699c36212691c0232fa002e4f7539b25ef0b
3
+ size 535751866
run-0/checkpoint-4000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0771581525ea94df74fc1ffc6d9a9a7e1f11965bb3edc1e62727de238162859
3
+ size 14244
run-0/checkpoint-4000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b8d19b6744e771b4a9530f4add0974eda3dd465a7159708dd098be29e2ce042
3
+ size 1064
run-0/checkpoint-4000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-4000/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "DistilBertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-0/checkpoint-4000/trainer_state.json ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.20877034962177277,
3
+ "best_model_checkpoint": "./results/run-0/checkpoint-4000",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 4000,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.5,
13
+ "grad_norm": 4.100419521331787,
14
+ "learning_rate": 3.1570541968021056e-06,
15
+ "loss": 1.3549,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "grad_norm": 11.960673332214355,
21
+ "learning_rate": 2.8062703971574272e-06,
22
+ "loss": 0.8349,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 1.0,
27
+ "eval_accuracy": 0.7905,
28
+ "eval_loss": 0.6183840036392212,
29
+ "eval_runtime": 9.0459,
30
+ "eval_samples_per_second": 221.096,
31
+ "eval_steps_per_second": 27.637,
32
+ "step": 1000
33
+ },
34
+ {
35
+ "epoch": 1.5,
36
+ "grad_norm": 5.748434543609619,
37
+ "learning_rate": 2.4554865975127484e-06,
38
+ "loss": 0.5184,
39
+ "step": 1500
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "grad_norm": 22.25712776184082,
44
+ "learning_rate": 2.10470279786807e-06,
45
+ "loss": 0.384,
46
+ "step": 2000
47
+ },
48
+ {
49
+ "epoch": 2.0,
50
+ "eval_accuracy": 0.909,
51
+ "eval_loss": 0.30574527382850647,
52
+ "eval_runtime": 9.0588,
53
+ "eval_samples_per_second": 220.779,
54
+ "eval_steps_per_second": 27.597,
55
+ "step": 2000
56
+ },
57
+ {
58
+ "epoch": 2.5,
59
+ "grad_norm": 5.657580852508545,
60
+ "learning_rate": 1.753918998223392e-06,
61
+ "loss": 0.2753,
62
+ "step": 2500
63
+ },
64
+ {
65
+ "epoch": 3.0,
66
+ "grad_norm": 8.047184944152832,
67
+ "learning_rate": 1.4031351985787136e-06,
68
+ "loss": 0.2544,
69
+ "step": 3000
70
+ },
71
+ {
72
+ "epoch": 3.0,
73
+ "eval_accuracy": 0.926,
74
+ "eval_loss": 0.23158574104309082,
75
+ "eval_runtime": 9.085,
76
+ "eval_samples_per_second": 220.144,
77
+ "eval_steps_per_second": 27.518,
78
+ "step": 3000
79
+ },
80
+ {
81
+ "epoch": 3.5,
82
+ "grad_norm": 8.62791919708252,
83
+ "learning_rate": 1.052351398934035e-06,
84
+ "loss": 0.2115,
85
+ "step": 3500
86
+ },
87
+ {
88
+ "epoch": 4.0,
89
+ "grad_norm": 3.2406601905822754,
90
+ "learning_rate": 7.015675992893568e-07,
91
+ "loss": 0.2027,
92
+ "step": 4000
93
+ },
94
+ {
95
+ "epoch": 4.0,
96
+ "eval_accuracy": 0.928,
97
+ "eval_loss": 0.20877034962177277,
98
+ "eval_runtime": 9.0519,
99
+ "eval_samples_per_second": 220.949,
100
+ "eval_steps_per_second": 27.619,
101
+ "step": 4000
102
+ }
103
+ ],
104
+ "logging_steps": 500,
105
+ "max_steps": 5000,
106
+ "num_input_tokens_seen": 0,
107
+ "num_train_epochs": 5,
108
+ "save_steps": 500,
109
+ "total_flos": 8478518280192000.0,
110
+ "train_batch_size": 16,
111
+ "trial_name": null,
112
+ "trial_params": {
113
+ "learning_rate": 3.507837996446784e-06,
114
+ "num_train_epochs": 5,
115
+ "per_device_train_batch_size": 16,
116
+ "seed": 16
117
+ }
118
+ }
run-0/checkpoint-4000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91f8f40dfcf0b2c79af71df18fa68d8aea743148ae225d30899abc38be627e74
3
+ size 4920
run-0/checkpoint-4000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-5000/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4",
17
+ "5": "LABEL_5"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "label2id": {
21
+ "LABEL_0": 0,
22
+ "LABEL_1": 1,
23
+ "LABEL_2": 2,
24
+ "LABEL_3": 3,
25
+ "LABEL_4": 4,
26
+ "LABEL_5": 5
27
+ },
28
+ "max_position_embeddings": 512,
29
+ "model_type": "distilbert",
30
+ "n_heads": 12,
31
+ "n_layers": 6,
32
+ "pad_token_id": 0,
33
+ "problem_type": "single_label_classification",
34
+ "qa_dropout": 0.1,
35
+ "seq_classif_dropout": 0.2,
36
+ "sinusoidal_pos_embds": false,
37
+ "tie_weights_": true,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.38.2",
40
+ "vocab_size": 30522
41
+ }
run-0/checkpoint-5000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5d704cc9862ddf1d8d932f63fd2a04f445025136eada995440f6c63cbfceb69
3
+ size 267844872
run-0/checkpoint-5000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:784b996526526a5cd8fc3a55c22a62714f8a0d67fff050c0fda322b798aedf39
3
+ size 535751866
run-0/checkpoint-5000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08ece59d05ca6ae22d9a98b2fa3a8a580231dea90e91aa2ae7692f6d0d637a0f
3
+ size 14244
run-0/checkpoint-5000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9d0139089d40e481dc003ac329dab0c85f60dcb6590bff943ed1972b51e69c4
3
+ size 1064
run-0/checkpoint-5000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-5000/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "DistilBertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-0/checkpoint-5000/trainer_state.json ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.20295538008213043,
3
+ "best_model_checkpoint": "./results/run-0/checkpoint-5000",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 5000,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.5,
13
+ "grad_norm": 4.100419521331787,
14
+ "learning_rate": 3.1570541968021056e-06,
15
+ "loss": 1.3549,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "grad_norm": 11.960673332214355,
21
+ "learning_rate": 2.8062703971574272e-06,
22
+ "loss": 0.8349,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 1.0,
27
+ "eval_accuracy": 0.7905,
28
+ "eval_loss": 0.6183840036392212,
29
+ "eval_runtime": 9.0459,
30
+ "eval_samples_per_second": 221.096,
31
+ "eval_steps_per_second": 27.637,
32
+ "step": 1000
33
+ },
34
+ {
35
+ "epoch": 1.5,
36
+ "grad_norm": 5.748434543609619,
37
+ "learning_rate": 2.4554865975127484e-06,
38
+ "loss": 0.5184,
39
+ "step": 1500
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "grad_norm": 22.25712776184082,
44
+ "learning_rate": 2.10470279786807e-06,
45
+ "loss": 0.384,
46
+ "step": 2000
47
+ },
48
+ {
49
+ "epoch": 2.0,
50
+ "eval_accuracy": 0.909,
51
+ "eval_loss": 0.30574527382850647,
52
+ "eval_runtime": 9.0588,
53
+ "eval_samples_per_second": 220.779,
54
+ "eval_steps_per_second": 27.597,
55
+ "step": 2000
56
+ },
57
+ {
58
+ "epoch": 2.5,
59
+ "grad_norm": 5.657580852508545,
60
+ "learning_rate": 1.753918998223392e-06,
61
+ "loss": 0.2753,
62
+ "step": 2500
63
+ },
64
+ {
65
+ "epoch": 3.0,
66
+ "grad_norm": 8.047184944152832,
67
+ "learning_rate": 1.4031351985787136e-06,
68
+ "loss": 0.2544,
69
+ "step": 3000
70
+ },
71
+ {
72
+ "epoch": 3.0,
73
+ "eval_accuracy": 0.926,
74
+ "eval_loss": 0.23158574104309082,
75
+ "eval_runtime": 9.085,
76
+ "eval_samples_per_second": 220.144,
77
+ "eval_steps_per_second": 27.518,
78
+ "step": 3000
79
+ },
80
+ {
81
+ "epoch": 3.5,
82
+ "grad_norm": 8.62791919708252,
83
+ "learning_rate": 1.052351398934035e-06,
84
+ "loss": 0.2115,
85
+ "step": 3500
86
+ },
87
+ {
88
+ "epoch": 4.0,
89
+ "grad_norm": 3.2406601905822754,
90
+ "learning_rate": 7.015675992893568e-07,
91
+ "loss": 0.2027,
92
+ "step": 4000
93
+ },
94
+ {
95
+ "epoch": 4.0,
96
+ "eval_accuracy": 0.928,
97
+ "eval_loss": 0.20877034962177277,
98
+ "eval_runtime": 9.0519,
99
+ "eval_samples_per_second": 220.949,
100
+ "eval_steps_per_second": 27.619,
101
+ "step": 4000
102
+ },
103
+ {
104
+ "epoch": 4.5,
105
+ "grad_norm": 2.1353747844696045,
106
+ "learning_rate": 3.507837996446784e-07,
107
+ "loss": 0.1858,
108
+ "step": 4500
109
+ },
110
+ {
111
+ "epoch": 5.0,
112
+ "grad_norm": 15.034784317016602,
113
+ "learning_rate": 0.0,
114
+ "loss": 0.1757,
115
+ "step": 5000
116
+ },
117
+ {
118
+ "epoch": 5.0,
119
+ "eval_accuracy": 0.9295,
120
+ "eval_loss": 0.20295538008213043,
121
+ "eval_runtime": 9.0539,
122
+ "eval_samples_per_second": 220.9,
123
+ "eval_steps_per_second": 27.612,
124
+ "step": 5000
125
+ }
126
+ ],
127
+ "logging_steps": 500,
128
+ "max_steps": 5000,
129
+ "num_input_tokens_seen": 0,
130
+ "num_train_epochs": 5,
131
+ "save_steps": 500,
132
+ "total_flos": 1.059814785024e+16,
133
+ "train_batch_size": 16,
134
+ "trial_name": null,
135
+ "trial_params": {
136
+ "learning_rate": 3.507837996446784e-06,
137
+ "num_train_epochs": 5,
138
+ "per_device_train_batch_size": 16,
139
+ "seed": 16
140
+ }
141
+ }
run-0/checkpoint-5000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91f8f40dfcf0b2c79af71df18fa68d8aea743148ae225d30899abc38be627e74
3
+ size 4920
run-0/checkpoint-5000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-1/checkpoint-1000/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4",
17
+ "5": "LABEL_5"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "label2id": {
21
+ "LABEL_0": 0,
22
+ "LABEL_1": 1,
23
+ "LABEL_2": 2,
24
+ "LABEL_3": 3,
25
+ "LABEL_4": 4,
26
+ "LABEL_5": 5
27
+ },
28
+ "max_position_embeddings": 512,
29
+ "model_type": "distilbert",
30
+ "n_heads": 12,
31
+ "n_layers": 6,
32
+ "pad_token_id": 0,
33
+ "problem_type": "single_label_classification",
34
+ "qa_dropout": 0.1,
35
+ "seq_classif_dropout": 0.2,
36
+ "sinusoidal_pos_embds": false,
37
+ "tie_weights_": true,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.38.2",
40
+ "vocab_size": 30522
41
+ }