LiYuan199701 commited on
Commit
f26777c
1 Parent(s): abf4c09

Add model weights and configurations

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +58 -1
  2. checkpoint-13/config.json +37 -0
  3. checkpoint-13/optimizer.pt +3 -0
  4. checkpoint-13/pytorch_model.bin +3 -0
  5. checkpoint-13/rng_state.pth +3 -0
  6. checkpoint-13/scheduler.pt +3 -0
  7. checkpoint-13/special_tokens_map.json +1 -0
  8. checkpoint-13/tokenizer.json +0 -0
  9. checkpoint-13/tokenizer_config.json +1 -0
  10. checkpoint-13/trainer_state.json +25 -0
  11. checkpoint-13/training_args.bin +3 -0
  12. checkpoint-13/vocab.txt +0 -0
  13. checkpoint-26/config.json +37 -0
  14. checkpoint-26/optimizer.pt +3 -0
  15. checkpoint-26/pytorch_model.bin +3 -0
  16. checkpoint-26/rng_state.pth +3 -0
  17. checkpoint-26/scheduler.pt +3 -0
  18. checkpoint-26/special_tokens_map.json +1 -0
  19. checkpoint-26/tokenizer.json +0 -0
  20. checkpoint-26/tokenizer_config.json +1 -0
  21. checkpoint-26/trainer_state.json +34 -0
  22. checkpoint-26/training_args.bin +3 -0
  23. checkpoint-26/vocab.txt +0 -0
  24. checkpoint-35702/config.json +37 -0
  25. checkpoint-35702/optimizer.pt +3 -0
  26. checkpoint-35702/pytorch_model.bin +3 -0
  27. checkpoint-35702/rng_state.pth +3 -0
  28. checkpoint-35702/scheduler.pt +3 -0
  29. checkpoint-35702/special_tokens_map.json +1 -0
  30. checkpoint-35702/tokenizer.json +0 -0
  31. checkpoint-35702/tokenizer_config.json +1 -0
  32. checkpoint-35702/trainer_state.json +451 -0
  33. checkpoint-35702/training_args.bin +3 -0
  34. checkpoint-35702/vocab.txt +0 -0
  35. checkpoint-71404/config.json +37 -0
  36. checkpoint-71404/optimizer.pt +3 -0
  37. checkpoint-71404/pytorch_model.bin +3 -0
  38. checkpoint-71404/rng_state.pth +3 -0
  39. checkpoint-71404/scheduler.pt +3 -0
  40. checkpoint-71404/special_tokens_map.json +1 -0
  41. checkpoint-71404/tokenizer.json +0 -0
  42. checkpoint-71404/tokenizer_config.json +1 -0
  43. checkpoint-71404/trainer_state.json +886 -0
  44. checkpoint-71404/training_args.bin +3 -0
  45. checkpoint-71404/vocab.txt +0 -0
  46. config.json +37 -0
  47. pytorch_model.bin +3 -0
  48. runs/Apr27_04-29-22_a457e5c667c8/1651033831.5069559/events.out.tfevents.1651033831.a457e5c667c8.98.1 +3 -0
  49. runs/Apr27_04-29-22_a457e5c667c8/events.out.tfevents.1651033831.a457e5c667c8.98.0 +3 -0
  50. runs/Apr27_04-29-22_a457e5c667c8/events.out.tfevents.1651033873.a457e5c667c8.98.2 +3 -0
README.md CHANGED
@@ -1,3 +1,60 @@
1
  ---
2
- license: afl-3.0
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ license: apache-2.0
3
+ tags:
4
+ - generated_from_trainer
5
+ metrics:
6
+ - accuracy
7
+ model-index:
8
+ - name: distilbert-base-uncased-finetuned-mnli
9
+ results: []
10
  ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # distilbert-base-uncased-finetuned-mnli
16
+
17
+ This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on an unknown dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 0.8244
20
+ - Accuracy: 0.6617
21
+
22
+ ## Model description
23
+
24
+ More information needed
25
+
26
+ ## Intended uses & limitations
27
+
28
+ More information needed
29
+
30
+ ## Training and evaluation data
31
+
32
+ More information needed
33
+
34
+ ## Training procedure
35
+
36
+ ### Training hyperparameters
37
+
38
+ The following hyperparameters were used during training:
39
+ - learning_rate: 2e-05
40
+ - train_batch_size: 16
41
+ - eval_batch_size: 16
42
+ - seed: 42
43
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
+ - lr_scheduler_type: linear
45
+ - num_epochs: 2
46
+
47
+ ### Training results
48
+
49
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
50
+ |:-------------:|:-----:|:-----:|:---------------:|:--------:|
51
+ | 0.8981 | 1.0 | 35702 | 0.8662 | 0.6371 |
52
+ | 0.7837 | 2.0 | 71404 | 0.8244 | 0.6617 |
53
+
54
+
55
+ ### Framework versions
56
+
57
+ - Transformers 4.18.0
58
+ - Pytorch 1.11.0+cu113
59
+ - Datasets 2.1.0
60
+ - Tokenizers 0.12.1
checkpoint-13/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "label2id": {
19
+ "LABEL_0": 0,
20
+ "LABEL_1": 1,
21
+ "LABEL_2": 2,
22
+ "LABEL_3": 3
23
+ },
24
+ "max_position_embeddings": 512,
25
+ "model_type": "distilbert",
26
+ "n_heads": 12,
27
+ "n_layers": 6,
28
+ "pad_token_id": 0,
29
+ "problem_type": "single_label_classification",
30
+ "qa_dropout": 0.1,
31
+ "seq_classif_dropout": 0.2,
32
+ "sinusoidal_pos_embds": false,
33
+ "tie_weights_": true,
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.18.0",
36
+ "vocab_size": 30522
37
+ }
checkpoint-13/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14ae301c531056906f9e80347151258358682c2853a86f031f89366d11d482dd
3
+ size 535712225
checkpoint-13/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1295509e3e54526b462734fedcaa630d6131909b21fdefa42eb653f1a66b4e15
3
+ size 267860465
checkpoint-13/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3df29cec05354a25dd347170384b4cda8a08e5f4f1885f2b0a7ab07c8ee95598
3
+ size 14503
checkpoint-13/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6e9d89a5bd86901cfea599a980713ac341e0c480f14a074ff38b818780f1dd0
3
+ size 623
checkpoint-13/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
checkpoint-13/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-13/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased", "tokenizer_class": "DistilBertTokenizer"}
checkpoint-13/trainer_state.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.43,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-mnli/checkpoint-13",
4
+ "epoch": 1.0,
5
+ "global_step": 13,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.43,
13
+ "eval_loss": 1.2553551197052002,
14
+ "eval_runtime": 0.5695,
15
+ "eval_samples_per_second": 175.596,
16
+ "eval_steps_per_second": 12.292,
17
+ "step": 13
18
+ }
19
+ ],
20
+ "max_steps": 26,
21
+ "num_train_epochs": 2,
22
+ "total_flos": 26494424678400.0,
23
+ "trial_name": null,
24
+ "trial_params": null
25
+ }
checkpoint-13/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ddcdf0fab9ae58016832dcbd510e3859fca80f08fc775daf1d3ddd7a9780a83
3
+ size 3119
checkpoint-13/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-26/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "label2id": {
19
+ "LABEL_0": 0,
20
+ "LABEL_1": 1,
21
+ "LABEL_2": 2,
22
+ "LABEL_3": 3
23
+ },
24
+ "max_position_embeddings": 512,
25
+ "model_type": "distilbert",
26
+ "n_heads": 12,
27
+ "n_layers": 6,
28
+ "pad_token_id": 0,
29
+ "problem_type": "single_label_classification",
30
+ "qa_dropout": 0.1,
31
+ "seq_classif_dropout": 0.2,
32
+ "sinusoidal_pos_embds": false,
33
+ "tie_weights_": true,
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.18.0",
36
+ "vocab_size": 30522
37
+ }
checkpoint-26/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b015994f612e4137abe25f5c36ce812e91418bac0d8cbfecd0a7306c8e729dfd
3
+ size 535712225
checkpoint-26/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5140ed6f9897893c5e8df5d9d329c7034fea1ea17be33e200bb30eb3776cbc32
3
+ size 267860465
checkpoint-26/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab7107f8f12911d83fd4acf1036eecac8568f6c2547fab3e465074eacb351559
3
+ size 14503
checkpoint-26/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f2b3f8597016bfcda3f90e9c28b7f920e0052487224afc4ffcfbd2e909a612c
3
+ size 623
checkpoint-26/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
checkpoint-26/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-26/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased", "tokenizer_class": "DistilBertTokenizer"}
checkpoint-26/trainer_state.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.43,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-mnli/checkpoint-13",
4
+ "epoch": 2.0,
5
+ "global_step": 26,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.43,
13
+ "eval_loss": 1.2553551197052002,
14
+ "eval_runtime": 0.5695,
15
+ "eval_samples_per_second": 175.596,
16
+ "eval_steps_per_second": 12.292,
17
+ "step": 13
18
+ },
19
+ {
20
+ "epoch": 2.0,
21
+ "eval_accuracy": 0.43,
22
+ "eval_loss": 1.217254877090454,
23
+ "eval_runtime": 0.58,
24
+ "eval_samples_per_second": 172.399,
25
+ "eval_steps_per_second": 12.068,
26
+ "step": 26
27
+ }
28
+ ],
29
+ "max_steps": 26,
30
+ "num_train_epochs": 2,
31
+ "total_flos": 52988849356800.0,
32
+ "trial_name": null,
33
+ "trial_params": null
34
+ }
checkpoint-26/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ddcdf0fab9ae58016832dcbd510e3859fca80f08fc775daf1d3ddd7a9780a83
3
+ size 3119
checkpoint-26/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-35702/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "label2id": {
19
+ "LABEL_0": 0,
20
+ "LABEL_1": 1,
21
+ "LABEL_2": 2,
22
+ "LABEL_3": 3
23
+ },
24
+ "max_position_embeddings": 512,
25
+ "model_type": "distilbert",
26
+ "n_heads": 12,
27
+ "n_layers": 6,
28
+ "pad_token_id": 0,
29
+ "problem_type": "single_label_classification",
30
+ "qa_dropout": 0.1,
31
+ "seq_classif_dropout": 0.2,
32
+ "sinusoidal_pos_embds": false,
33
+ "tie_weights_": true,
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.18.0",
36
+ "vocab_size": 30522
37
+ }
checkpoint-35702/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24ac78d9cfa13f6213586ba2d859e06585978324fb65bfdf9b7689d2a3418d18
3
+ size 535712353
checkpoint-35702/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dc13f58747c6ddb30f99543ebcaa4357988245f7340388bc036c5d10c0f247f
3
+ size 267860465
checkpoint-35702/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12f91a6243a26d24770ff141db34c77d3f6eae341dd3368689fe09baa941aa85
3
+ size 14503
checkpoint-35702/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cd5914861a4c4f6623e5081d63c24af822e623e6bfdf278636cf79281eabe56
3
+ size 623
checkpoint-35702/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
checkpoint-35702/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-35702/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased", "tokenizer_class": "DistilBertTokenizer"}
checkpoint-35702/trainer_state.json ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.63705,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-mnli/checkpoint-35702",
4
+ "epoch": 1.0,
5
+ "global_step": 35702,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 1.9859951823427263e-05,
13
+ "loss": 1.1951,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.03,
18
+ "learning_rate": 1.971990364685452e-05,
19
+ "loss": 1.1418,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.04,
24
+ "learning_rate": 1.9579855470281778e-05,
25
+ "loss": 1.099,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.06,
30
+ "learning_rate": 1.9439807293709036e-05,
31
+ "loss": 1.0961,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.07,
36
+ "learning_rate": 1.9299759117136297e-05,
37
+ "loss": 1.0836,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.08,
42
+ "learning_rate": 1.9159710940563555e-05,
43
+ "loss": 1.0721,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.1,
48
+ "learning_rate": 1.9019662763990812e-05,
49
+ "loss": 1.0654,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.11,
54
+ "learning_rate": 1.8879614587418074e-05,
55
+ "loss": 1.0439,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.13,
60
+ "learning_rate": 1.873956641084533e-05,
61
+ "loss": 1.0453,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.14,
66
+ "learning_rate": 1.8599518234272592e-05,
67
+ "loss": 1.0399,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.15,
72
+ "learning_rate": 1.845947005769985e-05,
73
+ "loss": 1.0295,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.17,
78
+ "learning_rate": 1.8319421881127108e-05,
79
+ "loss": 1.0417,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.18,
84
+ "learning_rate": 1.817937370455437e-05,
85
+ "loss": 1.0327,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 0.2,
90
+ "learning_rate": 1.8039325527981627e-05,
91
+ "loss": 1.0148,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 0.21,
96
+ "learning_rate": 1.7899277351408884e-05,
97
+ "loss": 1.0171,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 0.22,
102
+ "learning_rate": 1.7759229174836145e-05,
103
+ "loss": 1.0113,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 0.24,
108
+ "learning_rate": 1.7619180998263403e-05,
109
+ "loss": 1.0062,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 0.25,
114
+ "learning_rate": 1.7479132821690664e-05,
115
+ "loss": 1.0022,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 0.27,
120
+ "learning_rate": 1.7339084645117922e-05,
121
+ "loss": 0.9908,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 0.28,
126
+ "learning_rate": 1.719903646854518e-05,
127
+ "loss": 0.9919,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 0.29,
132
+ "learning_rate": 1.705898829197244e-05,
133
+ "loss": 0.9875,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 0.31,
138
+ "learning_rate": 1.69189401153997e-05,
139
+ "loss": 0.9908,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 0.32,
144
+ "learning_rate": 1.6778891938826956e-05,
145
+ "loss": 0.9807,
146
+ "step": 11500
147
+ },
148
+ {
149
+ "epoch": 0.34,
150
+ "learning_rate": 1.6638843762254217e-05,
151
+ "loss": 0.9622,
152
+ "step": 12000
153
+ },
154
+ {
155
+ "epoch": 0.35,
156
+ "learning_rate": 1.6498795585681475e-05,
157
+ "loss": 0.9698,
158
+ "step": 12500
159
+ },
160
+ {
161
+ "epoch": 0.36,
162
+ "learning_rate": 1.6358747409108736e-05,
163
+ "loss": 0.9611,
164
+ "step": 13000
165
+ },
166
+ {
167
+ "epoch": 0.38,
168
+ "learning_rate": 1.6218699232535994e-05,
169
+ "loss": 0.9844,
170
+ "step": 13500
171
+ },
172
+ {
173
+ "epoch": 0.39,
174
+ "learning_rate": 1.6078651055963252e-05,
175
+ "loss": 0.9717,
176
+ "step": 14000
177
+ },
178
+ {
179
+ "epoch": 0.41,
180
+ "learning_rate": 1.5938602879390513e-05,
181
+ "loss": 0.978,
182
+ "step": 14500
183
+ },
184
+ {
185
+ "epoch": 0.42,
186
+ "learning_rate": 1.579855470281777e-05,
187
+ "loss": 0.9684,
188
+ "step": 15000
189
+ },
190
+ {
191
+ "epoch": 0.43,
192
+ "learning_rate": 1.5658506526245028e-05,
193
+ "loss": 0.9518,
194
+ "step": 15500
195
+ },
196
+ {
197
+ "epoch": 0.45,
198
+ "learning_rate": 1.551845834967229e-05,
199
+ "loss": 0.9592,
200
+ "step": 16000
201
+ },
202
+ {
203
+ "epoch": 0.46,
204
+ "learning_rate": 1.5378410173099547e-05,
205
+ "loss": 0.9482,
206
+ "step": 16500
207
+ },
208
+ {
209
+ "epoch": 0.48,
210
+ "learning_rate": 1.5238361996526806e-05,
211
+ "loss": 0.9565,
212
+ "step": 17000
213
+ },
214
+ {
215
+ "epoch": 0.49,
216
+ "learning_rate": 1.5098313819954064e-05,
217
+ "loss": 0.956,
218
+ "step": 17500
219
+ },
220
+ {
221
+ "epoch": 0.5,
222
+ "learning_rate": 1.4958265643381324e-05,
223
+ "loss": 0.9432,
224
+ "step": 18000
225
+ },
226
+ {
227
+ "epoch": 0.52,
228
+ "learning_rate": 1.4818217466808585e-05,
229
+ "loss": 0.943,
230
+ "step": 18500
231
+ },
232
+ {
233
+ "epoch": 0.53,
234
+ "learning_rate": 1.4678169290235842e-05,
235
+ "loss": 0.9354,
236
+ "step": 19000
237
+ },
238
+ {
239
+ "epoch": 0.55,
240
+ "learning_rate": 1.4538121113663102e-05,
241
+ "loss": 0.958,
242
+ "step": 19500
243
+ },
244
+ {
245
+ "epoch": 0.56,
246
+ "learning_rate": 1.439807293709036e-05,
247
+ "loss": 0.9358,
248
+ "step": 20000
249
+ },
250
+ {
251
+ "epoch": 0.57,
252
+ "learning_rate": 1.4258024760517619e-05,
253
+ "loss": 0.9395,
254
+ "step": 20500
255
+ },
256
+ {
257
+ "epoch": 0.59,
258
+ "learning_rate": 1.4117976583944878e-05,
259
+ "loss": 0.9448,
260
+ "step": 21000
261
+ },
262
+ {
263
+ "epoch": 0.6,
264
+ "learning_rate": 1.3977928407372136e-05,
265
+ "loss": 0.9349,
266
+ "step": 21500
267
+ },
268
+ {
269
+ "epoch": 0.62,
270
+ "learning_rate": 1.3837880230799397e-05,
271
+ "loss": 0.9444,
272
+ "step": 22000
273
+ },
274
+ {
275
+ "epoch": 0.63,
276
+ "learning_rate": 1.3697832054226653e-05,
277
+ "loss": 0.929,
278
+ "step": 22500
279
+ },
280
+ {
281
+ "epoch": 0.64,
282
+ "learning_rate": 1.3557783877653914e-05,
283
+ "loss": 0.933,
284
+ "step": 23000
285
+ },
286
+ {
287
+ "epoch": 0.66,
288
+ "learning_rate": 1.3417735701081174e-05,
289
+ "loss": 0.9357,
290
+ "step": 23500
291
+ },
292
+ {
293
+ "epoch": 0.67,
294
+ "learning_rate": 1.3277687524508432e-05,
295
+ "loss": 0.9182,
296
+ "step": 24000
297
+ },
298
+ {
299
+ "epoch": 0.69,
300
+ "learning_rate": 1.3137639347935691e-05,
301
+ "loss": 0.9279,
302
+ "step": 24500
303
+ },
304
+ {
305
+ "epoch": 0.7,
306
+ "learning_rate": 1.299759117136295e-05,
307
+ "loss": 0.9245,
308
+ "step": 25000
309
+ },
310
+ {
311
+ "epoch": 0.71,
312
+ "learning_rate": 1.2857542994790208e-05,
313
+ "loss": 0.9205,
314
+ "step": 25500
315
+ },
316
+ {
317
+ "epoch": 0.73,
318
+ "learning_rate": 1.271749481821747e-05,
319
+ "loss": 0.9214,
320
+ "step": 26000
321
+ },
322
+ {
323
+ "epoch": 0.74,
324
+ "learning_rate": 1.2577446641644725e-05,
325
+ "loss": 0.9388,
326
+ "step": 26500
327
+ },
328
+ {
329
+ "epoch": 0.76,
330
+ "learning_rate": 1.2437398465071986e-05,
331
+ "loss": 0.9256,
332
+ "step": 27000
333
+ },
334
+ {
335
+ "epoch": 0.77,
336
+ "learning_rate": 1.2297350288499246e-05,
337
+ "loss": 0.9206,
338
+ "step": 27500
339
+ },
340
+ {
341
+ "epoch": 0.78,
342
+ "learning_rate": 1.2157302111926503e-05,
343
+ "loss": 0.9091,
344
+ "step": 28000
345
+ },
346
+ {
347
+ "epoch": 0.8,
348
+ "learning_rate": 1.2017253935353763e-05,
349
+ "loss": 0.9267,
350
+ "step": 28500
351
+ },
352
+ {
353
+ "epoch": 0.81,
354
+ "learning_rate": 1.187720575878102e-05,
355
+ "loss": 0.9103,
356
+ "step": 29000
357
+ },
358
+ {
359
+ "epoch": 0.83,
360
+ "learning_rate": 1.173715758220828e-05,
361
+ "loss": 0.9032,
362
+ "step": 29500
363
+ },
364
+ {
365
+ "epoch": 0.84,
366
+ "learning_rate": 1.1597109405635541e-05,
367
+ "loss": 0.9075,
368
+ "step": 30000
369
+ },
370
+ {
371
+ "epoch": 0.85,
372
+ "learning_rate": 1.1457061229062799e-05,
373
+ "loss": 0.9016,
374
+ "step": 30500
375
+ },
376
+ {
377
+ "epoch": 0.87,
378
+ "learning_rate": 1.1317013052490058e-05,
379
+ "loss": 0.9119,
380
+ "step": 31000
381
+ },
382
+ {
383
+ "epoch": 0.88,
384
+ "learning_rate": 1.1176964875917316e-05,
385
+ "loss": 0.9085,
386
+ "step": 31500
387
+ },
388
+ {
389
+ "epoch": 0.9,
390
+ "learning_rate": 1.1036916699344575e-05,
391
+ "loss": 0.894,
392
+ "step": 32000
393
+ },
394
+ {
395
+ "epoch": 0.91,
396
+ "learning_rate": 1.0896868522771835e-05,
397
+ "loss": 0.9156,
398
+ "step": 32500
399
+ },
400
+ {
401
+ "epoch": 0.92,
402
+ "learning_rate": 1.0756820346199093e-05,
403
+ "loss": 0.8944,
404
+ "step": 33000
405
+ },
406
+ {
407
+ "epoch": 0.94,
408
+ "learning_rate": 1.0616772169626352e-05,
409
+ "loss": 0.8824,
410
+ "step": 33500
411
+ },
412
+ {
413
+ "epoch": 0.95,
414
+ "learning_rate": 1.047672399305361e-05,
415
+ "loss": 0.9014,
416
+ "step": 34000
417
+ },
418
+ {
419
+ "epoch": 0.97,
420
+ "learning_rate": 1.033667581648087e-05,
421
+ "loss": 0.9022,
422
+ "step": 34500
423
+ },
424
+ {
425
+ "epoch": 0.98,
426
+ "learning_rate": 1.019662763990813e-05,
427
+ "loss": 0.8888,
428
+ "step": 35000
429
+ },
430
+ {
431
+ "epoch": 0.99,
432
+ "learning_rate": 1.0056579463335388e-05,
433
+ "loss": 0.8981,
434
+ "step": 35500
435
+ },
436
+ {
437
+ "epoch": 1.0,
438
+ "eval_accuracy": 0.63705,
439
+ "eval_loss": 0.8662445545196533,
440
+ "eval_runtime": 112.0196,
441
+ "eval_samples_per_second": 178.54,
442
+ "eval_steps_per_second": 11.159,
443
+ "step": 35702
444
+ }
445
+ ],
446
+ "max_steps": 71404,
447
+ "num_train_epochs": 2,
448
+ "total_flos": 7.567112374034842e+16,
449
+ "trial_name": null,
450
+ "trial_params": null
451
+ }
checkpoint-35702/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b69140ab6b24ac5d06b1ffa0cda477a62e57823010306ba7159c6f1c22522fe
3
+ size 3119
checkpoint-35702/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-71404/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "label2id": {
19
+ "LABEL_0": 0,
20
+ "LABEL_1": 1,
21
+ "LABEL_2": 2,
22
+ "LABEL_3": 3
23
+ },
24
+ "max_position_embeddings": 512,
25
+ "model_type": "distilbert",
26
+ "n_heads": 12,
27
+ "n_layers": 6,
28
+ "pad_token_id": 0,
29
+ "problem_type": "single_label_classification",
30
+ "qa_dropout": 0.1,
31
+ "seq_classif_dropout": 0.2,
32
+ "sinusoidal_pos_embds": false,
33
+ "tie_weights_": true,
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.18.0",
36
+ "vocab_size": 30522
37
+ }
checkpoint-71404/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:033c8d340e0be89ec59b924621244f788959be445ee546b3f92fdc1248e98005
3
+ size 535712545
checkpoint-71404/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18f997ca2e55d65912dbf0139e6c3853d4a06f592cb04c60a1766c6557f06a87
3
+ size 267860465
checkpoint-71404/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:590fa2ab684a0256a1eed6423ca4a8b197a92cc903c6613514418f5125d9c88d
3
+ size 14503
checkpoint-71404/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc9cf6973eb6dca3eeef731eda115482e40b297cd7f06999e95553ef4a2b1a4a
3
+ size 623
checkpoint-71404/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
checkpoint-71404/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-71404/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased", "tokenizer_class": "DistilBertTokenizer"}
checkpoint-71404/trainer_state.json ADDED
@@ -0,0 +1,886 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6617,