akkky02 commited on
Commit
6ccfb8f
1 Parent(s): 143f0de

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. google_t5/t5_base_amazon/README.md +87 -0
  2. google_t5/t5_base_amazon/all_results.json +23 -0
  3. google_t5/t5_base_amazon/checkpoint-700/config.json +113 -0
  4. google_t5/t5_base_amazon/checkpoint-700/model.safetensors +3 -0
  5. google_t5/t5_base_amazon/checkpoint-700/optimizer.pt +3 -0
  6. google_t5/t5_base_amazon/checkpoint-700/rng_state_0.pth +3 -0
  7. google_t5/t5_base_amazon/checkpoint-700/rng_state_1.pth +3 -0
  8. google_t5/t5_base_amazon/checkpoint-700/scheduler.pt +3 -0
  9. google_t5/t5_base_amazon/checkpoint-700/special_tokens_map.json +107 -0
  10. google_t5/t5_base_amazon/checkpoint-700/spiece.model +3 -0
  11. google_t5/t5_base_amazon/checkpoint-700/tokenizer.json +0 -0
  12. google_t5/t5_base_amazon/checkpoint-700/tokenizer_config.json +937 -0
  13. google_t5/t5_base_amazon/checkpoint-700/trainer_state.json +665 -0
  14. google_t5/t5_base_amazon/checkpoint-700/training_args.bin +3 -0
  15. google_t5/t5_base_amazon/config.json +113 -0
  16. google_t5/t5_base_amazon/eval_results.json +11 -0
  17. google_t5/t5_base_amazon/model.safetensors +3 -0
  18. google_t5/t5_base_amazon/run.log +4 -0
  19. google_t5/t5_base_amazon/special_tokens_map.json +107 -0
  20. google_t5/t5_base_amazon/spiece.model +3 -0
  21. google_t5/t5_base_amazon/test_results.json +10 -0
  22. google_t5/t5_base_amazon/tokenizer.json +0 -0
  23. google_t5/t5_base_amazon/tokenizer_config.json +937 -0
  24. google_t5/t5_base_amazon/train_results.json +8 -0
  25. google_t5/t5_base_amazon/trainer_state.json +1070 -0
  26. google_t5/t5_base_amazon/training_args.bin +3 -0
  27. google_t5/t5_base_ledgar/README.md +93 -0
  28. google_t5/t5_base_ledgar/all_results.json +23 -0
  29. google_t5/t5_base_ledgar/checkpoint-2800/config.json +267 -0
  30. google_t5/t5_base_ledgar/checkpoint-2800/model.safetensors +3 -0
  31. google_t5/t5_base_ledgar/checkpoint-2800/optimizer.pt +3 -0
  32. google_t5/t5_base_ledgar/checkpoint-2800/rng_state_0.pth +3 -0
  33. google_t5/t5_base_ledgar/checkpoint-2800/rng_state_1.pth +3 -0
  34. google_t5/t5_base_ledgar/checkpoint-2800/scheduler.pt +3 -0
  35. google_t5/t5_base_ledgar/checkpoint-2800/special_tokens_map.json +107 -0
  36. google_t5/t5_base_ledgar/checkpoint-2800/spiece.model +3 -0
  37. google_t5/t5_base_ledgar/checkpoint-2800/tokenizer.json +0 -0
  38. google_t5/t5_base_ledgar/checkpoint-2800/tokenizer_config.json +937 -0
  39. google_t5/t5_base_ledgar/checkpoint-2800/trainer_state.json +1113 -0
  40. google_t5/t5_base_ledgar/checkpoint-2800/training_args.bin +3 -0
  41. google_t5/t5_base_ledgar/config.json +267 -0
  42. google_t5/t5_base_ledgar/eval_results.json +11 -0
  43. google_t5/t5_base_ledgar/model.safetensors +3 -0
  44. google_t5/t5_base_ledgar/run.log +4 -0
  45. google_t5/t5_base_ledgar/special_tokens_map.json +107 -0
  46. google_t5/t5_base_ledgar/spiece.model +3 -0
  47. google_t5/t5_base_ledgar/test_results.json +10 -0
  48. google_t5/t5_base_ledgar/tokenizer.json +0 -0
  49. google_t5/t5_base_ledgar/tokenizer_config.json +937 -0
  50. google_t5/t5_base_ledgar/train_results.json +8 -0
google_t5/t5_base_amazon/README.md ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: google-t5/t5-base
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
+ model-index:
9
+ - name: t5_base_amazon
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # t5_base_amazon
17
+
18
+ This model is a fine-tuned version of [google-t5/t5-base](https://huggingface.co/google-t5/t5-base) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.5565
21
+ - Accuracy: 0.8399
22
+ - F1 Macro: 0.8113
23
+ - F1 Micro: 0.8399
24
+
25
+ ## Model description
26
+
27
+ More information needed
28
+
29
+ ## Intended uses & limitations
30
+
31
+ More information needed
32
+
33
+ ## Training and evaluation data
34
+
35
+ More information needed
36
+
37
+ ## Training procedure
38
+
39
+ ### Training hyperparameters
40
+
41
+ The following hyperparameters were used during training:
42
+ - learning_rate: 0.0005
43
+ - train_batch_size: 16
44
+ - eval_batch_size: 16
45
+ - seed: 42
46
+ - distributed_type: multi-GPU
47
+ - num_devices: 2
48
+ - total_train_batch_size: 32
49
+ - total_eval_batch_size: 32
50
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
51
+ - lr_scheduler_type: linear
52
+ - num_epochs: 3.0
53
+
54
+ ### Training results
55
+
56
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 Macro | F1 Micro |
57
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|:--------:|:--------:|
58
+ | 1.2275 | 0.13 | 50 | 1.0353 | 0.6950 | 0.6073 | 0.6950 |
59
+ | 0.8341 | 0.26 | 100 | 0.8838 | 0.7385 | 0.6814 | 0.7385 |
60
+ | 0.7773 | 0.39 | 150 | 0.7473 | 0.7833 | 0.7340 | 0.7833 |
61
+ | 0.7188 | 0.53 | 200 | 0.7024 | 0.7925 | 0.7433 | 0.7925 |
62
+ | 0.7483 | 0.66 | 250 | 0.7056 | 0.7872 | 0.7396 | 0.7872 |
63
+ | 0.6228 | 0.79 | 300 | 0.6338 | 0.8129 | 0.7636 | 0.8129 |
64
+ | 0.7089 | 0.92 | 350 | 0.6130 | 0.8208 | 0.7963 | 0.8208 |
65
+ | 0.5055 | 1.05 | 400 | 0.5939 | 0.8300 | 0.8075 | 0.8300 |
66
+ | 0.3942 | 1.18 | 450 | 0.6021 | 0.8241 | 0.7916 | 0.8241 |
67
+ | 0.4248 | 1.32 | 500 | 0.5956 | 0.8300 | 0.8060 | 0.8300 |
68
+ | 0.3595 | 1.45 | 550 | 0.6173 | 0.8175 | 0.7897 | 0.8175 |
69
+ | 0.5263 | 1.58 | 600 | 0.6170 | 0.8162 | 0.7908 | 0.8162 |
70
+ | 0.5153 | 1.71 | 650 | 0.6007 | 0.8327 | 0.8043 | 0.8327 |
71
+ | 0.4237 | 1.84 | 700 | 0.5565 | 0.8399 | 0.8113 | 0.8399 |
72
+ | 0.3852 | 1.97 | 750 | 0.5631 | 0.8439 | 0.8146 | 0.8439 |
73
+ | 0.1916 | 2.11 | 800 | 0.5848 | 0.8439 | 0.8132 | 0.8439 |
74
+ | 0.2108 | 2.24 | 850 | 0.6054 | 0.8432 | 0.8094 | 0.8432 |
75
+ | 0.1752 | 2.37 | 900 | 0.6142 | 0.8439 | 0.8131 | 0.8439 |
76
+ | 0.1502 | 2.5 | 950 | 0.6100 | 0.8452 | 0.8119 | 0.8452 |
77
+ | 0.2253 | 2.63 | 1000 | 0.6084 | 0.8439 | 0.8228 | 0.8439 |
78
+ | 0.2193 | 2.76 | 1050 | 0.6062 | 0.8485 | 0.8171 | 0.8485 |
79
+ | 0.2182 | 2.89 | 1100 | 0.5966 | 0.8498 | 0.8182 | 0.8498 |
80
+
81
+
82
+ ### Framework versions
83
+
84
+ - Transformers 4.39.0.dev0
85
+ - Pytorch 2.2.1+cu121
86
+ - Datasets 2.18.0
87
+ - Tokenizers 0.15.2
google_t5/t5_base_amazon/all_results.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.8399209486166008,
4
+ "eval_f1_macro": 0.8112898753723374,
5
+ "eval_f1_micro": 0.8399209486166008,
6
+ "eval_loss": 0.5565423369407654,
7
+ "eval_runtime": 3.8107,
8
+ "eval_samples": 1518,
9
+ "eval_samples_per_second": 398.35,
10
+ "eval_steps_per_second": 12.596,
11
+ "test_accuracy": 0.857707509881423,
12
+ "test_f1_macro": 0.8293055942786999,
13
+ "test_f1_micro": 0.857707509881423,
14
+ "test_loss": 0.5306673645973206,
15
+ "test_runtime": 3.7633,
16
+ "test_samples_per_second": 403.37,
17
+ "test_steps_per_second": 12.755,
18
+ "train_loss": 0.5285837122222834,
19
+ "train_runtime": 447.0645,
20
+ "train_samples": 12144,
21
+ "train_samples_per_second": 81.492,
22
+ "train_steps_per_second": 2.55
23
+ }
google_t5/t5_base_amazon/checkpoint-700/config.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google-t5/t5-base",
3
+ "architectures": [
4
+ "T5ForSequenceClassification"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "relu",
15
+ "finetuning_task": "text-classification",
16
+ "id2label": {
17
+ "0": "0",
18
+ "1": "1",
19
+ "2": "10",
20
+ "3": "11",
21
+ "4": "12",
22
+ "5": "13",
23
+ "6": "14",
24
+ "7": "15",
25
+ "8": "16",
26
+ "9": "17",
27
+ "10": "18",
28
+ "11": "19",
29
+ "12": "2",
30
+ "13": "20",
31
+ "14": "21",
32
+ "15": "22",
33
+ "16": "3",
34
+ "17": "4",
35
+ "18": "5",
36
+ "19": "6",
37
+ "20": "7",
38
+ "21": "8",
39
+ "22": "9"
40
+ },
41
+ "initializer_factor": 1.0,
42
+ "is_encoder_decoder": true,
43
+ "is_gated_act": false,
44
+ "label2id": {
45
+ "0": 0,
46
+ "1": 1,
47
+ "10": 2,
48
+ "11": 3,
49
+ "12": 4,
50
+ "13": 5,
51
+ "14": 6,
52
+ "15": 7,
53
+ "16": 8,
54
+ "17": 9,
55
+ "18": 10,
56
+ "19": 11,
57
+ "2": 12,
58
+ "20": 13,
59
+ "21": 14,
60
+ "22": 15,
61
+ "3": 16,
62
+ "4": 17,
63
+ "5": 18,
64
+ "6": 19,
65
+ "7": 20,
66
+ "8": 21,
67
+ "9": 22
68
+ },
69
+ "layer_norm_epsilon": 1e-06,
70
+ "model_type": "t5",
71
+ "n_positions": 512,
72
+ "num_decoder_layers": 12,
73
+ "num_heads": 12,
74
+ "num_layers": 12,
75
+ "output_past": true,
76
+ "pad_token_id": 0,
77
+ "problem_type": "single_label_classification",
78
+ "relative_attention_max_distance": 128,
79
+ "relative_attention_num_buckets": 32,
80
+ "task_specific_params": {
81
+ "summarization": {
82
+ "early_stopping": true,
83
+ "length_penalty": 2.0,
84
+ "max_length": 200,
85
+ "min_length": 30,
86
+ "no_repeat_ngram_size": 3,
87
+ "num_beams": 4,
88
+ "prefix": "summarize: "
89
+ },
90
+ "translation_en_to_de": {
91
+ "early_stopping": true,
92
+ "max_length": 300,
93
+ "num_beams": 4,
94
+ "prefix": "translate English to German: "
95
+ },
96
+ "translation_en_to_fr": {
97
+ "early_stopping": true,
98
+ "max_length": 300,
99
+ "num_beams": 4,
100
+ "prefix": "translate English to French: "
101
+ },
102
+ "translation_en_to_ro": {
103
+ "early_stopping": true,
104
+ "max_length": 300,
105
+ "num_beams": 4,
106
+ "prefix": "translate English to Romanian: "
107
+ }
108
+ },
109
+ "torch_dtype": "float32",
110
+ "transformers_version": "4.39.0.dev0",
111
+ "use_cache": true,
112
+ "vocab_size": 32128
113
+ }
google_t5/t5_base_amazon/checkpoint-700/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:793ee165edf28806fd5d823c7f8559e2764d24d6a360fd01fcaab3c3b51a43b7
3
+ size 894081308
google_t5/t5_base_amazon/checkpoint-700/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6826461ddc187854774503393d07d2bab54a9545971ea1a146982ab3d4c6e8d
3
+ size 1788314362
google_t5/t5_base_amazon/checkpoint-700/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c07361a99a91c0c9b6371248704b1b3dca05b07d15619b60759b3d2ff74fd1f
3
+ size 14512
google_t5/t5_base_amazon/checkpoint-700/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cc509d254e83e747331fff71c722f69fec583ca0a30405bbfed8debff379aef
3
+ size 14512
google_t5/t5_base_amazon/checkpoint-700/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf00ee845c47f044a643cdd9c93d8563aaf47d1f4581854acf7abb83f7302f20
3
+ size 1064
google_t5/t5_base_amazon/checkpoint-700/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
google_t5/t5_base_amazon/checkpoint-700/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
google_t5/t5_base_amazon/checkpoint-700/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
google_t5/t5_base_amazon/checkpoint-700/tokenizer_config.json ADDED
@@ -0,0 +1,937 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32002": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32003": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32004": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "32005": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "32006": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "32007": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "32008": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "32009": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "32010": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32011": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32012": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "32013": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "32014": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "32015": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "32016": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "32017": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "32018": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32019": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "32020": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "32021": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "32022": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "32023": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "32024": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "32025": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "32026": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "32027": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "32028": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32029": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "32030": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "32031": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "32032": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "32033": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "32034": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "32035": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "32036": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "32037": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "32038": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "32039": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "32040": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "32041": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "32042": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "32043": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "32044": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "32045": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "32046": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "32047": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "32048": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "32049": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "32050": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "32051": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "32052": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "32053": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "32054": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "32055": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "32056": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "32057": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "32058": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "32059": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "32060": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "32061": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "32062": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "32063": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "32064": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "32065": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "32066": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "32067": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "32068": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "32069": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "32070": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "32071": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "32072": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "32073": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "32074": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "32075": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "32076": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "32077": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "32078": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "32079": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "32080": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "32081": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "32082": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "32083": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "32084": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "32085": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "32086": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "32087": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "32088": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "32089": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "32090": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "32091": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "32092": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "32093": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "32094": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "32095": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "32096": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "32097": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "32098": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "32099": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": true,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "model_max_length": 512,
934
+ "pad_token": "<pad>",
935
+ "tokenizer_class": "T5Tokenizer",
936
+ "unk_token": "<unk>"
937
+ }
google_t5/t5_base_amazon/checkpoint-700/trainer_state.json ADDED
@@ -0,0 +1,665 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5565423369407654,
3
+ "best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/google_t5/t5_base_amazon/checkpoint-700",
4
+ "epoch": 1.8421052631578947,
5
+ "eval_steps": 50,
6
+ "global_step": 700,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "grad_norm": 2.8495495319366455,
14
+ "learning_rate": 0.0004956140350877193,
15
+ "loss": 3.117,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.05,
20
+ "grad_norm": 2.5190072059631348,
21
+ "learning_rate": 0.0004912280701754386,
22
+ "loss": 2.7209,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.08,
27
+ "grad_norm": 2.294928789138794,
28
+ "learning_rate": 0.0004868421052631579,
29
+ "loss": 2.0205,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.11,
34
+ "grad_norm": 3.267091751098633,
35
+ "learning_rate": 0.0004824561403508772,
36
+ "loss": 1.2866,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.13,
41
+ "grad_norm": 4.188276767730713,
42
+ "learning_rate": 0.00047807017543859647,
43
+ "loss": 1.2275,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.13,
48
+ "eval_accuracy": 0.6949934123847167,
49
+ "eval_f1_macro": 0.6073038050199799,
50
+ "eval_f1_micro": 0.6949934123847167,
51
+ "eval_loss": 1.035279631614685,
52
+ "eval_runtime": 3.7814,
53
+ "eval_samples_per_second": 401.442,
54
+ "eval_steps_per_second": 12.694,
55
+ "step": 50
56
+ },
57
+ {
58
+ "epoch": 0.16,
59
+ "grad_norm": 6.240413665771484,
60
+ "learning_rate": 0.00047368421052631577,
61
+ "loss": 1.0172,
62
+ "step": 60
63
+ },
64
+ {
65
+ "epoch": 0.18,
66
+ "grad_norm": 3.5446884632110596,
67
+ "learning_rate": 0.0004692982456140351,
68
+ "loss": 1.0344,
69
+ "step": 70
70
+ },
71
+ {
72
+ "epoch": 0.21,
73
+ "grad_norm": 4.263936519622803,
74
+ "learning_rate": 0.00046491228070175437,
75
+ "loss": 0.8594,
76
+ "step": 80
77
+ },
78
+ {
79
+ "epoch": 0.24,
80
+ "grad_norm": 3.0901923179626465,
81
+ "learning_rate": 0.0004605263157894737,
82
+ "loss": 0.8781,
83
+ "step": 90
84
+ },
85
+ {
86
+ "epoch": 0.26,
87
+ "grad_norm": 2.737272262573242,
88
+ "learning_rate": 0.000456140350877193,
89
+ "loss": 0.8341,
90
+ "step": 100
91
+ },
92
+ {
93
+ "epoch": 0.26,
94
+ "eval_accuracy": 0.738471673254282,
95
+ "eval_f1_macro": 0.6814380216401201,
96
+ "eval_f1_micro": 0.738471673254282,
97
+ "eval_loss": 0.8838083744049072,
98
+ "eval_runtime": 3.8044,
99
+ "eval_samples_per_second": 399.016,
100
+ "eval_steps_per_second": 12.617,
101
+ "step": 100
102
+ },
103
+ {
104
+ "epoch": 0.29,
105
+ "grad_norm": 2.692996025085449,
106
+ "learning_rate": 0.00045175438596491233,
107
+ "loss": 0.9941,
108
+ "step": 110
109
+ },
110
+ {
111
+ "epoch": 0.32,
112
+ "grad_norm": 2.3376717567443848,
113
+ "learning_rate": 0.0004473684210526316,
114
+ "loss": 0.8984,
115
+ "step": 120
116
+ },
117
+ {
118
+ "epoch": 0.34,
119
+ "grad_norm": 2.112525224685669,
120
+ "learning_rate": 0.0004429824561403509,
121
+ "loss": 0.909,
122
+ "step": 130
123
+ },
124
+ {
125
+ "epoch": 0.37,
126
+ "grad_norm": 2.8785059452056885,
127
+ "learning_rate": 0.0004385964912280702,
128
+ "loss": 0.7654,
129
+ "step": 140
130
+ },
131
+ {
132
+ "epoch": 0.39,
133
+ "grad_norm": 2.3538177013397217,
134
+ "learning_rate": 0.0004342105263157895,
135
+ "loss": 0.7773,
136
+ "step": 150
137
+ },
138
+ {
139
+ "epoch": 0.39,
140
+ "eval_accuracy": 0.7832674571805006,
141
+ "eval_f1_macro": 0.7339618917892343,
142
+ "eval_f1_micro": 0.7832674571805006,
143
+ "eval_loss": 0.7473268508911133,
144
+ "eval_runtime": 3.8195,
145
+ "eval_samples_per_second": 397.435,
146
+ "eval_steps_per_second": 12.567,
147
+ "step": 150
148
+ },
149
+ {
150
+ "epoch": 0.42,
151
+ "grad_norm": 2.850787401199341,
152
+ "learning_rate": 0.0004298245614035088,
153
+ "loss": 0.7734,
154
+ "step": 160
155
+ },
156
+ {
157
+ "epoch": 0.45,
158
+ "grad_norm": 3.4294583797454834,
159
+ "learning_rate": 0.0004254385964912281,
160
+ "loss": 0.7876,
161
+ "step": 170
162
+ },
163
+ {
164
+ "epoch": 0.47,
165
+ "grad_norm": 2.0919501781463623,
166
+ "learning_rate": 0.00042105263157894734,
167
+ "loss": 0.7329,
168
+ "step": 180
169
+ },
170
+ {
171
+ "epoch": 0.5,
172
+ "grad_norm": 5.026761531829834,
173
+ "learning_rate": 0.0004166666666666667,
174
+ "loss": 0.8396,
175
+ "step": 190
176
+ },
177
+ {
178
+ "epoch": 0.53,
179
+ "grad_norm": 2.3940157890319824,
180
+ "learning_rate": 0.000412280701754386,
181
+ "loss": 0.7188,
182
+ "step": 200
183
+ },
184
+ {
185
+ "epoch": 0.53,
186
+ "eval_accuracy": 0.7924901185770751,
187
+ "eval_f1_macro": 0.7432732481576452,
188
+ "eval_f1_micro": 0.7924901185770751,
189
+ "eval_loss": 0.7023962140083313,
190
+ "eval_runtime": 3.8263,
191
+ "eval_samples_per_second": 396.73,
192
+ "eval_steps_per_second": 12.545,
193
+ "step": 200
194
+ },
195
+ {
196
+ "epoch": 0.55,
197
+ "grad_norm": 2.965258836746216,
198
+ "learning_rate": 0.00040789473684210524,
199
+ "loss": 0.66,
200
+ "step": 210
201
+ },
202
+ {
203
+ "epoch": 0.58,
204
+ "grad_norm": 3.2059409618377686,
205
+ "learning_rate": 0.00040350877192982455,
206
+ "loss": 0.7795,
207
+ "step": 220
208
+ },
209
+ {
210
+ "epoch": 0.61,
211
+ "grad_norm": 4.9381537437438965,
212
+ "learning_rate": 0.0003991228070175439,
213
+ "loss": 0.7163,
214
+ "step": 230
215
+ },
216
+ {
217
+ "epoch": 0.63,
218
+ "grad_norm": 4.937159061431885,
219
+ "learning_rate": 0.00039473684210526315,
220
+ "loss": 0.7546,
221
+ "step": 240
222
+ },
223
+ {
224
+ "epoch": 0.66,
225
+ "grad_norm": 2.786454439163208,
226
+ "learning_rate": 0.00039035087719298245,
227
+ "loss": 0.7483,
228
+ "step": 250
229
+ },
230
+ {
231
+ "epoch": 0.66,
232
+ "eval_accuracy": 0.7872200263504612,
233
+ "eval_f1_macro": 0.7396308270254102,
234
+ "eval_f1_micro": 0.7872200263504612,
235
+ "eval_loss": 0.7055577039718628,
236
+ "eval_runtime": 3.8209,
237
+ "eval_samples_per_second": 397.29,
238
+ "eval_steps_per_second": 12.563,
239
+ "step": 250
240
+ },
241
+ {
242
+ "epoch": 0.68,
243
+ "grad_norm": 2.2485697269439697,
244
+ "learning_rate": 0.00038596491228070175,
245
+ "loss": 0.7216,
246
+ "step": 260
247
+ },
248
+ {
249
+ "epoch": 0.71,
250
+ "grad_norm": 3.388631582260132,
251
+ "learning_rate": 0.00038157894736842105,
252
+ "loss": 0.7437,
253
+ "step": 270
254
+ },
255
+ {
256
+ "epoch": 0.74,
257
+ "grad_norm": 2.945599317550659,
258
+ "learning_rate": 0.00037719298245614036,
259
+ "loss": 0.6433,
260
+ "step": 280
261
+ },
262
+ {
263
+ "epoch": 0.76,
264
+ "grad_norm": 2.894361972808838,
265
+ "learning_rate": 0.00037280701754385966,
266
+ "loss": 0.6333,
267
+ "step": 290
268
+ },
269
+ {
270
+ "epoch": 0.79,
271
+ "grad_norm": 1.8753783702850342,
272
+ "learning_rate": 0.00036842105263157896,
273
+ "loss": 0.6228,
274
+ "step": 300
275
+ },
276
+ {
277
+ "epoch": 0.79,
278
+ "eval_accuracy": 0.8129117259552042,
279
+ "eval_f1_macro": 0.7636250391666534,
280
+ "eval_f1_micro": 0.8129117259552042,
281
+ "eval_loss": 0.6337724924087524,
282
+ "eval_runtime": 3.8242,
283
+ "eval_samples_per_second": 396.945,
284
+ "eval_steps_per_second": 12.552,
285
+ "step": 300
286
+ },
287
+ {
288
+ "epoch": 0.82,
289
+ "grad_norm": 3.340951442718506,
290
+ "learning_rate": 0.00036403508771929826,
291
+ "loss": 0.5978,
292
+ "step": 310
293
+ },
294
+ {
295
+ "epoch": 0.84,
296
+ "grad_norm": 3.2675557136535645,
297
+ "learning_rate": 0.00035964912280701756,
298
+ "loss": 0.7124,
299
+ "step": 320
300
+ },
301
+ {
302
+ "epoch": 0.87,
303
+ "grad_norm": 2.308924674987793,
304
+ "learning_rate": 0.00035526315789473687,
305
+ "loss": 0.7423,
306
+ "step": 330
307
+ },
308
+ {
309
+ "epoch": 0.89,
310
+ "grad_norm": 3.407076835632324,
311
+ "learning_rate": 0.0003508771929824561,
312
+ "loss": 0.6787,
313
+ "step": 340
314
+ },
315
+ {
316
+ "epoch": 0.92,
317
+ "grad_norm": 2.5976576805114746,
318
+ "learning_rate": 0.00034649122807017547,
319
+ "loss": 0.7089,
320
+ "step": 350
321
+ },
322
+ {
323
+ "epoch": 0.92,
324
+ "eval_accuracy": 0.8208168642951251,
325
+ "eval_f1_macro": 0.7962943499701793,
326
+ "eval_f1_micro": 0.8208168642951251,
327
+ "eval_loss": 0.6130083799362183,
328
+ "eval_runtime": 3.8233,
329
+ "eval_samples_per_second": 397.037,
330
+ "eval_steps_per_second": 12.555,
331
+ "step": 350
332
+ },
333
+ {
334
+ "epoch": 0.95,
335
+ "grad_norm": 2.053323745727539,
336
+ "learning_rate": 0.00034210526315789477,
337
+ "loss": 0.5299,
338
+ "step": 360
339
+ },
340
+ {
341
+ "epoch": 0.97,
342
+ "grad_norm": 3.201794147491455,
343
+ "learning_rate": 0.000337719298245614,
344
+ "loss": 0.7405,
345
+ "step": 370
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "grad_norm": 2.8295910358428955,
350
+ "learning_rate": 0.0003333333333333333,
351
+ "loss": 0.7584,
352
+ "step": 380
353
+ },
354
+ {
355
+ "epoch": 1.03,
356
+ "grad_norm": 1.9317399263381958,
357
+ "learning_rate": 0.0003289473684210527,
358
+ "loss": 0.4822,
359
+ "step": 390
360
+ },
361
+ {
362
+ "epoch": 1.05,
363
+ "grad_norm": 2.5840656757354736,
364
+ "learning_rate": 0.0003245614035087719,
365
+ "loss": 0.5055,
366
+ "step": 400
367
+ },
368
+ {
369
+ "epoch": 1.05,
370
+ "eval_accuracy": 0.8300395256916996,
371
+ "eval_f1_macro": 0.8075388232802375,
372
+ "eval_f1_micro": 0.8300395256916996,
373
+ "eval_loss": 0.5938563346862793,
374
+ "eval_runtime": 3.8245,
375
+ "eval_samples_per_second": 396.912,
376
+ "eval_steps_per_second": 12.551,
377
+ "step": 400
378
+ },
379
+ {
380
+ "epoch": 1.08,
381
+ "grad_norm": 2.4889354705810547,
382
+ "learning_rate": 0.00032017543859649123,
383
+ "loss": 0.425,
384
+ "step": 410
385
+ },
386
+ {
387
+ "epoch": 1.11,
388
+ "grad_norm": 2.618088722229004,
389
+ "learning_rate": 0.00031578947368421053,
390
+ "loss": 0.3865,
391
+ "step": 420
392
+ },
393
+ {
394
+ "epoch": 1.13,
395
+ "grad_norm": 2.8282623291015625,
396
+ "learning_rate": 0.00031140350877192983,
397
+ "loss": 0.4543,
398
+ "step": 430
399
+ },
400
+ {
401
+ "epoch": 1.16,
402
+ "grad_norm": 3.4801692962646484,
403
+ "learning_rate": 0.00030701754385964913,
404
+ "loss": 0.4152,
405
+ "step": 440
406
+ },
407
+ {
408
+ "epoch": 1.18,
409
+ "grad_norm": 1.7531658411026,
410
+ "learning_rate": 0.00030263157894736844,
411
+ "loss": 0.3942,
412
+ "step": 450
413
+ },
414
+ {
415
+ "epoch": 1.18,
416
+ "eval_accuracy": 0.8241106719367589,
417
+ "eval_f1_macro": 0.7915647113122625,
418
+ "eval_f1_micro": 0.8241106719367589,
419
+ "eval_loss": 0.6020949482917786,
420
+ "eval_runtime": 3.8282,
421
+ "eval_samples_per_second": 396.536,
422
+ "eval_steps_per_second": 12.539,
423
+ "step": 450
424
+ },
425
+ {
426
+ "epoch": 1.21,
427
+ "grad_norm": 2.6115176677703857,
428
+ "learning_rate": 0.0002982456140350877,
429
+ "loss": 0.4801,
430
+ "step": 460
431
+ },
432
+ {
433
+ "epoch": 1.24,
434
+ "grad_norm": 2.1855995655059814,
435
+ "learning_rate": 0.00029385964912280704,
436
+ "loss": 0.3942,
437
+ "step": 470
438
+ },
439
+ {
440
+ "epoch": 1.26,
441
+ "grad_norm": 2.3812382221221924,
442
+ "learning_rate": 0.00028947368421052634,
443
+ "loss": 0.485,
444
+ "step": 480
445
+ },
446
+ {
447
+ "epoch": 1.29,
448
+ "grad_norm": 2.102308750152588,
449
+ "learning_rate": 0.00028508771929824564,
450
+ "loss": 0.417,
451
+ "step": 490
452
+ },
453
+ {
454
+ "epoch": 1.32,
455
+ "grad_norm": 4.095526218414307,
456
+ "learning_rate": 0.0002807017543859649,
457
+ "loss": 0.4248,
458
+ "step": 500
459
+ },
460
+ {
461
+ "epoch": 1.32,
462
+ "eval_accuracy": 0.8300395256916996,
463
+ "eval_f1_macro": 0.8060251760691185,
464
+ "eval_f1_micro": 0.8300395256916996,
465
+ "eval_loss": 0.5956056714057922,
466
+ "eval_runtime": 3.8218,
467
+ "eval_samples_per_second": 397.192,
468
+ "eval_steps_per_second": 12.559,
469
+ "step": 500
470
+ },
471
+ {
472
+ "epoch": 1.34,
473
+ "grad_norm": 3.2039239406585693,
474
+ "learning_rate": 0.00027631578947368425,
475
+ "loss": 0.4209,
476
+ "step": 510
477
+ },
478
+ {
479
+ "epoch": 1.37,
480
+ "grad_norm": 1.9944714307785034,
481
+ "learning_rate": 0.00027192982456140355,
482
+ "loss": 0.5373,
483
+ "step": 520
484
+ },
485
+ {
486
+ "epoch": 1.39,
487
+ "grad_norm": 3.2802696228027344,
488
+ "learning_rate": 0.0002675438596491228,
489
+ "loss": 0.5216,
490
+ "step": 530
491
+ },
492
+ {
493
+ "epoch": 1.42,
494
+ "grad_norm": 1.4747893810272217,
495
+ "learning_rate": 0.0002631578947368421,
496
+ "loss": 0.3785,
497
+ "step": 540
498
+ },
499
+ {
500
+ "epoch": 1.45,
501
+ "grad_norm": 1.2378747463226318,
502
+ "learning_rate": 0.00025877192982456146,
503
+ "loss": 0.3595,
504
+ "step": 550
505
+ },
506
+ {
507
+ "epoch": 1.45,
508
+ "eval_accuracy": 0.8175230566534915,
509
+ "eval_f1_macro": 0.7897278945523275,
510
+ "eval_f1_micro": 0.8175230566534915,
511
+ "eval_loss": 0.6172593832015991,
512
+ "eval_runtime": 3.8184,
513
+ "eval_samples_per_second": 397.551,
514
+ "eval_steps_per_second": 12.571,
515
+ "step": 550
516
+ },
517
+ {
518
+ "epoch": 1.47,
519
+ "grad_norm": 2.6604156494140625,
520
+ "learning_rate": 0.0002543859649122807,
521
+ "loss": 0.4231,
522
+ "step": 560
523
+ },
524
+ {
525
+ "epoch": 1.5,
526
+ "grad_norm": 1.879952311515808,
527
+ "learning_rate": 0.00025,
528
+ "loss": 0.4555,
529
+ "step": 570
530
+ },
531
+ {
532
+ "epoch": 1.53,
533
+ "grad_norm": 3.9825170040130615,
534
+ "learning_rate": 0.0002456140350877193,
535
+ "loss": 0.3988,
536
+ "step": 580
537
+ },
538
+ {
539
+ "epoch": 1.55,
540
+ "grad_norm": 2.999025583267212,
541
+ "learning_rate": 0.0002412280701754386,
542
+ "loss": 0.3854,
543
+ "step": 590
544
+ },
545
+ {
546
+ "epoch": 1.58,
547
+ "grad_norm": 2.778930425643921,
548
+ "learning_rate": 0.00023684210526315788,
549
+ "loss": 0.5263,
550
+ "step": 600
551
+ },
552
+ {
553
+ "epoch": 1.58,
554
+ "eval_accuracy": 0.8162055335968379,
555
+ "eval_f1_macro": 0.7908253969964322,
556
+ "eval_f1_micro": 0.8162055335968379,
557
+ "eval_loss": 0.6170048117637634,
558
+ "eval_runtime": 3.838,
559
+ "eval_samples_per_second": 395.517,
560
+ "eval_steps_per_second": 12.506,
561
+ "step": 600
562
+ },
563
+ {
564
+ "epoch": 1.61,
565
+ "grad_norm": 2.2400004863739014,
566
+ "learning_rate": 0.00023245614035087719,
567
+ "loss": 0.443,
568
+ "step": 610
569
+ },
570
+ {
571
+ "epoch": 1.63,
572
+ "grad_norm": 2.5458765029907227,
573
+ "learning_rate": 0.0002280701754385965,
574
+ "loss": 0.4106,
575
+ "step": 620
576
+ },
577
+ {
578
+ "epoch": 1.66,
579
+ "grad_norm": 2.955345392227173,
580
+ "learning_rate": 0.0002236842105263158,
581
+ "loss": 0.4078,
582
+ "step": 630
583
+ },
584
+ {
585
+ "epoch": 1.68,
586
+ "grad_norm": 3.5653369426727295,
587
+ "learning_rate": 0.0002192982456140351,
588
+ "loss": 0.4746,
589
+ "step": 640
590
+ },
591
+ {
592
+ "epoch": 1.71,
593
+ "grad_norm": 1.5618356466293335,
594
+ "learning_rate": 0.0002149122807017544,
595
+ "loss": 0.5153,
596
+ "step": 650
597
+ },
598
+ {
599
+ "epoch": 1.71,
600
+ "eval_accuracy": 0.8326745718050066,
601
+ "eval_f1_macro": 0.8042732309505177,
602
+ "eval_f1_micro": 0.8326745718050066,
603
+ "eval_loss": 0.6007006168365479,
604
+ "eval_runtime": 3.8189,
605
+ "eval_samples_per_second": 397.495,
606
+ "eval_steps_per_second": 12.569,
607
+ "step": 650
608
+ },
609
+ {
610
+ "epoch": 1.74,
611
+ "grad_norm": 3.3417813777923584,
612
+ "learning_rate": 0.00021052631578947367,
613
+ "loss": 0.3366,
614
+ "step": 660
615
+ },
616
+ {
617
+ "epoch": 1.76,
618
+ "grad_norm": 3.232940912246704,
619
+ "learning_rate": 0.000206140350877193,
620
+ "loss": 0.4284,
621
+ "step": 670
622
+ },
623
+ {
624
+ "epoch": 1.79,
625
+ "grad_norm": 2.414170265197754,
626
+ "learning_rate": 0.00020175438596491227,
627
+ "loss": 0.4816,
628
+ "step": 680
629
+ },
630
+ {
631
+ "epoch": 1.82,
632
+ "grad_norm": 2.161409378051758,
633
+ "learning_rate": 0.00019736842105263157,
634
+ "loss": 0.398,
635
+ "step": 690
636
+ },
637
+ {
638
+ "epoch": 1.84,
639
+ "grad_norm": 2.6579811573028564,
640
+ "learning_rate": 0.00019298245614035088,
641
+ "loss": 0.4237,
642
+ "step": 700
643
+ },
644
+ {
645
+ "epoch": 1.84,
646
+ "eval_accuracy": 0.8399209486166008,
647
+ "eval_f1_macro": 0.8112898753723374,
648
+ "eval_f1_micro": 0.8399209486166008,
649
+ "eval_loss": 0.5565423369407654,
650
+ "eval_runtime": 3.8246,
651
+ "eval_samples_per_second": 396.904,
652
+ "eval_steps_per_second": 12.55,
653
+ "step": 700
654
+ }
655
+ ],
656
+ "logging_steps": 10,
657
+ "max_steps": 1140,
658
+ "num_input_tokens_seen": 0,
659
+ "num_train_epochs": 3,
660
+ "save_steps": 50,
661
+ "total_flos": 3420628430356480.0,
662
+ "train_batch_size": 16,
663
+ "trial_name": null,
664
+ "trial_params": null
665
+ }
google_t5/t5_base_amazon/checkpoint-700/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9058d2c55809c08dfe5d631fdecbc63ece4067a6cea137e1478fddc6e29958ed
3
+ size 5048
google_t5/t5_base_amazon/config.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google-t5/t5-base",
3
+ "architectures": [
4
+ "T5ForSequenceClassification"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "relu",
15
+ "finetuning_task": "text-classification",
16
+ "id2label": {
17
+ "0": "0",
18
+ "1": "1",
19
+ "2": "10",
20
+ "3": "11",
21
+ "4": "12",
22
+ "5": "13",
23
+ "6": "14",
24
+ "7": "15",
25
+ "8": "16",
26
+ "9": "17",
27
+ "10": "18",
28
+ "11": "19",
29
+ "12": "2",
30
+ "13": "20",
31
+ "14": "21",
32
+ "15": "22",
33
+ "16": "3",
34
+ "17": "4",
35
+ "18": "5",
36
+ "19": "6",
37
+ "20": "7",
38
+ "21": "8",
39
+ "22": "9"
40
+ },
41
+ "initializer_factor": 1.0,
42
+ "is_encoder_decoder": true,
43
+ "is_gated_act": false,
44
+ "label2id": {
45
+ "0": 0,
46
+ "1": 1,
47
+ "10": 2,
48
+ "11": 3,
49
+ "12": 4,
50
+ "13": 5,
51
+ "14": 6,
52
+ "15": 7,
53
+ "16": 8,
54
+ "17": 9,
55
+ "18": 10,
56
+ "19": 11,
57
+ "2": 12,
58
+ "20": 13,
59
+ "21": 14,
60
+ "22": 15,
61
+ "3": 16,
62
+ "4": 17,
63
+ "5": 18,
64
+ "6": 19,
65
+ "7": 20,
66
+ "8": 21,
67
+ "9": 22
68
+ },
69
+ "layer_norm_epsilon": 1e-06,
70
+ "model_type": "t5",
71
+ "n_positions": 512,
72
+ "num_decoder_layers": 12,
73
+ "num_heads": 12,
74
+ "num_layers": 12,
75
+ "output_past": true,
76
+ "pad_token_id": 0,
77
+ "problem_type": "single_label_classification",
78
+ "relative_attention_max_distance": 128,
79
+ "relative_attention_num_buckets": 32,
80
+ "task_specific_params": {
81
+ "summarization": {
82
+ "early_stopping": true,
83
+ "length_penalty": 2.0,
84
+ "max_length": 200,
85
+ "min_length": 30,
86
+ "no_repeat_ngram_size": 3,
87
+ "num_beams": 4,
88
+ "prefix": "summarize: "
89
+ },
90
+ "translation_en_to_de": {
91
+ "early_stopping": true,
92
+ "max_length": 300,
93
+ "num_beams": 4,
94
+ "prefix": "translate English to German: "
95
+ },
96
+ "translation_en_to_fr": {
97
+ "early_stopping": true,
98
+ "max_length": 300,
99
+ "num_beams": 4,
100
+ "prefix": "translate English to French: "
101
+ },
102
+ "translation_en_to_ro": {
103
+ "early_stopping": true,
104
+ "max_length": 300,
105
+ "num_beams": 4,
106
+ "prefix": "translate English to Romanian: "
107
+ }
108
+ },
109
+ "torch_dtype": "float32",
110
+ "transformers_version": "4.39.0.dev0",
111
+ "use_cache": true,
112
+ "vocab_size": 32128
113
+ }
google_t5/t5_base_amazon/eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.8399209486166008,
4
+ "eval_f1_macro": 0.8112898753723374,
5
+ "eval_f1_micro": 0.8399209486166008,
6
+ "eval_loss": 0.5565423369407654,
7
+ "eval_runtime": 3.8107,
8
+ "eval_samples": 1518,
9
+ "eval_samples_per_second": 398.35,
10
+ "eval_steps_per_second": 12.596
11
+ }
google_t5/t5_base_amazon/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:793ee165edf28806fd5d823c7f8559e2764d24d6a360fd01fcaab3c3b51a43b7
3
+ size 894081308
google_t5/t5_base_amazon/run.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ 03/15/2024 11:09:53 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, 16-bits training: False
2
+ 03/15/2024 11:09:53 - WARNING - __main__ - Process rank: 1, device: cuda:1, n_gpu: 1, distributed training: True, 16-bits training: False
3
+ 03/15/2024 11:09:56 - WARNING - __main__ - The label2id key in the model config.json is not equal to the label2id key of this run. You can ignore this if you are doing finetuning.
4
+ 03/15/2024 11:09:56 - WARNING - __main__ - The label2id key in the model config.json is not equal to the label2id key of this run. You can ignore this if you are doing finetuning.
google_t5/t5_base_amazon/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
google_t5/t5_base_amazon/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
google_t5/t5_base_amazon/test_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "test_accuracy": 0.857707509881423,
4
+ "test_f1_macro": 0.8293055942786999,
5
+ "test_f1_micro": 0.857707509881423,
6
+ "test_loss": 0.5306673645973206,
7
+ "test_runtime": 3.7633,
8
+ "test_samples_per_second": 403.37,
9
+ "test_steps_per_second": 12.755
10
+ }
google_t5/t5_base_amazon/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
google_t5/t5_base_amazon/tokenizer_config.json ADDED
@@ -0,0 +1,937 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32002": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32003": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32004": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "32005": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "32006": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "32007": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "32008": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "32009": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "32010": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32011": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32012": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "32013": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "32014": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "32015": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "32016": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "32017": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "32018": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32019": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "32020": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "32021": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "32022": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "32023": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "32024": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "32025": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "32026": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "32027": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "32028": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32029": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "32030": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "32031": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "32032": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "32033": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "32034": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "32035": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "32036": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "32037": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "32038": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "32039": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "32040": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "32041": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "32042": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "32043": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "32044": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "32045": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "32046": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "32047": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "32048": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "32049": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "32050": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "32051": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "32052": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "32053": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "32054": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "32055": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "32056": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "32057": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "32058": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "32059": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "32060": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "32061": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "32062": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "32063": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "32064": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "32065": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "32066": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "32067": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "32068": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "32069": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "32070": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "32071": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "32072": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "32073": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "32074": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "32075": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "32076": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "32077": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "32078": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "32079": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "32080": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "32081": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "32082": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "32083": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "32084": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "32085": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "32086": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "32087": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "32088": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "32089": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "32090": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "32091": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "32092": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "32093": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "32094": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "32095": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "32096": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "32097": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "32098": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "32099": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": true,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "model_max_length": 512,
934
+ "pad_token": "<pad>",
935
+ "tokenizer_class": "T5Tokenizer",
936
+ "unk_token": "<unk>"
937
+ }
google_t5/t5_base_amazon/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.5285837122222834,
4
+ "train_runtime": 447.0645,
5
+ "train_samples": 12144,
6
+ "train_samples_per_second": 81.492,
7
+ "train_steps_per_second": 2.55
8
+ }
google_t5/t5_base_amazon/trainer_state.json ADDED
@@ -0,0 +1,1070 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5565423369407654,
3
+ "best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/google_t5/t5_base_amazon/checkpoint-700",
4
+ "epoch": 3.0,
5
+ "eval_steps": 50,
6
+ "global_step": 1140,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "grad_norm": 2.8495495319366455,
14
+ "learning_rate": 0.0004956140350877193,
15
+ "loss": 3.117,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.05,
20
+ "grad_norm": 2.5190072059631348,
21
+ "learning_rate": 0.0004912280701754386,
22
+ "loss": 2.7209,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.08,
27
+ "grad_norm": 2.294928789138794,
28
+ "learning_rate": 0.0004868421052631579,
29
+ "loss": 2.0205,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.11,
34
+ "grad_norm": 3.267091751098633,
35
+ "learning_rate": 0.0004824561403508772,
36
+ "loss": 1.2866,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.13,
41
+ "grad_norm": 4.188276767730713,
42
+ "learning_rate": 0.00047807017543859647,
43
+ "loss": 1.2275,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.13,
48
+ "eval_accuracy": 0.6949934123847167,
49
+ "eval_f1_macro": 0.6073038050199799,
50
+ "eval_f1_micro": 0.6949934123847167,
51
+ "eval_loss": 1.035279631614685,
52
+ "eval_runtime": 3.7814,
53
+ "eval_samples_per_second": 401.442,
54
+ "eval_steps_per_second": 12.694,
55
+ "step": 50
56
+ },
57
+ {
58
+ "epoch": 0.16,
59
+ "grad_norm": 6.240413665771484,
60
+ "learning_rate": 0.00047368421052631577,
61
+ "loss": 1.0172,
62
+ "step": 60
63
+ },
64
+ {
65
+ "epoch": 0.18,
66
+ "grad_norm": 3.5446884632110596,
67
+ "learning_rate": 0.0004692982456140351,
68
+ "loss": 1.0344,
69
+ "step": 70
70
+ },
71
+ {
72
+ "epoch": 0.21,
73
+ "grad_norm": 4.263936519622803,
74
+ "learning_rate": 0.00046491228070175437,
75
+ "loss": 0.8594,
76
+ "step": 80
77
+ },
78
+ {
79
+ "epoch": 0.24,
80
+ "grad_norm": 3.0901923179626465,
81
+ "learning_rate": 0.0004605263157894737,
82
+ "loss": 0.8781,
83
+ "step": 90
84
+ },
85
+ {
86
+ "epoch": 0.26,
87
+ "grad_norm": 2.737272262573242,
88
+ "learning_rate": 0.000456140350877193,
89
+ "loss": 0.8341,
90
+ "step": 100
91
+ },
92
+ {
93
+ "epoch": 0.26,
94
+ "eval_accuracy": 0.738471673254282,
95
+ "eval_f1_macro": 0.6814380216401201,
96
+ "eval_f1_micro": 0.738471673254282,
97
+ "eval_loss": 0.8838083744049072,
98
+ "eval_runtime": 3.8044,
99
+ "eval_samples_per_second": 399.016,
100
+ "eval_steps_per_second": 12.617,
101
+ "step": 100
102
+ },
103
+ {
104
+ "epoch": 0.29,
105
+ "grad_norm": 2.692996025085449,
106
+ "learning_rate": 0.00045175438596491233,
107
+ "loss": 0.9941,
108
+ "step": 110
109
+ },
110
+ {
111
+ "epoch": 0.32,
112
+ "grad_norm": 2.3376717567443848,
113
+ "learning_rate": 0.0004473684210526316,
114
+ "loss": 0.8984,
115
+ "step": 120
116
+ },
117
+ {
118
+ "epoch": 0.34,
119
+ "grad_norm": 2.112525224685669,
120
+ "learning_rate": 0.0004429824561403509,
121
+ "loss": 0.909,
122
+ "step": 130
123
+ },
124
+ {
125
+ "epoch": 0.37,
126
+ "grad_norm": 2.8785059452056885,
127
+ "learning_rate": 0.0004385964912280702,
128
+ "loss": 0.7654,
129
+ "step": 140
130
+ },
131
+ {
132
+ "epoch": 0.39,
133
+ "grad_norm": 2.3538177013397217,
134
+ "learning_rate": 0.0004342105263157895,
135
+ "loss": 0.7773,
136
+ "step": 150
137
+ },
138
+ {
139
+ "epoch": 0.39,
140
+ "eval_accuracy": 0.7832674571805006,
141
+ "eval_f1_macro": 0.7339618917892343,
142
+ "eval_f1_micro": 0.7832674571805006,
143
+ "eval_loss": 0.7473268508911133,
144
+ "eval_runtime": 3.8195,
145
+ "eval_samples_per_second": 397.435,
146
+ "eval_steps_per_second": 12.567,
147
+ "step": 150
148
+ },
149
+ {
150
+ "epoch": 0.42,
151
+ "grad_norm": 2.850787401199341,
152
+ "learning_rate": 0.0004298245614035088,
153
+ "loss": 0.7734,
154
+ "step": 160
155
+ },
156
+ {
157
+ "epoch": 0.45,
158
+ "grad_norm": 3.4294583797454834,
159
+ "learning_rate": 0.0004254385964912281,
160
+ "loss": 0.7876,
161
+ "step": 170
162
+ },
163
+ {
164
+ "epoch": 0.47,
165
+ "grad_norm": 2.0919501781463623,
166
+ "learning_rate": 0.00042105263157894734,
167
+ "loss": 0.7329,
168
+ "step": 180
169
+ },
170
+ {
171
+ "epoch": 0.5,
172
+ "grad_norm": 5.026761531829834,
173
+ "learning_rate": 0.0004166666666666667,
174
+ "loss": 0.8396,
175
+ "step": 190
176
+ },
177
+ {
178
+ "epoch": 0.53,
179
+ "grad_norm": 2.3940157890319824,
180
+ "learning_rate": 0.000412280701754386,
181
+ "loss": 0.7188,
182
+ "step": 200
183
+ },
184
+ {
185
+ "epoch": 0.53,
186
+ "eval_accuracy": 0.7924901185770751,
187
+ "eval_f1_macro": 0.7432732481576452,
188
+ "eval_f1_micro": 0.7924901185770751,
189
+ "eval_loss": 0.7023962140083313,
190
+ "eval_runtime": 3.8263,
191
+ "eval_samples_per_second": 396.73,
192
+ "eval_steps_per_second": 12.545,
193
+ "step": 200
194
+ },
195
+ {
196
+ "epoch": 0.55,
197
+ "grad_norm": 2.965258836746216,
198
+ "learning_rate": 0.00040789473684210524,
199
+ "loss": 0.66,
200
+ "step": 210
201
+ },
202
+ {
203
+ "epoch": 0.58,
204
+ "grad_norm": 3.2059409618377686,
205
+ "learning_rate": 0.00040350877192982455,
206
+ "loss": 0.7795,
207
+ "step": 220
208
+ },
209
+ {
210
+ "epoch": 0.61,
211
+ "grad_norm": 4.9381537437438965,
212
+ "learning_rate": 0.0003991228070175439,
213
+ "loss": 0.7163,
214
+ "step": 230
215
+ },
216
+ {
217
+ "epoch": 0.63,
218
+ "grad_norm": 4.937159061431885,
219
+ "learning_rate": 0.00039473684210526315,
220
+ "loss": 0.7546,
221
+ "step": 240
222
+ },
223
+ {
224
+ "epoch": 0.66,
225
+ "grad_norm": 2.786454439163208,
226
+ "learning_rate": 0.00039035087719298245,
227
+ "loss": 0.7483,
228
+ "step": 250
229
+ },
230
+ {
231
+ "epoch": 0.66,
232
+ "eval_accuracy": 0.7872200263504612,
233
+ "eval_f1_macro": 0.7396308270254102,
234
+ "eval_f1_micro": 0.7872200263504612,
235
+ "eval_loss": 0.7055577039718628,
236
+ "eval_runtime": 3.8209,
237
+ "eval_samples_per_second": 397.29,
238
+ "eval_steps_per_second": 12.563,
239
+ "step": 250
240
+ },
241
+ {
242
+ "epoch": 0.68,
243
+ "grad_norm": 2.2485697269439697,
244
+ "learning_rate": 0.00038596491228070175,
245
+ "loss": 0.7216,
246
+ "step": 260
247
+ },
248
+ {
249
+ "epoch": 0.71,
250
+ "grad_norm": 3.388631582260132,
251
+ "learning_rate": 0.00038157894736842105,
252
+ "loss": 0.7437,
253
+ "step": 270
254
+ },
255
+ {
256
+ "epoch": 0.74,
257
+ "grad_norm": 2.945599317550659,
258
+ "learning_rate": 0.00037719298245614036,
259
+ "loss": 0.6433,
260
+ "step": 280
261
+ },
262
+ {
263
+ "epoch": 0.76,
264
+ "grad_norm": 2.894361972808838,
265
+ "learning_rate": 0.00037280701754385966,
266
+ "loss": 0.6333,
267
+ "step": 290
268
+ },
269
+ {
270
+ "epoch": 0.79,
271
+ "grad_norm": 1.8753783702850342,
272
+ "learning_rate": 0.00036842105263157896,
273
+ "loss": 0.6228,
274
+ "step": 300
275
+ },
276
+ {
277
+ "epoch": 0.79,
278
+ "eval_accuracy": 0.8129117259552042,
279
+ "eval_f1_macro": 0.7636250391666534,
280
+ "eval_f1_micro": 0.8129117259552042,
281
+ "eval_loss": 0.6337724924087524,
282
+ "eval_runtime": 3.8242,
283
+ "eval_samples_per_second": 396.945,
284
+ "eval_steps_per_second": 12.552,
285
+ "step": 300
286
+ },
287
+ {
288
+ "epoch": 0.82,
289
+ "grad_norm": 3.340951442718506,
290
+ "learning_rate": 0.00036403508771929826,
291
+ "loss": 0.5978,
292
+ "step": 310
293
+ },
294
+ {
295
+ "epoch": 0.84,
296
+ "grad_norm": 3.2675557136535645,
297
+ "learning_rate": 0.00035964912280701756,
298
+ "loss": 0.7124,
299
+ "step": 320
300
+ },
301
+ {
302
+ "epoch": 0.87,
303
+ "grad_norm": 2.308924674987793,
304
+ "learning_rate": 0.00035526315789473687,
305
+ "loss": 0.7423,
306
+ "step": 330
307
+ },
308
+ {
309
+ "epoch": 0.89,
310
+ "grad_norm": 3.407076835632324,
311
+ "learning_rate": 0.0003508771929824561,
312
+ "loss": 0.6787,
313
+ "step": 340
314
+ },
315
+ {
316
+ "epoch": 0.92,
317
+ "grad_norm": 2.5976576805114746,
318
+ "learning_rate": 0.00034649122807017547,
319
+ "loss": 0.7089,
320
+ "step": 350
321
+ },
322
+ {
323
+ "epoch": 0.92,
324
+ "eval_accuracy": 0.8208168642951251,
325
+ "eval_f1_macro": 0.7962943499701793,
326
+ "eval_f1_micro": 0.8208168642951251,
327
+ "eval_loss": 0.6130083799362183,
328
+ "eval_runtime": 3.8233,
329
+ "eval_samples_per_second": 397.037,
330
+ "eval_steps_per_second": 12.555,
331
+ "step": 350
332
+ },
333
+ {
334
+ "epoch": 0.95,
335
+ "grad_norm": 2.053323745727539,
336
+ "learning_rate": 0.00034210526315789477,
337
+ "loss": 0.5299,
338
+ "step": 360
339
+ },
340
+ {
341
+ "epoch": 0.97,
342
+ "grad_norm": 3.201794147491455,
343
+ "learning_rate": 0.000337719298245614,
344
+ "loss": 0.7405,
345
+ "step": 370
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "grad_norm": 2.8295910358428955,
350
+ "learning_rate": 0.0003333333333333333,
351
+ "loss": 0.7584,
352
+ "step": 380
353
+ },
354
+ {
355
+ "epoch": 1.03,
356
+ "grad_norm": 1.9317399263381958,
357
+ "learning_rate": 0.0003289473684210527,
358
+ "loss": 0.4822,
359
+ "step": 390
360
+ },
361
+ {
362
+ "epoch": 1.05,
363
+ "grad_norm": 2.5840656757354736,
364
+ "learning_rate": 0.0003245614035087719,
365
+ "loss": 0.5055,
366
+ "step": 400
367
+ },
368
+ {
369
+ "epoch": 1.05,
370
+ "eval_accuracy": 0.8300395256916996,
371
+ "eval_f1_macro": 0.8075388232802375,
372
+ "eval_f1_micro": 0.8300395256916996,
373
+ "eval_loss": 0.5938563346862793,
374
+ "eval_runtime": 3.8245,
375
+ "eval_samples_per_second": 396.912,
376
+ "eval_steps_per_second": 12.551,
377
+ "step": 400
378
+ },
379
+ {
380
+ "epoch": 1.08,
381
+ "grad_norm": 2.4889354705810547,
382
+ "learning_rate": 0.00032017543859649123,
383
+ "loss": 0.425,
384
+ "step": 410
385
+ },
386
+ {
387
+ "epoch": 1.11,
388
+ "grad_norm": 2.618088722229004,
389
+ "learning_rate": 0.00031578947368421053,
390
+ "loss": 0.3865,
391
+ "step": 420
392
+ },
393
+ {
394
+ "epoch": 1.13,
395
+ "grad_norm": 2.8282623291015625,
396
+ "learning_rate": 0.00031140350877192983,
397
+ "loss": 0.4543,
398
+ "step": 430
399
+ },
400
+ {
401
+ "epoch": 1.16,
402
+ "grad_norm": 3.4801692962646484,
403
+ "learning_rate": 0.00030701754385964913,
404
+ "loss": 0.4152,
405
+ "step": 440
406
+ },
407
+ {
408
+ "epoch": 1.18,
409
+ "grad_norm": 1.7531658411026,
410
+ "learning_rate": 0.00030263157894736844,
411
+ "loss": 0.3942,
412
+ "step": 450
413
+ },
414
+ {
415
+ "epoch": 1.18,
416
+ "eval_accuracy": 0.8241106719367589,
417
+ "eval_f1_macro": 0.7915647113122625,
418
+ "eval_f1_micro": 0.8241106719367589,
419
+ "eval_loss": 0.6020949482917786,
420
+ "eval_runtime": 3.8282,
421
+ "eval_samples_per_second": 396.536,
422
+ "eval_steps_per_second": 12.539,
423
+ "step": 450
424
+ },
425
+ {
426
+ "epoch": 1.21,
427
+ "grad_norm": 2.6115176677703857,
428
+ "learning_rate": 0.0002982456140350877,
429
+ "loss": 0.4801,
430
+ "step": 460
431
+ },
432
+ {
433
+ "epoch": 1.24,
434
+ "grad_norm": 2.1855995655059814,
435
+ "learning_rate": 0.00029385964912280704,
436
+ "loss": 0.3942,
437
+ "step": 470
438
+ },
439
+ {
440
+ "epoch": 1.26,
441
+ "grad_norm": 2.3812382221221924,
442
+ "learning_rate": 0.00028947368421052634,
443
+ "loss": 0.485,
444
+ "step": 480
445
+ },
446
+ {
447
+ "epoch": 1.29,
448
+ "grad_norm": 2.102308750152588,
449
+ "learning_rate": 0.00028508771929824564,
450
+ "loss": 0.417,
451
+ "step": 490
452
+ },
453
+ {
454
+ "epoch": 1.32,
455
+ "grad_norm": 4.095526218414307,
456
+ "learning_rate": 0.0002807017543859649,
457
+ "loss": 0.4248,
458
+ "step": 500
459
+ },
460
+ {
461
+ "epoch": 1.32,
462
+ "eval_accuracy": 0.8300395256916996,
463
+ "eval_f1_macro": 0.8060251760691185,
464
+ "eval_f1_micro": 0.8300395256916996,
465
+ "eval_loss": 0.5956056714057922,
466
+ "eval_runtime": 3.8218,
467
+ "eval_samples_per_second": 397.192,
468
+ "eval_steps_per_second": 12.559,
469
+ "step": 500
470
+ },
471
+ {
472
+ "epoch": 1.34,
473
+ "grad_norm": 3.2039239406585693,
474
+ "learning_rate": 0.00027631578947368425,
475
+ "loss": 0.4209,
476
+ "step": 510
477
+ },
478
+ {
479
+ "epoch": 1.37,
480
+ "grad_norm": 1.9944714307785034,
481
+ "learning_rate": 0.00027192982456140355,
482
+ "loss": 0.5373,
483
+ "step": 520
484
+ },
485
+ {
486
+ "epoch": 1.39,
487
+ "grad_norm": 3.2802696228027344,
488
+ "learning_rate": 0.0002675438596491228,
489
+ "loss": 0.5216,
490
+ "step": 530
491
+ },
492
+ {
493
+ "epoch": 1.42,
494
+ "grad_norm": 1.4747893810272217,
495
+ "learning_rate": 0.0002631578947368421,
496
+ "loss": 0.3785,
497
+ "step": 540
498
+ },
499
+ {
500
+ "epoch": 1.45,
501
+ "grad_norm": 1.2378747463226318,
502
+ "learning_rate": 0.00025877192982456146,
503
+ "loss": 0.3595,
504
+ "step": 550
505
+ },
506
+ {
507
+ "epoch": 1.45,
508
+ "eval_accuracy": 0.8175230566534915,
509
+ "eval_f1_macro": 0.7897278945523275,
510
+ "eval_f1_micro": 0.8175230566534915,
511
+ "eval_loss": 0.6172593832015991,
512
+ "eval_runtime": 3.8184,
513
+ "eval_samples_per_second": 397.551,
514
+ "eval_steps_per_second": 12.571,
515
+ "step": 550
516
+ },
517
+ {
518
+ "epoch": 1.47,
519
+ "grad_norm": 2.6604156494140625,
520
+ "learning_rate": 0.0002543859649122807,
521
+ "loss": 0.4231,
522
+ "step": 560
523
+ },
524
+ {
525
+ "epoch": 1.5,
526
+ "grad_norm": 1.879952311515808,
527
+ "learning_rate": 0.00025,
528
+ "loss": 0.4555,
529
+ "step": 570
530
+ },
531
+ {
532
+ "epoch": 1.53,
533
+ "grad_norm": 3.9825170040130615,
534
+ "learning_rate": 0.0002456140350877193,
535
+ "loss": 0.3988,
536
+ "step": 580
537
+ },
538
+ {
539
+ "epoch": 1.55,
540
+ "grad_norm": 2.999025583267212,
541
+ "learning_rate": 0.0002412280701754386,
542
+ "loss": 0.3854,
543
+ "step": 590
544
+ },
545
+ {
546
+ "epoch": 1.58,
547
+ "grad_norm": 2.778930425643921,
548
+ "learning_rate": 0.00023684210526315788,
549
+ "loss": 0.5263,
550
+ "step": 600
551
+ },
552
+ {
553
+ "epoch": 1.58,
554
+ "eval_accuracy": 0.8162055335968379,
555
+ "eval_f1_macro": 0.7908253969964322,
556
+ "eval_f1_micro": 0.8162055335968379,
557
+ "eval_loss": 0.6170048117637634,
558
+ "eval_runtime": 3.838,
559
+ "eval_samples_per_second": 395.517,
560
+ "eval_steps_per_second": 12.506,
561
+ "step": 600
562
+ },
563
+ {
564
+ "epoch": 1.61,
565
+ "grad_norm": 2.2400004863739014,
566
+ "learning_rate": 0.00023245614035087719,
567
+ "loss": 0.443,
568
+ "step": 610
569
+ },
570
+ {
571
+ "epoch": 1.63,
572
+ "grad_norm": 2.5458765029907227,
573
+ "learning_rate": 0.0002280701754385965,
574
+ "loss": 0.4106,
575
+ "step": 620
576
+ },
577
+ {
578
+ "epoch": 1.66,
579
+ "grad_norm": 2.955345392227173,
580
+ "learning_rate": 0.0002236842105263158,
581
+ "loss": 0.4078,
582
+ "step": 630
583
+ },
584
+ {
585
+ "epoch": 1.68,
586
+ "grad_norm": 3.5653369426727295,
587
+ "learning_rate": 0.0002192982456140351,
588
+ "loss": 0.4746,
589
+ "step": 640
590
+ },
591
+ {
592
+ "epoch": 1.71,
593
+ "grad_norm": 1.5618356466293335,
594
+ "learning_rate": 0.0002149122807017544,
595
+ "loss": 0.5153,
596
+ "step": 650
597
+ },
598
+ {
599
+ "epoch": 1.71,
600
+ "eval_accuracy": 0.8326745718050066,
601
+ "eval_f1_macro": 0.8042732309505177,
602
+ "eval_f1_micro": 0.8326745718050066,
603
+ "eval_loss": 0.6007006168365479,
604
+ "eval_runtime": 3.8189,
605
+ "eval_samples_per_second": 397.495,
606
+ "eval_steps_per_second": 12.569,
607
+ "step": 650
608
+ },
609
+ {
610
+ "epoch": 1.74,
611
+ "grad_norm": 3.3417813777923584,
612
+ "learning_rate": 0.00021052631578947367,
613
+ "loss": 0.3366,
614
+ "step": 660
615
+ },
616
+ {
617
+ "epoch": 1.76,
618
+ "grad_norm": 3.232940912246704,
619
+ "learning_rate": 0.000206140350877193,
620
+ "loss": 0.4284,
621
+ "step": 670
622
+ },
623
+ {
624
+ "epoch": 1.79,
625
+ "grad_norm": 2.414170265197754,
626
+ "learning_rate": 0.00020175438596491227,
627
+ "loss": 0.4816,
628
+ "step": 680
629
+ },
630
+ {
631
+ "epoch": 1.82,
632
+ "grad_norm": 2.161409378051758,
633
+ "learning_rate": 0.00019736842105263157,
634
+ "loss": 0.398,
635
+ "step": 690
636
+ },
637
+ {
638
+ "epoch": 1.84,
639
+ "grad_norm": 2.6579811573028564,
640
+ "learning_rate": 0.00019298245614035088,
641
+ "loss": 0.4237,
642
+ "step": 700
643
+ },
644
+ {
645
+ "epoch": 1.84,
646
+ "eval_accuracy": 0.8399209486166008,
647
+ "eval_f1_macro": 0.8112898753723374,
648
+ "eval_f1_micro": 0.8399209486166008,
649
+ "eval_loss": 0.5565423369407654,
650
+ "eval_runtime": 3.8246,
651
+ "eval_samples_per_second": 396.904,
652
+ "eval_steps_per_second": 12.55,
653
+ "step": 700
654
+ },
655
+ {
656
+ "epoch": 1.87,
657
+ "grad_norm": 3.548602342605591,
658
+ "learning_rate": 0.00018859649122807018,
659
+ "loss": 0.4094,
660
+ "step": 710
661
+ },
662
+ {
663
+ "epoch": 1.89,
664
+ "grad_norm": 2.4683637619018555,
665
+ "learning_rate": 0.00018421052631578948,
666
+ "loss": 0.4157,
667
+ "step": 720
668
+ },
669
+ {
670
+ "epoch": 1.92,
671
+ "grad_norm": 3.2135560512542725,
672
+ "learning_rate": 0.00017982456140350878,
673
+ "loss": 0.4306,
674
+ "step": 730
675
+ },
676
+ {
677
+ "epoch": 1.95,
678
+ "grad_norm": 2.9333302974700928,
679
+ "learning_rate": 0.00017543859649122806,
680
+ "loss": 0.4584,
681
+ "step": 740
682
+ },
683
+ {
684
+ "epoch": 1.97,
685
+ "grad_norm": 3.854954957962036,
686
+ "learning_rate": 0.00017105263157894739,
687
+ "loss": 0.3852,
688
+ "step": 750
689
+ },
690
+ {
691
+ "epoch": 1.97,
692
+ "eval_accuracy": 0.8438735177865613,
693
+ "eval_f1_macro": 0.8146328190948308,
694
+ "eval_f1_micro": 0.8438735177865613,
695
+ "eval_loss": 0.563149094581604,
696
+ "eval_runtime": 3.8078,
697
+ "eval_samples_per_second": 398.657,
698
+ "eval_steps_per_second": 12.606,
699
+ "step": 750
700
+ },
701
+ {
702
+ "epoch": 2.0,
703
+ "grad_norm": 2.8746769428253174,
704
+ "learning_rate": 0.00016666666666666666,
705
+ "loss": 0.3997,
706
+ "step": 760
707
+ },
708
+ {
709
+ "epoch": 2.03,
710
+ "grad_norm": 1.5188621282577515,
711
+ "learning_rate": 0.00016228070175438596,
712
+ "loss": 0.2052,
713
+ "step": 770
714
+ },
715
+ {
716
+ "epoch": 2.05,
717
+ "grad_norm": 2.1245510578155518,
718
+ "learning_rate": 0.00015789473684210527,
719
+ "loss": 0.2673,
720
+ "step": 780
721
+ },
722
+ {
723
+ "epoch": 2.08,
724
+ "grad_norm": 1.595406174659729,
725
+ "learning_rate": 0.00015350877192982457,
726
+ "loss": 0.1844,
727
+ "step": 790
728
+ },
729
+ {
730
+ "epoch": 2.11,
731
+ "grad_norm": 1.294638991355896,
732
+ "learning_rate": 0.00014912280701754384,
733
+ "loss": 0.1916,
734
+ "step": 800
735
+ },
736
+ {
737
+ "epoch": 2.11,
738
+ "eval_accuracy": 0.8438735177865613,
739
+ "eval_f1_macro": 0.8132477856483462,
740
+ "eval_f1_micro": 0.8438735177865613,
741
+ "eval_loss": 0.5847834944725037,
742
+ "eval_runtime": 3.8164,
743
+ "eval_samples_per_second": 397.762,
744
+ "eval_steps_per_second": 12.577,
745
+ "step": 800
746
+ },
747
+ {
748
+ "epoch": 2.13,
749
+ "grad_norm": 1.5228568315505981,
750
+ "learning_rate": 0.00014473684210526317,
751
+ "loss": 0.203,
752
+ "step": 810
753
+ },
754
+ {
755
+ "epoch": 2.16,
756
+ "grad_norm": 2.10640025138855,
757
+ "learning_rate": 0.00014035087719298245,
758
+ "loss": 0.1871,
759
+ "step": 820
760
+ },
761
+ {
762
+ "epoch": 2.18,
763
+ "grad_norm": 2.23407244682312,
764
+ "learning_rate": 0.00013596491228070177,
765
+ "loss": 0.2381,
766
+ "step": 830
767
+ },
768
+ {
769
+ "epoch": 2.21,
770
+ "grad_norm": 4.305612564086914,
771
+ "learning_rate": 0.00013157894736842105,
772
+ "loss": 0.1792,
773
+ "step": 840
774
+ },
775
+ {
776
+ "epoch": 2.24,
777
+ "grad_norm": 2.1483376026153564,
778
+ "learning_rate": 0.00012719298245614035,
779
+ "loss": 0.2108,
780
+ "step": 850
781
+ },
782
+ {
783
+ "epoch": 2.24,
784
+ "eval_accuracy": 0.8432147562582345,
785
+ "eval_f1_macro": 0.8094003161642315,
786
+ "eval_f1_micro": 0.8432147562582345,
787
+ "eval_loss": 0.6054214239120483,
788
+ "eval_runtime": 3.8173,
789
+ "eval_samples_per_second": 397.661,
790
+ "eval_steps_per_second": 12.574,
791
+ "step": 850
792
+ },
793
+ {
794
+ "epoch": 2.26,
795
+ "grad_norm": 2.0426666736602783,
796
+ "learning_rate": 0.00012280701754385965,
797
+ "loss": 0.1777,
798
+ "step": 860
799
+ },
800
+ {
801
+ "epoch": 2.29,
802
+ "grad_norm": 1.9927353858947754,
803
+ "learning_rate": 0.00011842105263157894,
804
+ "loss": 0.1886,
805
+ "step": 870
806
+ },
807
+ {
808
+ "epoch": 2.32,
809
+ "grad_norm": 2.371792793273926,
810
+ "learning_rate": 0.00011403508771929824,
811
+ "loss": 0.1927,
812
+ "step": 880
813
+ },
814
+ {
815
+ "epoch": 2.34,
816
+ "grad_norm": 1.061587929725647,
817
+ "learning_rate": 0.00010964912280701755,
818
+ "loss": 0.1,
819
+ "step": 890
820
+ },
821
+ {
822
+ "epoch": 2.37,
823
+ "grad_norm": 3.100485324859619,
824
+ "learning_rate": 0.00010526315789473683,
825
+ "loss": 0.1752,
826
+ "step": 900
827
+ },
828
+ {
829
+ "epoch": 2.37,
830
+ "eval_accuracy": 0.8438735177865613,
831
+ "eval_f1_macro": 0.8131496787168201,
832
+ "eval_f1_micro": 0.8438735177865613,
833
+ "eval_loss": 0.6142441630363464,
834
+ "eval_runtime": 3.8158,
835
+ "eval_samples_per_second": 397.817,
836
+ "eval_steps_per_second": 12.579,
837
+ "step": 900
838
+ },
839
+ {
840
+ "epoch": 2.39,
841
+ "grad_norm": 4.032674789428711,
842
+ "learning_rate": 0.00010087719298245614,
843
+ "loss": 0.2409,
844
+ "step": 910
845
+ },
846
+ {
847
+ "epoch": 2.42,
848
+ "grad_norm": 1.537070393562317,
849
+ "learning_rate": 9.649122807017544e-05,
850
+ "loss": 0.2244,
851
+ "step": 920
852
+ },
853
+ {
854
+ "epoch": 2.45,
855
+ "grad_norm": 2.81392765045166,
856
+ "learning_rate": 9.210526315789474e-05,
857
+ "loss": 0.2367,
858
+ "step": 930
859
+ },
860
+ {
861
+ "epoch": 2.47,
862
+ "grad_norm": 4.410009860992432,
863
+ "learning_rate": 8.771929824561403e-05,
864
+ "loss": 0.2912,
865
+ "step": 940
866
+ },
867
+ {
868
+ "epoch": 2.5,
869
+ "grad_norm": 2.0353574752807617,
870
+ "learning_rate": 8.333333333333333e-05,
871
+ "loss": 0.1502,
872
+ "step": 950
873
+ },
874
+ {
875
+ "epoch": 2.5,
876
+ "eval_accuracy": 0.8451910408432147,
877
+ "eval_f1_macro": 0.8119149292307762,
878
+ "eval_f1_micro": 0.8451910408432147,
879
+ "eval_loss": 0.6100274324417114,
880
+ "eval_runtime": 3.8177,
881
+ "eval_samples_per_second": 397.617,
882
+ "eval_steps_per_second": 12.573,
883
+ "step": 950
884
+ },
885
+ {
886
+ "epoch": 2.53,
887
+ "grad_norm": 0.4018252193927765,
888
+ "learning_rate": 7.894736842105263e-05,
889
+ "loss": 0.2425,
890
+ "step": 960
891
+ },
892
+ {
893
+ "epoch": 2.55,
894
+ "grad_norm": 1.633636236190796,
895
+ "learning_rate": 7.456140350877192e-05,
896
+ "loss": 0.3121,
897
+ "step": 970
898
+ },
899
+ {
900
+ "epoch": 2.58,
901
+ "grad_norm": 1.6794798374176025,
902
+ "learning_rate": 7.017543859649122e-05,
903
+ "loss": 0.1456,
904
+ "step": 980
905
+ },
906
+ {
907
+ "epoch": 2.61,
908
+ "grad_norm": 1.684718132019043,
909
+ "learning_rate": 6.578947368421052e-05,
910
+ "loss": 0.1977,
911
+ "step": 990
912
+ },
913
+ {
914
+ "epoch": 2.63,
915
+ "grad_norm": 1.3666688203811646,
916
+ "learning_rate": 6.140350877192983e-05,
917
+ "loss": 0.2253,
918
+ "step": 1000
919
+ },
920
+ {
921
+ "epoch": 2.63,
922
+ "eval_accuracy": 0.8438735177865613,
923
+ "eval_f1_macro": 0.8227552343954732,
924
+ "eval_f1_micro": 0.8438735177865613,
925
+ "eval_loss": 0.6083930730819702,
926
+ "eval_runtime": 3.8094,
927
+ "eval_samples_per_second": 398.489,
928
+ "eval_steps_per_second": 12.6,
929
+ "step": 1000
930
+ },
931
+ {
932
+ "epoch": 2.66,
933
+ "grad_norm": 1.7882211208343506,
934
+ "learning_rate": 5.701754385964912e-05,
935
+ "loss": 0.1514,
936
+ "step": 1010
937
+ },
938
+ {
939
+ "epoch": 2.68,
940
+ "grad_norm": 1.7349433898925781,
941
+ "learning_rate": 5.263157894736842e-05,
942
+ "loss": 0.2528,
943
+ "step": 1020
944
+ },
945
+ {
946
+ "epoch": 2.71,
947
+ "grad_norm": 1.9312665462493896,
948
+ "learning_rate": 4.824561403508772e-05,
949
+ "loss": 0.238,
950
+ "step": 1030
951
+ },
952
+ {
953
+ "epoch": 2.74,
954
+ "grad_norm": 2.227630376815796,
955
+ "learning_rate": 4.3859649122807014e-05,
956
+ "loss": 0.2113,
957
+ "step": 1040
958
+ },
959
+ {
960
+ "epoch": 2.76,
961
+ "grad_norm": 1.098223328590393,
962
+ "learning_rate": 3.9473684210526316e-05,
963
+ "loss": 0.2193,
964
+ "step": 1050
965
+ },
966
+ {
967
+ "epoch": 2.76,
968
+ "eval_accuracy": 0.8484848484848485,
969
+ "eval_f1_macro": 0.817141995514716,
970
+ "eval_f1_micro": 0.8484848484848485,
971
+ "eval_loss": 0.6062248945236206,
972
+ "eval_runtime": 3.8206,
973
+ "eval_samples_per_second": 397.322,
974
+ "eval_steps_per_second": 12.564,
975
+ "step": 1050
976
+ },
977
+ {
978
+ "epoch": 2.79,
979
+ "grad_norm": 0.6382438540458679,
980
+ "learning_rate": 3.508771929824561e-05,
981
+ "loss": 0.2207,
982
+ "step": 1060
983
+ },
984
+ {
985
+ "epoch": 2.82,
986
+ "grad_norm": 2.2816574573516846,
987
+ "learning_rate": 3.0701754385964913e-05,
988
+ "loss": 0.2896,
989
+ "step": 1070
990
+ },
991
+ {
992
+ "epoch": 2.84,
993
+ "grad_norm": 1.8550561666488647,
994
+ "learning_rate": 2.631578947368421e-05,
995
+ "loss": 0.2776,
996
+ "step": 1080
997
+ },
998
+ {
999
+ "epoch": 2.87,
1000
+ "grad_norm": 1.517103672027588,
1001
+ "learning_rate": 2.1929824561403507e-05,
1002
+ "loss": 0.1744,
1003
+ "step": 1090
1004
+ },
1005
+ {
1006
+ "epoch": 2.89,
1007
+ "grad_norm": 1.9958040714263916,
1008
+ "learning_rate": 1.7543859649122806e-05,
1009
+ "loss": 0.2182,
1010
+ "step": 1100
1011
+ },
1012
+ {
1013
+ "epoch": 2.89,
1014
+ "eval_accuracy": 0.849802371541502,
1015
+ "eval_f1_macro": 0.8181612702287439,
1016
+ "eval_f1_micro": 0.849802371541502,
1017
+ "eval_loss": 0.5966492891311646,
1018
+ "eval_runtime": 3.822,
1019
+ "eval_samples_per_second": 397.173,
1020
+ "eval_steps_per_second": 12.559,
1021
+ "step": 1100
1022
+ },
1023
+ {
1024
+ "epoch": 2.92,
1025
+ "grad_norm": 1.0382195711135864,
1026
+ "learning_rate": 1.3157894736842104e-05,
1027
+ "loss": 0.2314,
1028
+ "step": 1110
1029
+ },
1030
+ {
1031
+ "epoch": 2.95,
1032
+ "grad_norm": 0.6311368346214294,
1033
+ "learning_rate": 8.771929824561403e-06,
1034
+ "loss": 0.2827,
1035
+ "step": 1120
1036
+ },
1037
+ {
1038
+ "epoch": 2.97,
1039
+ "grad_norm": 2.1355645656585693,
1040
+ "learning_rate": 4.3859649122807014e-06,
1041
+ "loss": 0.1823,
1042
+ "step": 1130
1043
+ },
1044
+ {
1045
+ "epoch": 3.0,
1046
+ "grad_norm": 0.8415520191192627,
1047
+ "learning_rate": 0.0,
1048
+ "loss": 0.168,
1049
+ "step": 1140
1050
+ },
1051
+ {
1052
+ "epoch": 3.0,
1053
+ "step": 1140,
1054
+ "total_flos": 5570737729437696.0,
1055
+ "train_loss": 0.5285837122222834,
1056
+ "train_runtime": 447.0645,
1057
+ "train_samples_per_second": 81.492,
1058
+ "train_steps_per_second": 2.55
1059
+ }
1060
+ ],
1061
+ "logging_steps": 10,
1062
+ "max_steps": 1140,
1063
+ "num_input_tokens_seen": 0,
1064
+ "num_train_epochs": 3,
1065
+ "save_steps": 50,
1066
+ "total_flos": 5570737729437696.0,
1067
+ "train_batch_size": 16,
1068
+ "trial_name": null,
1069
+ "trial_params": null
1070
+ }
google_t5/t5_base_amazon/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9058d2c55809c08dfe5d631fdecbc63ece4067a6cea137e1478fddc6e29958ed
3
+ size 5048
google_t5/t5_base_ledgar/README.md ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: google-t5/t5-base
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
+ model-index:
9
+ - name: t5_base_ledgar
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # t5_base_ledgar
17
+
18
+ This model is a fine-tuned version of [google-t5/t5-base](https://huggingface.co/google-t5/t5-base) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.5004
21
+ - Accuracy: 0.8664
22
+ - F1 Macro: 0.7948
23
+ - F1 Micro: 0.8664
24
+
25
+ ## Model description
26
+
27
+ More information needed
28
+
29
+ ## Intended uses & limitations
30
+
31
+ More information needed
32
+
33
+ ## Training and evaluation data
34
+
35
+ More information needed
36
+
37
+ ## Training procedure
38
+
39
+ ### Training hyperparameters
40
+
41
+ The following hyperparameters were used during training:
42
+ - learning_rate: 0.0005
43
+ - train_batch_size: 32
44
+ - eval_batch_size: 32
45
+ - seed: 42
46
+ - distributed_type: multi-GPU
47
+ - num_devices: 2
48
+ - total_train_batch_size: 64
49
+ - total_eval_batch_size: 64
50
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
51
+ - lr_scheduler_type: linear
52
+ - num_epochs: 3.0
53
+
54
+ ### Training results
55
+
56
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 Macro | F1 Micro |
57
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|:--------:|:--------:|
58
+ | 1.443 | 0.11 | 100 | 1.1133 | 0.7291 | 0.5312 | 0.7291 |
59
+ | 0.8813 | 0.21 | 200 | 0.8404 | 0.7712 | 0.6296 | 0.7712 |
60
+ | 0.761 | 0.32 | 300 | 0.7386 | 0.8021 | 0.6789 | 0.8021 |
61
+ | 0.7358 | 0.43 | 400 | 0.7313 | 0.805 | 0.6787 | 0.805 |
62
+ | 0.7624 | 0.53 | 500 | 0.6561 | 0.8164 | 0.7134 | 0.8164 |
63
+ | 0.7067 | 0.64 | 600 | 0.6419 | 0.821 | 0.7273 | 0.821 |
64
+ | 0.6298 | 0.75 | 700 | 0.6412 | 0.8254 | 0.7230 | 0.8254 |
65
+ | 0.6544 | 0.85 | 800 | 0.6277 | 0.8217 | 0.7223 | 0.8217 |
66
+ | 0.5781 | 0.96 | 900 | 0.6054 | 0.8305 | 0.7420 | 0.8305 |
67
+ | 0.4674 | 1.07 | 1000 | 0.6210 | 0.8346 | 0.7371 | 0.8346 |
68
+ | 0.4929 | 1.17 | 1100 | 0.5876 | 0.8387 | 0.7423 | 0.8387 |
69
+ | 0.566 | 1.28 | 1200 | 0.5779 | 0.8475 | 0.7633 | 0.8475 |
70
+ | 0.4577 | 1.39 | 1300 | 0.5772 | 0.8435 | 0.7508 | 0.8435 |
71
+ | 0.4233 | 1.49 | 1400 | 0.5581 | 0.8476 | 0.7625 | 0.8476 |
72
+ | 0.4567 | 1.6 | 1500 | 0.5688 | 0.8462 | 0.7576 | 0.8462 |
73
+ | 0.483 | 1.71 | 1600 | 0.5547 | 0.8478 | 0.7609 | 0.8478 |
74
+ | 0.4649 | 1.81 | 1700 | 0.5396 | 0.851 | 0.7680 | 0.851 |
75
+ | 0.4288 | 1.92 | 1800 | 0.5235 | 0.8577 | 0.7759 | 0.8577 |
76
+ | 0.3445 | 2.03 | 1900 | 0.5204 | 0.8603 | 0.7791 | 0.8603 |
77
+ | 0.3014 | 2.13 | 2000 | 0.5269 | 0.8607 | 0.7862 | 0.8607 |
78
+ | 0.3301 | 2.24 | 2100 | 0.5234 | 0.8591 | 0.7826 | 0.8591 |
79
+ | 0.3069 | 2.35 | 2200 | 0.5266 | 0.8624 | 0.7851 | 0.8624 |
80
+ | 0.3095 | 2.45 | 2300 | 0.5155 | 0.8629 | 0.7846 | 0.8629 |
81
+ | 0.3164 | 2.56 | 2400 | 0.5106 | 0.8646 | 0.7909 | 0.8646 |
82
+ | 0.2914 | 2.67 | 2500 | 0.5055 | 0.8647 | 0.7934 | 0.8647 |
83
+ | 0.2946 | 2.77 | 2600 | 0.5027 | 0.8643 | 0.7917 | 0.8643 |
84
+ | 0.3012 | 2.88 | 2700 | 0.5009 | 0.8671 | 0.7953 | 0.8671 |
85
+ | 0.3181 | 2.99 | 2800 | 0.5004 | 0.8664 | 0.7948 | 0.8664 |
86
+
87
+
88
+ ### Framework versions
89
+
90
+ - Transformers 4.39.0.dev0
91
+ - Pytorch 2.2.1+cu121
92
+ - Datasets 2.18.0
93
+ - Tokenizers 0.15.2
google_t5/t5_base_ledgar/all_results.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.8664,
4
+ "eval_f1_macro": 0.7947908970820687,
5
+ "eval_f1_micro": 0.8664,
6
+ "eval_loss": 0.5004217028617859,
7
+ "eval_runtime": 23.9278,
8
+ "eval_samples": 10000,
9
+ "eval_samples_per_second": 417.924,
10
+ "eval_steps_per_second": 6.561,
11
+ "test_accuracy": 0.871,
12
+ "test_f1_macro": 0.7998698141352754,
13
+ "test_f1_micro": 0.871,
14
+ "test_loss": 0.5104538798332214,
15
+ "test_runtime": 23.9487,
16
+ "test_samples_per_second": 417.56,
17
+ "test_steps_per_second": 6.556,
18
+ "train_loss": 0.5752294131348806,
19
+ "train_runtime": 2000.5798,
20
+ "train_samples": 60000,
21
+ "train_samples_per_second": 89.974,
22
+ "train_steps_per_second": 1.407
23
+ }
google_t5/t5_base_ledgar/checkpoint-2800/config.json ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google-t5/t5-base",
3
+ "architectures": [
4
+ "T5ForSequenceClassification"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "relu",
15
+ "finetuning_task": "text-classification",
16
+ "id2label": {
17
+ "0": "0",
18
+ "1": "1",
19
+ "2": "10",
20
+ "3": "11",
21
+ "4": "12",
22
+ "5": "13",
23
+ "6": "14",
24
+ "7": "15",
25
+ "8": "16",
26
+ "9": "17",
27
+ "10": "18",
28
+ "11": "19",
29
+ "12": "2",
30
+ "13": "20",
31
+ "14": "21",
32
+ "15": "22",
33
+ "16": "23",
34
+ "17": "24",
35
+ "18": "25",
36
+ "19": "26",
37
+ "20": "27",
38
+ "21": "28",
39
+ "22": "29",
40
+ "23": "3",
41
+ "24": "30",
42
+ "25": "31",
43
+ "26": "32",
44
+ "27": "33",
45
+ "28": "34",
46
+ "29": "35",
47
+ "30": "36",
48
+ "31": "37",
49
+ "32": "38",
50
+ "33": "39",
51
+ "34": "4",
52
+ "35": "40",
53
+ "36": "41",
54
+ "37": "42",
55
+ "38": "43",
56
+ "39": "44",
57
+ "40": "45",
58
+ "41": "46",
59
+ "42": "47",
60
+ "43": "48",
61
+ "44": "49",
62
+ "45": "5",
63
+ "46": "50",
64
+ "47": "51",
65
+ "48": "52",
66
+ "49": "53",
67
+ "50": "54",
68
+ "51": "55",
69
+ "52": "56",
70
+ "53": "57",
71
+ "54": "58",
72
+ "55": "59",
73
+ "56": "6",
74
+ "57": "60",
75
+ "58": "61",
76
+ "59": "62",
77
+ "60": "63",
78
+ "61": "64",
79
+ "62": "65",
80
+ "63": "66",
81
+ "64": "67",
82
+ "65": "68",
83
+ "66": "69",
84
+ "67": "7",
85
+ "68": "70",
86
+ "69": "71",
87
+ "70": "72",
88
+ "71": "73",
89
+ "72": "74",
90
+ "73": "75",
91
+ "74": "76",
92
+ "75": "77",
93
+ "76": "78",
94
+ "77": "79",
95
+ "78": "8",
96
+ "79": "80",
97
+ "80": "81",
98
+ "81": "82",
99
+ "82": "83",
100
+ "83": "84",
101
+ "84": "85",
102
+ "85": "86",
103
+ "86": "87",
104
+ "87": "88",
105
+ "88": "89",
106
+ "89": "9",
107
+ "90": "90",
108
+ "91": "91",
109
+ "92": "92",
110
+ "93": "93",
111
+ "94": "94",
112
+ "95": "95",
113
+ "96": "96",
114
+ "97": "97",
115
+ "98": "98",
116
+ "99": "99"
117
+ },
118
+ "initializer_factor": 1.0,
119
+ "is_encoder_decoder": true,
120
+ "is_gated_act": false,
121
+ "label2id": {
122
+ "0": 0,
123
+ "1": 1,
124
+ "10": 2,
125
+ "11": 3,
126
+ "12": 4,
127
+ "13": 5,
128
+ "14": 6,
129
+ "15": 7,
130
+ "16": 8,
131
+ "17": 9,
132
+ "18": 10,
133
+ "19": 11,
134
+ "2": 12,
135
+ "20": 13,
136
+ "21": 14,
137
+ "22": 15,
138
+ "23": 16,
139
+ "24": 17,
140
+ "25": 18,
141
+ "26": 19,
142
+ "27": 20,
143
+ "28": 21,
144
+ "29": 22,
145
+ "3": 23,
146
+ "30": 24,
147
+ "31": 25,
148
+ "32": 26,
149
+ "33": 27,
150
+ "34": 28,
151
+ "35": 29,
152
+ "36": 30,
153
+ "37": 31,
154
+ "38": 32,
155
+ "39": 33,
156
+ "4": 34,
157
+ "40": 35,
158
+ "41": 36,
159
+ "42": 37,
160
+ "43": 38,
161
+ "44": 39,
162
+ "45": 40,
163
+ "46": 41,
164
+ "47": 42,
165
+ "48": 43,
166
+ "49": 44,
167
+ "5": 45,
168
+ "50": 46,
169
+ "51": 47,
170
+ "52": 48,
171
+ "53": 49,
172
+ "54": 50,
173
+ "55": 51,
174
+ "56": 52,
175
+ "57": 53,
176
+ "58": 54,
177
+ "59": 55,
178
+ "6": 56,
179
+ "60": 57,
180
+ "61": 58,
181
+ "62": 59,
182
+ "63": 60,
183
+ "64": 61,
184
+ "65": 62,
185
+ "66": 63,
186
+ "67": 64,
187
+ "68": 65,
188
+ "69": 66,
189
+ "7": 67,
190
+ "70": 68,
191
+ "71": 69,
192
+ "72": 70,
193
+ "73": 71,
194
+ "74": 72,
195
+ "75": 73,
196
+ "76": 74,
197
+ "77": 75,
198
+ "78": 76,
199
+ "79": 77,
200
+ "8": 78,
201
+ "80": 79,
202
+ "81": 80,
203
+ "82": 81,
204
+ "83": 82,
205
+ "84": 83,
206
+ "85": 84,
207
+ "86": 85,
208
+ "87": 86,
209
+ "88": 87,
210
+ "89": 88,
211
+ "9": 89,
212
+ "90": 90,
213
+ "91": 91,
214
+ "92": 92,
215
+ "93": 93,
216
+ "94": 94,
217
+ "95": 95,
218
+ "96": 96,
219
+ "97": 97,
220
+ "98": 98,
221
+ "99": 99
222
+ },
223
+ "layer_norm_epsilon": 1e-06,
224
+ "model_type": "t5",
225
+ "n_positions": 512,
226
+ "num_decoder_layers": 12,
227
+ "num_heads": 12,
228
+ "num_layers": 12,
229
+ "output_past": true,
230
+ "pad_token_id": 0,
231
+ "problem_type": "single_label_classification",
232
+ "relative_attention_max_distance": 128,
233
+ "relative_attention_num_buckets": 32,
234
+ "task_specific_params": {
235
+ "summarization": {
236
+ "early_stopping": true,
237
+ "length_penalty": 2.0,
238
+ "max_length": 200,
239
+ "min_length": 30,
240
+ "no_repeat_ngram_size": 3,
241
+ "num_beams": 4,
242
+ "prefix": "summarize: "
243
+ },
244
+ "translation_en_to_de": {
245
+ "early_stopping": true,
246
+ "max_length": 300,
247
+ "num_beams": 4,
248
+ "prefix": "translate English to German: "
249
+ },
250
+ "translation_en_to_fr": {
251
+ "early_stopping": true,
252
+ "max_length": 300,
253
+ "num_beams": 4,
254
+ "prefix": "translate English to French: "
255
+ },
256
+ "translation_en_to_ro": {
257
+ "early_stopping": true,
258
+ "max_length": 300,
259
+ "num_beams": 4,
260
+ "prefix": "translate English to Romanian: "
261
+ }
262
+ },
263
+ "torch_dtype": "float32",
264
+ "transformers_version": "4.39.0.dev0",
265
+ "use_cache": true,
266
+ "vocab_size": 32128
267
+ }
google_t5/t5_base_ledgar/checkpoint-2800/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bec396a5ee38fb93b45dcbfe6b134f5c7adba0ea2c64f0184a3f24a69faff1e
3
+ size 894318160
google_t5/t5_base_ledgar/checkpoint-2800/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1189c70b050342748d8acf188a98f72bb43308b8b1a6fd8d7f850b2185ea93c3
3
+ size 1788788090
google_t5/t5_base_ledgar/checkpoint-2800/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24ba53d7978dbe7a0c204e81111c055f86c339fdfaab61d75b7ccaed5cd1388d
3
+ size 14512
google_t5/t5_base_ledgar/checkpoint-2800/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb62001198093461d4e1250452b59b09d0426f9b616a6c5afff748dc51f021be
3
+ size 14512
google_t5/t5_base_ledgar/checkpoint-2800/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51e284703b00465acd87a34da92d3e2f6ab78a159fbf2682d3512ef8b4721d1e
3
+ size 1064
google_t5/t5_base_ledgar/checkpoint-2800/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
google_t5/t5_base_ledgar/checkpoint-2800/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
google_t5/t5_base_ledgar/checkpoint-2800/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
google_t5/t5_base_ledgar/checkpoint-2800/tokenizer_config.json ADDED
@@ -0,0 +1,937 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32002": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32003": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32004": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "32005": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "32006": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "32007": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "32008": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "32009": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "32010": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32011": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32012": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "32013": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "32014": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "32015": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "32016": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "32017": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "32018": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32019": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "32020": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "32021": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "32022": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "32023": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "32024": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "32025": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "32026": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "32027": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "32028": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32029": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "32030": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "32031": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "32032": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "32033": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "32034": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "32035": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "32036": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "32037": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "32038": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "32039": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "32040": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "32041": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "32042": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "32043": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "32044": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "32045": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "32046": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "32047": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "32048": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "32049": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "32050": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "32051": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "32052": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "32053": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "32054": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "32055": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "32056": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "32057": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "32058": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "32059": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "32060": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "32061": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "32062": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "32063": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "32064": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "32065": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "32066": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "32067": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "32068": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "32069": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "32070": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "32071": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "32072": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "32073": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "32074": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "32075": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "32076": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "32077": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "32078": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "32079": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "32080": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "32081": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "32082": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "32083": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "32084": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "32085": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "32086": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "32087": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "32088": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "32089": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "32090": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "32091": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "32092": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "32093": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "32094": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "32095": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "32096": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "32097": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "32098": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "32099": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": true,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "model_max_length": 512,
934
+ "pad_token": "<pad>",
935
+ "tokenizer_class": "T5Tokenizer",
936
+ "unk_token": "<unk>"
937
+ }
google_t5/t5_base_ledgar/checkpoint-2800/trainer_state.json ADDED
@@ -0,0 +1,1113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5004217028617859,
3
+ "best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/google_t5/t5_base_ledgar/checkpoint-2800",
4
+ "epoch": 2.9850746268656714,
5
+ "eval_steps": 100,
6
+ "global_step": 2800,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "grad_norm": 2.0540711879730225,
14
+ "learning_rate": 0.0004955579246624023,
15
+ "loss": 4.1527,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.05,
20
+ "grad_norm": 2.241663694381714,
21
+ "learning_rate": 0.0004911158493248046,
22
+ "loss": 2.8808,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.08,
27
+ "grad_norm": 2.093815803527832,
28
+ "learning_rate": 0.00048667377398720687,
29
+ "loss": 1.9075,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.11,
34
+ "grad_norm": 2.396036148071289,
35
+ "learning_rate": 0.0004822316986496091,
36
+ "loss": 1.443,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.11,
41
+ "eval_accuracy": 0.7291,
42
+ "eval_f1_macro": 0.5312263291596806,
43
+ "eval_f1_micro": 0.7291,
44
+ "eval_loss": 1.113275408744812,
45
+ "eval_runtime": 24.8696,
46
+ "eval_samples_per_second": 402.097,
47
+ "eval_steps_per_second": 6.313,
48
+ "step": 100
49
+ },
50
+ {
51
+ "epoch": 0.13,
52
+ "grad_norm": 2.6226117610931396,
53
+ "learning_rate": 0.0004777896233120114,
54
+ "loss": 1.1778,
55
+ "step": 125
56
+ },
57
+ {
58
+ "epoch": 0.16,
59
+ "grad_norm": 2.527249574661255,
60
+ "learning_rate": 0.00047334754797441367,
61
+ "loss": 1.0394,
62
+ "step": 150
63
+ },
64
+ {
65
+ "epoch": 0.19,
66
+ "grad_norm": 2.435964584350586,
67
+ "learning_rate": 0.00046890547263681595,
68
+ "loss": 0.9535,
69
+ "step": 175
70
+ },
71
+ {
72
+ "epoch": 0.21,
73
+ "grad_norm": 2.0345797538757324,
74
+ "learning_rate": 0.00046446339729921824,
75
+ "loss": 0.8813,
76
+ "step": 200
77
+ },
78
+ {
79
+ "epoch": 0.21,
80
+ "eval_accuracy": 0.7712,
81
+ "eval_f1_macro": 0.6296223926135491,
82
+ "eval_f1_micro": 0.7712,
83
+ "eval_loss": 0.8404093980789185,
84
+ "eval_runtime": 24.3258,
85
+ "eval_samples_per_second": 411.086,
86
+ "eval_steps_per_second": 6.454,
87
+ "step": 200
88
+ },
89
+ {
90
+ "epoch": 0.24,
91
+ "grad_norm": 4.247000217437744,
92
+ "learning_rate": 0.0004600213219616205,
93
+ "loss": 0.8062,
94
+ "step": 225
95
+ },
96
+ {
97
+ "epoch": 0.27,
98
+ "grad_norm": 1.9799127578735352,
99
+ "learning_rate": 0.00045557924662402275,
100
+ "loss": 0.8654,
101
+ "step": 250
102
+ },
103
+ {
104
+ "epoch": 0.29,
105
+ "grad_norm": 2.0902786254882812,
106
+ "learning_rate": 0.000451137171286425,
107
+ "loss": 0.8073,
108
+ "step": 275
109
+ },
110
+ {
111
+ "epoch": 0.32,
112
+ "grad_norm": 1.7158573865890503,
113
+ "learning_rate": 0.00044669509594882727,
114
+ "loss": 0.761,
115
+ "step": 300
116
+ },
117
+ {
118
+ "epoch": 0.32,
119
+ "eval_accuracy": 0.8021,
120
+ "eval_f1_macro": 0.6788544961571827,
121
+ "eval_f1_micro": 0.8021,
122
+ "eval_loss": 0.738567590713501,
123
+ "eval_runtime": 24.2955,
124
+ "eval_samples_per_second": 411.599,
125
+ "eval_steps_per_second": 6.462,
126
+ "step": 300
127
+ },
128
+ {
129
+ "epoch": 0.35,
130
+ "grad_norm": 2.2180001735687256,
131
+ "learning_rate": 0.00044225302061122956,
132
+ "loss": 0.7375,
133
+ "step": 325
134
+ },
135
+ {
136
+ "epoch": 0.37,
137
+ "grad_norm": 2.029712677001953,
138
+ "learning_rate": 0.00043781094527363184,
139
+ "loss": 0.7601,
140
+ "step": 350
141
+ },
142
+ {
143
+ "epoch": 0.4,
144
+ "grad_norm": 2.0835516452789307,
145
+ "learning_rate": 0.0004333688699360341,
146
+ "loss": 0.6685,
147
+ "step": 375
148
+ },
149
+ {
150
+ "epoch": 0.43,
151
+ "grad_norm": 2.2809343338012695,
152
+ "learning_rate": 0.0004289267945984364,
153
+ "loss": 0.7358,
154
+ "step": 400
155
+ },
156
+ {
157
+ "epoch": 0.43,
158
+ "eval_accuracy": 0.805,
159
+ "eval_f1_macro": 0.6786916719278343,
160
+ "eval_f1_micro": 0.805,
161
+ "eval_loss": 0.731271505355835,
162
+ "eval_runtime": 24.2168,
163
+ "eval_samples_per_second": 412.937,
164
+ "eval_steps_per_second": 6.483,
165
+ "step": 400
166
+ },
167
+ {
168
+ "epoch": 0.45,
169
+ "grad_norm": 2.0364644527435303,
170
+ "learning_rate": 0.00042448471926083864,
171
+ "loss": 0.7414,
172
+ "step": 425
173
+ },
174
+ {
175
+ "epoch": 0.48,
176
+ "grad_norm": 2.7770190238952637,
177
+ "learning_rate": 0.00042004264392324093,
178
+ "loss": 0.8142,
179
+ "step": 450
180
+ },
181
+ {
182
+ "epoch": 0.51,
183
+ "grad_norm": 9.388680458068848,
184
+ "learning_rate": 0.0004156005685856432,
185
+ "loss": 0.7282,
186
+ "step": 475
187
+ },
188
+ {
189
+ "epoch": 0.53,
190
+ "grad_norm": 2.113584041595459,
191
+ "learning_rate": 0.0004111584932480455,
192
+ "loss": 0.7624,
193
+ "step": 500
194
+ },
195
+ {
196
+ "epoch": 0.53,
197
+ "eval_accuracy": 0.8164,
198
+ "eval_f1_macro": 0.7134072796381032,
199
+ "eval_f1_micro": 0.8164,
200
+ "eval_loss": 0.6560911536216736,
201
+ "eval_runtime": 24.2135,
202
+ "eval_samples_per_second": 412.993,
203
+ "eval_steps_per_second": 6.484,
204
+ "step": 500
205
+ },
206
+ {
207
+ "epoch": 0.56,
208
+ "grad_norm": 1.490932822227478,
209
+ "learning_rate": 0.0004067164179104478,
210
+ "loss": 0.7123,
211
+ "step": 525
212
+ },
213
+ {
214
+ "epoch": 0.59,
215
+ "grad_norm": 1.1674153804779053,
216
+ "learning_rate": 0.00040227434257285007,
217
+ "loss": 0.692,
218
+ "step": 550
219
+ },
220
+ {
221
+ "epoch": 0.61,
222
+ "grad_norm": 2.5101075172424316,
223
+ "learning_rate": 0.00039783226723525235,
224
+ "loss": 0.6392,
225
+ "step": 575
226
+ },
227
+ {
228
+ "epoch": 0.64,
229
+ "grad_norm": 1.797776460647583,
230
+ "learning_rate": 0.0003933901918976546,
231
+ "loss": 0.7067,
232
+ "step": 600
233
+ },
234
+ {
235
+ "epoch": 0.64,
236
+ "eval_accuracy": 0.821,
237
+ "eval_f1_macro": 0.7273350797174626,
238
+ "eval_f1_micro": 0.821,
239
+ "eval_loss": 0.6418954133987427,
240
+ "eval_runtime": 24.1496,
241
+ "eval_samples_per_second": 414.086,
242
+ "eval_steps_per_second": 6.501,
243
+ "step": 600
244
+ },
245
+ {
246
+ "epoch": 0.67,
247
+ "grad_norm": 1.9408563375473022,
248
+ "learning_rate": 0.00038894811656005687,
249
+ "loss": 0.6762,
250
+ "step": 625
251
+ },
252
+ {
253
+ "epoch": 0.69,
254
+ "grad_norm": 1.9674413204193115,
255
+ "learning_rate": 0.00038450604122245916,
256
+ "loss": 0.6784,
257
+ "step": 650
258
+ },
259
+ {
260
+ "epoch": 0.72,
261
+ "grad_norm": 2.5168087482452393,
262
+ "learning_rate": 0.00038006396588486144,
263
+ "loss": 0.7106,
264
+ "step": 675
265
+ },
266
+ {
267
+ "epoch": 0.75,
268
+ "grad_norm": 1.7439873218536377,
269
+ "learning_rate": 0.0003756218905472637,
270
+ "loss": 0.6298,
271
+ "step": 700
272
+ },
273
+ {
274
+ "epoch": 0.75,
275
+ "eval_accuracy": 0.8254,
276
+ "eval_f1_macro": 0.7229632924236448,
277
+ "eval_f1_micro": 0.8254,
278
+ "eval_loss": 0.6412187218666077,
279
+ "eval_runtime": 24.0401,
280
+ "eval_samples_per_second": 415.971,
281
+ "eval_steps_per_second": 6.531,
282
+ "step": 700
283
+ },
284
+ {
285
+ "epoch": 0.77,
286
+ "grad_norm": 1.6693323850631714,
287
+ "learning_rate": 0.00037117981520966596,
288
+ "loss": 0.6267,
289
+ "step": 725
290
+ },
291
+ {
292
+ "epoch": 0.8,
293
+ "grad_norm": 2.042109251022339,
294
+ "learning_rate": 0.00036673773987206824,
295
+ "loss": 0.7153,
296
+ "step": 750
297
+ },
298
+ {
299
+ "epoch": 0.83,
300
+ "grad_norm": 1.5407096147537231,
301
+ "learning_rate": 0.0003622956645344705,
302
+ "loss": 0.6641,
303
+ "step": 775
304
+ },
305
+ {
306
+ "epoch": 0.85,
307
+ "grad_norm": 1.5067718029022217,
308
+ "learning_rate": 0.00035785358919687276,
309
+ "loss": 0.6544,
310
+ "step": 800
311
+ },
312
+ {
313
+ "epoch": 0.85,
314
+ "eval_accuracy": 0.8217,
315
+ "eval_f1_macro": 0.7223377361999187,
316
+ "eval_f1_micro": 0.8217,
317
+ "eval_loss": 0.6277242302894592,
318
+ "eval_runtime": 24.1366,
319
+ "eval_samples_per_second": 414.309,
320
+ "eval_steps_per_second": 6.505,
321
+ "step": 800
322
+ },
323
+ {
324
+ "epoch": 0.88,
325
+ "grad_norm": 2.1698966026306152,
326
+ "learning_rate": 0.00035341151385927504,
327
+ "loss": 0.7108,
328
+ "step": 825
329
+ },
330
+ {
331
+ "epoch": 0.91,
332
+ "grad_norm": 1.2698220014572144,
333
+ "learning_rate": 0.00034896943852167733,
334
+ "loss": 0.5897,
335
+ "step": 850
336
+ },
337
+ {
338
+ "epoch": 0.93,
339
+ "grad_norm": 2.0006988048553467,
340
+ "learning_rate": 0.0003445273631840796,
341
+ "loss": 0.6386,
342
+ "step": 875
343
+ },
344
+ {
345
+ "epoch": 0.96,
346
+ "grad_norm": 1.7718091011047363,
347
+ "learning_rate": 0.0003400852878464819,
348
+ "loss": 0.5781,
349
+ "step": 900
350
+ },
351
+ {
352
+ "epoch": 0.96,
353
+ "eval_accuracy": 0.8305,
354
+ "eval_f1_macro": 0.7420217283556048,
355
+ "eval_f1_micro": 0.8305,
356
+ "eval_loss": 0.6054204702377319,
357
+ "eval_runtime": 24.1901,
358
+ "eval_samples_per_second": 413.393,
359
+ "eval_steps_per_second": 6.49,
360
+ "step": 900
361
+ },
362
+ {
363
+ "epoch": 0.99,
364
+ "grad_norm": 1.9609644412994385,
365
+ "learning_rate": 0.00033564321250888413,
366
+ "loss": 0.6415,
367
+ "step": 925
368
+ },
369
+ {
370
+ "epoch": 1.01,
371
+ "grad_norm": 1.4652588367462158,
372
+ "learning_rate": 0.0003312011371712864,
373
+ "loss": 0.5699,
374
+ "step": 950
375
+ },
376
+ {
377
+ "epoch": 1.04,
378
+ "grad_norm": 1.6652113199234009,
379
+ "learning_rate": 0.0003267590618336887,
380
+ "loss": 0.486,
381
+ "step": 975
382
+ },
383
+ {
384
+ "epoch": 1.07,
385
+ "grad_norm": 1.3951687812805176,
386
+ "learning_rate": 0.000322316986496091,
387
+ "loss": 0.4674,
388
+ "step": 1000
389
+ },
390
+ {
391
+ "epoch": 1.07,
392
+ "eval_accuracy": 0.8346,
393
+ "eval_f1_macro": 0.737133370653897,
394
+ "eval_f1_micro": 0.8346,
395
+ "eval_loss": 0.6210275292396545,
396
+ "eval_runtime": 24.1347,
397
+ "eval_samples_per_second": 414.342,
398
+ "eval_steps_per_second": 6.505,
399
+ "step": 1000
400
+ },
401
+ {
402
+ "epoch": 1.09,
403
+ "grad_norm": 1.52338445186615,
404
+ "learning_rate": 0.00031787491115849327,
405
+ "loss": 0.4979,
406
+ "step": 1025
407
+ },
408
+ {
409
+ "epoch": 1.12,
410
+ "grad_norm": 1.9127336740493774,
411
+ "learning_rate": 0.00031343283582089556,
412
+ "loss": 0.4658,
413
+ "step": 1050
414
+ },
415
+ {
416
+ "epoch": 1.15,
417
+ "grad_norm": 1.5914901494979858,
418
+ "learning_rate": 0.00030899076048329784,
419
+ "loss": 0.5625,
420
+ "step": 1075
421
+ },
422
+ {
423
+ "epoch": 1.17,
424
+ "grad_norm": 2.0196564197540283,
425
+ "learning_rate": 0.0003045486851457001,
426
+ "loss": 0.4929,
427
+ "step": 1100
428
+ },
429
+ {
430
+ "epoch": 1.17,
431
+ "eval_accuracy": 0.8387,
432
+ "eval_f1_macro": 0.7423130077227065,
433
+ "eval_f1_micro": 0.8387,
434
+ "eval_loss": 0.5875550508499146,
435
+ "eval_runtime": 24.1428,
436
+ "eval_samples_per_second": 414.202,
437
+ "eval_steps_per_second": 6.503,
438
+ "step": 1100
439
+ },
440
+ {
441
+ "epoch": 1.2,
442
+ "grad_norm": 1.3181229829788208,
443
+ "learning_rate": 0.00030010660980810236,
444
+ "loss": 0.4999,
445
+ "step": 1125
446
+ },
447
+ {
448
+ "epoch": 1.23,
449
+ "grad_norm": 1.321296215057373,
450
+ "learning_rate": 0.00029566453447050464,
451
+ "loss": 0.4456,
452
+ "step": 1150
453
+ },
454
+ {
455
+ "epoch": 1.25,
456
+ "grad_norm": 1.4967468976974487,
457
+ "learning_rate": 0.0002912224591329069,
458
+ "loss": 0.443,
459
+ "step": 1175
460
+ },
461
+ {
462
+ "epoch": 1.28,
463
+ "grad_norm": 1.622883915901184,
464
+ "learning_rate": 0.00028678038379530916,
465
+ "loss": 0.566,
466
+ "step": 1200
467
+ },
468
+ {
469
+ "epoch": 1.28,
470
+ "eval_accuracy": 0.8475,
471
+ "eval_f1_macro": 0.7633448680546241,
472
+ "eval_f1_micro": 0.8475,
473
+ "eval_loss": 0.5779463648796082,
474
+ "eval_runtime": 24.1848,
475
+ "eval_samples_per_second": 413.483,
476
+ "eval_steps_per_second": 6.492,
477
+ "step": 1200
478
+ },
479
+ {
480
+ "epoch": 1.31,
481
+ "grad_norm": 2.2820820808410645,
482
+ "learning_rate": 0.00028233830845771145,
483
+ "loss": 0.5382,
484
+ "step": 1225
485
+ },
486
+ {
487
+ "epoch": 1.33,
488
+ "grad_norm": 1.5108492374420166,
489
+ "learning_rate": 0.00027789623312011373,
490
+ "loss": 0.5314,
491
+ "step": 1250
492
+ },
493
+ {
494
+ "epoch": 1.36,
495
+ "grad_norm": 1.2868916988372803,
496
+ "learning_rate": 0.00027345415778251596,
497
+ "loss": 0.4951,
498
+ "step": 1275
499
+ },
500
+ {
501
+ "epoch": 1.39,
502
+ "grad_norm": 2.0262043476104736,
503
+ "learning_rate": 0.00026901208244491825,
504
+ "loss": 0.4577,
505
+ "step": 1300
506
+ },
507
+ {
508
+ "epoch": 1.39,
509
+ "eval_accuracy": 0.8435,
510
+ "eval_f1_macro": 0.7507843808294652,
511
+ "eval_f1_micro": 0.8435,
512
+ "eval_loss": 0.5771787762641907,
513
+ "eval_runtime": 24.1138,
514
+ "eval_samples_per_second": 414.7,
515
+ "eval_steps_per_second": 6.511,
516
+ "step": 1300
517
+ },
518
+ {
519
+ "epoch": 1.41,
520
+ "grad_norm": 1.7890022993087769,
521
+ "learning_rate": 0.00026457000710732053,
522
+ "loss": 0.5097,
523
+ "step": 1325
524
+ },
525
+ {
526
+ "epoch": 1.44,
527
+ "grad_norm": 1.5777374505996704,
528
+ "learning_rate": 0.0002601279317697228,
529
+ "loss": 0.4894,
530
+ "step": 1350
531
+ },
532
+ {
533
+ "epoch": 1.47,
534
+ "grad_norm": 1.6835675239562988,
535
+ "learning_rate": 0.0002556858564321251,
536
+ "loss": 0.4487,
537
+ "step": 1375
538
+ },
539
+ {
540
+ "epoch": 1.49,
541
+ "grad_norm": 1.7419664859771729,
542
+ "learning_rate": 0.0002512437810945274,
543
+ "loss": 0.4233,
544
+ "step": 1400
545
+ },
546
+ {
547
+ "epoch": 1.49,
548
+ "eval_accuracy": 0.8476,
549
+ "eval_f1_macro": 0.7624728239220356,
550
+ "eval_f1_micro": 0.8476,
551
+ "eval_loss": 0.5580869913101196,
552
+ "eval_runtime": 24.1623,
553
+ "eval_samples_per_second": 413.868,
554
+ "eval_steps_per_second": 6.498,
555
+ "step": 1400
556
+ },
557
+ {
558
+ "epoch": 1.52,
559
+ "grad_norm": 1.6330546140670776,
560
+ "learning_rate": 0.0002468017057569296,
561
+ "loss": 0.4985,
562
+ "step": 1425
563
+ },
564
+ {
565
+ "epoch": 1.55,
566
+ "grad_norm": 1.5560388565063477,
567
+ "learning_rate": 0.00024235963041933193,
568
+ "loss": 0.4902,
569
+ "step": 1450
570
+ },
571
+ {
572
+ "epoch": 1.57,
573
+ "grad_norm": 1.1412893533706665,
574
+ "learning_rate": 0.0002379175550817342,
575
+ "loss": 0.4811,
576
+ "step": 1475
577
+ },
578
+ {
579
+ "epoch": 1.6,
580
+ "grad_norm": 1.4423662424087524,
581
+ "learning_rate": 0.00023347547974413648,
582
+ "loss": 0.4567,
583
+ "step": 1500
584
+ },
585
+ {
586
+ "epoch": 1.6,
587
+ "eval_accuracy": 0.8462,
588
+ "eval_f1_macro": 0.7575976290013776,
589
+ "eval_f1_micro": 0.8462,
590
+ "eval_loss": 0.5687991976737976,
591
+ "eval_runtime": 24.159,
592
+ "eval_samples_per_second": 413.925,
593
+ "eval_steps_per_second": 6.499,
594
+ "step": 1500
595
+ },
596
+ {
597
+ "epoch": 1.63,
598
+ "grad_norm": 1.9873683452606201,
599
+ "learning_rate": 0.00022903340440653876,
600
+ "loss": 0.4455,
601
+ "step": 1525
602
+ },
603
+ {
604
+ "epoch": 1.65,
605
+ "grad_norm": 1.4635204076766968,
606
+ "learning_rate": 0.000224591329068941,
607
+ "loss": 0.4457,
608
+ "step": 1550
609
+ },
610
+ {
611
+ "epoch": 1.68,
612
+ "grad_norm": 1.56647789478302,
613
+ "learning_rate": 0.00022014925373134328,
614
+ "loss": 0.4223,
615
+ "step": 1575
616
+ },
617
+ {
618
+ "epoch": 1.71,
619
+ "grad_norm": 1.8017354011535645,
620
+ "learning_rate": 0.00021570717839374556,
621
+ "loss": 0.483,
622
+ "step": 1600
623
+ },
624
+ {
625
+ "epoch": 1.71,
626
+ "eval_accuracy": 0.8478,
627
+ "eval_f1_macro": 0.7608717953670078,
628
+ "eval_f1_micro": 0.8478,
629
+ "eval_loss": 0.5547010898590088,
630
+ "eval_runtime": 24.1023,
631
+ "eval_samples_per_second": 414.897,
632
+ "eval_steps_per_second": 6.514,
633
+ "step": 1600
634
+ },
635
+ {
636
+ "epoch": 1.73,
637
+ "grad_norm": 1.768385887145996,
638
+ "learning_rate": 0.00021126510305614785,
639
+ "loss": 0.4593,
640
+ "step": 1625
641
+ },
642
+ {
643
+ "epoch": 1.76,
644
+ "grad_norm": 1.8453024625778198,
645
+ "learning_rate": 0.0002068230277185501,
646
+ "loss": 0.4378,
647
+ "step": 1650
648
+ },
649
+ {
650
+ "epoch": 1.79,
651
+ "grad_norm": 1.5103825330734253,
652
+ "learning_rate": 0.0002023809523809524,
653
+ "loss": 0.455,
654
+ "step": 1675
655
+ },
656
+ {
657
+ "epoch": 1.81,
658
+ "grad_norm": 1.8187497854232788,
659
+ "learning_rate": 0.00019793887704335468,
660
+ "loss": 0.4649,
661
+ "step": 1700
662
+ },
663
+ {
664
+ "epoch": 1.81,
665
+ "eval_accuracy": 0.851,
666
+ "eval_f1_macro": 0.7680217546192462,
667
+ "eval_f1_micro": 0.851,
668
+ "eval_loss": 0.5395861864089966,
669
+ "eval_runtime": 24.1531,
670
+ "eval_samples_per_second": 414.025,
671
+ "eval_steps_per_second": 6.5,
672
+ "step": 1700
673
+ },
674
+ {
675
+ "epoch": 1.84,
676
+ "grad_norm": 2.1151626110076904,
677
+ "learning_rate": 0.00019349680170575694,
678
+ "loss": 0.5057,
679
+ "step": 1725
680
+ },
681
+ {
682
+ "epoch": 1.87,
683
+ "grad_norm": 1.4344931840896606,
684
+ "learning_rate": 0.00018905472636815922,
685
+ "loss": 0.4275,
686
+ "step": 1750
687
+ },
688
+ {
689
+ "epoch": 1.89,
690
+ "grad_norm": 3.410400152206421,
691
+ "learning_rate": 0.00018461265103056148,
692
+ "loss": 0.4973,
693
+ "step": 1775
694
+ },
695
+ {
696
+ "epoch": 1.92,
697
+ "grad_norm": 2.3107552528381348,
698
+ "learning_rate": 0.00018017057569296374,
699
+ "loss": 0.4288,
700
+ "step": 1800
701
+ },
702
+ {
703
+ "epoch": 1.92,
704
+ "eval_accuracy": 0.8577,
705
+ "eval_f1_macro": 0.7759139835888773,
706
+ "eval_f1_micro": 0.8577,
707
+ "eval_loss": 0.5235319137573242,
708
+ "eval_runtime": 24.1535,
709
+ "eval_samples_per_second": 414.018,
710
+ "eval_steps_per_second": 6.5,
711
+ "step": 1800
712
+ },
713
+ {
714
+ "epoch": 1.95,
715
+ "grad_norm": 1.2763726711273193,
716
+ "learning_rate": 0.00017572850035536602,
717
+ "loss": 0.5008,
718
+ "step": 1825
719
+ },
720
+ {
721
+ "epoch": 1.97,
722
+ "grad_norm": 1.5869358777999878,
723
+ "learning_rate": 0.0001712864250177683,
724
+ "loss": 0.4581,
725
+ "step": 1850
726
+ },
727
+ {
728
+ "epoch": 2.0,
729
+ "grad_norm": 1.450799822807312,
730
+ "learning_rate": 0.0001668443496801706,
731
+ "loss": 0.4103,
732
+ "step": 1875
733
+ },
734
+ {
735
+ "epoch": 2.03,
736
+ "grad_norm": 1.3206963539123535,
737
+ "learning_rate": 0.00016240227434257285,
738
+ "loss": 0.3445,
739
+ "step": 1900
740
+ },
741
+ {
742
+ "epoch": 2.03,
743
+ "eval_accuracy": 0.8603,
744
+ "eval_f1_macro": 0.7790782413257752,
745
+ "eval_f1_micro": 0.8603,
746
+ "eval_loss": 0.520423948764801,
747
+ "eval_runtime": 24.0544,
748
+ "eval_samples_per_second": 415.724,
749
+ "eval_steps_per_second": 6.527,
750
+ "step": 1900
751
+ },
752
+ {
753
+ "epoch": 2.05,
754
+ "grad_norm": 1.3110324144363403,
755
+ "learning_rate": 0.00015796019900497514,
756
+ "loss": 0.3278,
757
+ "step": 1925
758
+ },
759
+ {
760
+ "epoch": 2.08,
761
+ "grad_norm": 1.4223313331604004,
762
+ "learning_rate": 0.00015351812366737742,
763
+ "loss": 0.3376,
764
+ "step": 1950
765
+ },
766
+ {
767
+ "epoch": 2.11,
768
+ "grad_norm": 1.6518670320510864,
769
+ "learning_rate": 0.00014907604832977968,
770
+ "loss": 0.3519,
771
+ "step": 1975
772
+ },
773
+ {
774
+ "epoch": 2.13,
775
+ "grad_norm": 1.5389341115951538,
776
+ "learning_rate": 0.00014463397299218194,
777
+ "loss": 0.3014,
778
+ "step": 2000
779
+ },
780
+ {
781
+ "epoch": 2.13,
782
+ "eval_accuracy": 0.8607,
783
+ "eval_f1_macro": 0.786161944348828,
784
+ "eval_f1_micro": 0.8607,
785
+ "eval_loss": 0.5268532037734985,
786
+ "eval_runtime": 24.1182,
787
+ "eval_samples_per_second": 414.624,
788
+ "eval_steps_per_second": 6.51,
789
+ "step": 2000
790
+ },
791
+ {
792
+ "epoch": 2.16,
793
+ "grad_norm": 1.810062289237976,
794
+ "learning_rate": 0.00014019189765458422,
795
+ "loss": 0.3184,
796
+ "step": 2025
797
+ },
798
+ {
799
+ "epoch": 2.19,
800
+ "grad_norm": 1.4731919765472412,
801
+ "learning_rate": 0.00013574982231698648,
802
+ "loss": 0.3457,
803
+ "step": 2050
804
+ },
805
+ {
806
+ "epoch": 2.21,
807
+ "grad_norm": 1.8230247497558594,
808
+ "learning_rate": 0.00013130774697938877,
809
+ "loss": 0.3107,
810
+ "step": 2075
811
+ },
812
+ {
813
+ "epoch": 2.24,
814
+ "grad_norm": 1.9620997905731201,
815
+ "learning_rate": 0.00012686567164179105,
816
+ "loss": 0.3301,
817
+ "step": 2100
818
+ },
819
+ {
820
+ "epoch": 2.24,
821
+ "eval_accuracy": 0.8591,
822
+ "eval_f1_macro": 0.7826263587368261,
823
+ "eval_f1_micro": 0.8591,
824
+ "eval_loss": 0.5233541131019592,
825
+ "eval_runtime": 24.1529,
826
+ "eval_samples_per_second": 414.03,
827
+ "eval_steps_per_second": 6.5,
828
+ "step": 2100
829
+ },
830
+ {
831
+ "epoch": 2.27,
832
+ "grad_norm": 1.9270436763763428,
833
+ "learning_rate": 0.0001224235963041933,
834
+ "loss": 0.3273,
835
+ "step": 2125
836
+ },
837
+ {
838
+ "epoch": 2.29,
839
+ "grad_norm": 1.2540065050125122,
840
+ "learning_rate": 0.0001179815209665956,
841
+ "loss": 0.2682,
842
+ "step": 2150
843
+ },
844
+ {
845
+ "epoch": 2.32,
846
+ "grad_norm": 0.6836864948272705,
847
+ "learning_rate": 0.00011353944562899787,
848
+ "loss": 0.2787,
849
+ "step": 2175
850
+ },
851
+ {
852
+ "epoch": 2.35,
853
+ "grad_norm": 1.7758402824401855,
854
+ "learning_rate": 0.00010909737029140014,
855
+ "loss": 0.3069,
856
+ "step": 2200
857
+ },
858
+ {
859
+ "epoch": 2.35,
860
+ "eval_accuracy": 0.8624,
861
+ "eval_f1_macro": 0.785058711126236,
862
+ "eval_f1_micro": 0.8624,
863
+ "eval_loss": 0.5265922546386719,
864
+ "eval_runtime": 24.117,
865
+ "eval_samples_per_second": 414.644,
866
+ "eval_steps_per_second": 6.51,
867
+ "step": 2200
868
+ },
869
+ {
870
+ "epoch": 2.37,
871
+ "grad_norm": 1.3381538391113281,
872
+ "learning_rate": 0.00010465529495380242,
873
+ "loss": 0.2866,
874
+ "step": 2225
875
+ },
876
+ {
877
+ "epoch": 2.4,
878
+ "grad_norm": 1.759792685508728,
879
+ "learning_rate": 0.0001002132196162047,
880
+ "loss": 0.3031,
881
+ "step": 2250
882
+ },
883
+ {
884
+ "epoch": 2.43,
885
+ "grad_norm": 1.2064933776855469,
886
+ "learning_rate": 9.577114427860697e-05,
887
+ "loss": 0.3182,
888
+ "step": 2275
889
+ },
890
+ {
891
+ "epoch": 2.45,
892
+ "grad_norm": 1.0604907274246216,
893
+ "learning_rate": 9.132906894100924e-05,
894
+ "loss": 0.3095,
895
+ "step": 2300
896
+ },
897
+ {
898
+ "epoch": 2.45,
899
+ "eval_accuracy": 0.8629,
900
+ "eval_f1_macro": 0.7846414495209991,
901
+ "eval_f1_micro": 0.8629,
902
+ "eval_loss": 0.5154865980148315,
903
+ "eval_runtime": 24.1319,
904
+ "eval_samples_per_second": 414.389,
905
+ "eval_steps_per_second": 6.506,
906
+ "step": 2300
907
+ },
908
+ {
909
+ "epoch": 2.48,
910
+ "grad_norm": 1.9324097633361816,
911
+ "learning_rate": 8.688699360341151e-05,
912
+ "loss": 0.3321,
913
+ "step": 2325
914
+ },
915
+ {
916
+ "epoch": 2.51,
917
+ "grad_norm": 1.9070733785629272,
918
+ "learning_rate": 8.24449182658138e-05,
919
+ "loss": 0.3165,
920
+ "step": 2350
921
+ },
922
+ {
923
+ "epoch": 2.53,
924
+ "grad_norm": 1.3031100034713745,
925
+ "learning_rate": 7.800284292821607e-05,
926
+ "loss": 0.2939,
927
+ "step": 2375
928
+ },
929
+ {
930
+ "epoch": 2.56,
931
+ "grad_norm": 1.479374647140503,
932
+ "learning_rate": 7.356076759061834e-05,
933
+ "loss": 0.3164,
934
+ "step": 2400
935
+ },
936
+ {
937
+ "epoch": 2.56,
938
+ "eval_accuracy": 0.8646,
939
+ "eval_f1_macro": 0.7909189563960074,
940
+ "eval_f1_micro": 0.8646,
941
+ "eval_loss": 0.5106394290924072,
942
+ "eval_runtime": 24.1589,
943
+ "eval_samples_per_second": 413.927,
944
+ "eval_steps_per_second": 6.499,
945
+ "step": 2400
946
+ },
947
+ {
948
+ "epoch": 2.59,
949
+ "grad_norm": 1.6215256452560425,
950
+ "learning_rate": 6.911869225302061e-05,
951
+ "loss": 0.3524,
952
+ "step": 2425
953
+ },
954
+ {
955
+ "epoch": 2.61,
956
+ "grad_norm": 1.5538333654403687,
957
+ "learning_rate": 6.467661691542288e-05,
958
+ "loss": 0.3354,
959
+ "step": 2450
960
+ },
961
+ {
962
+ "epoch": 2.64,
963
+ "grad_norm": 1.515882134437561,
964
+ "learning_rate": 6.023454157782516e-05,
965
+ "loss": 0.2992,
966
+ "step": 2475
967
+ },
968
+ {
969
+ "epoch": 2.67,
970
+ "grad_norm": 2.216226100921631,
971
+ "learning_rate": 5.579246624022743e-05,
972
+ "loss": 0.2914,
973
+ "step": 2500
974
+ },
975
+ {
976
+ "epoch": 2.67,
977
+ "eval_accuracy": 0.8647,
978
+ "eval_f1_macro": 0.7934095832580423,
979
+ "eval_f1_micro": 0.8647,
980
+ "eval_loss": 0.5055064558982849,
981
+ "eval_runtime": 24.1523,
982
+ "eval_samples_per_second": 414.039,
983
+ "eval_steps_per_second": 6.5,
984
+ "step": 2500
985
+ },
986
+ {
987
+ "epoch": 2.69,
988
+ "grad_norm": 2.8203630447387695,
989
+ "learning_rate": 5.135039090262971e-05,
990
+ "loss": 0.321,
991
+ "step": 2525
992
+ },
993
+ {
994
+ "epoch": 2.72,
995
+ "grad_norm": 2.173407793045044,
996
+ "learning_rate": 4.690831556503199e-05,
997
+ "loss": 0.3,
998
+ "step": 2550
999
+ },
1000
+ {
1001
+ "epoch": 2.75,
1002
+ "grad_norm": 1.960857629776001,
1003
+ "learning_rate": 4.2466240227434255e-05,
1004
+ "loss": 0.3181,
1005
+ "step": 2575
1006
+ },
1007
+ {
1008
+ "epoch": 2.77,
1009
+ "grad_norm": 1.7531359195709229,
1010
+ "learning_rate": 3.802416488983653e-05,
1011
+ "loss": 0.2946,
1012
+ "step": 2600
1013
+ },
1014
+ {
1015
+ "epoch": 2.77,
1016
+ "eval_accuracy": 0.8643,
1017
+ "eval_f1_macro": 0.7917093314024903,
1018
+ "eval_f1_micro": 0.8643,
1019
+ "eval_loss": 0.502694308757782,
1020
+ "eval_runtime": 24.1319,
1021
+ "eval_samples_per_second": 414.389,
1022
+ "eval_steps_per_second": 6.506,
1023
+ "step": 2600
1024
+ },
1025
+ {
1026
+ "epoch": 2.8,
1027
+ "grad_norm": 1.2817922830581665,
1028
+ "learning_rate": 3.3582089552238805e-05,
1029
+ "loss": 0.2963,
1030
+ "step": 2625
1031
+ },
1032
+ {
1033
+ "epoch": 2.83,
1034
+ "grad_norm": 2.3058297634124756,
1035
+ "learning_rate": 2.9140014214641083e-05,
1036
+ "loss": 0.33,
1037
+ "step": 2650
1038
+ },
1039
+ {
1040
+ "epoch": 2.85,
1041
+ "grad_norm": 1.1777921915054321,
1042
+ "learning_rate": 2.4697938877043355e-05,
1043
+ "loss": 0.3122,
1044
+ "step": 2675
1045
+ },
1046
+ {
1047
+ "epoch": 2.88,
1048
+ "grad_norm": 1.6172949075698853,
1049
+ "learning_rate": 2.025586353944563e-05,
1050
+ "loss": 0.3012,
1051
+ "step": 2700
1052
+ },
1053
+ {
1054
+ "epoch": 2.88,
1055
+ "eval_accuracy": 0.8671,
1056
+ "eval_f1_macro": 0.7953004112768677,
1057
+ "eval_f1_micro": 0.8671,
1058
+ "eval_loss": 0.500918984413147,
1059
+ "eval_runtime": 24.1653,
1060
+ "eval_samples_per_second": 413.817,
1061
+ "eval_steps_per_second": 6.497,
1062
+ "step": 2700
1063
+ },
1064
+ {
1065
+ "epoch": 2.91,
1066
+ "grad_norm": 1.0941059589385986,
1067
+ "learning_rate": 1.5813788201847902e-05,
1068
+ "loss": 0.2869,
1069
+ "step": 2725
1070
+ },
1071
+ {
1072
+ "epoch": 2.93,
1073
+ "grad_norm": 1.700844168663025,
1074
+ "learning_rate": 1.1371712864250177e-05,
1075
+ "loss": 0.2968,
1076
+ "step": 2750
1077
+ },
1078
+ {
1079
+ "epoch": 2.96,
1080
+ "grad_norm": 1.8403632640838623,
1081
+ "learning_rate": 6.929637526652452e-06,
1082
+ "loss": 0.272,
1083
+ "step": 2775
1084
+ },
1085
+ {
1086
+ "epoch": 2.99,
1087
+ "grad_norm": 1.8172844648361206,
1088
+ "learning_rate": 2.4875621890547264e-06,
1089
+ "loss": 0.3181,
1090
+ "step": 2800
1091
+ },
1092
+ {
1093
+ "epoch": 2.99,
1094
+ "eval_accuracy": 0.8664,
1095
+ "eval_f1_macro": 0.7947908970820687,
1096
+ "eval_f1_micro": 0.8664,
1097
+ "eval_loss": 0.5004217028617859,
1098
+ "eval_runtime": 24.1769,
1099
+ "eval_samples_per_second": 413.617,
1100
+ "eval_steps_per_second": 6.494,
1101
+ "step": 2800
1102
+ }
1103
+ ],
1104
+ "logging_steps": 25,
1105
+ "max_steps": 2814,
1106
+ "num_input_tokens_seen": 0,
1107
+ "num_train_epochs": 3,
1108
+ "save_steps": 100,
1109
+ "total_flos": 2.737317734462259e+16,
1110
+ "train_batch_size": 32,
1111
+ "trial_name": null,
1112
+ "trial_params": null
1113
+ }
google_t5/t5_base_ledgar/checkpoint-2800/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:986d82ea9721f626a5daebec5a86078b62c60c449ec04e11dee9e747b5a7de49
3
+ size 5048
google_t5/t5_base_ledgar/config.json ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google-t5/t5-base",
3
+ "architectures": [
4
+ "T5ForSequenceClassification"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "relu",
15
+ "finetuning_task": "text-classification",
16
+ "id2label": {
17
+ "0": "0",
18
+ "1": "1",
19
+ "2": "10",
20
+ "3": "11",
21
+ "4": "12",
22
+ "5": "13",
23
+ "6": "14",
24
+ "7": "15",
25
+ "8": "16",
26
+ "9": "17",
27
+ "10": "18",
28
+ "11": "19",
29
+ "12": "2",
30
+ "13": "20",
31
+ "14": "21",
32
+ "15": "22",
33
+ "16": "23",
34
+ "17": "24",
35
+ "18": "25",
36
+ "19": "26",
37
+ "20": "27",
38
+ "21": "28",
39
+ "22": "29",
40
+ "23": "3",
41
+ "24": "30",
42
+ "25": "31",
43
+ "26": "32",
44
+ "27": "33",
45
+ "28": "34",
46
+ "29": "35",
47
+ "30": "36",
48
+ "31": "37",
49
+ "32": "38",
50
+ "33": "39",
51
+ "34": "4",
52
+ "35": "40",
53
+ "36": "41",
54
+ "37": "42",
55
+ "38": "43",
56
+ "39": "44",
57
+ "40": "45",
58
+ "41": "46",
59
+ "42": "47",
60
+ "43": "48",
61
+ "44": "49",
62
+ "45": "5",
63
+ "46": "50",
64
+ "47": "51",
65
+ "48": "52",
66
+ "49": "53",
67
+ "50": "54",
68
+ "51": "55",
69
+ "52": "56",
70
+ "53": "57",
71
+ "54": "58",
72
+ "55": "59",
73
+ "56": "6",
74
+ "57": "60",
75
+ "58": "61",
76
+ "59": "62",
77
+ "60": "63",
78
+ "61": "64",
79
+ "62": "65",
80
+ "63": "66",
81
+ "64": "67",
82
+ "65": "68",
83
+ "66": "69",
84
+ "67": "7",
85
+ "68": "70",
86
+ "69": "71",
87
+ "70": "72",
88
+ "71": "73",
89
+ "72": "74",
90
+ "73": "75",
91
+ "74": "76",
92
+ "75": "77",
93
+ "76": "78",
94
+ "77": "79",
95
+ "78": "8",
96
+ "79": "80",
97
+ "80": "81",
98
+ "81": "82",
99
+ "82": "83",
100
+ "83": "84",
101
+ "84": "85",
102
+ "85": "86",
103
+ "86": "87",
104
+ "87": "88",
105
+ "88": "89",
106
+ "89": "9",
107
+ "90": "90",
108
+ "91": "91",
109
+ "92": "92",
110
+ "93": "93",
111
+ "94": "94",
112
+ "95": "95",
113
+ "96": "96",
114
+ "97": "97",
115
+ "98": "98",
116
+ "99": "99"
117
+ },
118
+ "initializer_factor": 1.0,
119
+ "is_encoder_decoder": true,
120
+ "is_gated_act": false,
121
+ "label2id": {
122
+ "0": 0,
123
+ "1": 1,
124
+ "10": 2,
125
+ "11": 3,
126
+ "12": 4,
127
+ "13": 5,
128
+ "14": 6,
129
+ "15": 7,
130
+ "16": 8,
131
+ "17": 9,
132
+ "18": 10,
133
+ "19": 11,
134
+ "2": 12,
135
+ "20": 13,
136
+ "21": 14,
137
+ "22": 15,
138
+ "23": 16,
139
+ "24": 17,
140
+ "25": 18,
141
+ "26": 19,
142
+ "27": 20,
143
+ "28": 21,
144
+ "29": 22,
145
+ "3": 23,
146
+ "30": 24,
147
+ "31": 25,
148
+ "32": 26,
149
+ "33": 27,
150
+ "34": 28,
151
+ "35": 29,
152
+ "36": 30,
153
+ "37": 31,
154
+ "38": 32,
155
+ "39": 33,
156
+ "4": 34,
157
+ "40": 35,
158
+ "41": 36,
159
+ "42": 37,
160
+ "43": 38,
161
+ "44": 39,
162
+ "45": 40,
163
+ "46": 41,
164
+ "47": 42,
165
+ "48": 43,
166
+ "49": 44,
167
+ "5": 45,
168
+ "50": 46,
169
+ "51": 47,
170
+ "52": 48,
171
+ "53": 49,
172
+ "54": 50,
173
+ "55": 51,
174
+ "56": 52,
175
+ "57": 53,
176
+ "58": 54,
177
+ "59": 55,
178
+ "6": 56,
179
+ "60": 57,
180
+ "61": 58,
181
+ "62": 59,
182
+ "63": 60,
183
+ "64": 61,
184
+ "65": 62,
185
+ "66": 63,
186
+ "67": 64,
187
+ "68": 65,
188
+ "69": 66,
189
+ "7": 67,
190
+ "70": 68,
191
+ "71": 69,
192
+ "72": 70,
193
+ "73": 71,
194
+ "74": 72,
195
+ "75": 73,
196
+ "76": 74,
197
+ "77": 75,
198
+ "78": 76,
199
+ "79": 77,
200
+ "8": 78,
201
+ "80": 79,
202
+ "81": 80,
203
+ "82": 81,
204
+ "83": 82,
205
+ "84": 83,
206
+ "85": 84,
207
+ "86": 85,
208
+ "87": 86,
209
+ "88": 87,
210
+ "89": 88,
211
+ "9": 89,
212
+ "90": 90,
213
+ "91": 91,
214
+ "92": 92,
215
+ "93": 93,
216
+ "94": 94,
217
+ "95": 95,
218
+ "96": 96,
219
+ "97": 97,
220
+ "98": 98,
221
+ "99": 99
222
+ },
223
+ "layer_norm_epsilon": 1e-06,
224
+ "model_type": "t5",
225
+ "n_positions": 512,
226
+ "num_decoder_layers": 12,
227
+ "num_heads": 12,
228
+ "num_layers": 12,
229
+ "output_past": true,
230
+ "pad_token_id": 0,
231
+ "problem_type": "single_label_classification",
232
+ "relative_attention_max_distance": 128,
233
+ "relative_attention_num_buckets": 32,
234
+ "task_specific_params": {
235
+ "summarization": {
236
+ "early_stopping": true,
237
+ "length_penalty": 2.0,
238
+ "max_length": 200,
239
+ "min_length": 30,
240
+ "no_repeat_ngram_size": 3,
241
+ "num_beams": 4,
242
+ "prefix": "summarize: "
243
+ },
244
+ "translation_en_to_de": {
245
+ "early_stopping": true,
246
+ "max_length": 300,
247
+ "num_beams": 4,
248
+ "prefix": "translate English to German: "
249
+ },
250
+ "translation_en_to_fr": {
251
+ "early_stopping": true,
252
+ "max_length": 300,
253
+ "num_beams": 4,
254
+ "prefix": "translate English to French: "
255
+ },
256
+ "translation_en_to_ro": {
257
+ "early_stopping": true,
258
+ "max_length": 300,
259
+ "num_beams": 4,
260
+ "prefix": "translate English to Romanian: "
261
+ }
262
+ },
263
+ "torch_dtype": "float32",
264
+ "transformers_version": "4.39.0.dev0",
265
+ "use_cache": true,
266
+ "vocab_size": 32128
267
+ }
google_t5/t5_base_ledgar/eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.8664,
4
+ "eval_f1_macro": 0.7947908970820687,
5
+ "eval_f1_micro": 0.8664,
6
+ "eval_loss": 0.5004217028617859,
7
+ "eval_runtime": 23.9278,
8
+ "eval_samples": 10000,
9
+ "eval_samples_per_second": 417.924,
10
+ "eval_steps_per_second": 6.561
11
+ }
google_t5/t5_base_ledgar/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bec396a5ee38fb93b45dcbfe6b134f5c7adba0ea2c64f0184a3f24a69faff1e
3
+ size 894318160
google_t5/t5_base_ledgar/run.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ 03/15/2024 18:45:09 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, 16-bits training: False
2
+ 03/15/2024 18:45:09 - WARNING - __main__ - Process rank: 1, device: cuda:1, n_gpu: 1, distributed training: True, 16-bits training: False
3
+ 03/15/2024 18:45:13 - WARNING - __main__ - The label2id key in the model config.json is not equal to the label2id key of this run. You can ignore this if you are doing finetuning.
4
+ 03/15/2024 18:45:13 - WARNING - __main__ - The label2id key in the model config.json is not equal to the label2id key of this run. You can ignore this if you are doing finetuning.
google_t5/t5_base_ledgar/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
google_t5/t5_base_ledgar/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
google_t5/t5_base_ledgar/test_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "test_accuracy": 0.871,
4
+ "test_f1_macro": 0.7998698141352754,
5
+ "test_f1_micro": 0.871,
6
+ "test_loss": 0.5104538798332214,
7
+ "test_runtime": 23.9487,
8
+ "test_samples_per_second": 417.56,
9
+ "test_steps_per_second": 6.556
10
+ }
google_t5/t5_base_ledgar/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
google_t5/t5_base_ledgar/tokenizer_config.json ADDED
@@ -0,0 +1,937 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32002": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32003": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32004": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "32005": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "32006": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "32007": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "32008": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "32009": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "32010": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32011": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32012": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "32013": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "32014": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "32015": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "32016": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "32017": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "32018": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32019": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "32020": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "32021": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "32022": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "32023": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "32024": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "32025": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "32026": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "32027": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "32028": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32029": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "32030": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "32031": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "32032": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "32033": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "32034": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "32035": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "32036": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "32037": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "32038": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "32039": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "32040": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "32041": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "32042": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "32043": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "32044": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "32045": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "32046": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "32047": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "32048": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "32049": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "32050": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "32051": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "32052": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "32053": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "32054": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "32055": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "32056": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "32057": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "32058": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "32059": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "32060": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "32061": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "32062": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "32063": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "32064": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "32065": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "32066": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "32067": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "32068": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "32069": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "32070": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "32071": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "32072": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "32073": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "32074": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "32075": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "32076": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "32077": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "32078": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "32079": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "32080": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "32081": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "32082": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "32083": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "32084": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "32085": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "32086": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "32087": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "32088": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "32089": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "32090": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "32091": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "32092": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "32093": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "32094": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "32095": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "32096": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "32097": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "32098": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "32099": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": true,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "model_max_length": 512,
934
+ "pad_token": "<pad>",
935
+ "tokenizer_class": "T5Tokenizer",
936
+ "unk_token": "<unk>"
937
+ }
google_t5/t5_base_ledgar/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.5752294131348806,
4
+ "train_runtime": 2000.5798,
5
+ "train_samples": 60000,
6
+ "train_samples_per_second": 89.974,
7
+ "train_steps_per_second": 1.407
8
+ }