akkky02 commited on
Commit
aa5b5e2
1 Parent(s): ca5f7f5

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +10 -0
  2. google/flan_t5_base_amazon/README.md +87 -0
  3. google/flan_t5_base_amazon/all_results.json +23 -0
  4. google/flan_t5_base_amazon/checkpoint-750/config.json +114 -0
  5. google/flan_t5_base_amazon/checkpoint-750/model.safetensors +3 -0
  6. google/flan_t5_base_amazon/checkpoint-750/optimizer.pt +3 -0
  7. google/flan_t5_base_amazon/checkpoint-750/rng_state_0.pth +3 -0
  8. google/flan_t5_base_amazon/checkpoint-750/rng_state_1.pth +3 -0
  9. google/flan_t5_base_amazon/checkpoint-750/scheduler.pt +3 -0
  10. google/flan_t5_base_amazon/checkpoint-750/special_tokens_map.json +125 -0
  11. google/flan_t5_base_amazon/checkpoint-750/spiece.model +3 -0
  12. google/flan_t5_base_amazon/checkpoint-750/tokenizer.json +0 -0
  13. google/flan_t5_base_amazon/checkpoint-750/tokenizer_config.json +938 -0
  14. google/flan_t5_base_amazon/checkpoint-750/trainer_state.json +711 -0
  15. google/flan_t5_base_amazon/checkpoint-750/training_args.bin +3 -0
  16. google/flan_t5_base_amazon/config.json +114 -0
  17. google/flan_t5_base_amazon/eval_results.json +11 -0
  18. google/flan_t5_base_amazon/model.safetensors +3 -0
  19. google/flan_t5_base_amazon/run.log +4 -0
  20. google/flan_t5_base_amazon/special_tokens_map.json +125 -0
  21. google/flan_t5_base_amazon/spiece.model +3 -0
  22. google/flan_t5_base_amazon/test_results.json +10 -0
  23. google/flan_t5_base_amazon/tokenizer.json +0 -0
  24. google/flan_t5_base_amazon/tokenizer_config.json +938 -0
  25. google/flan_t5_base_amazon/train_results.json +8 -0
  26. google/flan_t5_base_amazon/trainer_state.json +1070 -0
  27. google/flan_t5_base_amazon/training_args.bin +3 -0
  28. google/flan_t5_base_ledgar/README.md +93 -0
  29. google/flan_t5_base_ledgar/all_results.json +23 -0
  30. google/flan_t5_base_ledgar/checkpoint-2800/config.json +268 -0
  31. google/flan_t5_base_ledgar/checkpoint-2800/model.safetensors +3 -0
  32. google/flan_t5_base_ledgar/checkpoint-2800/optimizer.pt +3 -0
  33. google/flan_t5_base_ledgar/checkpoint-2800/rng_state_0.pth +3 -0
  34. google/flan_t5_base_ledgar/checkpoint-2800/rng_state_1.pth +3 -0
  35. google/flan_t5_base_ledgar/checkpoint-2800/scheduler.pt +3 -0
  36. google/flan_t5_base_ledgar/checkpoint-2800/special_tokens_map.json +125 -0
  37. google/flan_t5_base_ledgar/checkpoint-2800/spiece.model +3 -0
  38. google/flan_t5_base_ledgar/checkpoint-2800/tokenizer.json +0 -0
  39. google/flan_t5_base_ledgar/checkpoint-2800/tokenizer_config.json +938 -0
  40. google/flan_t5_base_ledgar/checkpoint-2800/trainer_state.json +1113 -0
  41. google/flan_t5_base_ledgar/checkpoint-2800/training_args.bin +3 -0
  42. google/flan_t5_base_ledgar/config.json +268 -0
  43. google/flan_t5_base_ledgar/eval_results.json +11 -0
  44. google/flan_t5_base_ledgar/model.safetensors +3 -0
  45. google/flan_t5_base_ledgar/run.log +4 -0
  46. google/flan_t5_base_ledgar/special_tokens_map.json +125 -0
  47. google/flan_t5_base_ledgar/spiece.model +3 -0
  48. google/flan_t5_base_ledgar/test_results.json +10 -0
  49. google/flan_t5_base_ledgar/tokenizer.json +0 -0
  50. google/flan_t5_base_ledgar/tokenizer_config.json +938 -0
.gitattributes CHANGED
@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ google/gemma_2b_amazon/checkpoint-350/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ google/gemma_2b_amazon/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ google/gemma_2b_ledgar/checkpoint-1800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ google/gemma_2b_ledgar/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ google/gemma_2b_patent/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ google/gemma_2b_patent/tokenizer.json filter=lfs diff=lfs merge=lfs -text
42
+ google/gemma_2b_scotus/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
43
+ google/gemma_2b_scotus/tokenizer.json filter=lfs diff=lfs merge=lfs -text
44
+ google/gemma_2b_twitter/checkpoint-250/tokenizer.json filter=lfs diff=lfs merge=lfs -text
45
+ google/gemma_2b_twitter/tokenizer.json filter=lfs diff=lfs merge=lfs -text
google/flan_t5_base_amazon/README.md ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: google/flan-t5-base
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
+ model-index:
9
+ - name: flan_t5_base_amazon
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # flan_t5_base_amazon
17
+
18
+ This model is a fine-tuned version of [google/flan-t5-base](https://huggingface.co/google/flan-t5-base) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.5448
21
+ - Accuracy: 0.8412
22
+ - F1 Macro: 0.8142
23
+ - F1 Micro: 0.8412
24
+
25
+ ## Model description
26
+
27
+ More information needed
28
+
29
+ ## Intended uses & limitations
30
+
31
+ More information needed
32
+
33
+ ## Training and evaluation data
34
+
35
+ More information needed
36
+
37
+ ## Training procedure
38
+
39
+ ### Training hyperparameters
40
+
41
+ The following hyperparameters were used during training:
42
+ - learning_rate: 0.0005
43
+ - train_batch_size: 16
44
+ - eval_batch_size: 16
45
+ - seed: 42
46
+ - distributed_type: multi-GPU
47
+ - num_devices: 2
48
+ - total_train_batch_size: 32
49
+ - total_eval_batch_size: 32
50
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
51
+ - lr_scheduler_type: linear
52
+ - num_epochs: 3.0
53
+
54
+ ### Training results
55
+
56
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 Macro | F1 Micro |
57
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|:--------:|:--------:|
58
+ | 1.1669 | 0.13 | 50 | 0.9142 | 0.7404 | 0.6916 | 0.7404 |
59
+ | 0.8536 | 0.26 | 100 | 0.8417 | 0.7569 | 0.7197 | 0.7569 |
60
+ | 0.827 | 0.39 | 150 | 0.6893 | 0.7905 | 0.7471 | 0.7905 |
61
+ | 0.672 | 0.53 | 200 | 0.7235 | 0.7984 | 0.7730 | 0.7984 |
62
+ | 0.7424 | 0.66 | 250 | 0.6684 | 0.7945 | 0.7461 | 0.7945 |
63
+ | 0.6802 | 0.79 | 300 | 0.6008 | 0.8215 | 0.8014 | 0.8215 |
64
+ | 0.7847 | 0.92 | 350 | 0.6225 | 0.8123 | 0.7925 | 0.8123 |
65
+ | 0.5258 | 1.05 | 400 | 0.6656 | 0.8215 | 0.8000 | 0.8215 |
66
+ | 0.4945 | 1.18 | 450 | 0.6410 | 0.8235 | 0.7983 | 0.8235 |
67
+ | 0.4097 | 1.32 | 500 | 0.5937 | 0.8347 | 0.8110 | 0.8347 |
68
+ | 0.4116 | 1.45 | 550 | 0.5966 | 0.8314 | 0.8061 | 0.8314 |
69
+ | 0.4785 | 1.58 | 600 | 0.5696 | 0.8347 | 0.8107 | 0.8347 |
70
+ | 0.4821 | 1.71 | 650 | 0.5536 | 0.8366 | 0.8098 | 0.8366 |
71
+ | 0.4137 | 1.84 | 700 | 0.5612 | 0.8373 | 0.8116 | 0.8373 |
72
+ | 0.4623 | 1.97 | 750 | 0.5448 | 0.8412 | 0.8142 | 0.8412 |
73
+ | 0.1953 | 2.11 | 800 | 0.5984 | 0.8472 | 0.8201 | 0.8472 |
74
+ | 0.2114 | 2.24 | 850 | 0.6189 | 0.8432 | 0.8177 | 0.8432 |
75
+ | 0.2252 | 2.37 | 900 | 0.6411 | 0.8465 | 0.8199 | 0.8465 |
76
+ | 0.1937 | 2.5 | 950 | 0.6044 | 0.8524 | 0.8245 | 0.8524 |
77
+ | 0.2611 | 2.63 | 1000 | 0.6188 | 0.8472 | 0.8189 | 0.8472 |
78
+ | 0.3021 | 2.76 | 1050 | 0.6018 | 0.8472 | 0.8189 | 0.8472 |
79
+ | 0.2309 | 2.89 | 1100 | 0.5804 | 0.8478 | 0.8186 | 0.8478 |
80
+
81
+
82
+ ### Framework versions
83
+
84
+ - Transformers 4.39.0.dev0
85
+ - Pytorch 2.2.1+cu121
86
+ - Datasets 2.18.0
87
+ - Tokenizers 0.15.2
google/flan_t5_base_amazon/all_results.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.8412384716732543,
4
+ "eval_f1_macro": 0.8142445361734595,
5
+ "eval_f1_micro": 0.8412384716732543,
6
+ "eval_loss": 0.5448094010353088,
7
+ "eval_runtime": 4.3522,
8
+ "eval_samples": 1518,
9
+ "eval_samples_per_second": 348.786,
10
+ "eval_steps_per_second": 11.029,
11
+ "test_accuracy": 0.8636363636363636,
12
+ "test_f1_macro": 0.8457818676414356,
13
+ "test_f1_micro": 0.8636363636363636,
14
+ "test_loss": 0.5014536380767822,
15
+ "test_runtime": 4.3576,
16
+ "test_samples_per_second": 348.358,
17
+ "test_steps_per_second": 11.015,
18
+ "train_loss": 0.5365497335007317,
19
+ "train_runtime": 501.2901,
20
+ "train_samples": 12144,
21
+ "train_samples_per_second": 72.676,
22
+ "train_steps_per_second": 2.274
23
+ }
google/flan_t5_base_amazon/checkpoint-750/config.json ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/flan-t5-base",
3
+ "architectures": [
4
+ "T5ForSequenceClassification"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "finetuning_task": "text-classification",
16
+ "id2label": {
17
+ "0": "0",
18
+ "1": "1",
19
+ "2": "10",
20
+ "3": "11",
21
+ "4": "12",
22
+ "5": "13",
23
+ "6": "14",
24
+ "7": "15",
25
+ "8": "16",
26
+ "9": "17",
27
+ "10": "18",
28
+ "11": "19",
29
+ "12": "2",
30
+ "13": "20",
31
+ "14": "21",
32
+ "15": "22",
33
+ "16": "3",
34
+ "17": "4",
35
+ "18": "5",
36
+ "19": "6",
37
+ "20": "7",
38
+ "21": "8",
39
+ "22": "9"
40
+ },
41
+ "initializer_factor": 1.0,
42
+ "is_encoder_decoder": true,
43
+ "is_gated_act": true,
44
+ "label2id": {
45
+ "0": 0,
46
+ "1": 1,
47
+ "10": 2,
48
+ "11": 3,
49
+ "12": 4,
50
+ "13": 5,
51
+ "14": 6,
52
+ "15": 7,
53
+ "16": 8,
54
+ "17": 9,
55
+ "18": 10,
56
+ "19": 11,
57
+ "2": 12,
58
+ "20": 13,
59
+ "21": 14,
60
+ "22": 15,
61
+ "3": 16,
62
+ "4": 17,
63
+ "5": 18,
64
+ "6": 19,
65
+ "7": 20,
66
+ "8": 21,
67
+ "9": 22
68
+ },
69
+ "layer_norm_epsilon": 1e-06,
70
+ "model_type": "t5",
71
+ "n_positions": 512,
72
+ "num_decoder_layers": 12,
73
+ "num_heads": 12,
74
+ "num_layers": 12,
75
+ "output_past": true,
76
+ "pad_token_id": 0,
77
+ "problem_type": "single_label_classification",
78
+ "relative_attention_max_distance": 128,
79
+ "relative_attention_num_buckets": 32,
80
+ "task_specific_params": {
81
+ "summarization": {
82
+ "early_stopping": true,
83
+ "length_penalty": 2.0,
84
+ "max_length": 200,
85
+ "min_length": 30,
86
+ "no_repeat_ngram_size": 3,
87
+ "num_beams": 4,
88
+ "prefix": "summarize: "
89
+ },
90
+ "translation_en_to_de": {
91
+ "early_stopping": true,
92
+ "max_length": 300,
93
+ "num_beams": 4,
94
+ "prefix": "translate English to German: "
95
+ },
96
+ "translation_en_to_fr": {
97
+ "early_stopping": true,
98
+ "max_length": 300,
99
+ "num_beams": 4,
100
+ "prefix": "translate English to French: "
101
+ },
102
+ "translation_en_to_ro": {
103
+ "early_stopping": true,
104
+ "max_length": 300,
105
+ "num_beams": 4,
106
+ "prefix": "translate English to Romanian: "
107
+ }
108
+ },
109
+ "tie_word_embeddings": false,
110
+ "torch_dtype": "float32",
111
+ "transformers_version": "4.39.0.dev0",
112
+ "use_cache": true,
113
+ "vocab_size": 32128
114
+ }
google/flan_t5_base_amazon/checkpoint-750/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdf12b0177b17b6c82d42f25a5f17f318b90eccd70379adbc953121e7f3287cc
3
+ size 894084644
google/flan_t5_base_amazon/checkpoint-750/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efdb66602a3ac1b1c21d673bb284224a361eec06bb14f5a710996a91010facb3
3
+ size 1788334650
google/flan_t5_base_amazon/checkpoint-750/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:875907df07bcaac90a9a56086559d38cbc9e0d66b5b14ca26338a6e0081be67d
3
+ size 14512
google/flan_t5_base_amazon/checkpoint-750/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2d49c121ab04b6a6fd45104646c36e5dca61d84078ecdff37f3ba9abe2ca89e
3
+ size 14512
google/flan_t5_base_amazon/checkpoint-750/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f060f9f1855ee025338c89df5296e093ae36daf48431a1326ecd5dd5d217e68
3
+ size 1064
google/flan_t5_base_amazon/checkpoint-750/special_tokens_map.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
+ }
google/flan_t5_base_amazon/checkpoint-750/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
google/flan_t5_base_amazon/checkpoint-750/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
google/flan_t5_base_amazon/checkpoint-750/tokenizer_config.json ADDED
@@ -0,0 +1,938 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32002": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32003": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32004": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "32005": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "32006": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "32007": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "32008": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "32009": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "32010": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32011": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32012": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "32013": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "32014": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "32015": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "32016": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "32017": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "32018": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32019": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "32020": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "32021": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "32022": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "32023": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "32024": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "32025": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "32026": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "32027": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "32028": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32029": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "32030": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "32031": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "32032": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "32033": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "32034": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "32035": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "32036": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "32037": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "32038": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "32039": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "32040": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "32041": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "32042": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "32043": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "32044": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "32045": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "32046": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "32047": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "32048": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "32049": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "32050": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "32051": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "32052": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "32053": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "32054": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "32055": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "32056": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "32057": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "32058": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "32059": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "32060": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "32061": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "32062": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "32063": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "32064": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "32065": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "32066": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "32067": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "32068": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "32069": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "32070": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "32071": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "32072": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "32073": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "32074": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "32075": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "32076": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "32077": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "32078": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "32079": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "32080": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "32081": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "32082": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "32083": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "32084": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "32085": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "32086": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "32087": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "32088": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "32089": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "32090": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "32091": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "32092": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "32093": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "32094": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "32095": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "32096": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "32097": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "32098": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "32099": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": true,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "model_max_length": 512,
934
+ "pad_token": "<pad>",
935
+ "sp_model_kwargs": {},
936
+ "tokenizer_class": "T5Tokenizer",
937
+ "unk_token": "<unk>"
938
+ }
google/flan_t5_base_amazon/checkpoint-750/trainer_state.json ADDED
@@ -0,0 +1,711 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5448094010353088,
3
+ "best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/google/flan_t5_base_amazon/checkpoint-750",
4
+ "epoch": 1.973684210526316,
5
+ "eval_steps": 50,
6
+ "global_step": 750,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "grad_norm": 2.0010387897491455,
14
+ "learning_rate": 0.0004956140350877193,
15
+ "loss": 3.0662,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.05,
20
+ "grad_norm": 3.5069518089294434,
21
+ "learning_rate": 0.0004912280701754386,
22
+ "loss": 2.6518,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.08,
27
+ "grad_norm": 3.5161848068237305,
28
+ "learning_rate": 0.0004868421052631579,
29
+ "loss": 1.6574,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.11,
34
+ "grad_norm": 4.639632701873779,
35
+ "learning_rate": 0.0004824561403508772,
36
+ "loss": 1.121,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.13,
41
+ "grad_norm": 4.880507469177246,
42
+ "learning_rate": 0.00047807017543859647,
43
+ "loss": 1.1669,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.13,
48
+ "eval_accuracy": 0.7404479578392622,
49
+ "eval_f1_macro": 0.691578760708662,
50
+ "eval_f1_micro": 0.7404479578392622,
51
+ "eval_loss": 0.9141963124275208,
52
+ "eval_runtime": 4.3214,
53
+ "eval_samples_per_second": 351.273,
54
+ "eval_steps_per_second": 11.107,
55
+ "step": 50
56
+ },
57
+ {
58
+ "epoch": 0.16,
59
+ "grad_norm": 5.10620641708374,
60
+ "learning_rate": 0.00047368421052631577,
61
+ "loss": 0.9721,
62
+ "step": 60
63
+ },
64
+ {
65
+ "epoch": 0.18,
66
+ "grad_norm": 3.84094500541687,
67
+ "learning_rate": 0.0004692982456140351,
68
+ "loss": 0.9353,
69
+ "step": 70
70
+ },
71
+ {
72
+ "epoch": 0.21,
73
+ "grad_norm": 4.975834846496582,
74
+ "learning_rate": 0.00046491228070175437,
75
+ "loss": 0.9117,
76
+ "step": 80
77
+ },
78
+ {
79
+ "epoch": 0.24,
80
+ "grad_norm": 4.519538402557373,
81
+ "learning_rate": 0.0004605263157894737,
82
+ "loss": 0.909,
83
+ "step": 90
84
+ },
85
+ {
86
+ "epoch": 0.26,
87
+ "grad_norm": 4.041190147399902,
88
+ "learning_rate": 0.000456140350877193,
89
+ "loss": 0.8536,
90
+ "step": 100
91
+ },
92
+ {
93
+ "epoch": 0.26,
94
+ "eval_accuracy": 0.7569169960474308,
95
+ "eval_f1_macro": 0.7196785257049705,
96
+ "eval_f1_micro": 0.7569169960474308,
97
+ "eval_loss": 0.8417074680328369,
98
+ "eval_runtime": 4.3701,
99
+ "eval_samples_per_second": 347.362,
100
+ "eval_steps_per_second": 10.984,
101
+ "step": 100
102
+ },
103
+ {
104
+ "epoch": 0.29,
105
+ "grad_norm": 3.605381965637207,
106
+ "learning_rate": 0.00045175438596491233,
107
+ "loss": 0.9664,
108
+ "step": 110
109
+ },
110
+ {
111
+ "epoch": 0.32,
112
+ "grad_norm": 2.9544260501861572,
113
+ "learning_rate": 0.0004473684210526316,
114
+ "loss": 0.8692,
115
+ "step": 120
116
+ },
117
+ {
118
+ "epoch": 0.34,
119
+ "grad_norm": 2.32454514503479,
120
+ "learning_rate": 0.0004429824561403509,
121
+ "loss": 0.874,
122
+ "step": 130
123
+ },
124
+ {
125
+ "epoch": 0.37,
126
+ "grad_norm": 2.802645206451416,
127
+ "learning_rate": 0.0004385964912280702,
128
+ "loss": 0.748,
129
+ "step": 140
130
+ },
131
+ {
132
+ "epoch": 0.39,
133
+ "grad_norm": 2.7617292404174805,
134
+ "learning_rate": 0.0004342105263157895,
135
+ "loss": 0.827,
136
+ "step": 150
137
+ },
138
+ {
139
+ "epoch": 0.39,
140
+ "eval_accuracy": 0.7905138339920948,
141
+ "eval_f1_macro": 0.7470749543158418,
142
+ "eval_f1_micro": 0.7905138339920948,
143
+ "eval_loss": 0.6893027424812317,
144
+ "eval_runtime": 4.3681,
145
+ "eval_samples_per_second": 347.523,
146
+ "eval_steps_per_second": 10.989,
147
+ "step": 150
148
+ },
149
+ {
150
+ "epoch": 0.42,
151
+ "grad_norm": 4.82970666885376,
152
+ "learning_rate": 0.0004298245614035088,
153
+ "loss": 0.806,
154
+ "step": 160
155
+ },
156
+ {
157
+ "epoch": 0.45,
158
+ "grad_norm": 3.695923089981079,
159
+ "learning_rate": 0.0004254385964912281,
160
+ "loss": 0.7636,
161
+ "step": 170
162
+ },
163
+ {
164
+ "epoch": 0.47,
165
+ "grad_norm": 2.7026147842407227,
166
+ "learning_rate": 0.00042105263157894734,
167
+ "loss": 0.7973,
168
+ "step": 180
169
+ },
170
+ {
171
+ "epoch": 0.5,
172
+ "grad_norm": 4.265638828277588,
173
+ "learning_rate": 0.0004166666666666667,
174
+ "loss": 0.8075,
175
+ "step": 190
176
+ },
177
+ {
178
+ "epoch": 0.53,
179
+ "grad_norm": 3.372328281402588,
180
+ "learning_rate": 0.000412280701754386,
181
+ "loss": 0.672,
182
+ "step": 200
183
+ },
184
+ {
185
+ "epoch": 0.53,
186
+ "eval_accuracy": 0.7984189723320159,
187
+ "eval_f1_macro": 0.773043998913531,
188
+ "eval_f1_micro": 0.7984189723320159,
189
+ "eval_loss": 0.7234821319580078,
190
+ "eval_runtime": 4.3741,
191
+ "eval_samples_per_second": 347.043,
192
+ "eval_steps_per_second": 10.974,
193
+ "step": 200
194
+ },
195
+ {
196
+ "epoch": 0.55,
197
+ "grad_norm": 3.093799114227295,
198
+ "learning_rate": 0.00040789473684210524,
199
+ "loss": 0.6653,
200
+ "step": 210
201
+ },
202
+ {
203
+ "epoch": 0.58,
204
+ "grad_norm": 4.268807888031006,
205
+ "learning_rate": 0.00040350877192982455,
206
+ "loss": 0.7851,
207
+ "step": 220
208
+ },
209
+ {
210
+ "epoch": 0.61,
211
+ "grad_norm": 3.9077024459838867,
212
+ "learning_rate": 0.0003991228070175439,
213
+ "loss": 0.779,
214
+ "step": 230
215
+ },
216
+ {
217
+ "epoch": 0.63,
218
+ "grad_norm": 3.523000955581665,
219
+ "learning_rate": 0.00039473684210526315,
220
+ "loss": 0.8036,
221
+ "step": 240
222
+ },
223
+ {
224
+ "epoch": 0.66,
225
+ "grad_norm": 3.328284502029419,
226
+ "learning_rate": 0.00039035087719298245,
227
+ "loss": 0.7424,
228
+ "step": 250
229
+ },
230
+ {
231
+ "epoch": 0.66,
232
+ "eval_accuracy": 0.7944664031620553,
233
+ "eval_f1_macro": 0.7460722463431328,
234
+ "eval_f1_micro": 0.7944664031620553,
235
+ "eval_loss": 0.6683638691902161,
236
+ "eval_runtime": 4.3992,
237
+ "eval_samples_per_second": 345.06,
238
+ "eval_steps_per_second": 10.911,
239
+ "step": 250
240
+ },
241
+ {
242
+ "epoch": 0.68,
243
+ "grad_norm": 3.9189491271972656,
244
+ "learning_rate": 0.00038596491228070175,
245
+ "loss": 0.7463,
246
+ "step": 260
247
+ },
248
+ {
249
+ "epoch": 0.71,
250
+ "grad_norm": 2.302865505218506,
251
+ "learning_rate": 0.00038157894736842105,
252
+ "loss": 0.7241,
253
+ "step": 270
254
+ },
255
+ {
256
+ "epoch": 0.74,
257
+ "grad_norm": 3.9664320945739746,
258
+ "learning_rate": 0.00037719298245614036,
259
+ "loss": 0.6228,
260
+ "step": 280
261
+ },
262
+ {
263
+ "epoch": 0.76,
264
+ "grad_norm": 5.590970516204834,
265
+ "learning_rate": 0.00037280701754385966,
266
+ "loss": 0.6144,
267
+ "step": 290
268
+ },
269
+ {
270
+ "epoch": 0.79,
271
+ "grad_norm": 3.0977087020874023,
272
+ "learning_rate": 0.00036842105263157896,
273
+ "loss": 0.6802,
274
+ "step": 300
275
+ },
276
+ {
277
+ "epoch": 0.79,
278
+ "eval_accuracy": 0.8214756258234519,
279
+ "eval_f1_macro": 0.8013684848189496,
280
+ "eval_f1_micro": 0.8214756258234519,
281
+ "eval_loss": 0.60084068775177,
282
+ "eval_runtime": 4.3717,
283
+ "eval_samples_per_second": 347.237,
284
+ "eval_steps_per_second": 10.98,
285
+ "step": 300
286
+ },
287
+ {
288
+ "epoch": 0.82,
289
+ "grad_norm": 4.899272441864014,
290
+ "learning_rate": 0.00036403508771929826,
291
+ "loss": 0.6185,
292
+ "step": 310
293
+ },
294
+ {
295
+ "epoch": 0.84,
296
+ "grad_norm": 4.385756969451904,
297
+ "learning_rate": 0.00035964912280701756,
298
+ "loss": 0.7105,
299
+ "step": 320
300
+ },
301
+ {
302
+ "epoch": 0.87,
303
+ "grad_norm": 4.001315593719482,
304
+ "learning_rate": 0.00035526315789473687,
305
+ "loss": 0.7194,
306
+ "step": 330
307
+ },
308
+ {
309
+ "epoch": 0.89,
310
+ "grad_norm": 4.703561782836914,
311
+ "learning_rate": 0.0003508771929824561,
312
+ "loss": 0.6602,
313
+ "step": 340
314
+ },
315
+ {
316
+ "epoch": 0.92,
317
+ "grad_norm": 3.3888790607452393,
318
+ "learning_rate": 0.00034649122807017547,
319
+ "loss": 0.7847,
320
+ "step": 350
321
+ },
322
+ {
323
+ "epoch": 0.92,
324
+ "eval_accuracy": 0.8122529644268774,
325
+ "eval_f1_macro": 0.7925111022998313,
326
+ "eval_f1_micro": 0.8122529644268774,
327
+ "eval_loss": 0.6225090622901917,
328
+ "eval_runtime": 4.3793,
329
+ "eval_samples_per_second": 346.634,
330
+ "eval_steps_per_second": 10.961,
331
+ "step": 350
332
+ },
333
+ {
334
+ "epoch": 0.95,
335
+ "grad_norm": 2.835796594619751,
336
+ "learning_rate": 0.00034210526315789477,
337
+ "loss": 0.5313,
338
+ "step": 360
339
+ },
340
+ {
341
+ "epoch": 0.97,
342
+ "grad_norm": 4.493590354919434,
343
+ "learning_rate": 0.000337719298245614,
344
+ "loss": 0.7641,
345
+ "step": 370
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "grad_norm": 3.594285726547241,
350
+ "learning_rate": 0.0003333333333333333,
351
+ "loss": 0.7089,
352
+ "step": 380
353
+ },
354
+ {
355
+ "epoch": 1.03,
356
+ "grad_norm": 5.686199188232422,
357
+ "learning_rate": 0.0003289473684210527,
358
+ "loss": 0.4651,
359
+ "step": 390
360
+ },
361
+ {
362
+ "epoch": 1.05,
363
+ "grad_norm": 3.385310173034668,
364
+ "learning_rate": 0.0003245614035087719,
365
+ "loss": 0.5258,
366
+ "step": 400
367
+ },
368
+ {
369
+ "epoch": 1.05,
370
+ "eval_accuracy": 0.8214756258234519,
371
+ "eval_f1_macro": 0.8000274267607091,
372
+ "eval_f1_micro": 0.8214756258234519,
373
+ "eval_loss": 0.665559709072113,
374
+ "eval_runtime": 4.3743,
375
+ "eval_samples_per_second": 347.024,
376
+ "eval_steps_per_second": 10.973,
377
+ "step": 400
378
+ },
379
+ {
380
+ "epoch": 1.08,
381
+ "grad_norm": 2.027313709259033,
382
+ "learning_rate": 0.00032017543859649123,
383
+ "loss": 0.4222,
384
+ "step": 410
385
+ },
386
+ {
387
+ "epoch": 1.11,
388
+ "grad_norm": 4.47696590423584,
389
+ "learning_rate": 0.00031578947368421053,
390
+ "loss": 0.3952,
391
+ "step": 420
392
+ },
393
+ {
394
+ "epoch": 1.13,
395
+ "grad_norm": 3.5233242511749268,
396
+ "learning_rate": 0.00031140350877192983,
397
+ "loss": 0.4377,
398
+ "step": 430
399
+ },
400
+ {
401
+ "epoch": 1.16,
402
+ "grad_norm": 6.409312725067139,
403
+ "learning_rate": 0.00030701754385964913,
404
+ "loss": 0.4199,
405
+ "step": 440
406
+ },
407
+ {
408
+ "epoch": 1.18,
409
+ "grad_norm": 5.276035308837891,
410
+ "learning_rate": 0.00030263157894736844,
411
+ "loss": 0.4945,
412
+ "step": 450
413
+ },
414
+ {
415
+ "epoch": 1.18,
416
+ "eval_accuracy": 0.8234519104084321,
417
+ "eval_f1_macro": 0.7982973998883867,
418
+ "eval_f1_micro": 0.8234519104084321,
419
+ "eval_loss": 0.6409708857536316,
420
+ "eval_runtime": 4.371,
421
+ "eval_samples_per_second": 347.286,
422
+ "eval_steps_per_second": 10.981,
423
+ "step": 450
424
+ },
425
+ {
426
+ "epoch": 1.21,
427
+ "grad_norm": 2.9710564613342285,
428
+ "learning_rate": 0.0002982456140350877,
429
+ "loss": 0.5226,
430
+ "step": 460
431
+ },
432
+ {
433
+ "epoch": 1.24,
434
+ "grad_norm": 2.901766777038574,
435
+ "learning_rate": 0.00029385964912280704,
436
+ "loss": 0.4043,
437
+ "step": 470
438
+ },
439
+ {
440
+ "epoch": 1.26,
441
+ "grad_norm": 3.143082857131958,
442
+ "learning_rate": 0.00028947368421052634,
443
+ "loss": 0.5251,
444
+ "step": 480
445
+ },
446
+ {
447
+ "epoch": 1.29,
448
+ "grad_norm": 3.326885461807251,
449
+ "learning_rate": 0.00028508771929824564,
450
+ "loss": 0.4457,
451
+ "step": 490
452
+ },
453
+ {
454
+ "epoch": 1.32,
455
+ "grad_norm": 3.5045642852783203,
456
+ "learning_rate": 0.0002807017543859649,
457
+ "loss": 0.4097,
458
+ "step": 500
459
+ },
460
+ {
461
+ "epoch": 1.32,
462
+ "eval_accuracy": 0.8346508563899868,
463
+ "eval_f1_macro": 0.8110031397771374,
464
+ "eval_f1_micro": 0.8346508563899868,
465
+ "eval_loss": 0.593744158744812,
466
+ "eval_runtime": 4.365,
467
+ "eval_samples_per_second": 347.766,
468
+ "eval_steps_per_second": 10.997,
469
+ "step": 500
470
+ },
471
+ {
472
+ "epoch": 1.34,
473
+ "grad_norm": 2.686901807785034,
474
+ "learning_rate": 0.00027631578947368425,
475
+ "loss": 0.4494,
476
+ "step": 510
477
+ },
478
+ {
479
+ "epoch": 1.37,
480
+ "grad_norm": 3.4056475162506104,
481
+ "learning_rate": 0.00027192982456140355,
482
+ "loss": 0.5187,
483
+ "step": 520
484
+ },
485
+ {
486
+ "epoch": 1.39,
487
+ "grad_norm": 3.7124712467193604,
488
+ "learning_rate": 0.0002675438596491228,
489
+ "loss": 0.514,
490
+ "step": 530
491
+ },
492
+ {
493
+ "epoch": 1.42,
494
+ "grad_norm": 2.042414665222168,
495
+ "learning_rate": 0.0002631578947368421,
496
+ "loss": 0.4129,
497
+ "step": 540
498
+ },
499
+ {
500
+ "epoch": 1.45,
501
+ "grad_norm": 2.4515717029571533,
502
+ "learning_rate": 0.00025877192982456146,
503
+ "loss": 0.4116,
504
+ "step": 550
505
+ },
506
+ {
507
+ "epoch": 1.45,
508
+ "eval_accuracy": 0.8313570487483531,
509
+ "eval_f1_macro": 0.8061050086131921,
510
+ "eval_f1_micro": 0.8313570487483531,
511
+ "eval_loss": 0.5965825319290161,
512
+ "eval_runtime": 4.3699,
513
+ "eval_samples_per_second": 347.379,
514
+ "eval_steps_per_second": 10.984,
515
+ "step": 550
516
+ },
517
+ {
518
+ "epoch": 1.47,
519
+ "grad_norm": 2.718313694000244,
520
+ "learning_rate": 0.0002543859649122807,
521
+ "loss": 0.4797,
522
+ "step": 560
523
+ },
524
+ {
525
+ "epoch": 1.5,
526
+ "grad_norm": 3.2749569416046143,
527
+ "learning_rate": 0.00025,
528
+ "loss": 0.4551,
529
+ "step": 570
530
+ },
531
+ {
532
+ "epoch": 1.53,
533
+ "grad_norm": 4.085339069366455,
534
+ "learning_rate": 0.0002456140350877193,
535
+ "loss": 0.4578,
536
+ "step": 580
537
+ },
538
+ {
539
+ "epoch": 1.55,
540
+ "grad_norm": 2.8316428661346436,
541
+ "learning_rate": 0.0002412280701754386,
542
+ "loss": 0.3912,
543
+ "step": 590
544
+ },
545
+ {
546
+ "epoch": 1.58,
547
+ "grad_norm": 4.069477081298828,
548
+ "learning_rate": 0.00023684210526315788,
549
+ "loss": 0.4785,
550
+ "step": 600
551
+ },
552
+ {
553
+ "epoch": 1.58,
554
+ "eval_accuracy": 0.8346508563899868,
555
+ "eval_f1_macro": 0.8106718201042379,
556
+ "eval_f1_micro": 0.8346508563899868,
557
+ "eval_loss": 0.569642186164856,
558
+ "eval_runtime": 4.3698,
559
+ "eval_samples_per_second": 347.382,
560
+ "eval_steps_per_second": 10.984,
561
+ "step": 600
562
+ },
563
+ {
564
+ "epoch": 1.61,
565
+ "grad_norm": 2.7763497829437256,
566
+ "learning_rate": 0.00023245614035087719,
567
+ "loss": 0.4104,
568
+ "step": 610
569
+ },
570
+ {
571
+ "epoch": 1.63,
572
+ "grad_norm": 4.348814964294434,
573
+ "learning_rate": 0.0002280701754385965,
574
+ "loss": 0.4651,
575
+ "step": 620
576
+ },
577
+ {
578
+ "epoch": 1.66,
579
+ "grad_norm": 5.929917335510254,
580
+ "learning_rate": 0.0002236842105263158,
581
+ "loss": 0.4392,
582
+ "step": 630
583
+ },
584
+ {
585
+ "epoch": 1.68,
586
+ "grad_norm": 3.6419291496276855,
587
+ "learning_rate": 0.0002192982456140351,
588
+ "loss": 0.5543,
589
+ "step": 640
590
+ },
591
+ {
592
+ "epoch": 1.71,
593
+ "grad_norm": 3.2523300647735596,
594
+ "learning_rate": 0.0002149122807017544,
595
+ "loss": 0.4821,
596
+ "step": 650
597
+ },
598
+ {
599
+ "epoch": 1.71,
600
+ "eval_accuracy": 0.836627140974967,
601
+ "eval_f1_macro": 0.8097882964707892,
602
+ "eval_f1_micro": 0.836627140974967,
603
+ "eval_loss": 0.5536289811134338,
604
+ "eval_runtime": 4.3709,
605
+ "eval_samples_per_second": 347.301,
606
+ "eval_steps_per_second": 10.982,
607
+ "step": 650
608
+ },
609
+ {
610
+ "epoch": 1.74,
611
+ "grad_norm": 1.602184534072876,
612
+ "learning_rate": 0.00021052631578947367,
613
+ "loss": 0.3797,
614
+ "step": 660
615
+ },
616
+ {
617
+ "epoch": 1.76,
618
+ "grad_norm": 4.410408020019531,
619
+ "learning_rate": 0.000206140350877193,
620
+ "loss": 0.4086,
621
+ "step": 670
622
+ },
623
+ {
624
+ "epoch": 1.79,
625
+ "grad_norm": 3.752227783203125,
626
+ "learning_rate": 0.00020175438596491227,
627
+ "loss": 0.5254,
628
+ "step": 680
629
+ },
630
+ {
631
+ "epoch": 1.82,
632
+ "grad_norm": 4.433966159820557,
633
+ "learning_rate": 0.00019736842105263157,
634
+ "loss": 0.4407,
635
+ "step": 690
636
+ },
637
+ {
638
+ "epoch": 1.84,
639
+ "grad_norm": 4.68394660949707,
640
+ "learning_rate": 0.00019298245614035088,
641
+ "loss": 0.4137,
642
+ "step": 700
643
+ },
644
+ {
645
+ "epoch": 1.84,
646
+ "eval_accuracy": 0.8372859025032938,
647
+ "eval_f1_macro": 0.8116236824672852,
648
+ "eval_f1_micro": 0.8372859025032938,
649
+ "eval_loss": 0.5611599087715149,
650
+ "eval_runtime": 4.3689,
651
+ "eval_samples_per_second": 347.459,
652
+ "eval_steps_per_second": 10.987,
653
+ "step": 700
654
+ },
655
+ {
656
+ "epoch": 1.87,
657
+ "grad_norm": 5.52586030960083,
658
+ "learning_rate": 0.00018859649122807018,
659
+ "loss": 0.382,
660
+ "step": 710
661
+ },
662
+ {
663
+ "epoch": 1.89,
664
+ "grad_norm": 3.272871732711792,
665
+ "learning_rate": 0.00018421052631578948,
666
+ "loss": 0.3665,
667
+ "step": 720
668
+ },
669
+ {
670
+ "epoch": 1.92,
671
+ "grad_norm": 3.226039171218872,
672
+ "learning_rate": 0.00017982456140350878,
673
+ "loss": 0.4256,
674
+ "step": 730
675
+ },
676
+ {
677
+ "epoch": 1.95,
678
+ "grad_norm": 2.4743871688842773,
679
+ "learning_rate": 0.00017543859649122806,
680
+ "loss": 0.4211,
681
+ "step": 740
682
+ },
683
+ {
684
+ "epoch": 1.97,
685
+ "grad_norm": 3.1402978897094727,
686
+ "learning_rate": 0.00017105263157894739,
687
+ "loss": 0.4623,
688
+ "step": 750
689
+ },
690
+ {
691
+ "epoch": 1.97,
692
+ "eval_accuracy": 0.8412384716732543,
693
+ "eval_f1_macro": 0.8142445361734595,
694
+ "eval_f1_micro": 0.8412384716732543,
695
+ "eval_loss": 0.5448094010353088,
696
+ "eval_runtime": 4.3716,
697
+ "eval_samples_per_second": 347.244,
698
+ "eval_steps_per_second": 10.98,
699
+ "step": 750
700
+ }
701
+ ],
702
+ "logging_steps": 10,
703
+ "max_steps": 1140,
704
+ "num_input_tokens_seen": 0,
705
+ "num_train_epochs": 3,
706
+ "save_steps": 50,
707
+ "total_flos": 3664959032524800.0,
708
+ "train_batch_size": 16,
709
+ "trial_name": null,
710
+ "trial_params": null
711
+ }
google/flan_t5_base_amazon/checkpoint-750/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98b958a6b782601339a0e8d2ce2a2580aff5af307653d8e7e2c3da10fefbebd9
3
+ size 5048
google/flan_t5_base_amazon/config.json ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/flan-t5-base",
3
+ "architectures": [
4
+ "T5ForSequenceClassification"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "finetuning_task": "text-classification",
16
+ "id2label": {
17
+ "0": "0",
18
+ "1": "1",
19
+ "2": "10",
20
+ "3": "11",
21
+ "4": "12",
22
+ "5": "13",
23
+ "6": "14",
24
+ "7": "15",
25
+ "8": "16",
26
+ "9": "17",
27
+ "10": "18",
28
+ "11": "19",
29
+ "12": "2",
30
+ "13": "20",
31
+ "14": "21",
32
+ "15": "22",
33
+ "16": "3",
34
+ "17": "4",
35
+ "18": "5",
36
+ "19": "6",
37
+ "20": "7",
38
+ "21": "8",
39
+ "22": "9"
40
+ },
41
+ "initializer_factor": 1.0,
42
+ "is_encoder_decoder": true,
43
+ "is_gated_act": true,
44
+ "label2id": {
45
+ "0": 0,
46
+ "1": 1,
47
+ "10": 2,
48
+ "11": 3,
49
+ "12": 4,
50
+ "13": 5,
51
+ "14": 6,
52
+ "15": 7,
53
+ "16": 8,
54
+ "17": 9,
55
+ "18": 10,
56
+ "19": 11,
57
+ "2": 12,
58
+ "20": 13,
59
+ "21": 14,
60
+ "22": 15,
61
+ "3": 16,
62
+ "4": 17,
63
+ "5": 18,
64
+ "6": 19,
65
+ "7": 20,
66
+ "8": 21,
67
+ "9": 22
68
+ },
69
+ "layer_norm_epsilon": 1e-06,
70
+ "model_type": "t5",
71
+ "n_positions": 512,
72
+ "num_decoder_layers": 12,
73
+ "num_heads": 12,
74
+ "num_layers": 12,
75
+ "output_past": true,
76
+ "pad_token_id": 0,
77
+ "problem_type": "single_label_classification",
78
+ "relative_attention_max_distance": 128,
79
+ "relative_attention_num_buckets": 32,
80
+ "task_specific_params": {
81
+ "summarization": {
82
+ "early_stopping": true,
83
+ "length_penalty": 2.0,
84
+ "max_length": 200,
85
+ "min_length": 30,
86
+ "no_repeat_ngram_size": 3,
87
+ "num_beams": 4,
88
+ "prefix": "summarize: "
89
+ },
90
+ "translation_en_to_de": {
91
+ "early_stopping": true,
92
+ "max_length": 300,
93
+ "num_beams": 4,
94
+ "prefix": "translate English to German: "
95
+ },
96
+ "translation_en_to_fr": {
97
+ "early_stopping": true,
98
+ "max_length": 300,
99
+ "num_beams": 4,
100
+ "prefix": "translate English to French: "
101
+ },
102
+ "translation_en_to_ro": {
103
+ "early_stopping": true,
104
+ "max_length": 300,
105
+ "num_beams": 4,
106
+ "prefix": "translate English to Romanian: "
107
+ }
108
+ },
109
+ "tie_word_embeddings": false,
110
+ "torch_dtype": "float32",
111
+ "transformers_version": "4.39.0.dev0",
112
+ "use_cache": true,
113
+ "vocab_size": 32128
114
+ }
google/flan_t5_base_amazon/eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.8412384716732543,
4
+ "eval_f1_macro": 0.8142445361734595,
5
+ "eval_f1_micro": 0.8412384716732543,
6
+ "eval_loss": 0.5448094010353088,
7
+ "eval_runtime": 4.3522,
8
+ "eval_samples": 1518,
9
+ "eval_samples_per_second": 348.786,
10
+ "eval_steps_per_second": 11.029
11
+ }
google/flan_t5_base_amazon/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdf12b0177b17b6c82d42f25a5f17f318b90eccd70379adbc953121e7f3287cc
3
+ size 894084644
google/flan_t5_base_amazon/run.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ 03/15/2024 10:58:19 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, 16-bits training: False
2
+ 03/15/2024 10:58:19 - WARNING - __main__ - Process rank: 1, device: cuda:1, n_gpu: 1, distributed training: True, 16-bits training: False
3
+ 03/15/2024 10:58:22 - WARNING - __main__ - The label2id key in the model config.json is not equal to the label2id key of this run. You can ignore this if you are doing finetuning.
4
+ 03/15/2024 10:58:23 - WARNING - __main__ - The label2id key in the model config.json is not equal to the label2id key of this run. You can ignore this if you are doing finetuning.
google/flan_t5_base_amazon/special_tokens_map.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
+ }
google/flan_t5_base_amazon/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
google/flan_t5_base_amazon/test_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "test_accuracy": 0.8636363636363636,
4
+ "test_f1_macro": 0.8457818676414356,
5
+ "test_f1_micro": 0.8636363636363636,
6
+ "test_loss": 0.5014536380767822,
7
+ "test_runtime": 4.3576,
8
+ "test_samples_per_second": 348.358,
9
+ "test_steps_per_second": 11.015
10
+ }
google/flan_t5_base_amazon/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
google/flan_t5_base_amazon/tokenizer_config.json ADDED
@@ -0,0 +1,938 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32002": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32003": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32004": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "32005": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "32006": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "32007": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "32008": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "32009": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "32010": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32011": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32012": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "32013": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "32014": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "32015": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "32016": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "32017": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "32018": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32019": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "32020": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "32021": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "32022": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "32023": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "32024": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "32025": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "32026": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "32027": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "32028": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32029": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "32030": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "32031": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "32032": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "32033": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "32034": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "32035": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "32036": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "32037": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "32038": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "32039": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "32040": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "32041": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "32042": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "32043": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "32044": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "32045": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "32046": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "32047": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "32048": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "32049": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "32050": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "32051": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "32052": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "32053": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "32054": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "32055": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "32056": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "32057": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "32058": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "32059": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "32060": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "32061": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "32062": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "32063": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "32064": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "32065": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "32066": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "32067": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "32068": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "32069": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "32070": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "32071": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "32072": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "32073": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "32074": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "32075": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "32076": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "32077": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "32078": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "32079": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "32080": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "32081": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "32082": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "32083": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "32084": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "32085": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "32086": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "32087": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "32088": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "32089": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "32090": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "32091": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "32092": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "32093": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "32094": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "32095": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "32096": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "32097": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "32098": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "32099": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": true,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "model_max_length": 512,
934
+ "pad_token": "<pad>",
935
+ "sp_model_kwargs": {},
936
+ "tokenizer_class": "T5Tokenizer",
937
+ "unk_token": "<unk>"
938
+ }
google/flan_t5_base_amazon/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.5365497335007317,
4
+ "train_runtime": 501.2901,
5
+ "train_samples": 12144,
6
+ "train_samples_per_second": 72.676,
7
+ "train_steps_per_second": 2.274
8
+ }
google/flan_t5_base_amazon/trainer_state.json ADDED
@@ -0,0 +1,1070 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5448094010353088,
3
+ "best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/google/flan_t5_base_amazon/checkpoint-750",
4
+ "epoch": 3.0,
5
+ "eval_steps": 50,
6
+ "global_step": 1140,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "grad_norm": 2.0010387897491455,
14
+ "learning_rate": 0.0004956140350877193,
15
+ "loss": 3.0662,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.05,
20
+ "grad_norm": 3.5069518089294434,
21
+ "learning_rate": 0.0004912280701754386,
22
+ "loss": 2.6518,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.08,
27
+ "grad_norm": 3.5161848068237305,
28
+ "learning_rate": 0.0004868421052631579,
29
+ "loss": 1.6574,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.11,
34
+ "grad_norm": 4.639632701873779,
35
+ "learning_rate": 0.0004824561403508772,
36
+ "loss": 1.121,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.13,
41
+ "grad_norm": 4.880507469177246,
42
+ "learning_rate": 0.00047807017543859647,
43
+ "loss": 1.1669,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.13,
48
+ "eval_accuracy": 0.7404479578392622,
49
+ "eval_f1_macro": 0.691578760708662,
50
+ "eval_f1_micro": 0.7404479578392622,
51
+ "eval_loss": 0.9141963124275208,
52
+ "eval_runtime": 4.3214,
53
+ "eval_samples_per_second": 351.273,
54
+ "eval_steps_per_second": 11.107,
55
+ "step": 50
56
+ },
57
+ {
58
+ "epoch": 0.16,
59
+ "grad_norm": 5.10620641708374,
60
+ "learning_rate": 0.00047368421052631577,
61
+ "loss": 0.9721,
62
+ "step": 60
63
+ },
64
+ {
65
+ "epoch": 0.18,
66
+ "grad_norm": 3.84094500541687,
67
+ "learning_rate": 0.0004692982456140351,
68
+ "loss": 0.9353,
69
+ "step": 70
70
+ },
71
+ {
72
+ "epoch": 0.21,
73
+ "grad_norm": 4.975834846496582,
74
+ "learning_rate": 0.00046491228070175437,
75
+ "loss": 0.9117,
76
+ "step": 80
77
+ },
78
+ {
79
+ "epoch": 0.24,
80
+ "grad_norm": 4.519538402557373,
81
+ "learning_rate": 0.0004605263157894737,
82
+ "loss": 0.909,
83
+ "step": 90
84
+ },
85
+ {
86
+ "epoch": 0.26,
87
+ "grad_norm": 4.041190147399902,
88
+ "learning_rate": 0.000456140350877193,
89
+ "loss": 0.8536,
90
+ "step": 100
91
+ },
92
+ {
93
+ "epoch": 0.26,
94
+ "eval_accuracy": 0.7569169960474308,
95
+ "eval_f1_macro": 0.7196785257049705,
96
+ "eval_f1_micro": 0.7569169960474308,
97
+ "eval_loss": 0.8417074680328369,
98
+ "eval_runtime": 4.3701,
99
+ "eval_samples_per_second": 347.362,
100
+ "eval_steps_per_second": 10.984,
101
+ "step": 100
102
+ },
103
+ {
104
+ "epoch": 0.29,
105
+ "grad_norm": 3.605381965637207,
106
+ "learning_rate": 0.00045175438596491233,
107
+ "loss": 0.9664,
108
+ "step": 110
109
+ },
110
+ {
111
+ "epoch": 0.32,
112
+ "grad_norm": 2.9544260501861572,
113
+ "learning_rate": 0.0004473684210526316,
114
+ "loss": 0.8692,
115
+ "step": 120
116
+ },
117
+ {
118
+ "epoch": 0.34,
119
+ "grad_norm": 2.32454514503479,
120
+ "learning_rate": 0.0004429824561403509,
121
+ "loss": 0.874,
122
+ "step": 130
123
+ },
124
+ {
125
+ "epoch": 0.37,
126
+ "grad_norm": 2.802645206451416,
127
+ "learning_rate": 0.0004385964912280702,
128
+ "loss": 0.748,
129
+ "step": 140
130
+ },
131
+ {
132
+ "epoch": 0.39,
133
+ "grad_norm": 2.7617292404174805,
134
+ "learning_rate": 0.0004342105263157895,
135
+ "loss": 0.827,
136
+ "step": 150
137
+ },
138
+ {
139
+ "epoch": 0.39,
140
+ "eval_accuracy": 0.7905138339920948,
141
+ "eval_f1_macro": 0.7470749543158418,
142
+ "eval_f1_micro": 0.7905138339920948,
143
+ "eval_loss": 0.6893027424812317,
144
+ "eval_runtime": 4.3681,
145
+ "eval_samples_per_second": 347.523,
146
+ "eval_steps_per_second": 10.989,
147
+ "step": 150
148
+ },
149
+ {
150
+ "epoch": 0.42,
151
+ "grad_norm": 4.82970666885376,
152
+ "learning_rate": 0.0004298245614035088,
153
+ "loss": 0.806,
154
+ "step": 160
155
+ },
156
+ {
157
+ "epoch": 0.45,
158
+ "grad_norm": 3.695923089981079,
159
+ "learning_rate": 0.0004254385964912281,
160
+ "loss": 0.7636,
161
+ "step": 170
162
+ },
163
+ {
164
+ "epoch": 0.47,
165
+ "grad_norm": 2.7026147842407227,
166
+ "learning_rate": 0.00042105263157894734,
167
+ "loss": 0.7973,
168
+ "step": 180
169
+ },
170
+ {
171
+ "epoch": 0.5,
172
+ "grad_norm": 4.265638828277588,
173
+ "learning_rate": 0.0004166666666666667,
174
+ "loss": 0.8075,
175
+ "step": 190
176
+ },
177
+ {
178
+ "epoch": 0.53,
179
+ "grad_norm": 3.372328281402588,
180
+ "learning_rate": 0.000412280701754386,
181
+ "loss": 0.672,
182
+ "step": 200
183
+ },
184
+ {
185
+ "epoch": 0.53,
186
+ "eval_accuracy": 0.7984189723320159,
187
+ "eval_f1_macro": 0.773043998913531,
188
+ "eval_f1_micro": 0.7984189723320159,
189
+ "eval_loss": 0.7234821319580078,
190
+ "eval_runtime": 4.3741,
191
+ "eval_samples_per_second": 347.043,
192
+ "eval_steps_per_second": 10.974,
193
+ "step": 200
194
+ },
195
+ {
196
+ "epoch": 0.55,
197
+ "grad_norm": 3.093799114227295,
198
+ "learning_rate": 0.00040789473684210524,
199
+ "loss": 0.6653,
200
+ "step": 210
201
+ },
202
+ {
203
+ "epoch": 0.58,
204
+ "grad_norm": 4.268807888031006,
205
+ "learning_rate": 0.00040350877192982455,
206
+ "loss": 0.7851,
207
+ "step": 220
208
+ },
209
+ {
210
+ "epoch": 0.61,
211
+ "grad_norm": 3.9077024459838867,
212
+ "learning_rate": 0.0003991228070175439,
213
+ "loss": 0.779,
214
+ "step": 230
215
+ },
216
+ {
217
+ "epoch": 0.63,
218
+ "grad_norm": 3.523000955581665,
219
+ "learning_rate": 0.00039473684210526315,
220
+ "loss": 0.8036,
221
+ "step": 240
222
+ },
223
+ {
224
+ "epoch": 0.66,
225
+ "grad_norm": 3.328284502029419,
226
+ "learning_rate": 0.00039035087719298245,
227
+ "loss": 0.7424,
228
+ "step": 250
229
+ },
230
+ {
231
+ "epoch": 0.66,
232
+ "eval_accuracy": 0.7944664031620553,
233
+ "eval_f1_macro": 0.7460722463431328,
234
+ "eval_f1_micro": 0.7944664031620553,
235
+ "eval_loss": 0.6683638691902161,
236
+ "eval_runtime": 4.3992,
237
+ "eval_samples_per_second": 345.06,
238
+ "eval_steps_per_second": 10.911,
239
+ "step": 250
240
+ },
241
+ {
242
+ "epoch": 0.68,
243
+ "grad_norm": 3.9189491271972656,
244
+ "learning_rate": 0.00038596491228070175,
245
+ "loss": 0.7463,
246
+ "step": 260
247
+ },
248
+ {
249
+ "epoch": 0.71,
250
+ "grad_norm": 2.302865505218506,
251
+ "learning_rate": 0.00038157894736842105,
252
+ "loss": 0.7241,
253
+ "step": 270
254
+ },
255
+ {
256
+ "epoch": 0.74,
257
+ "grad_norm": 3.9664320945739746,
258
+ "learning_rate": 0.00037719298245614036,
259
+ "loss": 0.6228,
260
+ "step": 280
261
+ },
262
+ {
263
+ "epoch": 0.76,
264
+ "grad_norm": 5.590970516204834,
265
+ "learning_rate": 0.00037280701754385966,
266
+ "loss": 0.6144,
267
+ "step": 290
268
+ },
269
+ {
270
+ "epoch": 0.79,
271
+ "grad_norm": 3.0977087020874023,
272
+ "learning_rate": 0.00036842105263157896,
273
+ "loss": 0.6802,
274
+ "step": 300
275
+ },
276
+ {
277
+ "epoch": 0.79,
278
+ "eval_accuracy": 0.8214756258234519,
279
+ "eval_f1_macro": 0.8013684848189496,
280
+ "eval_f1_micro": 0.8214756258234519,
281
+ "eval_loss": 0.60084068775177,
282
+ "eval_runtime": 4.3717,
283
+ "eval_samples_per_second": 347.237,
284
+ "eval_steps_per_second": 10.98,
285
+ "step": 300
286
+ },
287
+ {
288
+ "epoch": 0.82,
289
+ "grad_norm": 4.899272441864014,
290
+ "learning_rate": 0.00036403508771929826,
291
+ "loss": 0.6185,
292
+ "step": 310
293
+ },
294
+ {
295
+ "epoch": 0.84,
296
+ "grad_norm": 4.385756969451904,
297
+ "learning_rate": 0.00035964912280701756,
298
+ "loss": 0.7105,
299
+ "step": 320
300
+ },
301
+ {
302
+ "epoch": 0.87,
303
+ "grad_norm": 4.001315593719482,
304
+ "learning_rate": 0.00035526315789473687,
305
+ "loss": 0.7194,
306
+ "step": 330
307
+ },
308
+ {
309
+ "epoch": 0.89,
310
+ "grad_norm": 4.703561782836914,
311
+ "learning_rate": 0.0003508771929824561,
312
+ "loss": 0.6602,
313
+ "step": 340
314
+ },
315
+ {
316
+ "epoch": 0.92,
317
+ "grad_norm": 3.3888790607452393,
318
+ "learning_rate": 0.00034649122807017547,
319
+ "loss": 0.7847,
320
+ "step": 350
321
+ },
322
+ {
323
+ "epoch": 0.92,
324
+ "eval_accuracy": 0.8122529644268774,
325
+ "eval_f1_macro": 0.7925111022998313,
326
+ "eval_f1_micro": 0.8122529644268774,
327
+ "eval_loss": 0.6225090622901917,
328
+ "eval_runtime": 4.3793,
329
+ "eval_samples_per_second": 346.634,
330
+ "eval_steps_per_second": 10.961,
331
+ "step": 350
332
+ },
333
+ {
334
+ "epoch": 0.95,
335
+ "grad_norm": 2.835796594619751,
336
+ "learning_rate": 0.00034210526315789477,
337
+ "loss": 0.5313,
338
+ "step": 360
339
+ },
340
+ {
341
+ "epoch": 0.97,
342
+ "grad_norm": 4.493590354919434,
343
+ "learning_rate": 0.000337719298245614,
344
+ "loss": 0.7641,
345
+ "step": 370
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "grad_norm": 3.594285726547241,
350
+ "learning_rate": 0.0003333333333333333,
351
+ "loss": 0.7089,
352
+ "step": 380
353
+ },
354
+ {
355
+ "epoch": 1.03,
356
+ "grad_norm": 5.686199188232422,
357
+ "learning_rate": 0.0003289473684210527,
358
+ "loss": 0.4651,
359
+ "step": 390
360
+ },
361
+ {
362
+ "epoch": 1.05,
363
+ "grad_norm": 3.385310173034668,
364
+ "learning_rate": 0.0003245614035087719,
365
+ "loss": 0.5258,
366
+ "step": 400
367
+ },
368
+ {
369
+ "epoch": 1.05,
370
+ "eval_accuracy": 0.8214756258234519,
371
+ "eval_f1_macro": 0.8000274267607091,
372
+ "eval_f1_micro": 0.8214756258234519,
373
+ "eval_loss": 0.665559709072113,
374
+ "eval_runtime": 4.3743,
375
+ "eval_samples_per_second": 347.024,
376
+ "eval_steps_per_second": 10.973,
377
+ "step": 400
378
+ },
379
+ {
380
+ "epoch": 1.08,
381
+ "grad_norm": 2.027313709259033,
382
+ "learning_rate": 0.00032017543859649123,
383
+ "loss": 0.4222,
384
+ "step": 410
385
+ },
386
+ {
387
+ "epoch": 1.11,
388
+ "grad_norm": 4.47696590423584,
389
+ "learning_rate": 0.00031578947368421053,
390
+ "loss": 0.3952,
391
+ "step": 420
392
+ },
393
+ {
394
+ "epoch": 1.13,
395
+ "grad_norm": 3.5233242511749268,
396
+ "learning_rate": 0.00031140350877192983,
397
+ "loss": 0.4377,
398
+ "step": 430
399
+ },
400
+ {
401
+ "epoch": 1.16,
402
+ "grad_norm": 6.409312725067139,
403
+ "learning_rate": 0.00030701754385964913,
404
+ "loss": 0.4199,
405
+ "step": 440
406
+ },
407
+ {
408
+ "epoch": 1.18,
409
+ "grad_norm": 5.276035308837891,
410
+ "learning_rate": 0.00030263157894736844,
411
+ "loss": 0.4945,
412
+ "step": 450
413
+ },
414
+ {
415
+ "epoch": 1.18,
416
+ "eval_accuracy": 0.8234519104084321,
417
+ "eval_f1_macro": 0.7982973998883867,
418
+ "eval_f1_micro": 0.8234519104084321,
419
+ "eval_loss": 0.6409708857536316,
420
+ "eval_runtime": 4.371,
421
+ "eval_samples_per_second": 347.286,
422
+ "eval_steps_per_second": 10.981,
423
+ "step": 450
424
+ },
425
+ {
426
+ "epoch": 1.21,
427
+ "grad_norm": 2.9710564613342285,
428
+ "learning_rate": 0.0002982456140350877,
429
+ "loss": 0.5226,
430
+ "step": 460
431
+ },
432
+ {
433
+ "epoch": 1.24,
434
+ "grad_norm": 2.901766777038574,
435
+ "learning_rate": 0.00029385964912280704,
436
+ "loss": 0.4043,
437
+ "step": 470
438
+ },
439
+ {
440
+ "epoch": 1.26,
441
+ "grad_norm": 3.143082857131958,
442
+ "learning_rate": 0.00028947368421052634,
443
+ "loss": 0.5251,
444
+ "step": 480
445
+ },
446
+ {
447
+ "epoch": 1.29,
448
+ "grad_norm": 3.326885461807251,
449
+ "learning_rate": 0.00028508771929824564,
450
+ "loss": 0.4457,
451
+ "step": 490
452
+ },
453
+ {
454
+ "epoch": 1.32,
455
+ "grad_norm": 3.5045642852783203,
456
+ "learning_rate": 0.0002807017543859649,
457
+ "loss": 0.4097,
458
+ "step": 500
459
+ },
460
+ {
461
+ "epoch": 1.32,
462
+ "eval_accuracy": 0.8346508563899868,
463
+ "eval_f1_macro": 0.8110031397771374,
464
+ "eval_f1_micro": 0.8346508563899868,
465
+ "eval_loss": 0.593744158744812,
466
+ "eval_runtime": 4.365,
467
+ "eval_samples_per_second": 347.766,
468
+ "eval_steps_per_second": 10.997,
469
+ "step": 500
470
+ },
471
+ {
472
+ "epoch": 1.34,
473
+ "grad_norm": 2.686901807785034,
474
+ "learning_rate": 0.00027631578947368425,
475
+ "loss": 0.4494,
476
+ "step": 510
477
+ },
478
+ {
479
+ "epoch": 1.37,
480
+ "grad_norm": 3.4056475162506104,
481
+ "learning_rate": 0.00027192982456140355,
482
+ "loss": 0.5187,
483
+ "step": 520
484
+ },
485
+ {
486
+ "epoch": 1.39,
487
+ "grad_norm": 3.7124712467193604,
488
+ "learning_rate": 0.0002675438596491228,
489
+ "loss": 0.514,
490
+ "step": 530
491
+ },
492
+ {
493
+ "epoch": 1.42,
494
+ "grad_norm": 2.042414665222168,
495
+ "learning_rate": 0.0002631578947368421,
496
+ "loss": 0.4129,
497
+ "step": 540
498
+ },
499
+ {
500
+ "epoch": 1.45,
501
+ "grad_norm": 2.4515717029571533,
502
+ "learning_rate": 0.00025877192982456146,
503
+ "loss": 0.4116,
504
+ "step": 550
505
+ },
506
+ {
507
+ "epoch": 1.45,
508
+ "eval_accuracy": 0.8313570487483531,
509
+ "eval_f1_macro": 0.8061050086131921,
510
+ "eval_f1_micro": 0.8313570487483531,
511
+ "eval_loss": 0.5965825319290161,
512
+ "eval_runtime": 4.3699,
513
+ "eval_samples_per_second": 347.379,
514
+ "eval_steps_per_second": 10.984,
515
+ "step": 550
516
+ },
517
+ {
518
+ "epoch": 1.47,
519
+ "grad_norm": 2.718313694000244,
520
+ "learning_rate": 0.0002543859649122807,
521
+ "loss": 0.4797,
522
+ "step": 560
523
+ },
524
+ {
525
+ "epoch": 1.5,
526
+ "grad_norm": 3.2749569416046143,
527
+ "learning_rate": 0.00025,
528
+ "loss": 0.4551,
529
+ "step": 570
530
+ },
531
+ {
532
+ "epoch": 1.53,
533
+ "grad_norm": 4.085339069366455,
534
+ "learning_rate": 0.0002456140350877193,
535
+ "loss": 0.4578,
536
+ "step": 580
537
+ },
538
+ {
539
+ "epoch": 1.55,
540
+ "grad_norm": 2.8316428661346436,
541
+ "learning_rate": 0.0002412280701754386,
542
+ "loss": 0.3912,
543
+ "step": 590
544
+ },
545
+ {
546
+ "epoch": 1.58,
547
+ "grad_norm": 4.069477081298828,
548
+ "learning_rate": 0.00023684210526315788,
549
+ "loss": 0.4785,
550
+ "step": 600
551
+ },
552
+ {
553
+ "epoch": 1.58,
554
+ "eval_accuracy": 0.8346508563899868,
555
+ "eval_f1_macro": 0.8106718201042379,
556
+ "eval_f1_micro": 0.8346508563899868,
557
+ "eval_loss": 0.569642186164856,
558
+ "eval_runtime": 4.3698,
559
+ "eval_samples_per_second": 347.382,
560
+ "eval_steps_per_second": 10.984,
561
+ "step": 600
562
+ },
563
+ {
564
+ "epoch": 1.61,
565
+ "grad_norm": 2.7763497829437256,
566
+ "learning_rate": 0.00023245614035087719,
567
+ "loss": 0.4104,
568
+ "step": 610
569
+ },
570
+ {
571
+ "epoch": 1.63,
572
+ "grad_norm": 4.348814964294434,
573
+ "learning_rate": 0.0002280701754385965,
574
+ "loss": 0.4651,
575
+ "step": 620
576
+ },
577
+ {
578
+ "epoch": 1.66,
579
+ "grad_norm": 5.929917335510254,
580
+ "learning_rate": 0.0002236842105263158,
581
+ "loss": 0.4392,
582
+ "step": 630
583
+ },
584
+ {
585
+ "epoch": 1.68,
586
+ "grad_norm": 3.6419291496276855,
587
+ "learning_rate": 0.0002192982456140351,
588
+ "loss": 0.5543,
589
+ "step": 640
590
+ },
591
+ {
592
+ "epoch": 1.71,
593
+ "grad_norm": 3.2523300647735596,
594
+ "learning_rate": 0.0002149122807017544,
595
+ "loss": 0.4821,
596
+ "step": 650
597
+ },
598
+ {
599
+ "epoch": 1.71,
600
+ "eval_accuracy": 0.836627140974967,
601
+ "eval_f1_macro": 0.8097882964707892,
602
+ "eval_f1_micro": 0.836627140974967,
603
+ "eval_loss": 0.5536289811134338,
604
+ "eval_runtime": 4.3709,
605
+ "eval_samples_per_second": 347.301,
606
+ "eval_steps_per_second": 10.982,
607
+ "step": 650
608
+ },
609
+ {
610
+ "epoch": 1.74,
611
+ "grad_norm": 1.602184534072876,
612
+ "learning_rate": 0.00021052631578947367,
613
+ "loss": 0.3797,
614
+ "step": 660
615
+ },
616
+ {
617
+ "epoch": 1.76,
618
+ "grad_norm": 4.410408020019531,
619
+ "learning_rate": 0.000206140350877193,
620
+ "loss": 0.4086,
621
+ "step": 670
622
+ },
623
+ {
624
+ "epoch": 1.79,
625
+ "grad_norm": 3.752227783203125,
626
+ "learning_rate": 0.00020175438596491227,
627
+ "loss": 0.5254,
628
+ "step": 680
629
+ },
630
+ {
631
+ "epoch": 1.82,
632
+ "grad_norm": 4.433966159820557,
633
+ "learning_rate": 0.00019736842105263157,
634
+ "loss": 0.4407,
635
+ "step": 690
636
+ },
637
+ {
638
+ "epoch": 1.84,
639
+ "grad_norm": 4.68394660949707,
640
+ "learning_rate": 0.00019298245614035088,
641
+ "loss": 0.4137,
642
+ "step": 700
643
+ },
644
+ {
645
+ "epoch": 1.84,
646
+ "eval_accuracy": 0.8372859025032938,
647
+ "eval_f1_macro": 0.8116236824672852,
648
+ "eval_f1_micro": 0.8372859025032938,
649
+ "eval_loss": 0.5611599087715149,
650
+ "eval_runtime": 4.3689,
651
+ "eval_samples_per_second": 347.459,
652
+ "eval_steps_per_second": 10.987,
653
+ "step": 700
654
+ },
655
+ {
656
+ "epoch": 1.87,
657
+ "grad_norm": 5.52586030960083,
658
+ "learning_rate": 0.00018859649122807018,
659
+ "loss": 0.382,
660
+ "step": 710
661
+ },
662
+ {
663
+ "epoch": 1.89,
664
+ "grad_norm": 3.272871732711792,
665
+ "learning_rate": 0.00018421052631578948,
666
+ "loss": 0.3665,
667
+ "step": 720
668
+ },
669
+ {
670
+ "epoch": 1.92,
671
+ "grad_norm": 3.226039171218872,
672
+ "learning_rate": 0.00017982456140350878,
673
+ "loss": 0.4256,
674
+ "step": 730
675
+ },
676
+ {
677
+ "epoch": 1.95,
678
+ "grad_norm": 2.4743871688842773,
679
+ "learning_rate": 0.00017543859649122806,
680
+ "loss": 0.4211,
681
+ "step": 740
682
+ },
683
+ {
684
+ "epoch": 1.97,
685
+ "grad_norm": 3.1402978897094727,
686
+ "learning_rate": 0.00017105263157894739,
687
+ "loss": 0.4623,
688
+ "step": 750
689
+ },
690
+ {
691
+ "epoch": 1.97,
692
+ "eval_accuracy": 0.8412384716732543,
693
+ "eval_f1_macro": 0.8142445361734595,
694
+ "eval_f1_micro": 0.8412384716732543,
695
+ "eval_loss": 0.5448094010353088,
696
+ "eval_runtime": 4.3716,
697
+ "eval_samples_per_second": 347.244,
698
+ "eval_steps_per_second": 10.98,
699
+ "step": 750
700
+ },
701
+ {
702
+ "epoch": 2.0,
703
+ "grad_norm": 2.178222179412842,
704
+ "learning_rate": 0.00016666666666666666,
705
+ "loss": 0.4186,
706
+ "step": 760
707
+ },
708
+ {
709
+ "epoch": 2.03,
710
+ "grad_norm": 4.079526901245117,
711
+ "learning_rate": 0.00016228070175438596,
712
+ "loss": 0.243,
713
+ "step": 770
714
+ },
715
+ {
716
+ "epoch": 2.05,
717
+ "grad_norm": 2.3109004497528076,
718
+ "learning_rate": 0.00015789473684210527,
719
+ "loss": 0.2709,
720
+ "step": 780
721
+ },
722
+ {
723
+ "epoch": 2.08,
724
+ "grad_norm": 1.3835806846618652,
725
+ "learning_rate": 0.00015350877192982457,
726
+ "loss": 0.2158,
727
+ "step": 790
728
+ },
729
+ {
730
+ "epoch": 2.11,
731
+ "grad_norm": 2.3929009437561035,
732
+ "learning_rate": 0.00014912280701754384,
733
+ "loss": 0.1953,
734
+ "step": 800
735
+ },
736
+ {
737
+ "epoch": 2.11,
738
+ "eval_accuracy": 0.847167325428195,
739
+ "eval_f1_macro": 0.8200760184707007,
740
+ "eval_f1_micro": 0.847167325428195,
741
+ "eval_loss": 0.5984218716621399,
742
+ "eval_runtime": 4.3681,
743
+ "eval_samples_per_second": 347.517,
744
+ "eval_steps_per_second": 10.989,
745
+ "step": 800
746
+ },
747
+ {
748
+ "epoch": 2.13,
749
+ "grad_norm": 2.7699904441833496,
750
+ "learning_rate": 0.00014473684210526317,
751
+ "loss": 0.1903,
752
+ "step": 810
753
+ },
754
+ {
755
+ "epoch": 2.16,
756
+ "grad_norm": 2.251420259475708,
757
+ "learning_rate": 0.00014035087719298245,
758
+ "loss": 0.208,
759
+ "step": 820
760
+ },
761
+ {
762
+ "epoch": 2.18,
763
+ "grad_norm": 2.384957790374756,
764
+ "learning_rate": 0.00013596491228070177,
765
+ "loss": 0.2275,
766
+ "step": 830
767
+ },
768
+ {
769
+ "epoch": 2.21,
770
+ "grad_norm": 3.703787088394165,
771
+ "learning_rate": 0.00013157894736842105,
772
+ "loss": 0.2143,
773
+ "step": 840
774
+ },
775
+ {
776
+ "epoch": 2.24,
777
+ "grad_norm": 2.6974778175354004,
778
+ "learning_rate": 0.00012719298245614035,
779
+ "loss": 0.2114,
780
+ "step": 850
781
+ },
782
+ {
783
+ "epoch": 2.24,
784
+ "eval_accuracy": 0.8432147562582345,
785
+ "eval_f1_macro": 0.8177458999700649,
786
+ "eval_f1_micro": 0.8432147562582345,
787
+ "eval_loss": 0.6188944578170776,
788
+ "eval_runtime": 4.3688,
789
+ "eval_samples_per_second": 347.462,
790
+ "eval_steps_per_second": 10.987,
791
+ "step": 850
792
+ },
793
+ {
794
+ "epoch": 2.26,
795
+ "grad_norm": 3.122279405593872,
796
+ "learning_rate": 0.00012280701754385965,
797
+ "loss": 0.2197,
798
+ "step": 860
799
+ },
800
+ {
801
+ "epoch": 2.29,
802
+ "grad_norm": 2.320108652114868,
803
+ "learning_rate": 0.00011842105263157894,
804
+ "loss": 0.2411,
805
+ "step": 870
806
+ },
807
+ {
808
+ "epoch": 2.32,
809
+ "grad_norm": 2.8228366374969482,
810
+ "learning_rate": 0.00011403508771929824,
811
+ "loss": 0.22,
812
+ "step": 880
813
+ },
814
+ {
815
+ "epoch": 2.34,
816
+ "grad_norm": 2.751335620880127,
817
+ "learning_rate": 0.00010964912280701755,
818
+ "loss": 0.1177,
819
+ "step": 890
820
+ },
821
+ {
822
+ "epoch": 2.37,
823
+ "grad_norm": 1.8863749504089355,
824
+ "learning_rate": 0.00010526315789473683,
825
+ "loss": 0.2252,
826
+ "step": 900
827
+ },
828
+ {
829
+ "epoch": 2.37,
830
+ "eval_accuracy": 0.8465085638998683,
831
+ "eval_f1_macro": 0.8199104606211177,
832
+ "eval_f1_micro": 0.8465085638998683,
833
+ "eval_loss": 0.6411211490631104,
834
+ "eval_runtime": 4.3649,
835
+ "eval_samples_per_second": 347.774,
836
+ "eval_steps_per_second": 10.997,
837
+ "step": 900
838
+ },
839
+ {
840
+ "epoch": 2.39,
841
+ "grad_norm": 5.841412544250488,
842
+ "learning_rate": 0.00010087719298245614,
843
+ "loss": 0.3529,
844
+ "step": 910
845
+ },
846
+ {
847
+ "epoch": 2.42,
848
+ "grad_norm": 4.241788864135742,
849
+ "learning_rate": 9.649122807017544e-05,
850
+ "loss": 0.2798,
851
+ "step": 920
852
+ },
853
+ {
854
+ "epoch": 2.45,
855
+ "grad_norm": 6.861723899841309,
856
+ "learning_rate": 9.210526315789474e-05,
857
+ "loss": 0.1996,
858
+ "step": 930
859
+ },
860
+ {
861
+ "epoch": 2.47,
862
+ "grad_norm": 3.2943456172943115,
863
+ "learning_rate": 8.771929824561403e-05,
864
+ "loss": 0.2679,
865
+ "step": 940
866
+ },
867
+ {
868
+ "epoch": 2.5,
869
+ "grad_norm": 4.726749897003174,
870
+ "learning_rate": 8.333333333333333e-05,
871
+ "loss": 0.1937,
872
+ "step": 950
873
+ },
874
+ {
875
+ "epoch": 2.5,
876
+ "eval_accuracy": 0.852437417654809,
877
+ "eval_f1_macro": 0.8245014606904096,
878
+ "eval_f1_micro": 0.852437417654809,
879
+ "eval_loss": 0.6044110655784607,
880
+ "eval_runtime": 4.3703,
881
+ "eval_samples_per_second": 347.348,
882
+ "eval_steps_per_second": 10.983,
883
+ "step": 950
884
+ },
885
+ {
886
+ "epoch": 2.53,
887
+ "grad_norm": 2.2791786193847656,
888
+ "learning_rate": 7.894736842105263e-05,
889
+ "loss": 0.2626,
890
+ "step": 960
891
+ },
892
+ {
893
+ "epoch": 2.55,
894
+ "grad_norm": 2.8042545318603516,
895
+ "learning_rate": 7.456140350877192e-05,
896
+ "loss": 0.3088,
897
+ "step": 970
898
+ },
899
+ {
900
+ "epoch": 2.58,
901
+ "grad_norm": 1.3460626602172852,
902
+ "learning_rate": 7.017543859649122e-05,
903
+ "loss": 0.1946,
904
+ "step": 980
905
+ },
906
+ {
907
+ "epoch": 2.61,
908
+ "grad_norm": 2.3350930213928223,
909
+ "learning_rate": 6.578947368421052e-05,
910
+ "loss": 0.2197,
911
+ "step": 990
912
+ },
913
+ {
914
+ "epoch": 2.63,
915
+ "grad_norm": 1.6555417776107788,
916
+ "learning_rate": 6.140350877192983e-05,
917
+ "loss": 0.2611,
918
+ "step": 1000
919
+ },
920
+ {
921
+ "epoch": 2.63,
922
+ "eval_accuracy": 0.847167325428195,
923
+ "eval_f1_macro": 0.8188839915533529,
924
+ "eval_f1_micro": 0.847167325428195,
925
+ "eval_loss": 0.6188414692878723,
926
+ "eval_runtime": 4.3701,
927
+ "eval_samples_per_second": 347.363,
928
+ "eval_steps_per_second": 10.984,
929
+ "step": 1000
930
+ },
931
+ {
932
+ "epoch": 2.66,
933
+ "grad_norm": 4.139593601226807,
934
+ "learning_rate": 5.701754385964912e-05,
935
+ "loss": 0.205,
936
+ "step": 1010
937
+ },
938
+ {
939
+ "epoch": 2.68,
940
+ "grad_norm": 3.4049813747406006,
941
+ "learning_rate": 5.263157894736842e-05,
942
+ "loss": 0.3039,
943
+ "step": 1020
944
+ },
945
+ {
946
+ "epoch": 2.71,
947
+ "grad_norm": 1.3635283708572388,
948
+ "learning_rate": 4.824561403508772e-05,
949
+ "loss": 0.2475,
950
+ "step": 1030
951
+ },
952
+ {
953
+ "epoch": 2.74,
954
+ "grad_norm": 2.1325881481170654,
955
+ "learning_rate": 4.3859649122807014e-05,
956
+ "loss": 0.2558,
957
+ "step": 1040
958
+ },
959
+ {
960
+ "epoch": 2.76,
961
+ "grad_norm": 2.268704891204834,
962
+ "learning_rate": 3.9473684210526316e-05,
963
+ "loss": 0.3021,
964
+ "step": 1050
965
+ },
966
+ {
967
+ "epoch": 2.76,
968
+ "eval_accuracy": 0.847167325428195,
969
+ "eval_f1_macro": 0.8188945909657032,
970
+ "eval_f1_micro": 0.847167325428195,
971
+ "eval_loss": 0.6017727851867676,
972
+ "eval_runtime": 4.3648,
973
+ "eval_samples_per_second": 347.78,
974
+ "eval_steps_per_second": 10.997,
975
+ "step": 1050
976
+ },
977
+ {
978
+ "epoch": 2.79,
979
+ "grad_norm": 2.4843382835388184,
980
+ "learning_rate": 3.508771929824561e-05,
981
+ "loss": 0.2837,
982
+ "step": 1060
983
+ },
984
+ {
985
+ "epoch": 2.82,
986
+ "grad_norm": 3.8762059211730957,
987
+ "learning_rate": 3.0701754385964913e-05,
988
+ "loss": 0.3006,
989
+ "step": 1070
990
+ },
991
+ {
992
+ "epoch": 2.84,
993
+ "grad_norm": 2.935537338256836,
994
+ "learning_rate": 2.631578947368421e-05,
995
+ "loss": 0.2562,
996
+ "step": 1080
997
+ },
998
+ {
999
+ "epoch": 2.87,
1000
+ "grad_norm": 2.4143476486206055,
1001
+ "learning_rate": 2.1929824561403507e-05,
1002
+ "loss": 0.2221,
1003
+ "step": 1090
1004
+ },
1005
+ {
1006
+ "epoch": 2.89,
1007
+ "grad_norm": 1.7292215824127197,
1008
+ "learning_rate": 1.7543859649122806e-05,
1009
+ "loss": 0.2309,
1010
+ "step": 1100
1011
+ },
1012
+ {
1013
+ "epoch": 2.89,
1014
+ "eval_accuracy": 0.8478260869565217,
1015
+ "eval_f1_macro": 0.8186183275294698,
1016
+ "eval_f1_micro": 0.8478260869565217,
1017
+ "eval_loss": 0.5803697109222412,
1018
+ "eval_runtime": 4.367,
1019
+ "eval_samples_per_second": 347.609,
1020
+ "eval_steps_per_second": 10.992,
1021
+ "step": 1100
1022
+ },
1023
+ {
1024
+ "epoch": 2.92,
1025
+ "grad_norm": 6.305541038513184,
1026
+ "learning_rate": 1.3157894736842104e-05,
1027
+ "loss": 0.2758,
1028
+ "step": 1110
1029
+ },
1030
+ {
1031
+ "epoch": 2.95,
1032
+ "grad_norm": 1.2674870491027832,
1033
+ "learning_rate": 8.771929824561403e-06,
1034
+ "loss": 0.2609,
1035
+ "step": 1120
1036
+ },
1037
+ {
1038
+ "epoch": 2.97,
1039
+ "grad_norm": 2.013765335083008,
1040
+ "learning_rate": 4.3859649122807014e-06,
1041
+ "loss": 0.178,
1042
+ "step": 1130
1043
+ },
1044
+ {
1045
+ "epoch": 3.0,
1046
+ "grad_norm": 1.0598067045211792,
1047
+ "learning_rate": 0.0,
1048
+ "loss": 0.2345,
1049
+ "step": 1140
1050
+ },
1051
+ {
1052
+ "epoch": 3.0,
1053
+ "step": 1140,
1054
+ "total_flos": 5570737729437696.0,
1055
+ "train_loss": 0.5365497335007317,
1056
+ "train_runtime": 501.2901,
1057
+ "train_samples_per_second": 72.676,
1058
+ "train_steps_per_second": 2.274
1059
+ }
1060
+ ],
1061
+ "logging_steps": 10,
1062
+ "max_steps": 1140,
1063
+ "num_input_tokens_seen": 0,
1064
+ "num_train_epochs": 3,
1065
+ "save_steps": 50,
1066
+ "total_flos": 5570737729437696.0,
1067
+ "train_batch_size": 16,
1068
+ "trial_name": null,
1069
+ "trial_params": null
1070
+ }
google/flan_t5_base_amazon/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98b958a6b782601339a0e8d2ce2a2580aff5af307653d8e7e2c3da10fefbebd9
3
+ size 5048
google/flan_t5_base_ledgar/README.md ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: google/flan-t5-base
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
+ model-index:
9
+ - name: flan_t5_base_ledgar
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # flan_t5_base_ledgar
17
+
18
+ This model is a fine-tuned version of [google/flan-t5-base](https://huggingface.co/google/flan-t5-base) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.5092
21
+ - Accuracy: 0.8685
22
+ - F1 Macro: 0.7955
23
+ - F1 Micro: 0.8685
24
+
25
+ ## Model description
26
+
27
+ More information needed
28
+
29
+ ## Intended uses & limitations
30
+
31
+ More information needed
32
+
33
+ ## Training and evaluation data
34
+
35
+ More information needed
36
+
37
+ ## Training procedure
38
+
39
+ ### Training hyperparameters
40
+
41
+ The following hyperparameters were used during training:
42
+ - learning_rate: 0.0005
43
+ - train_batch_size: 32
44
+ - eval_batch_size: 32
45
+ - seed: 42
46
+ - distributed_type: multi-GPU
47
+ - num_devices: 2
48
+ - total_train_batch_size: 64
49
+ - total_eval_batch_size: 64
50
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
51
+ - lr_scheduler_type: linear
52
+ - num_epochs: 3.0
53
+
54
+ ### Training results
55
+
56
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 Macro | F1 Micro |
57
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|:--------:|:--------:|
58
+ | 1.2824 | 0.11 | 100 | 1.0243 | 0.7464 | 0.5723 | 0.7464 |
59
+ | 0.8961 | 0.21 | 200 | 0.8572 | 0.7743 | 0.6359 | 0.7743 |
60
+ | 0.8233 | 0.32 | 300 | 0.7788 | 0.7968 | 0.6756 | 0.7968 |
61
+ | 0.7722 | 0.43 | 400 | 0.7432 | 0.8014 | 0.6689 | 0.8014 |
62
+ | 0.7739 | 0.53 | 500 | 0.6933 | 0.8135 | 0.7020 | 0.8135 |
63
+ | 0.7435 | 0.64 | 600 | 0.6871 | 0.8137 | 0.7028 | 0.8137 |
64
+ | 0.6877 | 0.75 | 700 | 0.6751 | 0.8169 | 0.7012 | 0.8169 |
65
+ | 0.6968 | 0.85 | 800 | 0.6471 | 0.8225 | 0.7249 | 0.8225 |
66
+ | 0.6218 | 0.96 | 900 | 0.6366 | 0.8219 | 0.7264 | 0.8219 |
67
+ | 0.5101 | 1.07 | 1000 | 0.6130 | 0.8378 | 0.7388 | 0.8378 |
68
+ | 0.5411 | 1.17 | 1100 | 0.6088 | 0.8375 | 0.7392 | 0.8375 |
69
+ | 0.5918 | 1.28 | 1200 | 0.5864 | 0.8449 | 0.7577 | 0.8449 |
70
+ | 0.4785 | 1.39 | 1300 | 0.5917 | 0.8391 | 0.7460 | 0.8391 |
71
+ | 0.4372 | 1.49 | 1400 | 0.5790 | 0.8409 | 0.7563 | 0.8409 |
72
+ | 0.5022 | 1.6 | 1500 | 0.5868 | 0.8437 | 0.7524 | 0.8437 |
73
+ | 0.5391 | 1.71 | 1600 | 0.5613 | 0.8447 | 0.7520 | 0.8447 |
74
+ | 0.4971 | 1.81 | 1700 | 0.5466 | 0.8545 | 0.7702 | 0.8545 |
75
+ | 0.4686 | 1.92 | 1800 | 0.5260 | 0.8566 | 0.7774 | 0.8566 |
76
+ | 0.349 | 2.03 | 1900 | 0.5416 | 0.8565 | 0.7721 | 0.8565 |
77
+ | 0.3216 | 2.13 | 2000 | 0.5441 | 0.8565 | 0.7763 | 0.8565 |
78
+ | 0.3531 | 2.24 | 2100 | 0.5444 | 0.8553 | 0.7753 | 0.8553 |
79
+ | 0.3276 | 2.35 | 2200 | 0.5380 | 0.8604 | 0.7796 | 0.8604 |
80
+ | 0.3372 | 2.45 | 2300 | 0.5231 | 0.8634 | 0.7831 | 0.8634 |
81
+ | 0.3227 | 2.56 | 2400 | 0.5210 | 0.8651 | 0.7872 | 0.8651 |
82
+ | 0.2987 | 2.67 | 2500 | 0.5188 | 0.8665 | 0.7910 | 0.8665 |
83
+ | 0.3354 | 2.77 | 2600 | 0.5150 | 0.8666 | 0.7931 | 0.8666 |
84
+ | 0.3103 | 2.88 | 2700 | 0.5103 | 0.8681 | 0.7942 | 0.8681 |
85
+ | 0.3248 | 2.99 | 2800 | 0.5092 | 0.8685 | 0.7955 | 0.8685 |
86
+
87
+
88
+ ### Framework versions
89
+
90
+ - Transformers 4.39.0.dev0
91
+ - Pytorch 2.2.1+cu121
92
+ - Datasets 2.18.0
93
+ - Tokenizers 0.15.2
google/flan_t5_base_ledgar/all_results.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.8685,
4
+ "eval_f1_macro": 0.7955270719340626,
5
+ "eval_f1_micro": 0.8685,
6
+ "eval_loss": 0.5091982483863831,
7
+ "eval_runtime": 27.379,
8
+ "eval_samples": 10000,
9
+ "eval_samples_per_second": 365.243,
10
+ "eval_steps_per_second": 5.734,
11
+ "test_accuracy": 0.8682,
12
+ "test_f1_macro": 0.7969025101161749,
13
+ "test_f1_micro": 0.8682,
14
+ "test_loss": 0.5303810834884644,
15
+ "test_runtime": 27.358,
16
+ "test_samples_per_second": 365.524,
17
+ "test_steps_per_second": 5.739,
18
+ "train_loss": 0.587905512435604,
19
+ "train_runtime": 2297.3592,
20
+ "train_samples": 60000,
21
+ "train_samples_per_second": 78.351,
22
+ "train_steps_per_second": 1.225
23
+ }
google/flan_t5_base_ledgar/checkpoint-2800/config.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/flan-t5-base",
3
+ "architectures": [
4
+ "T5ForSequenceClassification"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "finetuning_task": "text-classification",
16
+ "id2label": {
17
+ "0": "0",
18
+ "1": "1",
19
+ "2": "10",
20
+ "3": "11",
21
+ "4": "12",
22
+ "5": "13",
23
+ "6": "14",
24
+ "7": "15",
25
+ "8": "16",
26
+ "9": "17",
27
+ "10": "18",
28
+ "11": "19",
29
+ "12": "2",
30
+ "13": "20",
31
+ "14": "21",
32
+ "15": "22",
33
+ "16": "23",
34
+ "17": "24",
35
+ "18": "25",
36
+ "19": "26",
37
+ "20": "27",
38
+ "21": "28",
39
+ "22": "29",
40
+ "23": "3",
41
+ "24": "30",
42
+ "25": "31",
43
+ "26": "32",
44
+ "27": "33",
45
+ "28": "34",
46
+ "29": "35",
47
+ "30": "36",
48
+ "31": "37",
49
+ "32": "38",
50
+ "33": "39",
51
+ "34": "4",
52
+ "35": "40",
53
+ "36": "41",
54
+ "37": "42",
55
+ "38": "43",
56
+ "39": "44",
57
+ "40": "45",
58
+ "41": "46",
59
+ "42": "47",
60
+ "43": "48",
61
+ "44": "49",
62
+ "45": "5",
63
+ "46": "50",
64
+ "47": "51",
65
+ "48": "52",
66
+ "49": "53",
67
+ "50": "54",
68
+ "51": "55",
69
+ "52": "56",
70
+ "53": "57",
71
+ "54": "58",
72
+ "55": "59",
73
+ "56": "6",
74
+ "57": "60",
75
+ "58": "61",
76
+ "59": "62",
77
+ "60": "63",
78
+ "61": "64",
79
+ "62": "65",
80
+ "63": "66",
81
+ "64": "67",
82
+ "65": "68",
83
+ "66": "69",
84
+ "67": "7",
85
+ "68": "70",
86
+ "69": "71",
87
+ "70": "72",
88
+ "71": "73",
89
+ "72": "74",
90
+ "73": "75",
91
+ "74": "76",
92
+ "75": "77",
93
+ "76": "78",
94
+ "77": "79",
95
+ "78": "8",
96
+ "79": "80",
97
+ "80": "81",
98
+ "81": "82",
99
+ "82": "83",
100
+ "83": "84",
101
+ "84": "85",
102
+ "85": "86",
103
+ "86": "87",
104
+ "87": "88",
105
+ "88": "89",
106
+ "89": "9",
107
+ "90": "90",
108
+ "91": "91",
109
+ "92": "92",
110
+ "93": "93",
111
+ "94": "94",
112
+ "95": "95",
113
+ "96": "96",
114
+ "97": "97",
115
+ "98": "98",
116
+ "99": "99"
117
+ },
118
+ "initializer_factor": 1.0,
119
+ "is_encoder_decoder": true,
120
+ "is_gated_act": true,
121
+ "label2id": {
122
+ "0": 0,
123
+ "1": 1,
124
+ "10": 2,
125
+ "11": 3,
126
+ "12": 4,
127
+ "13": 5,
128
+ "14": 6,
129
+ "15": 7,
130
+ "16": 8,
131
+ "17": 9,
132
+ "18": 10,
133
+ "19": 11,
134
+ "2": 12,
135
+ "20": 13,
136
+ "21": 14,
137
+ "22": 15,
138
+ "23": 16,
139
+ "24": 17,
140
+ "25": 18,
141
+ "26": 19,
142
+ "27": 20,
143
+ "28": 21,
144
+ "29": 22,
145
+ "3": 23,
146
+ "30": 24,
147
+ "31": 25,
148
+ "32": 26,
149
+ "33": 27,
150
+ "34": 28,
151
+ "35": 29,
152
+ "36": 30,
153
+ "37": 31,
154
+ "38": 32,
155
+ "39": 33,
156
+ "4": 34,
157
+ "40": 35,
158
+ "41": 36,
159
+ "42": 37,
160
+ "43": 38,
161
+ "44": 39,
162
+ "45": 40,
163
+ "46": 41,
164
+ "47": 42,
165
+ "48": 43,
166
+ "49": 44,
167
+ "5": 45,
168
+ "50": 46,
169
+ "51": 47,
170
+ "52": 48,
171
+ "53": 49,
172
+ "54": 50,
173
+ "55": 51,
174
+ "56": 52,
175
+ "57": 53,
176
+ "58": 54,
177
+ "59": 55,
178
+ "6": 56,
179
+ "60": 57,
180
+ "61": 58,
181
+ "62": 59,
182
+ "63": 60,
183
+ "64": 61,
184
+ "65": 62,
185
+ "66": 63,
186
+ "67": 64,
187
+ "68": 65,
188
+ "69": 66,
189
+ "7": 67,
190
+ "70": 68,
191
+ "71": 69,
192
+ "72": 70,
193
+ "73": 71,
194
+ "74": 72,
195
+ "75": 73,
196
+ "76": 74,
197
+ "77": 75,
198
+ "78": 76,
199
+ "79": 77,
200
+ "8": 78,
201
+ "80": 79,
202
+ "81": 80,
203
+ "82": 81,
204
+ "83": 82,
205
+ "84": 83,
206
+ "85": 84,
207
+ "86": 85,
208
+ "87": 86,
209
+ "88": 87,
210
+ "89": 88,
211
+ "9": 89,
212
+ "90": 90,
213
+ "91": 91,
214
+ "92": 92,
215
+ "93": 93,
216
+ "94": 94,
217
+ "95": 95,
218
+ "96": 96,
219
+ "97": 97,
220
+ "98": 98,
221
+ "99": 99
222
+ },
223
+ "layer_norm_epsilon": 1e-06,
224
+ "model_type": "t5",
225
+ "n_positions": 512,
226
+ "num_decoder_layers": 12,
227
+ "num_heads": 12,
228
+ "num_layers": 12,
229
+ "output_past": true,
230
+ "pad_token_id": 0,
231
+ "problem_type": "single_label_classification",
232
+ "relative_attention_max_distance": 128,
233
+ "relative_attention_num_buckets": 32,
234
+ "task_specific_params": {
235
+ "summarization": {
236
+ "early_stopping": true,
237
+ "length_penalty": 2.0,
238
+ "max_length": 200,
239
+ "min_length": 30,
240
+ "no_repeat_ngram_size": 3,
241
+ "num_beams": 4,
242
+ "prefix": "summarize: "
243
+ },
244
+ "translation_en_to_de": {
245
+ "early_stopping": true,
246
+ "max_length": 300,
247
+ "num_beams": 4,
248
+ "prefix": "translate English to German: "
249
+ },
250
+ "translation_en_to_fr": {
251
+ "early_stopping": true,
252
+ "max_length": 300,
253
+ "num_beams": 4,
254
+ "prefix": "translate English to French: "
255
+ },
256
+ "translation_en_to_ro": {
257
+ "early_stopping": true,
258
+ "max_length": 300,
259
+ "num_beams": 4,
260
+ "prefix": "translate English to Romanian: "
261
+ }
262
+ },
263
+ "tie_word_embeddings": false,
264
+ "torch_dtype": "float32",
265
+ "transformers_version": "4.39.0.dev0",
266
+ "use_cache": true,
267
+ "vocab_size": 32128
268
+ }
google/flan_t5_base_ledgar/checkpoint-2800/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50d3c719cea01d5d773a6a5e97ab546ca789a29b55b989470a22e26a7c3b2434
3
+ size 894321496
google/flan_t5_base_ledgar/checkpoint-2800/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1c903a2e0b05a402bc612f3a4b9f6b2f27f1d8aeebb873052ec784c6191f2d5
3
+ size 1788808378
google/flan_t5_base_ledgar/checkpoint-2800/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9dd582de799cdacb5e3e364f924477793329e2e52836c4871d5fe16cd3af038
3
+ size 14512
google/flan_t5_base_ledgar/checkpoint-2800/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e814126b0cc6943ea52ebfb9dd2c7f5476f84e3662511f6e016f6e258c964dd
3
+ size 14512
google/flan_t5_base_ledgar/checkpoint-2800/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51e284703b00465acd87a34da92d3e2f6ab78a159fbf2682d3512ef8b4721d1e
3
+ size 1064
google/flan_t5_base_ledgar/checkpoint-2800/special_tokens_map.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
+ }
google/flan_t5_base_ledgar/checkpoint-2800/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
google/flan_t5_base_ledgar/checkpoint-2800/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
google/flan_t5_base_ledgar/checkpoint-2800/tokenizer_config.json ADDED
@@ -0,0 +1,938 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32002": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32003": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32004": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "32005": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "32006": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "32007": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "32008": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "32009": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "32010": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32011": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32012": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "32013": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "32014": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "32015": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "32016": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "32017": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "32018": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32019": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "32020": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "32021": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "32022": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "32023": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "32024": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "32025": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "32026": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "32027": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "32028": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32029": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "32030": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "32031": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "32032": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "32033": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "32034": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "32035": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "32036": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "32037": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "32038": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "32039": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "32040": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "32041": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "32042": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "32043": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "32044": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "32045": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "32046": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "32047": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "32048": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "32049": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "32050": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "32051": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "32052": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "32053": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "32054": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "32055": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "32056": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "32057": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "32058": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "32059": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "32060": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "32061": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "32062": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "32063": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "32064": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "32065": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "32066": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "32067": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "32068": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "32069": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "32070": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "32071": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "32072": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "32073": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "32074": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "32075": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "32076": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "32077": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "32078": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "32079": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "32080": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "32081": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "32082": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "32083": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "32084": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "32085": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "32086": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "32087": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "32088": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "32089": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "32090": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "32091": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "32092": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "32093": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "32094": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "32095": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "32096": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "32097": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "32098": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "32099": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": true,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "model_max_length": 512,
934
+ "pad_token": "<pad>",
935
+ "sp_model_kwargs": {},
936
+ "tokenizer_class": "T5Tokenizer",
937
+ "unk_token": "<unk>"
938
+ }
google/flan_t5_base_ledgar/checkpoint-2800/trainer_state.json ADDED
@@ -0,0 +1,1113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5091982483863831,
3
+ "best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/google/flan_t5_base_ledgar/checkpoint-2800",
4
+ "epoch": 2.9850746268656714,
5
+ "eval_steps": 100,
6
+ "global_step": 2800,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "grad_norm": 3.0375444889068604,
14
+ "learning_rate": 0.0004955579246624023,
15
+ "loss": 3.9847,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.05,
20
+ "grad_norm": 3.118692398071289,
21
+ "learning_rate": 0.0004911158493248046,
22
+ "loss": 2.3293,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.08,
27
+ "grad_norm": 2.571154832839966,
28
+ "learning_rate": 0.00048667377398720687,
29
+ "loss": 1.5365,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.11,
34
+ "grad_norm": 3.1888883113861084,
35
+ "learning_rate": 0.0004822316986496091,
36
+ "loss": 1.2824,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.11,
41
+ "eval_accuracy": 0.7464,
42
+ "eval_f1_macro": 0.5723224995560797,
43
+ "eval_f1_micro": 0.7464,
44
+ "eval_loss": 1.0243279933929443,
45
+ "eval_runtime": 28.3848,
46
+ "eval_samples_per_second": 352.301,
47
+ "eval_steps_per_second": 5.531,
48
+ "step": 100
49
+ },
50
+ {
51
+ "epoch": 0.13,
52
+ "grad_norm": 2.843799114227295,
53
+ "learning_rate": 0.0004777896233120114,
54
+ "loss": 1.1305,
55
+ "step": 125
56
+ },
57
+ {
58
+ "epoch": 0.16,
59
+ "grad_norm": 3.0389671325683594,
60
+ "learning_rate": 0.00047334754797441367,
61
+ "loss": 1.0298,
62
+ "step": 150
63
+ },
64
+ {
65
+ "epoch": 0.19,
66
+ "grad_norm": 3.0453531742095947,
67
+ "learning_rate": 0.00046890547263681595,
68
+ "loss": 0.985,
69
+ "step": 175
70
+ },
71
+ {
72
+ "epoch": 0.21,
73
+ "grad_norm": 3.0529351234436035,
74
+ "learning_rate": 0.00046446339729921824,
75
+ "loss": 0.8961,
76
+ "step": 200
77
+ },
78
+ {
79
+ "epoch": 0.21,
80
+ "eval_accuracy": 0.7743,
81
+ "eval_f1_macro": 0.635888783601725,
82
+ "eval_f1_micro": 0.7743,
83
+ "eval_loss": 0.8571656346321106,
84
+ "eval_runtime": 27.7857,
85
+ "eval_samples_per_second": 359.897,
86
+ "eval_steps_per_second": 5.65,
87
+ "step": 200
88
+ },
89
+ {
90
+ "epoch": 0.24,
91
+ "grad_norm": 2.3809685707092285,
92
+ "learning_rate": 0.0004600213219616205,
93
+ "loss": 0.837,
94
+ "step": 225
95
+ },
96
+ {
97
+ "epoch": 0.27,
98
+ "grad_norm": 2.1598992347717285,
99
+ "learning_rate": 0.00045557924662402275,
100
+ "loss": 0.8985,
101
+ "step": 250
102
+ },
103
+ {
104
+ "epoch": 0.29,
105
+ "grad_norm": 2.3083159923553467,
106
+ "learning_rate": 0.000451137171286425,
107
+ "loss": 0.831,
108
+ "step": 275
109
+ },
110
+ {
111
+ "epoch": 0.32,
112
+ "grad_norm": 2.8257880210876465,
113
+ "learning_rate": 0.00044669509594882727,
114
+ "loss": 0.8233,
115
+ "step": 300
116
+ },
117
+ {
118
+ "epoch": 0.32,
119
+ "eval_accuracy": 0.7968,
120
+ "eval_f1_macro": 0.6756189978654267,
121
+ "eval_f1_micro": 0.7968,
122
+ "eval_loss": 0.7788468599319458,
123
+ "eval_runtime": 27.811,
124
+ "eval_samples_per_second": 359.57,
125
+ "eval_steps_per_second": 5.645,
126
+ "step": 300
127
+ },
128
+ {
129
+ "epoch": 0.35,
130
+ "grad_norm": 1.9764364957809448,
131
+ "learning_rate": 0.00044225302061122956,
132
+ "loss": 0.8006,
133
+ "step": 325
134
+ },
135
+ {
136
+ "epoch": 0.37,
137
+ "grad_norm": 2.6898722648620605,
138
+ "learning_rate": 0.00043781094527363184,
139
+ "loss": 0.8076,
140
+ "step": 350
141
+ },
142
+ {
143
+ "epoch": 0.4,
144
+ "grad_norm": 3.589167594909668,
145
+ "learning_rate": 0.0004333688699360341,
146
+ "loss": 0.7255,
147
+ "step": 375
148
+ },
149
+ {
150
+ "epoch": 0.43,
151
+ "grad_norm": 2.5616493225097656,
152
+ "learning_rate": 0.0004289267945984364,
153
+ "loss": 0.7722,
154
+ "step": 400
155
+ },
156
+ {
157
+ "epoch": 0.43,
158
+ "eval_accuracy": 0.8014,
159
+ "eval_f1_macro": 0.668886210908163,
160
+ "eval_f1_micro": 0.8014,
161
+ "eval_loss": 0.7432093024253845,
162
+ "eval_runtime": 27.5834,
163
+ "eval_samples_per_second": 362.537,
164
+ "eval_steps_per_second": 5.692,
165
+ "step": 400
166
+ },
167
+ {
168
+ "epoch": 0.45,
169
+ "grad_norm": 2.7185428142547607,
170
+ "learning_rate": 0.00042448471926083864,
171
+ "loss": 0.7946,
172
+ "step": 425
173
+ },
174
+ {
175
+ "epoch": 0.48,
176
+ "grad_norm": 2.6401193141937256,
177
+ "learning_rate": 0.00042004264392324093,
178
+ "loss": 0.854,
179
+ "step": 450
180
+ },
181
+ {
182
+ "epoch": 0.51,
183
+ "grad_norm": 2.0926613807678223,
184
+ "learning_rate": 0.0004156005685856432,
185
+ "loss": 0.7647,
186
+ "step": 475
187
+ },
188
+ {
189
+ "epoch": 0.53,
190
+ "grad_norm": 4.611965656280518,
191
+ "learning_rate": 0.0004111584932480455,
192
+ "loss": 0.7739,
193
+ "step": 500
194
+ },
195
+ {
196
+ "epoch": 0.53,
197
+ "eval_accuracy": 0.8135,
198
+ "eval_f1_macro": 0.7020159105418678,
199
+ "eval_f1_micro": 0.8135,
200
+ "eval_loss": 0.6933461427688599,
201
+ "eval_runtime": 27.6954,
202
+ "eval_samples_per_second": 361.071,
203
+ "eval_steps_per_second": 5.669,
204
+ "step": 500
205
+ },
206
+ {
207
+ "epoch": 0.56,
208
+ "grad_norm": 1.9536117315292358,
209
+ "learning_rate": 0.0004067164179104478,
210
+ "loss": 0.7353,
211
+ "step": 525
212
+ },
213
+ {
214
+ "epoch": 0.59,
215
+ "grad_norm": 1.7874054908752441,
216
+ "learning_rate": 0.00040227434257285007,
217
+ "loss": 0.7235,
218
+ "step": 550
219
+ },
220
+ {
221
+ "epoch": 0.61,
222
+ "grad_norm": 3.6973233222961426,
223
+ "learning_rate": 0.00039783226723525235,
224
+ "loss": 0.6628,
225
+ "step": 575
226
+ },
227
+ {
228
+ "epoch": 0.64,
229
+ "grad_norm": 2.1840732097625732,
230
+ "learning_rate": 0.0003933901918976546,
231
+ "loss": 0.7435,
232
+ "step": 600
233
+ },
234
+ {
235
+ "epoch": 0.64,
236
+ "eval_accuracy": 0.8137,
237
+ "eval_f1_macro": 0.70281855085721,
238
+ "eval_f1_micro": 0.8137,
239
+ "eval_loss": 0.6870977282524109,
240
+ "eval_runtime": 27.6429,
241
+ "eval_samples_per_second": 361.757,
242
+ "eval_steps_per_second": 5.68,
243
+ "step": 600
244
+ },
245
+ {
246
+ "epoch": 0.67,
247
+ "grad_norm": 2.505772829055786,
248
+ "learning_rate": 0.00038894811656005687,
249
+ "loss": 0.7198,
250
+ "step": 625
251
+ },
252
+ {
253
+ "epoch": 0.69,
254
+ "grad_norm": 2.3095903396606445,
255
+ "learning_rate": 0.00038450604122245916,
256
+ "loss": 0.7184,
257
+ "step": 650
258
+ },
259
+ {
260
+ "epoch": 0.72,
261
+ "grad_norm": 2.0681698322296143,
262
+ "learning_rate": 0.00038006396588486144,
263
+ "loss": 0.7438,
264
+ "step": 675
265
+ },
266
+ {
267
+ "epoch": 0.75,
268
+ "grad_norm": 2.28863525390625,
269
+ "learning_rate": 0.0003756218905472637,
270
+ "loss": 0.6877,
271
+ "step": 700
272
+ },
273
+ {
274
+ "epoch": 0.75,
275
+ "eval_accuracy": 0.8169,
276
+ "eval_f1_macro": 0.7012014533173265,
277
+ "eval_f1_micro": 0.8169,
278
+ "eval_loss": 0.6751404404640198,
279
+ "eval_runtime": 27.5466,
280
+ "eval_samples_per_second": 363.021,
281
+ "eval_steps_per_second": 5.699,
282
+ "step": 700
283
+ },
284
+ {
285
+ "epoch": 0.77,
286
+ "grad_norm": 2.2676467895507812,
287
+ "learning_rate": 0.00037117981520966596,
288
+ "loss": 0.6665,
289
+ "step": 725
290
+ },
291
+ {
292
+ "epoch": 0.8,
293
+ "grad_norm": 2.371417999267578,
294
+ "learning_rate": 0.00036673773987206824,
295
+ "loss": 0.7411,
296
+ "step": 750
297
+ },
298
+ {
299
+ "epoch": 0.83,
300
+ "grad_norm": 2.0145998001098633,
301
+ "learning_rate": 0.0003622956645344705,
302
+ "loss": 0.6908,
303
+ "step": 775
304
+ },
305
+ {
306
+ "epoch": 0.85,
307
+ "grad_norm": 1.5675983428955078,
308
+ "learning_rate": 0.00035785358919687276,
309
+ "loss": 0.6968,
310
+ "step": 800
311
+ },
312
+ {
313
+ "epoch": 0.85,
314
+ "eval_accuracy": 0.8225,
315
+ "eval_f1_macro": 0.7248847218448246,
316
+ "eval_f1_micro": 0.8225,
317
+ "eval_loss": 0.647091805934906,
318
+ "eval_runtime": 27.597,
319
+ "eval_samples_per_second": 362.358,
320
+ "eval_steps_per_second": 5.689,
321
+ "step": 800
322
+ },
323
+ {
324
+ "epoch": 0.88,
325
+ "grad_norm": 3.181079864501953,
326
+ "learning_rate": 0.00035341151385927504,
327
+ "loss": 0.7083,
328
+ "step": 825
329
+ },
330
+ {
331
+ "epoch": 0.91,
332
+ "grad_norm": 1.5370581150054932,
333
+ "learning_rate": 0.00034896943852167733,
334
+ "loss": 0.6106,
335
+ "step": 850
336
+ },
337
+ {
338
+ "epoch": 0.93,
339
+ "grad_norm": 2.5065078735351562,
340
+ "learning_rate": 0.0003445273631840796,
341
+ "loss": 0.643,
342
+ "step": 875
343
+ },
344
+ {
345
+ "epoch": 0.96,
346
+ "grad_norm": 2.2463412284851074,
347
+ "learning_rate": 0.0003400852878464819,
348
+ "loss": 0.6218,
349
+ "step": 900
350
+ },
351
+ {
352
+ "epoch": 0.96,
353
+ "eval_accuracy": 0.8219,
354
+ "eval_f1_macro": 0.7263716048034571,
355
+ "eval_f1_micro": 0.8219,
356
+ "eval_loss": 0.6366150379180908,
357
+ "eval_runtime": 27.5858,
358
+ "eval_samples_per_second": 362.506,
359
+ "eval_steps_per_second": 5.691,
360
+ "step": 900
361
+ },
362
+ {
363
+ "epoch": 0.99,
364
+ "grad_norm": 2.3242409229278564,
365
+ "learning_rate": 0.00033564321250888413,
366
+ "loss": 0.6641,
367
+ "step": 925
368
+ },
369
+ {
370
+ "epoch": 1.01,
371
+ "grad_norm": 1.760398507118225,
372
+ "learning_rate": 0.0003312011371712864,
373
+ "loss": 0.5893,
374
+ "step": 950
375
+ },
376
+ {
377
+ "epoch": 1.04,
378
+ "grad_norm": 2.2009265422821045,
379
+ "learning_rate": 0.0003267590618336887,
380
+ "loss": 0.5553,
381
+ "step": 975
382
+ },
383
+ {
384
+ "epoch": 1.07,
385
+ "grad_norm": 1.6668460369110107,
386
+ "learning_rate": 0.000322316986496091,
387
+ "loss": 0.5101,
388
+ "step": 1000
389
+ },
390
+ {
391
+ "epoch": 1.07,
392
+ "eval_accuracy": 0.8378,
393
+ "eval_f1_macro": 0.7387558451003193,
394
+ "eval_f1_micro": 0.8378,
395
+ "eval_loss": 0.6130378842353821,
396
+ "eval_runtime": 27.6606,
397
+ "eval_samples_per_second": 361.525,
398
+ "eval_steps_per_second": 5.676,
399
+ "step": 1000
400
+ },
401
+ {
402
+ "epoch": 1.09,
403
+ "grad_norm": 1.8703542947769165,
404
+ "learning_rate": 0.00031787491115849327,
405
+ "loss": 0.5136,
406
+ "step": 1025
407
+ },
408
+ {
409
+ "epoch": 1.12,
410
+ "grad_norm": 1.9426932334899902,
411
+ "learning_rate": 0.00031343283582089556,
412
+ "loss": 0.509,
413
+ "step": 1050
414
+ },
415
+ {
416
+ "epoch": 1.15,
417
+ "grad_norm": 1.8860669136047363,
418
+ "learning_rate": 0.00030899076048329784,
419
+ "loss": 0.5581,
420
+ "step": 1075
421
+ },
422
+ {
423
+ "epoch": 1.17,
424
+ "grad_norm": 2.720142126083374,
425
+ "learning_rate": 0.0003045486851457001,
426
+ "loss": 0.5411,
427
+ "step": 1100
428
+ },
429
+ {
430
+ "epoch": 1.17,
431
+ "eval_accuracy": 0.8375,
432
+ "eval_f1_macro": 0.7392131840260611,
433
+ "eval_f1_micro": 0.8375,
434
+ "eval_loss": 0.6088192462921143,
435
+ "eval_runtime": 27.6613,
436
+ "eval_samples_per_second": 361.515,
437
+ "eval_steps_per_second": 5.676,
438
+ "step": 1100
439
+ },
440
+ {
441
+ "epoch": 1.2,
442
+ "grad_norm": 1.402212142944336,
443
+ "learning_rate": 0.00030010660980810236,
444
+ "loss": 0.5344,
445
+ "step": 1125
446
+ },
447
+ {
448
+ "epoch": 1.23,
449
+ "grad_norm": 1.3328239917755127,
450
+ "learning_rate": 0.00029566453447050464,
451
+ "loss": 0.4701,
452
+ "step": 1150
453
+ },
454
+ {
455
+ "epoch": 1.25,
456
+ "grad_norm": 2.079735040664673,
457
+ "learning_rate": 0.0002912224591329069,
458
+ "loss": 0.4697,
459
+ "step": 1175
460
+ },
461
+ {
462
+ "epoch": 1.28,
463
+ "grad_norm": 1.532960295677185,
464
+ "learning_rate": 0.00028678038379530916,
465
+ "loss": 0.5918,
466
+ "step": 1200
467
+ },
468
+ {
469
+ "epoch": 1.28,
470
+ "eval_accuracy": 0.8449,
471
+ "eval_f1_macro": 0.7576612585996539,
472
+ "eval_f1_micro": 0.8449,
473
+ "eval_loss": 0.5863717198371887,
474
+ "eval_runtime": 27.5864,
475
+ "eval_samples_per_second": 362.498,
476
+ "eval_steps_per_second": 5.691,
477
+ "step": 1200
478
+ },
479
+ {
480
+ "epoch": 1.31,
481
+ "grad_norm": 2.5888640880584717,
482
+ "learning_rate": 0.00028233830845771145,
483
+ "loss": 0.5539,
484
+ "step": 1225
485
+ },
486
+ {
487
+ "epoch": 1.33,
488
+ "grad_norm": 1.938421607017517,
489
+ "learning_rate": 0.00027789623312011373,
490
+ "loss": 0.5477,
491
+ "step": 1250
492
+ },
493
+ {
494
+ "epoch": 1.36,
495
+ "grad_norm": 1.5092904567718506,
496
+ "learning_rate": 0.00027345415778251596,
497
+ "loss": 0.5302,
498
+ "step": 1275
499
+ },
500
+ {
501
+ "epoch": 1.39,
502
+ "grad_norm": 2.320207118988037,
503
+ "learning_rate": 0.00026901208244491825,
504
+ "loss": 0.4785,
505
+ "step": 1300
506
+ },
507
+ {
508
+ "epoch": 1.39,
509
+ "eval_accuracy": 0.8391,
510
+ "eval_f1_macro": 0.7459680921477214,
511
+ "eval_f1_micro": 0.8391,
512
+ "eval_loss": 0.5916772484779358,
513
+ "eval_runtime": 27.5459,
514
+ "eval_samples_per_second": 363.031,
515
+ "eval_steps_per_second": 5.7,
516
+ "step": 1300
517
+ },
518
+ {
519
+ "epoch": 1.41,
520
+ "grad_norm": 1.8500772714614868,
521
+ "learning_rate": 0.00026457000710732053,
522
+ "loss": 0.5136,
523
+ "step": 1325
524
+ },
525
+ {
526
+ "epoch": 1.44,
527
+ "grad_norm": 1.7255891561508179,
528
+ "learning_rate": 0.0002601279317697228,
529
+ "loss": 0.5197,
530
+ "step": 1350
531
+ },
532
+ {
533
+ "epoch": 1.47,
534
+ "grad_norm": 2.2105064392089844,
535
+ "learning_rate": 0.0002556858564321251,
536
+ "loss": 0.4928,
537
+ "step": 1375
538
+ },
539
+ {
540
+ "epoch": 1.49,
541
+ "grad_norm": 1.8511146306991577,
542
+ "learning_rate": 0.0002512437810945274,
543
+ "loss": 0.4372,
544
+ "step": 1400
545
+ },
546
+ {
547
+ "epoch": 1.49,
548
+ "eval_accuracy": 0.8409,
549
+ "eval_f1_macro": 0.756256814875695,
550
+ "eval_f1_micro": 0.8409,
551
+ "eval_loss": 0.5789840221405029,
552
+ "eval_runtime": 27.5648,
553
+ "eval_samples_per_second": 362.781,
554
+ "eval_steps_per_second": 5.696,
555
+ "step": 1400
556
+ },
557
+ {
558
+ "epoch": 1.52,
559
+ "grad_norm": 1.9807904958724976,
560
+ "learning_rate": 0.0002468017057569296,
561
+ "loss": 0.5143,
562
+ "step": 1425
563
+ },
564
+ {
565
+ "epoch": 1.55,
566
+ "grad_norm": 1.6307320594787598,
567
+ "learning_rate": 0.00024235963041933193,
568
+ "loss": 0.5054,
569
+ "step": 1450
570
+ },
571
+ {
572
+ "epoch": 1.57,
573
+ "grad_norm": 1.5893832445144653,
574
+ "learning_rate": 0.0002379175550817342,
575
+ "loss": 0.5472,
576
+ "step": 1475
577
+ },
578
+ {
579
+ "epoch": 1.6,
580
+ "grad_norm": 1.941535234451294,
581
+ "learning_rate": 0.00023347547974413648,
582
+ "loss": 0.5022,
583
+ "step": 1500
584
+ },
585
+ {
586
+ "epoch": 1.6,
587
+ "eval_accuracy": 0.8437,
588
+ "eval_f1_macro": 0.7523793710987492,
589
+ "eval_f1_micro": 0.8437,
590
+ "eval_loss": 0.5867593884468079,
591
+ "eval_runtime": 27.7007,
592
+ "eval_samples_per_second": 361.002,
593
+ "eval_steps_per_second": 5.668,
594
+ "step": 1500
595
+ },
596
+ {
597
+ "epoch": 1.63,
598
+ "grad_norm": 2.6769652366638184,
599
+ "learning_rate": 0.00022903340440653876,
600
+ "loss": 0.4809,
601
+ "step": 1525
602
+ },
603
+ {
604
+ "epoch": 1.65,
605
+ "grad_norm": 1.7916655540466309,
606
+ "learning_rate": 0.000224591329068941,
607
+ "loss": 0.4579,
608
+ "step": 1550
609
+ },
610
+ {
611
+ "epoch": 1.68,
612
+ "grad_norm": 2.0444118976593018,
613
+ "learning_rate": 0.00022014925373134328,
614
+ "loss": 0.4445,
615
+ "step": 1575
616
+ },
617
+ {
618
+ "epoch": 1.71,
619
+ "grad_norm": 1.6492923498153687,
620
+ "learning_rate": 0.00021570717839374556,
621
+ "loss": 0.5391,
622
+ "step": 1600
623
+ },
624
+ {
625
+ "epoch": 1.71,
626
+ "eval_accuracy": 0.8447,
627
+ "eval_f1_macro": 0.7520110229774188,
628
+ "eval_f1_micro": 0.8447,
629
+ "eval_loss": 0.5613449215888977,
630
+ "eval_runtime": 27.5562,
631
+ "eval_samples_per_second": 362.895,
632
+ "eval_steps_per_second": 5.697,
633
+ "step": 1600
634
+ },
635
+ {
636
+ "epoch": 1.73,
637
+ "grad_norm": 2.078364133834839,
638
+ "learning_rate": 0.00021126510305614785,
639
+ "loss": 0.4622,
640
+ "step": 1625
641
+ },
642
+ {
643
+ "epoch": 1.76,
644
+ "grad_norm": 1.9655555486679077,
645
+ "learning_rate": 0.0002068230277185501,
646
+ "loss": 0.4562,
647
+ "step": 1650
648
+ },
649
+ {
650
+ "epoch": 1.79,
651
+ "grad_norm": 1.832479476928711,
652
+ "learning_rate": 0.0002023809523809524,
653
+ "loss": 0.4593,
654
+ "step": 1675
655
+ },
656
+ {
657
+ "epoch": 1.81,
658
+ "grad_norm": 2.659235715866089,
659
+ "learning_rate": 0.00019793887704335468,
660
+ "loss": 0.4971,
661
+ "step": 1700
662
+ },
663
+ {
664
+ "epoch": 1.81,
665
+ "eval_accuracy": 0.8545,
666
+ "eval_f1_macro": 0.7702427494125708,
667
+ "eval_f1_micro": 0.8545,
668
+ "eval_loss": 0.5466107130050659,
669
+ "eval_runtime": 27.6669,
670
+ "eval_samples_per_second": 361.443,
671
+ "eval_steps_per_second": 5.675,
672
+ "step": 1700
673
+ },
674
+ {
675
+ "epoch": 1.84,
676
+ "grad_norm": 2.2711193561553955,
677
+ "learning_rate": 0.00019349680170575694,
678
+ "loss": 0.532,
679
+ "step": 1725
680
+ },
681
+ {
682
+ "epoch": 1.87,
683
+ "grad_norm": 2.0986814498901367,
684
+ "learning_rate": 0.00018905472636815922,
685
+ "loss": 0.4538,
686
+ "step": 1750
687
+ },
688
+ {
689
+ "epoch": 1.89,
690
+ "grad_norm": 1.9741218090057373,
691
+ "learning_rate": 0.00018461265103056148,
692
+ "loss": 0.5076,
693
+ "step": 1775
694
+ },
695
+ {
696
+ "epoch": 1.92,
697
+ "grad_norm": 2.6759016513824463,
698
+ "learning_rate": 0.00018017057569296374,
699
+ "loss": 0.4686,
700
+ "step": 1800
701
+ },
702
+ {
703
+ "epoch": 1.92,
704
+ "eval_accuracy": 0.8566,
705
+ "eval_f1_macro": 0.7773748377033531,
706
+ "eval_f1_micro": 0.8566,
707
+ "eval_loss": 0.5260170102119446,
708
+ "eval_runtime": 27.7008,
709
+ "eval_samples_per_second": 361.001,
710
+ "eval_steps_per_second": 5.668,
711
+ "step": 1800
712
+ },
713
+ {
714
+ "epoch": 1.95,
715
+ "grad_norm": 1.6573829650878906,
716
+ "learning_rate": 0.00017572850035536602,
717
+ "loss": 0.5213,
718
+ "step": 1825
719
+ },
720
+ {
721
+ "epoch": 1.97,
722
+ "grad_norm": 2.535517930984497,
723
+ "learning_rate": 0.0001712864250177683,
724
+ "loss": 0.4897,
725
+ "step": 1850
726
+ },
727
+ {
728
+ "epoch": 2.0,
729
+ "grad_norm": 2.5402016639709473,
730
+ "learning_rate": 0.0001668443496801706,
731
+ "loss": 0.4166,
732
+ "step": 1875
733
+ },
734
+ {
735
+ "epoch": 2.03,
736
+ "grad_norm": 1.4277065992355347,
737
+ "learning_rate": 0.00016240227434257285,
738
+ "loss": 0.349,
739
+ "step": 1900
740
+ },
741
+ {
742
+ "epoch": 2.03,
743
+ "eval_accuracy": 0.8565,
744
+ "eval_f1_macro": 0.7721481293624487,
745
+ "eval_f1_micro": 0.8565,
746
+ "eval_loss": 0.5416210889816284,
747
+ "eval_runtime": 27.6604,
748
+ "eval_samples_per_second": 361.527,
749
+ "eval_steps_per_second": 5.676,
750
+ "step": 1900
751
+ },
752
+ {
753
+ "epoch": 2.05,
754
+ "grad_norm": 1.4561047554016113,
755
+ "learning_rate": 0.00015796019900497514,
756
+ "loss": 0.3468,
757
+ "step": 1925
758
+ },
759
+ {
760
+ "epoch": 2.08,
761
+ "grad_norm": 1.3793047666549683,
762
+ "learning_rate": 0.00015351812366737742,
763
+ "loss": 0.3668,
764
+ "step": 1950
765
+ },
766
+ {
767
+ "epoch": 2.11,
768
+ "grad_norm": 2.6919455528259277,
769
+ "learning_rate": 0.00014907604832977968,
770
+ "loss": 0.3815,
771
+ "step": 1975
772
+ },
773
+ {
774
+ "epoch": 2.13,
775
+ "grad_norm": 1.4338033199310303,
776
+ "learning_rate": 0.00014463397299218194,
777
+ "loss": 0.3216,
778
+ "step": 2000
779
+ },
780
+ {
781
+ "epoch": 2.13,
782
+ "eval_accuracy": 0.8565,
783
+ "eval_f1_macro": 0.7762571037204617,
784
+ "eval_f1_micro": 0.8565,
785
+ "eval_loss": 0.5441261529922485,
786
+ "eval_runtime": 27.678,
787
+ "eval_samples_per_second": 361.298,
788
+ "eval_steps_per_second": 5.672,
789
+ "step": 2000
790
+ },
791
+ {
792
+ "epoch": 2.16,
793
+ "grad_norm": 2.7009971141815186,
794
+ "learning_rate": 0.00014019189765458422,
795
+ "loss": 0.3376,
796
+ "step": 2025
797
+ },
798
+ {
799
+ "epoch": 2.19,
800
+ "grad_norm": 1.5223674774169922,
801
+ "learning_rate": 0.00013574982231698648,
802
+ "loss": 0.3645,
803
+ "step": 2050
804
+ },
805
+ {
806
+ "epoch": 2.21,
807
+ "grad_norm": 1.8355497121810913,
808
+ "learning_rate": 0.00013130774697938877,
809
+ "loss": 0.3396,
810
+ "step": 2075
811
+ },
812
+ {
813
+ "epoch": 2.24,
814
+ "grad_norm": 2.0202701091766357,
815
+ "learning_rate": 0.00012686567164179105,
816
+ "loss": 0.3531,
817
+ "step": 2100
818
+ },
819
+ {
820
+ "epoch": 2.24,
821
+ "eval_accuracy": 0.8553,
822
+ "eval_f1_macro": 0.7752625760775129,
823
+ "eval_f1_micro": 0.8553,
824
+ "eval_loss": 0.5443636775016785,
825
+ "eval_runtime": 27.6999,
826
+ "eval_samples_per_second": 361.012,
827
+ "eval_steps_per_second": 5.668,
828
+ "step": 2100
829
+ },
830
+ {
831
+ "epoch": 2.27,
832
+ "grad_norm": 1.3385556936264038,
833
+ "learning_rate": 0.0001224235963041933,
834
+ "loss": 0.3412,
835
+ "step": 2125
836
+ },
837
+ {
838
+ "epoch": 2.29,
839
+ "grad_norm": 9.454978942871094,
840
+ "learning_rate": 0.0001179815209665956,
841
+ "loss": 0.3046,
842
+ "step": 2150
843
+ },
844
+ {
845
+ "epoch": 2.32,
846
+ "grad_norm": 0.8032639622688293,
847
+ "learning_rate": 0.00011353944562899787,
848
+ "loss": 0.2839,
849
+ "step": 2175
850
+ },
851
+ {
852
+ "epoch": 2.35,
853
+ "grad_norm": 2.128880262374878,
854
+ "learning_rate": 0.00010909737029140014,
855
+ "loss": 0.3276,
856
+ "step": 2200
857
+ },
858
+ {
859
+ "epoch": 2.35,
860
+ "eval_accuracy": 0.8604,
861
+ "eval_f1_macro": 0.7795999632093279,
862
+ "eval_f1_micro": 0.8604,
863
+ "eval_loss": 0.5380069613456726,
864
+ "eval_runtime": 27.6479,
865
+ "eval_samples_per_second": 361.691,
866
+ "eval_steps_per_second": 5.679,
867
+ "step": 2200
868
+ },
869
+ {
870
+ "epoch": 2.37,
871
+ "grad_norm": 1.680617094039917,
872
+ "learning_rate": 0.00010465529495380242,
873
+ "loss": 0.2938,
874
+ "step": 2225
875
+ },
876
+ {
877
+ "epoch": 2.4,
878
+ "grad_norm": 2.078551769256592,
879
+ "learning_rate": 0.0001002132196162047,
880
+ "loss": 0.3324,
881
+ "step": 2250
882
+ },
883
+ {
884
+ "epoch": 2.43,
885
+ "grad_norm": 1.5513458251953125,
886
+ "learning_rate": 9.577114427860697e-05,
887
+ "loss": 0.3228,
888
+ "step": 2275
889
+ },
890
+ {
891
+ "epoch": 2.45,
892
+ "grad_norm": 1.6078649759292603,
893
+ "learning_rate": 9.132906894100924e-05,
894
+ "loss": 0.3372,
895
+ "step": 2300
896
+ },
897
+ {
898
+ "epoch": 2.45,
899
+ "eval_accuracy": 0.8634,
900
+ "eval_f1_macro": 0.7830890796393112,
901
+ "eval_f1_micro": 0.8634,
902
+ "eval_loss": 0.5231262445449829,
903
+ "eval_runtime": 27.5435,
904
+ "eval_samples_per_second": 363.062,
905
+ "eval_steps_per_second": 5.7,
906
+ "step": 2300
907
+ },
908
+ {
909
+ "epoch": 2.48,
910
+ "grad_norm": 2.382270336151123,
911
+ "learning_rate": 8.688699360341151e-05,
912
+ "loss": 0.3475,
913
+ "step": 2325
914
+ },
915
+ {
916
+ "epoch": 2.51,
917
+ "grad_norm": 1.841912865638733,
918
+ "learning_rate": 8.24449182658138e-05,
919
+ "loss": 0.3141,
920
+ "step": 2350
921
+ },
922
+ {
923
+ "epoch": 2.53,
924
+ "grad_norm": 1.8098047971725464,
925
+ "learning_rate": 7.800284292821607e-05,
926
+ "loss": 0.3318,
927
+ "step": 2375
928
+ },
929
+ {
930
+ "epoch": 2.56,
931
+ "grad_norm": 1.394537329673767,
932
+ "learning_rate": 7.356076759061834e-05,
933
+ "loss": 0.3227,
934
+ "step": 2400
935
+ },
936
+ {
937
+ "epoch": 2.56,
938
+ "eval_accuracy": 0.8651,
939
+ "eval_f1_macro": 0.7871992688610419,
940
+ "eval_f1_micro": 0.8651,
941
+ "eval_loss": 0.5210258364677429,
942
+ "eval_runtime": 27.5907,
943
+ "eval_samples_per_second": 362.441,
944
+ "eval_steps_per_second": 5.69,
945
+ "step": 2400
946
+ },
947
+ {
948
+ "epoch": 2.59,
949
+ "grad_norm": 1.7788827419281006,
950
+ "learning_rate": 6.911869225302061e-05,
951
+ "loss": 0.3696,
952
+ "step": 2425
953
+ },
954
+ {
955
+ "epoch": 2.61,
956
+ "grad_norm": 1.6976556777954102,
957
+ "learning_rate": 6.467661691542288e-05,
958
+ "loss": 0.3437,
959
+ "step": 2450
960
+ },
961
+ {
962
+ "epoch": 2.64,
963
+ "grad_norm": 1.5766022205352783,
964
+ "learning_rate": 6.023454157782516e-05,
965
+ "loss": 0.3127,
966
+ "step": 2475
967
+ },
968
+ {
969
+ "epoch": 2.67,
970
+ "grad_norm": 2.058786630630493,
971
+ "learning_rate": 5.579246624022743e-05,
972
+ "loss": 0.2987,
973
+ "step": 2500
974
+ },
975
+ {
976
+ "epoch": 2.67,
977
+ "eval_accuracy": 0.8665,
978
+ "eval_f1_macro": 0.7909555456783894,
979
+ "eval_f1_micro": 0.8665,
980
+ "eval_loss": 0.5188292860984802,
981
+ "eval_runtime": 27.7075,
982
+ "eval_samples_per_second": 360.914,
983
+ "eval_steps_per_second": 5.666,
984
+ "step": 2500
985
+ },
986
+ {
987
+ "epoch": 2.69,
988
+ "grad_norm": 1.9031825065612793,
989
+ "learning_rate": 5.135039090262971e-05,
990
+ "loss": 0.3398,
991
+ "step": 2525
992
+ },
993
+ {
994
+ "epoch": 2.72,
995
+ "grad_norm": 2.6174018383026123,
996
+ "learning_rate": 4.690831556503199e-05,
997
+ "loss": 0.3067,
998
+ "step": 2550
999
+ },
1000
+ {
1001
+ "epoch": 2.75,
1002
+ "grad_norm": 2.1308956146240234,
1003
+ "learning_rate": 4.2466240227434255e-05,
1004
+ "loss": 0.32,
1005
+ "step": 2575
1006
+ },
1007
+ {
1008
+ "epoch": 2.77,
1009
+ "grad_norm": 1.593653678894043,
1010
+ "learning_rate": 3.802416488983653e-05,
1011
+ "loss": 0.3354,
1012
+ "step": 2600
1013
+ },
1014
+ {
1015
+ "epoch": 2.77,
1016
+ "eval_accuracy": 0.8666,
1017
+ "eval_f1_macro": 0.7931215028367249,
1018
+ "eval_f1_micro": 0.8666,
1019
+ "eval_loss": 0.5149548649787903,
1020
+ "eval_runtime": 27.5387,
1021
+ "eval_samples_per_second": 363.125,
1022
+ "eval_steps_per_second": 5.701,
1023
+ "step": 2600
1024
+ },
1025
+ {
1026
+ "epoch": 2.8,
1027
+ "grad_norm": 2.0844106674194336,
1028
+ "learning_rate": 3.3582089552238805e-05,
1029
+ "loss": 0.3017,
1030
+ "step": 2625
1031
+ },
1032
+ {
1033
+ "epoch": 2.83,
1034
+ "grad_norm": 2.6212213039398193,
1035
+ "learning_rate": 2.9140014214641083e-05,
1036
+ "loss": 0.3661,
1037
+ "step": 2650
1038
+ },
1039
+ {
1040
+ "epoch": 2.85,
1041
+ "grad_norm": 1.6480947732925415,
1042
+ "learning_rate": 2.4697938877043355e-05,
1043
+ "loss": 0.3467,
1044
+ "step": 2675
1045
+ },
1046
+ {
1047
+ "epoch": 2.88,
1048
+ "grad_norm": 1.996435284614563,
1049
+ "learning_rate": 2.025586353944563e-05,
1050
+ "loss": 0.3103,
1051
+ "step": 2700
1052
+ },
1053
+ {
1054
+ "epoch": 2.88,
1055
+ "eval_accuracy": 0.8681,
1056
+ "eval_f1_macro": 0.7942260448694155,
1057
+ "eval_f1_micro": 0.8681,
1058
+ "eval_loss": 0.5103040933609009,
1059
+ "eval_runtime": 27.6127,
1060
+ "eval_samples_per_second": 362.153,
1061
+ "eval_steps_per_second": 5.686,
1062
+ "step": 2700
1063
+ },
1064
+ {
1065
+ "epoch": 2.91,
1066
+ "grad_norm": 1.096625566482544,
1067
+ "learning_rate": 1.5813788201847902e-05,
1068
+ "loss": 0.3018,
1069
+ "step": 2725
1070
+ },
1071
+ {
1072
+ "epoch": 2.93,
1073
+ "grad_norm": 1.7524516582489014,
1074
+ "learning_rate": 1.1371712864250177e-05,
1075
+ "loss": 0.3169,
1076
+ "step": 2750
1077
+ },
1078
+ {
1079
+ "epoch": 2.96,
1080
+ "grad_norm": 2.6799604892730713,
1081
+ "learning_rate": 6.929637526652452e-06,
1082
+ "loss": 0.2976,
1083
+ "step": 2775
1084
+ },
1085
+ {
1086
+ "epoch": 2.99,
1087
+ "grad_norm": 1.9697155952453613,
1088
+ "learning_rate": 2.4875621890547264e-06,
1089
+ "loss": 0.3248,
1090
+ "step": 2800
1091
+ },
1092
+ {
1093
+ "epoch": 2.99,
1094
+ "eval_accuracy": 0.8685,
1095
+ "eval_f1_macro": 0.7955270719340626,
1096
+ "eval_f1_micro": 0.8685,
1097
+ "eval_loss": 0.5091982483863831,
1098
+ "eval_runtime": 27.6473,
1099
+ "eval_samples_per_second": 361.699,
1100
+ "eval_steps_per_second": 5.679,
1101
+ "step": 2800
1102
+ }
1103
+ ],
1104
+ "logging_steps": 25,
1105
+ "max_steps": 2814,
1106
+ "num_input_tokens_seen": 0,
1107
+ "num_train_epochs": 3,
1108
+ "save_steps": 100,
1109
+ "total_flos": 2.737317734462259e+16,
1110
+ "train_batch_size": 32,
1111
+ "trial_name": null,
1112
+ "trial_params": null
1113
+ }
google/flan_t5_base_ledgar/checkpoint-2800/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35a9f1d6602900782940561e0745f30feabd929d78d64e7b0b2b48682643bf8b
3
+ size 5048
google/flan_t5_base_ledgar/config.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/flan-t5-base",
3
+ "architectures": [
4
+ "T5ForSequenceClassification"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "finetuning_task": "text-classification",
16
+ "id2label": {
17
+ "0": "0",
18
+ "1": "1",
19
+ "2": "10",
20
+ "3": "11",
21
+ "4": "12",
22
+ "5": "13",
23
+ "6": "14",
24
+ "7": "15",
25
+ "8": "16",
26
+ "9": "17",
27
+ "10": "18",
28
+ "11": "19",
29
+ "12": "2",
30
+ "13": "20",
31
+ "14": "21",
32
+ "15": "22",
33
+ "16": "23",
34
+ "17": "24",
35
+ "18": "25",
36
+ "19": "26",
37
+ "20": "27",
38
+ "21": "28",
39
+ "22": "29",
40
+ "23": "3",
41
+ "24": "30",
42
+ "25": "31",
43
+ "26": "32",
44
+ "27": "33",
45
+ "28": "34",
46
+ "29": "35",
47
+ "30": "36",
48
+ "31": "37",
49
+ "32": "38",
50
+ "33": "39",
51
+ "34": "4",
52
+ "35": "40",
53
+ "36": "41",
54
+ "37": "42",
55
+ "38": "43",
56
+ "39": "44",
57
+ "40": "45",
58
+ "41": "46",
59
+ "42": "47",
60
+ "43": "48",
61
+ "44": "49",
62
+ "45": "5",
63
+ "46": "50",
64
+ "47": "51",
65
+ "48": "52",
66
+ "49": "53",
67
+ "50": "54",
68
+ "51": "55",
69
+ "52": "56",
70
+ "53": "57",
71
+ "54": "58",
72
+ "55": "59",
73
+ "56": "6",
74
+ "57": "60",
75
+ "58": "61",
76
+ "59": "62",
77
+ "60": "63",
78
+ "61": "64",
79
+ "62": "65",
80
+ "63": "66",
81
+ "64": "67",
82
+ "65": "68",
83
+ "66": "69",
84
+ "67": "7",
85
+ "68": "70",
86
+ "69": "71",
87
+ "70": "72",
88
+ "71": "73",
89
+ "72": "74",
90
+ "73": "75",
91
+ "74": "76",
92
+ "75": "77",
93
+ "76": "78",
94
+ "77": "79",
95
+ "78": "8",
96
+ "79": "80",
97
+ "80": "81",
98
+ "81": "82",
99
+ "82": "83",
100
+ "83": "84",
101
+ "84": "85",
102
+ "85": "86",
103
+ "86": "87",
104
+ "87": "88",
105
+ "88": "89",
106
+ "89": "9",
107
+ "90": "90",
108
+ "91": "91",
109
+ "92": "92",
110
+ "93": "93",
111
+ "94": "94",
112
+ "95": "95",
113
+ "96": "96",
114
+ "97": "97",
115
+ "98": "98",
116
+ "99": "99"
117
+ },
118
+ "initializer_factor": 1.0,
119
+ "is_encoder_decoder": true,
120
+ "is_gated_act": true,
121
+ "label2id": {
122
+ "0": 0,
123
+ "1": 1,
124
+ "10": 2,
125
+ "11": 3,
126
+ "12": 4,
127
+ "13": 5,
128
+ "14": 6,
129
+ "15": 7,
130
+ "16": 8,
131
+ "17": 9,
132
+ "18": 10,
133
+ "19": 11,
134
+ "2": 12,
135
+ "20": 13,
136
+ "21": 14,
137
+ "22": 15,
138
+ "23": 16,
139
+ "24": 17,
140
+ "25": 18,
141
+ "26": 19,
142
+ "27": 20,
143
+ "28": 21,
144
+ "29": 22,
145
+ "3": 23,
146
+ "30": 24,
147
+ "31": 25,
148
+ "32": 26,
149
+ "33": 27,
150
+ "34": 28,
151
+ "35": 29,
152
+ "36": 30,
153
+ "37": 31,
154
+ "38": 32,
155
+ "39": 33,
156
+ "4": 34,
157
+ "40": 35,
158
+ "41": 36,
159
+ "42": 37,
160
+ "43": 38,
161
+ "44": 39,
162
+ "45": 40,
163
+ "46": 41,
164
+ "47": 42,
165
+ "48": 43,
166
+ "49": 44,
167
+ "5": 45,
168
+ "50": 46,
169
+ "51": 47,
170
+ "52": 48,
171
+ "53": 49,
172
+ "54": 50,
173
+ "55": 51,
174
+ "56": 52,
175
+ "57": 53,
176
+ "58": 54,
177
+ "59": 55,
178
+ "6": 56,
179
+ "60": 57,
180
+ "61": 58,
181
+ "62": 59,
182
+ "63": 60,
183
+ "64": 61,
184
+ "65": 62,
185
+ "66": 63,
186
+ "67": 64,
187
+ "68": 65,
188
+ "69": 66,
189
+ "7": 67,
190
+ "70": 68,
191
+ "71": 69,
192
+ "72": 70,
193
+ "73": 71,
194
+ "74": 72,
195
+ "75": 73,
196
+ "76": 74,
197
+ "77": 75,
198
+ "78": 76,
199
+ "79": 77,
200
+ "8": 78,
201
+ "80": 79,
202
+ "81": 80,
203
+ "82": 81,
204
+ "83": 82,
205
+ "84": 83,
206
+ "85": 84,
207
+ "86": 85,
208
+ "87": 86,
209
+ "88": 87,
210
+ "89": 88,
211
+ "9": 89,
212
+ "90": 90,
213
+ "91": 91,
214
+ "92": 92,
215
+ "93": 93,
216
+ "94": 94,
217
+ "95": 95,
218
+ "96": 96,
219
+ "97": 97,
220
+ "98": 98,
221
+ "99": 99
222
+ },
223
+ "layer_norm_epsilon": 1e-06,
224
+ "model_type": "t5",
225
+ "n_positions": 512,
226
+ "num_decoder_layers": 12,
227
+ "num_heads": 12,
228
+ "num_layers": 12,
229
+ "output_past": true,
230
+ "pad_token_id": 0,
231
+ "problem_type": "single_label_classification",
232
+ "relative_attention_max_distance": 128,
233
+ "relative_attention_num_buckets": 32,
234
+ "task_specific_params": {
235
+ "summarization": {
236
+ "early_stopping": true,
237
+ "length_penalty": 2.0,
238
+ "max_length": 200,
239
+ "min_length": 30,
240
+ "no_repeat_ngram_size": 3,
241
+ "num_beams": 4,
242
+ "prefix": "summarize: "
243
+ },
244
+ "translation_en_to_de": {
245
+ "early_stopping": true,
246
+ "max_length": 300,
247
+ "num_beams": 4,
248
+ "prefix": "translate English to German: "
249
+ },
250
+ "translation_en_to_fr": {
251
+ "early_stopping": true,
252
+ "max_length": 300,
253
+ "num_beams": 4,
254
+ "prefix": "translate English to French: "
255
+ },
256
+ "translation_en_to_ro": {
257
+ "early_stopping": true,
258
+ "max_length": 300,
259
+ "num_beams": 4,
260
+ "prefix": "translate English to Romanian: "
261
+ }
262
+ },
263
+ "tie_word_embeddings": false,
264
+ "torch_dtype": "float32",
265
+ "transformers_version": "4.39.0.dev0",
266
+ "use_cache": true,
267
+ "vocab_size": 32128
268
+ }
google/flan_t5_base_ledgar/eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.8685,
4
+ "eval_f1_macro": 0.7955270719340626,
5
+ "eval_f1_micro": 0.8685,
6
+ "eval_loss": 0.5091982483863831,
7
+ "eval_runtime": 27.379,
8
+ "eval_samples": 10000,
9
+ "eval_samples_per_second": 365.243,
10
+ "eval_steps_per_second": 5.734
11
+ }
google/flan_t5_base_ledgar/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50d3c719cea01d5d773a6a5e97ab546ca789a29b55b989470a22e26a7c3b2434
3
+ size 894321496
google/flan_t5_base_ledgar/run.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ 03/15/2024 17:41:25 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, 16-bits training: False
2
+ 03/15/2024 17:41:25 - WARNING - __main__ - Process rank: 1, device: cuda:1, n_gpu: 1, distributed training: True, 16-bits training: False
3
+ 03/15/2024 17:41:28 - WARNING - __main__ - The label2id key in the model config.json is not equal to the label2id key of this run. You can ignore this if you are doing finetuning.
4
+ 03/15/2024 17:41:28 - WARNING - __main__ - The label2id key in the model config.json is not equal to the label2id key of this run. You can ignore this if you are doing finetuning.
google/flan_t5_base_ledgar/special_tokens_map.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
+ }
google/flan_t5_base_ledgar/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
google/flan_t5_base_ledgar/test_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "test_accuracy": 0.8682,
4
+ "test_f1_macro": 0.7969025101161749,
5
+ "test_f1_micro": 0.8682,
6
+ "test_loss": 0.5303810834884644,
7
+ "test_runtime": 27.358,
8
+ "test_samples_per_second": 365.524,
9
+ "test_steps_per_second": 5.739
10
+ }
google/flan_t5_base_ledgar/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
google/flan_t5_base_ledgar/tokenizer_config.json ADDED
@@ -0,0 +1,938 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32002": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32003": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32004": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "32005": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "32006": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "32007": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "32008": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "32009": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "32010": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32011": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32012": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "32013": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "32014": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "32015": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "32016": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "32017": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "32018": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32019": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "32020": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "32021": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "32022": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "32023": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "32024": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "32025": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "32026": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "32027": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "32028": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32029": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "32030": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "32031": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "32032": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "32033": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "32034": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "32035": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "32036": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "32037": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "32038": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "32039": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "32040": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "32041": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "32042": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "32043": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "32044": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "32045": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "32046": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "32047": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "32048": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "32049": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "32050": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "32051": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "32052": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "32053": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "32054": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "32055": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "32056": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "32057": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "32058": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "32059": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "32060": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "32061": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "32062": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "32063": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "32064": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "32065": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "32066": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "32067": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "32068": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "32069": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "32070": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "32071": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "32072": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "32073": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "32074": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "32075": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "32076": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "32077": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "32078": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "32079": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "32080": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "32081": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "32082": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "32083": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "32084": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "32085": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "32086": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "32087": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "32088": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "32089": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "32090": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "32091": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "32092": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "32093": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "32094": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "32095": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "32096": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "32097": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "32098": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "32099": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": true,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "model_max_length": 512,
934
+ "pad_token": "<pad>",
935
+ "sp_model_kwargs": {},
936
+ "tokenizer_class": "T5Tokenizer",
937
+ "unk_token": "<unk>"
938
+ }