first checkpoint
Browse files- README.md +162 -0
- checkpoint-336/config.json +61 -0
- checkpoint-336/generation_config.json +6 -0
- checkpoint-336/optimizer.pt +3 -0
- checkpoint-336/pytorch_model.bin +3 -0
- checkpoint-336/rng_state.pth +3 -0
- checkpoint-336/scheduler.pt +3 -0
- checkpoint-336/trainer_state.json +120 -0
- checkpoint-336/training_args.bin +3 -0
- logs/1685298499.8234348/events.out.tfevents.1685298499.3341d909449e.6857.1 +3 -0
- logs/1685298535.790303/events.out.tfevents.1685298535.3341d909449e.6857.3 +3 -0
- logs/1685298643.1430945/events.out.tfevents.1685298643.3341d909449e.6857.5 +3 -0
- logs/1685298778.5712953/events.out.tfevents.1685298778.3341d909449e.8793.1 +3 -0
- logs/events.out.tfevents.1685298499.3341d909449e.6857.0 +3 -0
- logs/events.out.tfevents.1685298535.3341d909449e.6857.2 +3 -0
- logs/events.out.tfevents.1685298643.3341d909449e.6857.4 +3 -0
- logs/events.out.tfevents.1685298778.3341d909449e.8793.0 +3 -0
- logs/events.out.tfevents.1685302813.3341d909449e.8793.2 +3 -0
- special_tokens_map.json +107 -0
- spiece.model +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +112 -0
README.md
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
tags:
|
| 4 |
+
- generated_from_trainer
|
| 5 |
+
metrics:
|
| 6 |
+
- rouge
|
| 7 |
+
model-index:
|
| 8 |
+
- name: flan-t5-base-productdomain_instructions
|
| 9 |
+
results: []
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 13 |
+
should probably proofread and complete it, then remove this comment. -->
|
| 14 |
+
|
| 15 |
+
# flan-t5-base-productdomain_instructions
|
| 16 |
+
|
| 17 |
+
This model is a fine-tuned version of [google/flan-t5-base](https://huggingface.co/google/flan-t5-base) on the None dataset.
|
| 18 |
+
It achieves the following results on the evaluation set:
|
| 19 |
+
- Loss: 1.7837
|
| 20 |
+
- Rouge1: 36.5991
|
| 21 |
+
- Rouge2: 15.4799
|
| 22 |
+
- Rougel: 34.4037
|
| 23 |
+
- Rougelsum: 35.4543
|
| 24 |
+
- Gen Len: 14.0723
|
| 25 |
+
|
| 26 |
+
## Model description
|
| 27 |
+
|
| 28 |
+
More information needed
|
| 29 |
+
|
| 30 |
+
## Intended uses & limitations
|
| 31 |
+
|
| 32 |
+
More information needed
|
| 33 |
+
|
| 34 |
+
## Training and evaluation data
|
| 35 |
+
|
| 36 |
+
More information needed
|
| 37 |
+
|
| 38 |
+
## Training procedure
|
| 39 |
+
|
| 40 |
+
### Training hyperparameters
|
| 41 |
+
|
| 42 |
+
The following hyperparameters were used during training:
|
| 43 |
+
- learning_rate: 5e-05
|
| 44 |
+
- train_batch_size: 16
|
| 45 |
+
- eval_batch_size: 8
|
| 46 |
+
- seed: 42
|
| 47 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
| 48 |
+
- lr_scheduler_type: linear
|
| 49 |
+
- num_epochs: 100
|
| 50 |
+
|
| 51 |
+
### Training results
|
| 52 |
+
|
| 53 |
+
| Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
|
| 54 |
+
|:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|:-------:|
|
| 55 |
+
| No log | 1.0 | 42 | 2.0081 | 33.7683 | 13.3348 | 31.7464 | 32.3501 | 13.6988 |
|
| 56 |
+
| No log | 2.0 | 84 | 1.9122 | 35.8931 | 14.7035 | 33.5156 | 34.4285 | 14.2771 |
|
| 57 |
+
| No log | 3.0 | 126 | 1.8571 | 36.1469 | 14.7339 | 34.0229 | 34.8045 | 14.8554 |
|
| 58 |
+
| No log | 4.0 | 168 | 1.8285 | 34.6911 | 13.3403 | 32.4862 | 33.4576 | 14.8916 |
|
| 59 |
+
| No log | 5.0 | 210 | 1.8121 | 36.1848 | 14.3484 | 34.4051 | 35.0242 | 14.6627 |
|
| 60 |
+
| No log | 6.0 | 252 | 1.7985 | 35.8217 | 14.632 | 34.0805 | 34.7598 | 14.4217 |
|
| 61 |
+
| No log | 7.0 | 294 | 1.7926 | 36.7474 | 14.714 | 34.4996 | 35.471 | 14.4578 |
|
| 62 |
+
| No log | 8.0 | 336 | 1.7837 | 36.5991 | 15.4799 | 34.4037 | 35.4543 | 14.0723 |
|
| 63 |
+
| No log | 9.0 | 378 | 1.7868 | 38.5794 | 17.8009 | 36.3936 | 37.2854 | 14.0482 |
|
| 64 |
+
| No log | 10.0 | 420 | 1.7917 | 37.186 | 16.4516 | 34.5805 | 35.5496 | 14.6867 |
|
| 65 |
+
| No log | 11.0 | 462 | 1.7876 | 38.2387 | 16.8602 | 35.703 | 36.5374 | 14.3735 |
|
| 66 |
+
| 1.5613 | 12.0 | 504 | 1.7949 | 36.6609 | 17.1147 | 34.6186 | 35.1214 | 14.8554 |
|
| 67 |
+
| 1.5613 | 13.0 | 546 | 1.8045 | 38.9387 | 18.0734 | 36.7076 | 37.3858 | 14.8193 |
|
| 68 |
+
| 1.5613 | 14.0 | 588 | 1.8211 | 39.0697 | 16.5198 | 36.7938 | 37.3292 | 14.5663 |
|
| 69 |
+
| 1.5613 | 15.0 | 630 | 1.8214 | 38.2996 | 17.1678 | 36.4389 | 37.0512 | 14.8313 |
|
| 70 |
+
| 1.5613 | 16.0 | 672 | 1.8375 | 39.4345 | 18.0457 | 37.4487 | 38.0049 | 14.5422 |
|
| 71 |
+
| 1.5613 | 17.0 | 714 | 1.8668 | 36.9279 | 18.0742 | 34.8528 | 35.4754 | 14.8313 |
|
| 72 |
+
| 1.5613 | 18.0 | 756 | 1.8708 | 36.1653 | 17.1661 | 34.0035 | 34.7663 | 14.8795 |
|
| 73 |
+
| 1.5613 | 19.0 | 798 | 1.9029 | 36.9342 | 16.9662 | 34.7087 | 35.3687 | 14.7229 |
|
| 74 |
+
| 1.5613 | 20.0 | 840 | 1.9203 | 37.0405 | 16.51 | 34.708 | 35.4081 | 15.1446 |
|
| 75 |
+
| 1.5613 | 21.0 | 882 | 1.9241 | 40.1118 | 18.1251 | 37.7191 | 38.5263 | 14.7590 |
|
| 76 |
+
| 1.5613 | 22.0 | 924 | 1.9595 | 40.3279 | 17.7924 | 38.1031 | 38.6966 | 14.9036 |
|
| 77 |
+
| 1.5613 | 23.0 | 966 | 1.9486 | 38.5332 | 16.3386 | 36.3524 | 36.8476 | 15.1084 |
|
| 78 |
+
| 0.9347 | 24.0 | 1008 | 1.9651 | 39.0175 | 17.4398 | 36.7802 | 37.3691 | 14.6386 |
|
| 79 |
+
| 0.9347 | 25.0 | 1050 | 2.0215 | 37.3542 | 16.7397 | 35.1152 | 35.5948 | 15.1807 |
|
| 80 |
+
| 0.9347 | 26.0 | 1092 | 2.0136 | 36.1433 | 16.0566 | 33.5195 | 34.1703 | 15.3855 |
|
| 81 |
+
| 0.9347 | 27.0 | 1134 | 2.0317 | 37.365 | 17.3246 | 34.9103 | 35.5618 | 14.7229 |
|
| 82 |
+
| 0.9347 | 28.0 | 1176 | 2.0574 | 38.9994 | 18.9331 | 36.8122 | 37.2818 | 14.7590 |
|
| 83 |
+
| 0.9347 | 29.0 | 1218 | 2.0975 | 38.704 | 17.6156 | 36.4166 | 36.855 | 15.0843 |
|
| 84 |
+
| 0.9347 | 30.0 | 1260 | 2.1182 | 36.6657 | 17.2754 | 34.1387 | 34.5188 | 15.3735 |
|
| 85 |
+
| 0.9347 | 31.0 | 1302 | 2.1353 | 38.3665 | 17.6706 | 36.2971 | 36.9008 | 14.6386 |
|
| 86 |
+
| 0.9347 | 32.0 | 1344 | 2.1583 | 36.461 | 15.951 | 34.1126 | 34.7238 | 14.9639 |
|
| 87 |
+
| 0.9347 | 33.0 | 1386 | 2.1628 | 38.2005 | 17.8068 | 35.9379 | 36.4597 | 14.8554 |
|
| 88 |
+
| 0.9347 | 34.0 | 1428 | 2.1632 | 38.1226 | 17.8223 | 35.7166 | 36.3522 | 15.0964 |
|
| 89 |
+
| 0.9347 | 35.0 | 1470 | 2.1993 | 38.1793 | 16.4788 | 36.4238 | 36.8716 | 14.6145 |
|
| 90 |
+
| 0.6306 | 36.0 | 1512 | 2.2278 | 37.5943 | 18.0919 | 35.9979 | 36.0291 | 14.7108 |
|
| 91 |
+
| 0.6306 | 37.0 | 1554 | 2.2547 | 36.7207 | 16.675 | 34.9107 | 35.0798 | 14.7229 |
|
| 92 |
+
| 0.6306 | 38.0 | 1596 | 2.2688 | 36.9936 | 15.8314 | 35.4724 | 35.5883 | 14.4940 |
|
| 93 |
+
| 0.6306 | 39.0 | 1638 | 2.3119 | 37.5208 | 16.6074 | 35.8717 | 36.445 | 14.3253 |
|
| 94 |
+
| 0.6306 | 40.0 | 1680 | 2.3154 | 37.8128 | 16.9579 | 35.6907 | 36.1893 | 14.6867 |
|
| 95 |
+
| 0.6306 | 41.0 | 1722 | 2.3531 | 39.4845 | 17.5286 | 37.8577 | 38.3204 | 14.4458 |
|
| 96 |
+
| 0.6306 | 42.0 | 1764 | 2.3323 | 38.4761 | 17.295 | 36.6386 | 36.9557 | 14.6145 |
|
| 97 |
+
| 0.6306 | 43.0 | 1806 | 2.3743 | 38.7443 | 19.2581 | 37.1116 | 37.5985 | 14.5181 |
|
| 98 |
+
| 0.6306 | 44.0 | 1848 | 2.4311 | 40.3561 | 18.693 | 38.8656 | 39.3105 | 14.4337 |
|
| 99 |
+
| 0.6306 | 45.0 | 1890 | 2.3959 | 40.0522 | 19.397 | 38.6949 | 39.2113 | 14.3494 |
|
| 100 |
+
| 0.6306 | 46.0 | 1932 | 2.4536 | 38.2892 | 17.2512 | 36.3746 | 37.0066 | 14.3133 |
|
| 101 |
+
| 0.6306 | 47.0 | 1974 | 2.4263 | 40.1626 | 18.1146 | 38.2934 | 39.0442 | 14.5422 |
|
| 102 |
+
| 0.4555 | 48.0 | 2016 | 2.4762 | 38.6619 | 17.2921 | 36.7469 | 37.3807 | 14.3614 |
|
| 103 |
+
| 0.4555 | 49.0 | 2058 | 2.5072 | 38.2839 | 17.8954 | 36.532 | 36.9102 | 14.5181 |
|
| 104 |
+
| 0.4555 | 50.0 | 2100 | 2.5133 | 39.5629 | 18.1928 | 37.5546 | 38.2356 | 14.4578 |
|
| 105 |
+
| 0.4555 | 51.0 | 2142 | 2.5239 | 39.6734 | 17.4027 | 37.8029 | 38.0765 | 14.3253 |
|
| 106 |
+
| 0.4555 | 52.0 | 2184 | 2.5491 | 39.6165 | 18.1724 | 37.5788 | 38.5066 | 14.4578 |
|
| 107 |
+
| 0.4555 | 53.0 | 2226 | 2.5733 | 38.1501 | 18.2663 | 36.3533 | 37.0174 | 14.8554 |
|
| 108 |
+
| 0.4555 | 54.0 | 2268 | 2.5716 | 36.2353 | 16.133 | 34.1902 | 34.7408 | 14.7590 |
|
| 109 |
+
| 0.4555 | 55.0 | 2310 | 2.6192 | 37.8879 | 17.7186 | 35.9678 | 36.6746 | 14.9036 |
|
| 110 |
+
| 0.4555 | 56.0 | 2352 | 2.6474 | 37.1621 | 17.0886 | 35.4221 | 35.731 | 14.5181 |
|
| 111 |
+
| 0.4555 | 57.0 | 2394 | 2.6623 | 37.5523 | 16.7998 | 35.4469 | 36.0076 | 14.4699 |
|
| 112 |
+
| 0.4555 | 58.0 | 2436 | 2.6607 | 38.0032 | 17.0229 | 36.1551 | 36.5535 | 14.1807 |
|
| 113 |
+
| 0.4555 | 59.0 | 2478 | 2.7150 | 38.1025 | 17.4752 | 36.5283 | 36.7015 | 14.2289 |
|
| 114 |
+
| 0.3508 | 60.0 | 2520 | 2.6941 | 39.797 | 19.2379 | 38.1214 | 38.2261 | 14.3614 |
|
| 115 |
+
| 0.3508 | 61.0 | 2562 | 2.7107 | 38.8625 | 17.623 | 36.6963 | 37.0603 | 14.1325 |
|
| 116 |
+
| 0.3508 | 62.0 | 2604 | 2.6814 | 37.5211 | 16.4479 | 35.5462 | 35.8889 | 14.3494 |
|
| 117 |
+
| 0.3508 | 63.0 | 2646 | 2.7484 | 38.6866 | 17.6612 | 36.7428 | 37.1636 | 14.0723 |
|
| 118 |
+
| 0.3508 | 64.0 | 2688 | 2.7395 | 38.0483 | 17.6948 | 36.2878 | 36.697 | 14.1807 |
|
| 119 |
+
| 0.3508 | 65.0 | 2730 | 2.7365 | 37.6712 | 17.2705 | 35.8893 | 36.3441 | 14.4458 |
|
| 120 |
+
| 0.3508 | 66.0 | 2772 | 2.7555 | 37.9902 | 17.7247 | 36.0837 | 36.7237 | 14.3012 |
|
| 121 |
+
| 0.3508 | 67.0 | 2814 | 2.7494 | 36.6603 | 16.2134 | 34.6886 | 35.287 | 14.5783 |
|
| 122 |
+
| 0.3508 | 68.0 | 2856 | 2.7826 | 37.4075 | 16.5272 | 35.4471 | 35.8108 | 14.4458 |
|
| 123 |
+
| 0.3508 | 69.0 | 2898 | 2.7913 | 37.5132 | 16.5865 | 35.5267 | 35.8753 | 14.3133 |
|
| 124 |
+
| 0.3508 | 70.0 | 2940 | 2.8110 | 38.0779 | 17.5734 | 36.2356 | 36.4576 | 14.1687 |
|
| 125 |
+
| 0.3508 | 71.0 | 2982 | 2.8468 | 38.0068 | 17.1148 | 35.834 | 36.2888 | 14.2289 |
|
| 126 |
+
| 0.2859 | 72.0 | 3024 | 2.8722 | 37.0923 | 17.2183 | 35.4736 | 35.5467 | 14.2048 |
|
| 127 |
+
| 0.2859 | 73.0 | 3066 | 2.8532 | 37.3506 | 17.381 | 35.5293 | 35.7809 | 14.1928 |
|
| 128 |
+
| 0.2859 | 74.0 | 3108 | 2.8052 | 36.9958 | 16.5001 | 35.0384 | 35.4851 | 14.3735 |
|
| 129 |
+
| 0.2859 | 75.0 | 3150 | 2.8523 | 37.1479 | 15.9411 | 35.287 | 35.7899 | 14.3855 |
|
| 130 |
+
| 0.2859 | 76.0 | 3192 | 2.8778 | 36.8889 | 15.6829 | 34.905 | 35.3649 | 14.4337 |
|
| 131 |
+
| 0.2859 | 77.0 | 3234 | 2.9079 | 36.5824 | 15.5738 | 34.6425 | 35.1927 | 14.3614 |
|
| 132 |
+
| 0.2859 | 78.0 | 3276 | 2.8787 | 36.1728 | 15.938 | 34.4013 | 34.8261 | 14.4819 |
|
| 133 |
+
| 0.2859 | 79.0 | 3318 | 2.9080 | 35.9696 | 15.6976 | 34.2352 | 34.5983 | 14.6386 |
|
| 134 |
+
| 0.2859 | 80.0 | 3360 | 2.8772 | 37.0747 | 16.8528 | 35.1818 | 35.5885 | 14.4217 |
|
| 135 |
+
| 0.2859 | 81.0 | 3402 | 2.9020 | 36.3635 | 17.4462 | 34.3583 | 34.9417 | 14.4819 |
|
| 136 |
+
| 0.2859 | 82.0 | 3444 | 2.8993 | 37.4704 | 17.335 | 35.6702 | 36.1192 | 14.4217 |
|
| 137 |
+
| 0.2859 | 83.0 | 3486 | 2.8920 | 37.1973 | 17.3126 | 35.4618 | 35.8107 | 14.5542 |
|
| 138 |
+
| 0.2455 | 84.0 | 3528 | 2.9112 | 37.3907 | 17.2948 | 35.5391 | 35.9917 | 14.5783 |
|
| 139 |
+
| 0.2455 | 85.0 | 3570 | 2.9250 | 36.3332 | 16.2698 | 34.4579 | 34.7125 | 14.4337 |
|
| 140 |
+
| 0.2455 | 86.0 | 3612 | 2.9090 | 37.8226 | 17.3181 | 35.8265 | 36.4089 | 14.2048 |
|
| 141 |
+
| 0.2455 | 87.0 | 3654 | 2.9097 | 37.5181 | 17.2305 | 35.5447 | 35.9105 | 14.4940 |
|
| 142 |
+
| 0.2455 | 88.0 | 3696 | 2.9120 | 36.5995 | 16.6394 | 34.8092 | 35.1975 | 14.6867 |
|
| 143 |
+
| 0.2455 | 89.0 | 3738 | 2.9235 | 37.3048 | 16.939 | 35.3615 | 35.741 | 14.4578 |
|
| 144 |
+
| 0.2455 | 90.0 | 3780 | 2.9270 | 37.6118 | 17.4867 | 35.656 | 36.0439 | 14.6145 |
|
| 145 |
+
| 0.2455 | 91.0 | 3822 | 2.9260 | 37.6441 | 17.5091 | 35.7376 | 36.113 | 14.4578 |
|
| 146 |
+
| 0.2455 | 92.0 | 3864 | 2.9432 | 37.4994 | 17.3906 | 35.593 | 35.977 | 14.2651 |
|
| 147 |
+
| 0.2455 | 93.0 | 3906 | 2.9525 | 37.3703 | 17.3245 | 35.4908 | 35.9012 | 14.5663 |
|
| 148 |
+
| 0.2455 | 94.0 | 3948 | 2.9546 | 36.9876 | 17.1669 | 35.1814 | 35.5809 | 14.5542 |
|
| 149 |
+
| 0.2455 | 95.0 | 3990 | 2.9584 | 37.1337 | 17.1325 | 35.3505 | 35.6894 | 14.5542 |
|
| 150 |
+
| 0.2247 | 96.0 | 4032 | 2.9607 | 36.8183 | 16.9985 | 35.0273 | 35.3368 | 14.6024 |
|
| 151 |
+
| 0.2247 | 97.0 | 4074 | 2.9630 | 36.8418 | 17.027 | 35.0511 | 35.3509 | 14.6145 |
|
| 152 |
+
| 0.2247 | 98.0 | 4116 | 2.9610 | 36.8814 | 17.027 | 35.1067 | 35.4699 | 14.5663 |
|
| 153 |
+
| 0.2247 | 99.0 | 4158 | 2.9581 | 36.8814 | 17.027 | 35.1067 | 35.4699 | 14.5663 |
|
| 154 |
+
| 0.2247 | 100.0 | 4200 | 2.9576 | 36.8814 | 17.027 | 35.1067 | 35.4699 | 14.5663 |
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
### Framework versions
|
| 158 |
+
|
| 159 |
+
- Transformers 4.29.2
|
| 160 |
+
- Pytorch 2.0.1+cu118
|
| 161 |
+
- Datasets 2.12.0
|
| 162 |
+
- Tokenizers 0.13.3
|
checkpoint-336/config.json
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "google/flan-t5-base",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"T5ForConditionalGeneration"
|
| 5 |
+
],
|
| 6 |
+
"d_ff": 2048,
|
| 7 |
+
"d_kv": 64,
|
| 8 |
+
"d_model": 768,
|
| 9 |
+
"decoder_start_token_id": 0,
|
| 10 |
+
"dense_act_fn": "gelu_new",
|
| 11 |
+
"dropout_rate": 0.1,
|
| 12 |
+
"eos_token_id": 1,
|
| 13 |
+
"feed_forward_proj": "gated-gelu",
|
| 14 |
+
"initializer_factor": 1.0,
|
| 15 |
+
"is_encoder_decoder": true,
|
| 16 |
+
"is_gated_act": true,
|
| 17 |
+
"layer_norm_epsilon": 1e-06,
|
| 18 |
+
"model_type": "t5",
|
| 19 |
+
"n_positions": 512,
|
| 20 |
+
"num_decoder_layers": 12,
|
| 21 |
+
"num_heads": 12,
|
| 22 |
+
"num_layers": 12,
|
| 23 |
+
"output_past": true,
|
| 24 |
+
"pad_token_id": 0,
|
| 25 |
+
"relative_attention_max_distance": 128,
|
| 26 |
+
"relative_attention_num_buckets": 32,
|
| 27 |
+
"task_specific_params": {
|
| 28 |
+
"summarization": {
|
| 29 |
+
"early_stopping": true,
|
| 30 |
+
"length_penalty": 2.0,
|
| 31 |
+
"max_length": 200,
|
| 32 |
+
"min_length": 30,
|
| 33 |
+
"no_repeat_ngram_size": 3,
|
| 34 |
+
"num_beams": 4,
|
| 35 |
+
"prefix": "summarize: "
|
| 36 |
+
},
|
| 37 |
+
"translation_en_to_de": {
|
| 38 |
+
"early_stopping": true,
|
| 39 |
+
"max_length": 300,
|
| 40 |
+
"num_beams": 4,
|
| 41 |
+
"prefix": "translate English to German: "
|
| 42 |
+
},
|
| 43 |
+
"translation_en_to_fr": {
|
| 44 |
+
"early_stopping": true,
|
| 45 |
+
"max_length": 300,
|
| 46 |
+
"num_beams": 4,
|
| 47 |
+
"prefix": "translate English to French: "
|
| 48 |
+
},
|
| 49 |
+
"translation_en_to_ro": {
|
| 50 |
+
"early_stopping": true,
|
| 51 |
+
"max_length": 300,
|
| 52 |
+
"num_beams": 4,
|
| 53 |
+
"prefix": "translate English to Romanian: "
|
| 54 |
+
}
|
| 55 |
+
},
|
| 56 |
+
"tie_word_embeddings": false,
|
| 57 |
+
"torch_dtype": "float32",
|
| 58 |
+
"transformers_version": "4.29.2",
|
| 59 |
+
"use_cache": true,
|
| 60 |
+
"vocab_size": 32128
|
| 61 |
+
}
|
checkpoint-336/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"decoder_start_token_id": 0,
|
| 3 |
+
"eos_token_id": 1,
|
| 4 |
+
"pad_token_id": 0,
|
| 5 |
+
"transformers_version": "4.29.2"
|
| 6 |
+
}
|
checkpoint-336/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92ce928684bfac93713405689c7e252da4fd8fe9e123c95e9588114d93f83d2b
|
| 3 |
+
size 1980790149
|
checkpoint-336/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a28d88ca6c5460f9c2167fd3b0b038865db1f55ed9c784c361020726b9dc634
|
| 3 |
+
size 990408885
|
checkpoint-336/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8972d8c983225976e03f9922d82f4fd46ed9870a62c1761525ea3da5fe761e1
|
| 3 |
+
size 14575
|
checkpoint-336/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b51a0838752a12deef2cc4cd23c35ecf0561b08572ff7ad1874b7e3667ec793d
|
| 3 |
+
size 627
|
checkpoint-336/trainer_state.json
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": 1.783697247505188,
|
| 3 |
+
"best_model_checkpoint": "flan-t5-base-productdomain_instructions/checkpoint-336",
|
| 4 |
+
"epoch": 8.0,
|
| 5 |
+
"global_step": 336,
|
| 6 |
+
"is_hyper_param_search": false,
|
| 7 |
+
"is_local_process_zero": true,
|
| 8 |
+
"is_world_process_zero": true,
|
| 9 |
+
"log_history": [
|
| 10 |
+
{
|
| 11 |
+
"epoch": 1.0,
|
| 12 |
+
"eval_gen_len": 13.698795180722891,
|
| 13 |
+
"eval_loss": 2.008112668991089,
|
| 14 |
+
"eval_rouge1": 33.7683,
|
| 15 |
+
"eval_rouge2": 13.3348,
|
| 16 |
+
"eval_rougeL": 31.7464,
|
| 17 |
+
"eval_rougeLsum": 32.3501,
|
| 18 |
+
"eval_runtime": 7.8916,
|
| 19 |
+
"eval_samples_per_second": 10.518,
|
| 20 |
+
"eval_steps_per_second": 1.394,
|
| 21 |
+
"step": 42
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"epoch": 2.0,
|
| 25 |
+
"eval_gen_len": 14.27710843373494,
|
| 26 |
+
"eval_loss": 1.9122490882873535,
|
| 27 |
+
"eval_rouge1": 35.8931,
|
| 28 |
+
"eval_rouge2": 14.7035,
|
| 29 |
+
"eval_rougeL": 33.5156,
|
| 30 |
+
"eval_rougeLsum": 34.4285,
|
| 31 |
+
"eval_runtime": 5.6097,
|
| 32 |
+
"eval_samples_per_second": 14.796,
|
| 33 |
+
"eval_steps_per_second": 1.961,
|
| 34 |
+
"step": 84
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"epoch": 3.0,
|
| 38 |
+
"eval_gen_len": 14.855421686746988,
|
| 39 |
+
"eval_loss": 1.8570603132247925,
|
| 40 |
+
"eval_rouge1": 36.1469,
|
| 41 |
+
"eval_rouge2": 14.7339,
|
| 42 |
+
"eval_rougeL": 34.0229,
|
| 43 |
+
"eval_rougeLsum": 34.8045,
|
| 44 |
+
"eval_runtime": 5.636,
|
| 45 |
+
"eval_samples_per_second": 14.727,
|
| 46 |
+
"eval_steps_per_second": 1.952,
|
| 47 |
+
"step": 126
|
| 48 |
+
},
|
| 49 |
+
{
|
| 50 |
+
"epoch": 4.0,
|
| 51 |
+
"eval_gen_len": 14.891566265060241,
|
| 52 |
+
"eval_loss": 1.8284692764282227,
|
| 53 |
+
"eval_rouge1": 34.6911,
|
| 54 |
+
"eval_rouge2": 13.3403,
|
| 55 |
+
"eval_rougeL": 32.4862,
|
| 56 |
+
"eval_rougeLsum": 33.4576,
|
| 57 |
+
"eval_runtime": 5.6771,
|
| 58 |
+
"eval_samples_per_second": 14.62,
|
| 59 |
+
"eval_steps_per_second": 1.938,
|
| 60 |
+
"step": 168
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"epoch": 5.0,
|
| 64 |
+
"eval_gen_len": 14.662650602409638,
|
| 65 |
+
"eval_loss": 1.8120635747909546,
|
| 66 |
+
"eval_rouge1": 36.1848,
|
| 67 |
+
"eval_rouge2": 14.3484,
|
| 68 |
+
"eval_rougeL": 34.4051,
|
| 69 |
+
"eval_rougeLsum": 35.0242,
|
| 70 |
+
"eval_runtime": 5.6293,
|
| 71 |
+
"eval_samples_per_second": 14.744,
|
| 72 |
+
"eval_steps_per_second": 1.954,
|
| 73 |
+
"step": 210
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"epoch": 6.0,
|
| 77 |
+
"eval_gen_len": 14.421686746987952,
|
| 78 |
+
"eval_loss": 1.7984946966171265,
|
| 79 |
+
"eval_rouge1": 35.8217,
|
| 80 |
+
"eval_rouge2": 14.632,
|
| 81 |
+
"eval_rougeL": 34.0805,
|
| 82 |
+
"eval_rougeLsum": 34.7598,
|
| 83 |
+
"eval_runtime": 5.6725,
|
| 84 |
+
"eval_samples_per_second": 14.632,
|
| 85 |
+
"eval_steps_per_second": 1.939,
|
| 86 |
+
"step": 252
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 7.0,
|
| 90 |
+
"eval_gen_len": 14.457831325301205,
|
| 91 |
+
"eval_loss": 1.7926299571990967,
|
| 92 |
+
"eval_rouge1": 36.7474,
|
| 93 |
+
"eval_rouge2": 14.714,
|
| 94 |
+
"eval_rougeL": 34.4996,
|
| 95 |
+
"eval_rougeLsum": 35.471,
|
| 96 |
+
"eval_runtime": 5.6396,
|
| 97 |
+
"eval_samples_per_second": 14.717,
|
| 98 |
+
"eval_steps_per_second": 1.951,
|
| 99 |
+
"step": 294
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"epoch": 8.0,
|
| 103 |
+
"eval_gen_len": 14.072289156626505,
|
| 104 |
+
"eval_loss": 1.783697247505188,
|
| 105 |
+
"eval_rouge1": 36.5991,
|
| 106 |
+
"eval_rouge2": 15.4799,
|
| 107 |
+
"eval_rougeL": 34.4037,
|
| 108 |
+
"eval_rougeLsum": 35.4543,
|
| 109 |
+
"eval_runtime": 5.6279,
|
| 110 |
+
"eval_samples_per_second": 14.748,
|
| 111 |
+
"eval_steps_per_second": 1.955,
|
| 112 |
+
"step": 336
|
| 113 |
+
}
|
| 114 |
+
],
|
| 115 |
+
"max_steps": 4200,
|
| 116 |
+
"num_train_epochs": 100,
|
| 117 |
+
"total_flos": 1929475029860352.0,
|
| 118 |
+
"trial_name": null,
|
| 119 |
+
"trial_params": null
|
| 120 |
+
}
|
checkpoint-336/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6eea69df86d3feda64172d19b640b2cfb2ff8c0f166f13fb179323d3e9cafc00
|
| 3 |
+
size 4155
|
logs/1685298499.8234348/events.out.tfevents.1685298499.3341d909449e.6857.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71bf8147557686cd68c2e6d090a48b59db43882207dd3445199f195f31d32c4c
|
| 3 |
+
size 6273
|
logs/1685298535.790303/events.out.tfevents.1685298535.3341d909449e.6857.3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1c5dea6b8b3acf12c7047464035d10cb30013f5e90b0d8a183dffe4ad351e1d
|
| 3 |
+
size 6273
|
logs/1685298643.1430945/events.out.tfevents.1685298643.3341d909449e.6857.5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c0d9d70d30b2433345caf8092468ca9efb305f76a21fa29c59a489b41f94f43
|
| 3 |
+
size 6273
|
logs/1685298778.5712953/events.out.tfevents.1685298778.3341d909449e.8793.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b601aaa79b266e466f6074d6141662e5d20389adbefbd2127af22bf0c8573e0
|
| 3 |
+
size 6273
|
logs/events.out.tfevents.1685298499.3341d909449e.6857.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f3f0803f7019e3afed6edd05368a371d2cb311eea6633a8fa46f904d047d16f
|
| 3 |
+
size 5102
|
logs/events.out.tfevents.1685298535.3341d909449e.6857.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc21cfe9853f2417b247398723c1a35c2f31c32eaa5fccb3241c0cf568039dc1
|
| 3 |
+
size 5101
|
logs/events.out.tfevents.1685298643.3341d909449e.6857.4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c314182f14af0515a6d6618fba7f2b5da00ab9581d941991585f6aeb37bd3231
|
| 3 |
+
size 4184
|
logs/events.out.tfevents.1685298778.3341d909449e.8793.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dcc5b954a8b0fc3047edc3c6bacfaaf412a3f901559a35c512eb771441b512b7
|
| 3 |
+
size 59182
|
logs/events.out.tfevents.1685302813.3341d909449e.8793.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:daef279c1b3e6f64dd63ebf5ce390b861644dfb2dc22190c120b08cd3147706c
|
| 3 |
+
size 1138
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<extra_id_0>",
|
| 4 |
+
"<extra_id_1>",
|
| 5 |
+
"<extra_id_2>",
|
| 6 |
+
"<extra_id_3>",
|
| 7 |
+
"<extra_id_4>",
|
| 8 |
+
"<extra_id_5>",
|
| 9 |
+
"<extra_id_6>",
|
| 10 |
+
"<extra_id_7>",
|
| 11 |
+
"<extra_id_8>",
|
| 12 |
+
"<extra_id_9>",
|
| 13 |
+
"<extra_id_10>",
|
| 14 |
+
"<extra_id_11>",
|
| 15 |
+
"<extra_id_12>",
|
| 16 |
+
"<extra_id_13>",
|
| 17 |
+
"<extra_id_14>",
|
| 18 |
+
"<extra_id_15>",
|
| 19 |
+
"<extra_id_16>",
|
| 20 |
+
"<extra_id_17>",
|
| 21 |
+
"<extra_id_18>",
|
| 22 |
+
"<extra_id_19>",
|
| 23 |
+
"<extra_id_20>",
|
| 24 |
+
"<extra_id_21>",
|
| 25 |
+
"<extra_id_22>",
|
| 26 |
+
"<extra_id_23>",
|
| 27 |
+
"<extra_id_24>",
|
| 28 |
+
"<extra_id_25>",
|
| 29 |
+
"<extra_id_26>",
|
| 30 |
+
"<extra_id_27>",
|
| 31 |
+
"<extra_id_28>",
|
| 32 |
+
"<extra_id_29>",
|
| 33 |
+
"<extra_id_30>",
|
| 34 |
+
"<extra_id_31>",
|
| 35 |
+
"<extra_id_32>",
|
| 36 |
+
"<extra_id_33>",
|
| 37 |
+
"<extra_id_34>",
|
| 38 |
+
"<extra_id_35>",
|
| 39 |
+
"<extra_id_36>",
|
| 40 |
+
"<extra_id_37>",
|
| 41 |
+
"<extra_id_38>",
|
| 42 |
+
"<extra_id_39>",
|
| 43 |
+
"<extra_id_40>",
|
| 44 |
+
"<extra_id_41>",
|
| 45 |
+
"<extra_id_42>",
|
| 46 |
+
"<extra_id_43>",
|
| 47 |
+
"<extra_id_44>",
|
| 48 |
+
"<extra_id_45>",
|
| 49 |
+
"<extra_id_46>",
|
| 50 |
+
"<extra_id_47>",
|
| 51 |
+
"<extra_id_48>",
|
| 52 |
+
"<extra_id_49>",
|
| 53 |
+
"<extra_id_50>",
|
| 54 |
+
"<extra_id_51>",
|
| 55 |
+
"<extra_id_52>",
|
| 56 |
+
"<extra_id_53>",
|
| 57 |
+
"<extra_id_54>",
|
| 58 |
+
"<extra_id_55>",
|
| 59 |
+
"<extra_id_56>",
|
| 60 |
+
"<extra_id_57>",
|
| 61 |
+
"<extra_id_58>",
|
| 62 |
+
"<extra_id_59>",
|
| 63 |
+
"<extra_id_60>",
|
| 64 |
+
"<extra_id_61>",
|
| 65 |
+
"<extra_id_62>",
|
| 66 |
+
"<extra_id_63>",
|
| 67 |
+
"<extra_id_64>",
|
| 68 |
+
"<extra_id_65>",
|
| 69 |
+
"<extra_id_66>",
|
| 70 |
+
"<extra_id_67>",
|
| 71 |
+
"<extra_id_68>",
|
| 72 |
+
"<extra_id_69>",
|
| 73 |
+
"<extra_id_70>",
|
| 74 |
+
"<extra_id_71>",
|
| 75 |
+
"<extra_id_72>",
|
| 76 |
+
"<extra_id_73>",
|
| 77 |
+
"<extra_id_74>",
|
| 78 |
+
"<extra_id_75>",
|
| 79 |
+
"<extra_id_76>",
|
| 80 |
+
"<extra_id_77>",
|
| 81 |
+
"<extra_id_78>",
|
| 82 |
+
"<extra_id_79>",
|
| 83 |
+
"<extra_id_80>",
|
| 84 |
+
"<extra_id_81>",
|
| 85 |
+
"<extra_id_82>",
|
| 86 |
+
"<extra_id_83>",
|
| 87 |
+
"<extra_id_84>",
|
| 88 |
+
"<extra_id_85>",
|
| 89 |
+
"<extra_id_86>",
|
| 90 |
+
"<extra_id_87>",
|
| 91 |
+
"<extra_id_88>",
|
| 92 |
+
"<extra_id_89>",
|
| 93 |
+
"<extra_id_90>",
|
| 94 |
+
"<extra_id_91>",
|
| 95 |
+
"<extra_id_92>",
|
| 96 |
+
"<extra_id_93>",
|
| 97 |
+
"<extra_id_94>",
|
| 98 |
+
"<extra_id_95>",
|
| 99 |
+
"<extra_id_96>",
|
| 100 |
+
"<extra_id_97>",
|
| 101 |
+
"<extra_id_98>",
|
| 102 |
+
"<extra_id_99>"
|
| 103 |
+
],
|
| 104 |
+
"eos_token": "</s>",
|
| 105 |
+
"pad_token": "<pad>",
|
| 106 |
+
"unk_token": "<unk>"
|
| 107 |
+
}
|
spiece.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
|
| 3 |
+
size 791656
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<extra_id_0>",
|
| 4 |
+
"<extra_id_1>",
|
| 5 |
+
"<extra_id_2>",
|
| 6 |
+
"<extra_id_3>",
|
| 7 |
+
"<extra_id_4>",
|
| 8 |
+
"<extra_id_5>",
|
| 9 |
+
"<extra_id_6>",
|
| 10 |
+
"<extra_id_7>",
|
| 11 |
+
"<extra_id_8>",
|
| 12 |
+
"<extra_id_9>",
|
| 13 |
+
"<extra_id_10>",
|
| 14 |
+
"<extra_id_11>",
|
| 15 |
+
"<extra_id_12>",
|
| 16 |
+
"<extra_id_13>",
|
| 17 |
+
"<extra_id_14>",
|
| 18 |
+
"<extra_id_15>",
|
| 19 |
+
"<extra_id_16>",
|
| 20 |
+
"<extra_id_17>",
|
| 21 |
+
"<extra_id_18>",
|
| 22 |
+
"<extra_id_19>",
|
| 23 |
+
"<extra_id_20>",
|
| 24 |
+
"<extra_id_21>",
|
| 25 |
+
"<extra_id_22>",
|
| 26 |
+
"<extra_id_23>",
|
| 27 |
+
"<extra_id_24>",
|
| 28 |
+
"<extra_id_25>",
|
| 29 |
+
"<extra_id_26>",
|
| 30 |
+
"<extra_id_27>",
|
| 31 |
+
"<extra_id_28>",
|
| 32 |
+
"<extra_id_29>",
|
| 33 |
+
"<extra_id_30>",
|
| 34 |
+
"<extra_id_31>",
|
| 35 |
+
"<extra_id_32>",
|
| 36 |
+
"<extra_id_33>",
|
| 37 |
+
"<extra_id_34>",
|
| 38 |
+
"<extra_id_35>",
|
| 39 |
+
"<extra_id_36>",
|
| 40 |
+
"<extra_id_37>",
|
| 41 |
+
"<extra_id_38>",
|
| 42 |
+
"<extra_id_39>",
|
| 43 |
+
"<extra_id_40>",
|
| 44 |
+
"<extra_id_41>",
|
| 45 |
+
"<extra_id_42>",
|
| 46 |
+
"<extra_id_43>",
|
| 47 |
+
"<extra_id_44>",
|
| 48 |
+
"<extra_id_45>",
|
| 49 |
+
"<extra_id_46>",
|
| 50 |
+
"<extra_id_47>",
|
| 51 |
+
"<extra_id_48>",
|
| 52 |
+
"<extra_id_49>",
|
| 53 |
+
"<extra_id_50>",
|
| 54 |
+
"<extra_id_51>",
|
| 55 |
+
"<extra_id_52>",
|
| 56 |
+
"<extra_id_53>",
|
| 57 |
+
"<extra_id_54>",
|
| 58 |
+
"<extra_id_55>",
|
| 59 |
+
"<extra_id_56>",
|
| 60 |
+
"<extra_id_57>",
|
| 61 |
+
"<extra_id_58>",
|
| 62 |
+
"<extra_id_59>",
|
| 63 |
+
"<extra_id_60>",
|
| 64 |
+
"<extra_id_61>",
|
| 65 |
+
"<extra_id_62>",
|
| 66 |
+
"<extra_id_63>",
|
| 67 |
+
"<extra_id_64>",
|
| 68 |
+
"<extra_id_65>",
|
| 69 |
+
"<extra_id_66>",
|
| 70 |
+
"<extra_id_67>",
|
| 71 |
+
"<extra_id_68>",
|
| 72 |
+
"<extra_id_69>",
|
| 73 |
+
"<extra_id_70>",
|
| 74 |
+
"<extra_id_71>",
|
| 75 |
+
"<extra_id_72>",
|
| 76 |
+
"<extra_id_73>",
|
| 77 |
+
"<extra_id_74>",
|
| 78 |
+
"<extra_id_75>",
|
| 79 |
+
"<extra_id_76>",
|
| 80 |
+
"<extra_id_77>",
|
| 81 |
+
"<extra_id_78>",
|
| 82 |
+
"<extra_id_79>",
|
| 83 |
+
"<extra_id_80>",
|
| 84 |
+
"<extra_id_81>",
|
| 85 |
+
"<extra_id_82>",
|
| 86 |
+
"<extra_id_83>",
|
| 87 |
+
"<extra_id_84>",
|
| 88 |
+
"<extra_id_85>",
|
| 89 |
+
"<extra_id_86>",
|
| 90 |
+
"<extra_id_87>",
|
| 91 |
+
"<extra_id_88>",
|
| 92 |
+
"<extra_id_89>",
|
| 93 |
+
"<extra_id_90>",
|
| 94 |
+
"<extra_id_91>",
|
| 95 |
+
"<extra_id_92>",
|
| 96 |
+
"<extra_id_93>",
|
| 97 |
+
"<extra_id_94>",
|
| 98 |
+
"<extra_id_95>",
|
| 99 |
+
"<extra_id_96>",
|
| 100 |
+
"<extra_id_97>",
|
| 101 |
+
"<extra_id_98>",
|
| 102 |
+
"<extra_id_99>"
|
| 103 |
+
],
|
| 104 |
+
"clean_up_tokenization_spaces": true,
|
| 105 |
+
"eos_token": "</s>",
|
| 106 |
+
"extra_ids": 100,
|
| 107 |
+
"model_max_length": 512,
|
| 108 |
+
"pad_token": "<pad>",
|
| 109 |
+
"sp_model_kwargs": {},
|
| 110 |
+
"tokenizer_class": "T5Tokenizer",
|
| 111 |
+
"unk_token": "<unk>"
|
| 112 |
+
}
|