colesimmons commited on
Commit
682b103
β€’
1 Parent(s): 4c7fb63

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +0 -28
  2. tokenizer.json +2 -19
  3. tokenizer_config.json +1 -5
special_tokens_map.json CHANGED
@@ -1,12 +1,5 @@
1
  {
2
  "additional_special_tokens": [
3
- {
4
- "content": "</s>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
  {
11
  "content": "<RULING>",
12
  "lstrip": false,
@@ -14,13 +7,6 @@
14
  "rstrip": false,
15
  "single_word": false
16
  },
17
- {
18
- "content": "<mask>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
  {
25
  "content": "<BLANK_SPACE>",
26
  "lstrip": false,
@@ -42,13 +28,6 @@
42
  "rstrip": false,
43
  "single_word": false
44
  },
45
- {
46
- "content": "<s>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false
51
- },
52
  {
53
  "content": "<COLUMN>",
54
  "lstrip": false,
@@ -69,13 +48,6 @@
69
  "normalized": false,
70
  "rstrip": false,
71
  "single_word": false
72
- },
73
- {
74
- "content": "<pad>",
75
- "lstrip": false,
76
- "normalized": false,
77
- "rstrip": false,
78
- "single_word": false
79
  }
80
  ],
81
  "bos_token": {
 
1
  {
2
  "additional_special_tokens": [
 
 
 
 
 
 
 
3
  {
4
  "content": "<RULING>",
5
  "lstrip": false,
 
7
  "rstrip": false,
8
  "single_word": false
9
  },
 
 
 
 
 
 
 
10
  {
11
  "content": "<BLANK_SPACE>",
12
  "lstrip": false,
 
28
  "rstrip": false,
29
  "single_word": false
30
  },
 
 
 
 
 
 
 
31
  {
32
  "content": "<COLUMN>",
33
  "lstrip": false,
 
48
  "normalized": false,
49
  "rstrip": false,
50
  "single_word": false
 
 
 
 
 
 
 
51
  }
52
  ],
53
  "bos_token": {
tokenizer.json CHANGED
@@ -765,25 +765,8 @@
765
  "π’”²": 628,
766
  "π’”Έ": 629,
767
  "𒕁": 630,
768
- "π’•‚": 631,
769
- "π’Œ¨π’€­": 632,
770
- "π’€­π’‚—": 633,
771
- "π’Šπ’‹‘": 634,
772
- "π’‹—π’ƒΈ": 635,
773
- "𒆠𒁀": 636,
774
- "π’€­π’‚—π’ͺ": 637,
775
- "π’€­π’Ž": 638,
776
- "π’…†π’Œ¨": 639
777
  },
778
- "merges": [
779
- "π’Œ¨ π’€­",
780
- "π’€­ π’‚—",
781
- "π’Š π’‹‘",
782
- "π’‹— π’ƒΈ",
783
- "π’†  𒁀",
784
- "π’€­π’‚— π’ͺ",
785
- "π’€­ π’Ž",
786
- "π’…† π’Œ¨"
787
- ]
788
  }
789
  }
 
765
  "π’”²": 628,
766
  "π’”Έ": 629,
767
  "𒕁": 630,
768
+ "π’•‚": 631
 
 
 
 
 
 
 
 
769
  },
770
+ "merges": []
 
 
 
 
 
 
 
 
 
771
  }
772
  }
tokenizer_config.json CHANGED
@@ -90,17 +90,13 @@
90
  }
91
  },
92
  "additional_special_tokens": [
93
- "</s>",
94
  "<RULING>",
95
- "<mask>",
96
  "<BLANK_SPACE>",
97
  "<unk>",
98
  "\n",
99
- "<s>",
100
  "<COLUMN>",
101
  "<SURFACE>",
102
- "...",
103
- "<pad>"
104
  ],
105
  "bos_token": "<s>",
106
  "clean_up_tokenization_spaces": true,
 
90
  }
91
  },
92
  "additional_special_tokens": [
 
93
  "<RULING>",
 
94
  "<BLANK_SPACE>",
95
  "<unk>",
96
  "\n",
 
97
  "<COLUMN>",
98
  "<SURFACE>",
99
+ "..."
 
100
  ],
101
  "bos_token": "<s>",
102
  "clean_up_tokenization_spaces": true,