radlab
/

polish-cross-encoder

Sentence Similarity

sentence-transformers

text-classification

feature-extraction

text-embeddings-inference

Inference Endpoints

Model card Files Files and versions Community

pkedzia commited on Jul 15

Commit

b1985a7

•

1 Parent(s): c2f7614

Upload tokenizer.json

Updated tokenizer.json

Files changed (1) hide show

tokenizer.json +13 -19

tokenizer.json CHANGED Viewed

@@ -1,59 +1,52 @@
 {
   "version": "1.0",
   "truncation": null,
-  "padding": {
-    "strategy": "BatchLongest",
-    "direction": "Right",
-    "pad_to_multiple_of": null,
-    "pad_id": 1,
-    "pad_type_id": 0,
-    "pad_token": "<pad>"
-  },
   "added_tokens": [
     {
       "id": 0,
       "content": "<s>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false,
-      "special": true
     },
     {
       "id": 1,
       "content": "<pad>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false,
-      "special": true
     },
     {
       "id": 2,
       "content": "</s>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false,
-      "special": true
     },
     {
       "id": 3,
       "content": "<unk>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false,
-      "special": true
     },
     {
       "id": 128000,
       "content": "<mask>",
       "single_word": false,
       "lstrip": true,
       "rstrip": false,
-      "normalized": false,
-      "special": true
     }
   ],
   "normalizer": {
@@ -62,7 +55,8 @@
   "pre_tokenizer": {
     "type": "Metaspace",
     "replacement": "▁",
-    "add_prefix_space": false
   },
   "post_processor": {
     "type": "RobertaProcessing",

 {
   "version": "1.0",
   "truncation": null,
+  "padding": null,
   "added_tokens": [
     {
       "id": 0,
+      "special": true,
       "content": "<s>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false
     },
     {
       "id": 1,
+      "special": true,
       "content": "<pad>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false
     },
     {
       "id": 2,
+      "special": true,
       "content": "</s>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false
     },
     {
       "id": 3,
+      "special": true,
       "content": "<unk>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false
     },
     {
       "id": 128000,
+      "special": true,
       "content": "<mask>",
       "single_word": false,
       "lstrip": true,
       "rstrip": false,
+      "normalized": false
     }
   ],
   "normalizer": {
   "pre_tokenizer": {
     "type": "Metaspace",
     "replacement": "▁",
+    "add_prefix_space": true,
+	"prepend_scheme": "never"
   },
   "post_processor": {
     "type": "RobertaProcessing",