redis
/

langcache-embed-v3-small

@@ -1,14 +1,13 @@
 {
   "version": "1.0",
   "truncation": {
     "max_length": 128,
     "strategy": "LongestFirst",
     "stride": 0
   },
   "padding": {
-    "strategy": {
-      "Fixed": 128
-    },
     "direction": "Right",
     "pad_to_multiple_of": null,
     "pad_id": 0,
@@ -18,48 +17,48 @@
   "added_tokens": [
     {
       "id": 0,
-      "special": true,
       "content": "[PAD]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 100,
-      "special": true,
       "content": "[UNK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 101,
-      "special": true,
       "content": "[CLS]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 102,
-      "special": true,
       "content": "[SEP]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 103,
-      "special": true,
       "content": "[MASK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     }
   ],
   "normalizer": {
@@ -30682,4 +30681,4 @@
       "##～": 30521
     }
   }
-}

 {
   "version": "1.0",
   "truncation": {
+    "direction": "Right",
     "max_length": 128,
     "strategy": "LongestFirst",
     "stride": 0
   },
   "padding": {
+    "strategy": "BatchLongest",
     "direction": "Right",
     "pad_to_multiple_of": null,
     "pad_id": 0,
   "added_tokens": [
     {
       "id": 0,
       "content": "[PAD]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 100,
       "content": "[UNK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 101,
       "content": "[CLS]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 102,
       "content": "[SEP]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 103,
       "content": "[MASK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": {
       "##～": 30521
     }
   }
+}

tokenizer_config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff