Upload 5 files

Browse files

Files changed (5) hide show

preprocessor_config.json +11 -0
special_tokens_map.json +1 -0
tokenizer.json +177 -0
tokenizer_config.json +1 -0
vocab.json +95 -0

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "crop_size": 30,
+    "do_center_crop": true,
+    "do_normalize": true,
+    "do_resize": true,
+    "feature_extractor_type": "CLIPFeatureExtractor",
+    "image_mean": [0.48145466, 0.4578275, 0.40821073],
+    "image_std": [0.26862954, 0.26130258, 0.27577711],
+    "resample": 3,
+    "size": 30
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": {"content": "<\|startoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": "<\|endoftext\|>"}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,177 @@

+{
+    "version": "1.0",
+    "truncation": null,
+    "padding": null,
+    "added_tokens": [
+        {
+            "id": 0,
+            "special": true,
+            "content": "<|startoftext|>",
+            "single_word": false,
+            "lstrip": false,
+            "rstrip": false,
+            "normalized": true
+        },
+        {
+            "id": 1,
+            "special": true,
+            "content": "<|endoftext|>",
+            "single_word": false,
+            "lstrip": false,
+            "rstrip": false,
+            "normalized": false
+        }
+    ],
+    "normalizer": {
+        "type": "Sequence",
+        "normalizers": [
+            {
+                "type": "NFC"
+            },
+            {
+                "type": "Replace",
+                "pattern": {
+                    "Regex": "\\s+"
+                },
+                "content": " "
+            },
+            {
+                "type": "Lowercase"
+            }
+        ]
+    },
+    "pre_tokenizer": {
+        "type": "Sequence",
+        "pretokenizers": [
+            {
+                "type": "Split",
+                "pattern": {
+                    "Regex": "'s|'t|'re|'ve|'m|'ll|'d|[\\p{L}]+|[\\p{N}]|[^\\s\\p{L}\\p{N}]+"
+                },
+                "behavior": "Removed",
+                "invert": true
+            },
+            {
+                "type": "ByteLevel",
+                "add_prefix_space": false,
+                "trim_offsets": true
+            }
+        ]
+    },
+    "post_processor": {
+        "type": "RobertaProcessing",
+        "sep": ["<|endoftext|>", 1],
+        "cls": ["<|startoftext|>", 0],
+        "trim_offsets": false,
+        "add_prefix_space": false
+    },
+    "decoder": {
+        "type": "ByteLevel",
+        "add_prefix_space": true,
+        "trim_offsets": true
+    },
+    "model": {
+        "type": "BPE",
+        "dropout": null,
+        "unk_token": "<|endoftext|>",
+        "continuing_subword_prefix": "",
+        "end_of_word_suffix": "</w>",
+        "fuse_unk": false,
+        "vocab": {
+            "<|startoftext|>": 0,
+            "<|endoftext|>": 1,
+            "!": 2,
+            "\"": 3,
+            "#": 4,
+            "$": 5,
+            "%": 6,
+            "&": 7,
+            "'": 8,
+            "(": 9,
+            ")": 10,
+            "*": 11,
+            "+": 12,
+            ",": 13,
+            "-": 14,
+            ".": 15,
+            "/": 16,
+            "0": 17,
+            "1": 18,
+            "2": 19,
+            "3": 20,
+            "4": 21,
+            "5": 22,
+            "6": 23,
+            "7": 24,
+            "8": 25,
+            "9": 26,
+            ":": 27,
+            ";": 28,
+            "<": 29,
+            "=": 30,
+            ">": 31,
+            "?": 32,
+            "@": 33,
+            "A": 34,
+            "B": 35,
+            "C": 36,
+            "D": 37,
+            "E": 38,
+            "F": 39,
+            "G": 40,
+            "H": 41,
+            "I": 42,
+            "J": 43,
+            "K": 44,
+            "L": 45,
+            "M": 46,
+            "N": 47,
+            "O": 48,
+            "P": 49,
+            "Q": 50,
+            "R": 51,
+            "S": 52,
+            "T": 53,
+            "U": 54,
+            "V": 55,
+            "W": 56,
+            "X": 57,
+            "Y": 58,
+            "Z": 59,
+            "[": 60,
+            "\\": 61,
+            "]": 62,
+            "^": 63,
+            "_": 64,
+            "`": 65,
+            "a": 66,
+            "b": 67,
+            "c": 68,
+            "d": 69,
+            "e": 70,
+            "f": 71,
+            "g": 72,
+            "h": 73,
+            "i": 74,
+            "j": 75,
+            "k": 76,
+            "l": 77,
+            "m": 78,
+            "n": 79,
+            "o": 80,
+            "p": 81,
+            "q": 82,
+            "r": 83,
+            "s": 84,
+            "t": 85,
+            "u": 86,
+            "v": 87,
+            "w": 88,
+            "x": 89,
+            "y": 90,
+            "z": 91,
+            "|": 92
+        },
+        "merges": []
+    }
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|startoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": "<|endoftext|>", "add_prefix_space": false, "errors": "replace", "do_lower_case": true, "name_or_path": "hf-internal-testing/tiny-random-clip", "model_max_length": 77, "special_tokens_map_file": "/home/lysandre/.cache/huggingface/transformers/18a566598f286c9139f88160c99f84eec492a26bd22738fa9cb44d5b7e0a5c76.cce1206abbad28826f000510f22f354e53e66a97f7c23745a7dfe27609cc07f5", "from_slow": true, "tokenizer_class": "CLIPTokenizer"}

vocab.json ADDED Viewed

	@@ -0,0 +1,95 @@

+{
+    "<|startoftext|>": 0,
+    "<|endoftext|>": 1,
+    "!": 2,
+    "\"": 3,
+    "#": 4,
+    "$": 5,
+    "%": 6,
+    "&": 7,
+    "'": 8,
+    "(": 9,
+    ")": 10,
+    "*": 11,
+    "+": 12,
+    ",": 13,
+    "-": 14,
+    ".": 15,
+    "/": 16,
+    "0": 17,
+    "1": 18,
+    "2": 19,
+    "3": 20,
+    "4": 21,
+    "5": 22,
+    "6": 23,
+    "7": 24,
+    "8": 25,
+    "9": 26,
+    ":": 27,
+    ";": 28,
+    "<": 29,
+    "=": 30,
+    ">": 31,
+    "?": 32,
+    "@": 33,
+    "A": 34,
+    "B": 35,
+    "C": 36,
+    "D": 37,
+    "E": 38,
+    "F": 39,
+    "G": 40,
+    "H": 41,
+    "I": 42,
+    "J": 43,
+    "K": 44,
+    "L": 45,
+    "M": 46,
+    "N": 47,
+    "O": 48,
+    "P": 49,
+    "Q": 50,
+    "R": 51,
+    "S": 52,
+    "T": 53,
+    "U": 54,
+    "V": 55,
+    "W": 56,
+    "X": 57,
+    "Y": 58,
+    "Z": 59,
+    "[": 60,
+    "\\": 61,
+    "]": 62,
+    "^": 63,
+    "_": 64,
+    "`": 65,
+    "a": 66,
+    "b": 67,
+    "c": 68,
+    "d": 69,
+    "e": 70,
+    "f": 71,
+    "g": 72,
+    "h": 73,
+    "i": 74,
+    "j": 75,
+    "k": 76,
+    "l": 77,
+    "m": 78,
+    "n": 79,
+    "o": 80,
+    "p": 81,
+    "q": 82,
+    "r": 83,
+    "s": 84,
+    "t": 85,
+    "u": 86,
+    "v": 87,
+    "w": 88,
+    "x": 89,
+    "y": 90,
+    "z": 91,
+    "|": 92
+}