{ "_name_or_path": "google/vit-base-patch16-224", "architectures": [ "ViTForImageClassification" ], "attention_probs_dropout_prob": 0.0, "encoder_stride": 16, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "apple", "1": "ball", "10": "cereal_box", "11": "coffee_mug", "12": "comb", "13": "dry_battery", "14": "flashlight", "15": "food_bag", "16": "food_box", "17": "food_can", "18": "food_cup", "19": "food_jar", "2": "banana", "20": "garlic", "21": "glue_stick", "22": "greens", "23": "hand_towel", "24": "instant_noodles", "25": "keyboard", "26": "kleenex", "27": "lemon", "28": "lightbulb", "29": "lime", "3": "bell_pepper", "30": "marker", "31": "mushroom", "32": "notebook", "33": "onion", "34": "orange", "35": "peach", "36": "pear", "37": "pitcher", "38": "plate", "39": "pliers", "4": "binder", "40": "potato", "41": "rubber_eraser", "42": "scissors", "43": "shampoo", "44": "soda_can", "45": "sponge", "46": "stapler", "47": "tomato", "48": "toothbrush", "49": "toothpaste", "5": "bowl", "50": "water_bottle", "6": "calculator", "7": "camera", "8": "cap", "9": "cell_phone" }, "image_size": 224, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "apple": "0", "ball": "1", "banana": "2", "bell_pepper": "3", "binder": "4", "bowl": "5", "calculator": "6", "camera": "7", "cap": "8", "cell_phone": "9", "cereal_box": "10", "coffee_mug": "11", "comb": "12", "dry_battery": "13", "flashlight": "14", "food_bag": "15", "food_box": "16", "food_can": "17", "food_cup": "18", "food_jar": "19", "garlic": "20", "glue_stick": "21", "greens": "22", "hand_towel": "23", "instant_noodles": "24", "keyboard": "25", "kleenex": "26", "lemon": "27", "lightbulb": "28", "lime": "29", "marker": "30", "mushroom": "31", "notebook": "32", "onion": "33", "orange": "34", "peach": "35", "pear": "36", "pitcher": "37", "plate": "38", "pliers": "39", "potato": "40", "rubber_eraser": "41", "scissors": "42", "shampoo": "43", "soda_can": "44", "sponge": "45", "stapler": "46", "tomato": "47", "toothbrush": "48", "toothpaste": "49", "water_bottle": "50" }, "layer_norm_eps": 1e-12, "model_type": "vit", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "patch_size": 16, "qkv_bias": true, "transformers_version": "4.31.0" }