push

Browse files

Files changed (4) hide show

config.json +2024 -36
pytorch_model.bin → model.safetensors +2 -2
preprocessor_config.json +36 -0
vision_transformer.py +0 -1853

config.json CHANGED Viewed

@@ -1,38 +1,2026 @@
 {
-  "architecture": "vit_small_patch16_224",
-  "num_classes": 1000,
-  "num_features": 384,
-  "global_pool": "token",
-  "pretrained_cfg": {
-    "tag": "augreg_in21k_ft_in1k",
-    "custom_load": true,
-    "input_size": [
-      3,
-      224,
-      224
-    ],
-    "fixed_input_size": true,
-    "interpolation": "bicubic",
-    "crop_pct": 0.9,
-    "crop_mode": "center",
-    "mean": [
-      0.5,
-      0.5,
-      0.5
-    ],
-    "std": [
-      0.5,
-      0.5,
-      0.5
-    ],
-    "num_classes": 1000,
-    "pool_size": null,
-    "first_conv": "patch_embed.proj",
-    "classifier": "head"
   },
-  "_name_or_path": "magicslabnu/OutEffHop_vit_small_patch16_224",
-  "auto_map":
-      {"AutoModel": "vision_transformer.VisionTransformer"
-      }
-}

 {
+  "architectures": [
+    "ViTForImageClassification"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "encoder_stride": 16,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_size": 384,
+  "id2label": {
+    "0": "n01440764",
+    "1": "n01443537",
+    "2": "n01484850",
+    "3": "n01491361",
+    "4": "n01494475",
+    "5": "n01496331",
+    "6": "n01498041",
+    "7": "n01514668",
+    "8": "n01514859",
+    "9": "n01518878",
+    "10": "n01530575",
+    "11": "n01531178",
+    "12": "n01532829",
+    "13": "n01534433",
+    "14": "n01537544",
+    "15": "n01558993",
+    "16": "n01560419",
+    "17": "n01580077",
+    "18": "n01582220",
+    "19": "n01592084",
+    "20": "n01601694",
+    "21": "n01608432",
+    "22": "n01614925",
+    "23": "n01616318",
+    "24": "n01622779",
+    "25": "n01629819",
+    "26": "n01630670",
+    "27": "n01631663",
+    "28": "n01632458",
+    "29": "n01632777",
+    "30": "n01641577",
+    "31": "n01644373",
+    "32": "n01644900",
+    "33": "n01664065",
+    "34": "n01665541",
+    "35": "n01667114",
+    "36": "n01667778",
+    "37": "n01669191",
+    "38": "n01675722",
+    "39": "n01677366",
+    "40": "n01682714",
+    "41": "n01685808",
+    "42": "n01687978",
+    "43": "n01688243",
+    "44": "n01689811",
+    "45": "n01692333",
+    "46": "n01693334",
+    "47": "n01694178",
+    "48": "n01695060",
+    "49": "n01697457",
+    "50": "n01698640",
+    "51": "n01704323",
+    "52": "n01728572",
+    "53": "n01728920",
+    "54": "n01729322",
+    "55": "n01729977",
+    "56": "n01734418",
+    "57": "n01735189",
+    "58": "n01737021",
+    "59": "n01739381",
+    "60": "n01740131",
+    "61": "n01742172",
+    "62": "n01744401",
+    "63": "n01748264",
+    "64": "n01749939",
+    "65": "n01751748",
+    "66": "n01753488",
+    "67": "n01755581",
+    "68": "n01756291",
+    "69": "n01768244",
+    "70": "n01770081",
+    "71": "n01770393",
+    "72": "n01773157",
+    "73": "n01773549",
+    "74": "n01773797",
+    "75": "n01774384",
+    "76": "n01774750",
+    "77": "n01775062",
+    "78": "n01776313",
+    "79": "n01784675",
+    "80": "n01795545",
+    "81": "n01796340",
+    "82": "n01797886",
+    "83": "n01798484",
+    "84": "n01806143",
+    "85": "n01806567",
+    "86": "n01807496",
+    "87": "n01817953",
+    "88": "n01818515",
+    "89": "n01819313",
+    "90": "n01820546",
+    "91": "n01824575",
+    "92": "n01828970",
+    "93": "n01829413",
+    "94": "n01833805",
+    "95": "n01843065",
+    "96": "n01843383",
+    "97": "n01847000",
+    "98": "n01855032",
+    "99": "n01855672",
+    "100": "n01860187",
+    "101": "n01871265",
+    "102": "n01872401",
+    "103": "n01873310",
+    "104": "n01877812",
+    "105": "n01882714",
+    "106": "n01883070",
+    "107": "n01910747",
+    "108": "n01914609",
+    "109": "n01917289",
+    "110": "n01924916",
+    "111": "n01930112",
+    "112": "n01943899",
+    "113": "n01944390",
+    "114": "n01945685",
+    "115": "n01950731",
+    "116": "n01955084",
+    "117": "n01968897",
+    "118": "n01978287",
+    "119": "n01978455",
+    "120": "n01980166",
+    "121": "n01981276",
+    "122": "n01983481",
+    "123": "n01984695",
+    "124": "n01985128",
+    "125": "n01986214",
+    "126": "n01990800",
+    "127": "n02002556",
+    "128": "n02002724",
+    "129": "n02006656",
+    "130": "n02007558",
+    "131": "n02009229",
+    "132": "n02009912",
+    "133": "n02011460",
+    "134": "n02012849",
+    "135": "n02013706",
+    "136": "n02017213",
+    "137": "n02018207",
+    "138": "n02018795",
+    "139": "n02025239",
+    "140": "n02027492",
+    "141": "n02028035",
+    "142": "n02033041",
+    "143": "n02037110",
+    "144": "n02051845",
+    "145": "n02056570",
+    "146": "n02058221",
+    "147": "n02066245",
+    "148": "n02071294",
+    "149": "n02074367",
+    "150": "n02077923",
+    "151": "n02085620",
+    "152": "n02085782",
+    "153": "n02085936",
+    "154": "n02086079",
+    "155": "n02086240",
+    "156": "n02086646",
+    "157": "n02086910",
+    "158": "n02087046",
+    "159": "n02087394",
+    "160": "n02088094",
+    "161": "n02088238",
+    "162": "n02088364",
+    "163": "n02088466",
+    "164": "n02088632",
+    "165": "n02089078",
+    "166": "n02089867",
+    "167": "n02089973",
+    "168": "n02090379",
+    "169": "n02090622",
+    "170": "n02090721",
+    "171": "n02091032",
+    "172": "n02091134",
+    "173": "n02091244",
+    "174": "n02091467",
+    "175": "n02091635",
+    "176": "n02091831",
+    "177": "n02092002",
+    "178": "n02092339",
+    "179": "n02093256",
+    "180": "n02093428",
+    "181": "n02093647",
+    "182": "n02093754",
+    "183": "n02093859",
+    "184": "n02093991",
+    "185": "n02094114",
+    "186": "n02094258",
+    "187": "n02094433",
+    "188": "n02095314",
+    "189": "n02095570",
+    "190": "n02095889",
+    "191": "n02096051",
+    "192": "n02096177",
+    "193": "n02096294",
+    "194": "n02096437",
+    "195": "n02096585",
+    "196": "n02097047",
+    "197": "n02097130",
+    "198": "n02097209",
+    "199": "n02097298",
+    "200": "n02097474",
+    "201": "n02097658",
+    "202": "n02098105",
+    "203": "n02098286",
+    "204": "n02098413",
+    "205": "n02099267",
+    "206": "n02099429",
+    "207": "n02099601",
+    "208": "n02099712",
+    "209": "n02099849",
+    "210": "n02100236",
+    "211": "n02100583",
+    "212": "n02100735",
+    "213": "n02100877",
+    "214": "n02101006",
+    "215": "n02101388",
+    "216": "n02101556",
+    "217": "n02102040",
+    "218": "n02102177",
+    "219": "n02102318",
+    "220": "n02102480",
+    "221": "n02102973",
+    "222": "n02104029",
+    "223": "n02104365",
+    "224": "n02105056",
+    "225": "n02105162",
+    "226": "n02105251",
+    "227": "n02105412",
+    "228": "n02105505",
+    "229": "n02105641",
+    "230": "n02105855",
+    "231": "n02106030",
+    "232": "n02106166",
+    "233": "n02106382",
+    "234": "n02106550",
+    "235": "n02106662",
+    "236": "n02107142",
+    "237": "n02107312",
+    "238": "n02107574",
+    "239": "n02107683",
+    "240": "n02107908",
+    "241": "n02108000",
+    "242": "n02108089",
+    "243": "n02108422",
+    "244": "n02108551",
+    "245": "n02108915",
+    "246": "n02109047",
+    "247": "n02109525",
+    "248": "n02109961",
+    "249": "n02110063",
+    "250": "n02110185",
+    "251": "n02110341",
+    "252": "n02110627",
+    "253": "n02110806",
+    "254": "n02110958",
+    "255": "n02111129",
+    "256": "n02111277",
+    "257": "n02111500",
+    "258": "n02111889",
+    "259": "n02112018",
+    "260": "n02112137",
+    "261": "n02112350",
+    "262": "n02112706",
+    "263": "n02113023",
+    "264": "n02113186",
+    "265": "n02113624",
+    "266": "n02113712",
+    "267": "n02113799",
+    "268": "n02113978",
+    "269": "n02114367",
+    "270": "n02114548",
+    "271": "n02114712",
+    "272": "n02114855",
+    "273": "n02115641",
+    "274": "n02115913",
+    "275": "n02116738",
+    "276": "n02117135",
+    "277": "n02119022",
+    "278": "n02119789",
+    "279": "n02120079",
+    "280": "n02120505",
+    "281": "n02123045",
+    "282": "n02123159",
+    "283": "n02123394",
+    "284": "n02123597",
+    "285": "n02124075",
+    "286": "n02125311",
+    "287": "n02127052",
+    "288": "n02128385",
+    "289": "n02128757",
+    "290": "n02128925",
+    "291": "n02129165",
+    "292": "n02129604",
+    "293": "n02130308",
+    "294": "n02132136",
+    "295": "n02133161",
+    "296": "n02134084",
+    "297": "n02134418",
+    "298": "n02137549",
+    "299": "n02138441",
+    "300": "n02165105",
+    "301": "n02165456",
+    "302": "n02167151",
+    "303": "n02168699",
+    "304": "n02169497",
+    "305": "n02172182",
+    "306": "n02174001",
+    "307": "n02177972",
+    "308": "n02190166",
+    "309": "n02206856",
+    "310": "n02219486",
+    "311": "n02226429",
+    "312": "n02229544",
+    "313": "n02231487",
+    "314": "n02233338",
+    "315": "n02236044",
+    "316": "n02256656",
+    "317": "n02259212",
+    "318": "n02264363",
+    "319": "n02268443",
+    "320": "n02268853",
+    "321": "n02276258",
+    "322": "n02277742",
+    "323": "n02279972",
+    "324": "n02280649",
+    "325": "n02281406",
+    "326": "n02281787",
+    "327": "n02317335",
+    "328": "n02319095",
+    "329": "n02321529",
+    "330": "n02325366",
+    "331": "n02326432",
+    "332": "n02328150",
+    "333": "n02342885",
+    "334": "n02346627",
+    "335": "n02356798",
+    "336": "n02361337",
+    "337": "n02363005",
+    "338": "n02364673",
+    "339": "n02389026",
+    "340": "n02391049",
+    "341": "n02395406",
+    "342": "n02396427",
+    "343": "n02397096",
+    "344": "n02398521",
+    "345": "n02403003",
+    "346": "n02408429",
+    "347": "n02410509",
+    "348": "n02412080",
+    "349": "n02415577",
+    "350": "n02417914",
+    "351": "n02422106",
+    "352": "n02422699",
+    "353": "n02423022",
+    "354": "n02437312",
+    "355": "n02437616",
+    "356": "n02441942",
+    "357": "n02442845",
+    "358": "n02443114",
+    "359": "n02443484",
+    "360": "n02444819",
+    "361": "n02445715",
+    "362": "n02447366",
+    "363": "n02454379",
+    "364": "n02457408",
+    "365": "n02480495",
+    "366": "n02480855",
+    "367": "n02481823",
+    "368": "n02483362",
+    "369": "n02483708",
+    "370": "n02484975",
+    "371": "n02486261",
+    "372": "n02486410",
+    "373": "n02487347",
+    "374": "n02488291",
+    "375": "n02488702",
+    "376": "n02489166",
+    "377": "n02490219",
+    "378": "n02492035",
+    "379": "n02492660",
+    "380": "n02493509",
+    "381": "n02493793",
+    "382": "n02494079",
+    "383": "n02497673",
+    "384": "n02500267",
+    "385": "n02504013",
+    "386": "n02504458",
+    "387": "n02509815",
+    "388": "n02510455",
+    "389": "n02514041",
+    "390": "n02526121",
+    "391": "n02536864",
+    "392": "n02606052",
+    "393": "n02607072",
+    "394": "n02640242",
+    "395": "n02641379",
+    "396": "n02643566",
+    "397": "n02655020",
+    "398": "n02666196",
+    "399": "n02667093",
+    "400": "n02669723",
+    "401": "n02672831",
+    "402": "n02676566",
+    "403": "n02687172",
+    "404": "n02690373",
+    "405": "n02692877",
+    "406": "n02699494",
+    "407": "n02701002",
+    "408": "n02704792",
+    "409": "n02708093",
+    "410": "n02727426",
+    "411": "n02730930",
+    "412": "n02747177",
+    "413": "n02749479",
+    "414": "n02769748",
+    "415": "n02776631",
+    "416": "n02777292",
+    "417": "n02782093",
+    "418": "n02783161",
+    "419": "n02786058",
+    "420": "n02787622",
+    "421": "n02788148",
+    "422": "n02790996",
+    "423": "n02791124",
+    "424": "n02791270",
+    "425": "n02793495",
+    "426": "n02794156",
+    "427": "n02795169",
+    "428": "n02797295",
+    "429": "n02799071",
+    "430": "n02802426",
+    "431": "n02804414",
+    "432": "n02804610",
+    "433": "n02807133",
+    "434": "n02808304",
+    "435": "n02808440",
+    "436": "n02814533",
+    "437": "n02814860",
+    "438": "n02815834",
+    "439": "n02817516",
+    "440": "n02823428",
+    "441": "n02823750",
+    "442": "n02825657",
+    "443": "n02834397",
+    "444": "n02835271",
+    "445": "n02837789",
+    "446": "n02840245",
+    "447": "n02841315",
+    "448": "n02843684",
+    "449": "n02859443",
+    "450": "n02860847",
+    "451": "n02865351",
+    "452": "n02869837",
+    "453": "n02870880",
+    "454": "n02871525",
+    "455": "n02877765",
+    "456": "n02879718",
+    "457": "n02883205",
+    "458": "n02892201",
+    "459": "n02892767",
+    "460": "n02894605",
+    "461": "n02895154",
+    "462": "n02906734",
+    "463": "n02909870",
+    "464": "n02910353",
+    "465": "n02916936",
+    "466": "n02917067",
+    "467": "n02927161",
+    "468": "n02930766",
+    "469": "n02939185",
+    "470": "n02948072",
+    "471": "n02950826",
+    "472": "n02951358",
+    "473": "n02951585",
+    "474": "n02963159",
+    "475": "n02965783",
+    "476": "n02966193",
+    "477": "n02966687",
+    "478": "n02971356",
+    "479": "n02974003",
+    "480": "n02977058",
+    "481": "n02978881",
+    "482": "n02979186",
+    "483": "n02980441",
+    "484": "n02981792",
+    "485": "n02988304",
+    "486": "n02992211",
+    "487": "n02992529",
+    "488": "n02999410",
+    "489": "n03000134",
+    "490": "n03000247",
+    "491": "n03000684",
+    "492": "n03014705",
+    "493": "n03016953",
+    "494": "n03017168",
+    "495": "n03018349",
+    "496": "n03026506",
+    "497": "n03028079",
+    "498": "n03032252",
+    "499": "n03041632",
+    "500": "n03042490",
+    "501": "n03045698",
+    "502": "n03047690",
+    "503": "n03062245",
+    "504": "n03063599",
+    "505": "n03063689",
+    "506": "n03065424",
+    "507": "n03075370",
+    "508": "n03085013",
+    "509": "n03089624",
+    "510": "n03095699",
+    "511": "n03100240",
+    "512": "n03109150",
+    "513": "n03110669",
+    "514": "n03124043",
+    "515": "n03124170",
+    "516": "n03125729",
+    "517": "n03126707",
+    "518": "n03127747",
+    "519": "n03127925",
+    "520": "n03131574",
+    "521": "n03133878",
+    "522": "n03134739",
+    "523": "n03141823",
+    "524": "n03146219",
+    "525": "n03160309",
+    "526": "n03179701",
+    "527": "n03180011",
+    "528": "n03187595",
+    "529": "n03188531",
+    "530": "n03196217",
+    "531": "n03197337",
+    "532": "n03201208",
+    "533": "n03207743",
+    "534": "n03207941",
+    "535": "n03208938",
+    "536": "n03216828",
+    "537": "n03218198",
+    "538": "n03220513",
+    "539": "n03223299",
+    "540": "n03240683",
+    "541": "n03249569",
+    "542": "n03250847",
+    "543": "n03255030",
+    "544": "n03259280",
+    "545": "n03271574",
+    "546": "n03272010",
+    "547": "n03272562",
+    "548": "n03290653",
+    "549": "n03291819",
+    "550": "n03297495",
+    "551": "n03314780",
+    "552": "n03325584",
+    "553": "n03337140",
+    "554": "n03344393",
+    "555": "n03345487",
+    "556": "n03347037",
+    "557": "n03355925",
+    "558": "n03372029",
+    "559": "n03376595",
+    "560": "n03379051",
+    "561": "n03384352",
+    "562": "n03388043",
+    "563": "n03388183",
+    "564": "n03388549",
+    "565": "n03393912",
+    "566": "n03394916",
+    "567": "n03400231",
+    "568": "n03404251",
+    "569": "n03417042",
+    "570": "n03424325",
+    "571": "n03425413",
+    "572": "n03443371",
+    "573": "n03444034",
+    "574": "n03445777",
+    "575": "n03445924",
+    "576": "n03447447",
+    "577": "n03447721",
+    "578": "n03450230",
+    "579": "n03452741",
+    "580": "n03457902",
+    "581": "n03459775",
+    "582": "n03461385",
+    "583": "n03467068",
+    "584": "n03476684",
+    "585": "n03476991",
+    "586": "n03478589",
+    "587": "n03481172",
+    "588": "n03482405",
+    "589": "n03483316",
+    "590": "n03485407",
+    "591": "n03485794",
+    "592": "n03492542",
+    "593": "n03494278",
+    "594": "n03495258",
+    "595": "n03496892",
+    "596": "n03498962",
+    "597": "n03527444",
+    "598": "n03529860",
+    "599": "n03530642",
+    "600": "n03532672",
+    "601": "n03534580",
+    "602": "n03535780",
+    "603": "n03538406",
+    "604": "n03544143",
+    "605": "n03584254",
+    "606": "n03584829",
+    "607": "n03590841",
+    "608": "n03594734",
+    "609": "n03594945",
+    "610": "n03595614",
+    "611": "n03598930",
+    "612": "n03599486",
+    "613": "n03602883",
+    "614": "n03617480",
+    "615": "n03623198",
+    "616": "n03627232",
+    "617": "n03630383",
+    "618": "n03633091",
+    "619": "n03637318",
+    "620": "n03642806",
+    "621": "n03649909",
+    "622": "n03657121",
+    "623": "n03658185",
+    "624": "n03661043",
+    "625": "n03662601",
+    "626": "n03666591",
+    "627": "n03670208",
+    "628": "n03673027",
+    "629": "n03676483",
+    "630": "n03680355",
+    "631": "n03690938",
+    "632": "n03691459",
+    "633": "n03692522",
+    "634": "n03697007",
+    "635": "n03706229",
+    "636": "n03709823",
+    "637": "n03710193",
+    "638": "n03710637",
+    "639": "n03710721",
+    "640": "n03717622",
+    "641": "n03720891",
+    "642": "n03721384",
+    "643": "n03724870",
+    "644": "n03729826",
+    "645": "n03733131",
+    "646": "n03733281",
+    "647": "n03733805",
+    "648": "n03742115",
+    "649": "n03743016",
+    "650": "n03759954",
+    "651": "n03761084",
+    "652": "n03763968",
+    "653": "n03764736",
+    "654": "n03769881",
+    "655": "n03770439",
+    "656": "n03770679",
+    "657": "n03773504",
+    "658": "n03775071",
+    "659": "n03775546",
+    "660": "n03776460",
+    "661": "n03777568",
+    "662": "n03777754",
+    "663": "n03781244",
+    "664": "n03782006",
+    "665": "n03785016",
+    "666": "n03786901",
+    "667": "n03787032",
+    "668": "n03788195",
+    "669": "n03788365",
+    "670": "n03791053",
+    "671": "n03792782",
+    "672": "n03792972",
+    "673": "n03793489",
+    "674": "n03794056",
+    "675": "n03796401",
+    "676": "n03803284",
+    "677": "n03804744",
+    "678": "n03814639",
+    "679": "n03814906",
+    "680": "n03825788",
+    "681": "n03832673",
+    "682": "n03837869",
+    "683": "n03838899",
+    "684": "n03840681",
+    "685": "n03841143",
+    "686": "n03843555",
+    "687": "n03854065",
+    "688": "n03857828",
+    "689": "n03866082",
+    "690": "n03868242",
+    "691": "n03868863",
+    "692": "n03871628",
+    "693": "n03873416",
+    "694": "n03874293",
+    "695": "n03874599",
+    "696": "n03876231",
+    "697": "n03877472",
+    "698": "n03877845",
+    "699": "n03884397",
+    "700": "n03887697",
+    "701": "n03888257",
+    "702": "n03888605",
+    "703": "n03891251",
+    "704": "n03891332",
+    "705": "n03895866",
+    "706": "n03899768",
+    "707": "n03902125",
+    "708": "n03903868",
+    "709": "n03908618",
+    "710": "n03908714",
+    "711": "n03916031",
+    "712": "n03920288",
+    "713": "n03924679",
+    "714": "n03929660",
+    "715": "n03929855",
+    "716": "n03930313",
+    "717": "n03930630",
+    "718": "n03933933",
+    "719": "n03935335",
+    "720": "n03937543",
+    "721": "n03938244",
+    "722": "n03942813",
+    "723": "n03944341",
+    "724": "n03947888",
+    "725": "n03950228",
+    "726": "n03954731",
+    "727": "n03956157",
+    "728": "n03958227",
+    "729": "n03961711",
+    "730": "n03967562",
+    "731": "n03970156",
+    "732": "n03976467",
+    "733": "n03976657",
+    "734": "n03977966",
+    "735": "n03980874",
+    "736": "n03982430",
+    "737": "n03983396",
+    "738": "n03991062",
+    "739": "n03992509",
+    "740": "n03995372",
+    "741": "n03998194",
+    "742": "n04004767",
+    "743": "n04005630",
+    "744": "n04008634",
+    "745": "n04009552",
+    "746": "n04019541",
+    "747": "n04023962",
+    "748": "n04026417",
+    "749": "n04033901",
+    "750": "n04033995",
+    "751": "n04037443",
+    "752": "n04039381",
+    "753": "n04040759",
+    "754": "n04041544",
+    "755": "n04044716",
+    "756": "n04049303",
+    "757": "n04065272",
+    "758": "n04067472",
+    "759": "n04069434",
+    "760": "n04070727",
+    "761": "n04074963",
+    "762": "n04081281",
+    "763": "n04086273",
+    "764": "n04090263",
+    "765": "n04099969",
+    "766": "n04111531",
+    "767": "n04116512",
+    "768": "n04118538",
+    "769": "n04118776",
+    "770": "n04120489",
+    "771": "n04125021",
+    "772": "n04127249",
+    "773": "n04131690",
+    "774": "n04133789",
+    "775": "n04136333",
+    "776": "n04141076",
+    "777": "n04141327",
+    "778": "n04141975",
+    "779": "n04146614",
+    "780": "n04147183",
+    "781": "n04149813",
+    "782": "n04152593",
+    "783": "n04153751",
+    "784": "n04154565",
+    "785": "n04162706",
+    "786": "n04179913",
+    "787": "n04192698",
+    "788": "n04200800",
+    "789": "n04201297",
+    "790": "n04204238",
+    "791": "n04204347",
+    "792": "n04208210",
+    "793": "n04209133",
+    "794": "n04209239",
+    "795": "n04228054",
+    "796": "n04229816",
+    "797": "n04235860",
+    "798": "n04238763",
+    "799": "n04239074",
+    "800": "n04243546",
+    "801": "n04251144",
+    "802": "n04252077",
+    "803": "n04252225",
+    "804": "n04254120",
+    "805": "n04254680",
+    "806": "n04254777",
+    "807": "n04258138",
+    "808": "n04259630",
+    "809": "n04263257",
+    "810": "n04264628",
+    "811": "n04265275",
+    "812": "n04266014",
+    "813": "n04270147",
+    "814": "n04273569",
+    "815": "n04275548",
+    "816": "n04277352",
+    "817": "n04285008",
+    "818": "n04286575",
+    "819": "n04296562",
+    "820": "n04310018",
+    "821": "n04311004",
+    "822": "n04311174",
+    "823": "n04317175",
+    "824": "n04325704",
+    "825": "n04326547",
+    "826": "n04328186",
+    "827": "n04330267",
+    "828": "n04332243",
+    "829": "n04335435",
+    "830": "n04336792",
+    "831": "n04344873",
+    "832": "n04346328",
+    "833": "n04347754",
+    "834": "n04350905",
+    "835": "n04355338",
+    "836": "n04355933",
+    "837": "n04356056",
+    "838": "n04357314",
+    "839": "n04366367",
+    "840": "n04367480",
+    "841": "n04370456",
+    "842": "n04371430",
+    "843": "n04371774",
+    "844": "n04372370",
+    "845": "n04376876",
+    "846": "n04380533",
+    "847": "n04389033",
+    "848": "n04392985",
+    "849": "n04398044",
+    "850": "n04399382",
+    "851": "n04404412",
+    "852": "n04409515",
+    "853": "n04417672",
+    "854": "n04418357",
+    "855": "n04423845",
+    "856": "n04428191",
+    "857": "n04429376",
+    "858": "n04435653",
+    "859": "n04442312",
+    "860": "n04443257",
+    "861": "n04447861",
+    "862": "n04456115",
+    "863": "n04458633",
+    "864": "n04461696",
+    "865": "n04462240",
+    "866": "n04465501",
+    "867": "n04467665",
+    "868": "n04476259",
+    "869": "n04479046",
+    "870": "n04482393",
+    "871": "n04483307",
+    "872": "n04485082",
+    "873": "n04486054",
+    "874": "n04487081",
+    "875": "n04487394",
+    "876": "n04493381",
+    "877": "n04501370",
+    "878": "n04505470",
+    "879": "n04507155",
+    "880": "n04509417",
+    "881": "n04515003",
+    "882": "n04517823",
+    "883": "n04522168",
+    "884": "n04523525",
+    "885": "n04525038",
+    "886": "n04525305",
+    "887": "n04532106",
+    "888": "n04532670",
+    "889": "n04536866",
+    "890": "n04540053",
+    "891": "n04542943",
+    "892": "n04548280",
+    "893": "n04548362",
+    "894": "n04550184",
+    "895": "n04552348",
+    "896": "n04553703",
+    "897": "n04554684",
+    "898": "n04557648",
+    "899": "n04560804",
+    "900": "n04562935",
+    "901": "n04579145",
+    "902": "n04579432",
+    "903": "n04584207",
+    "904": "n04589890",
+    "905": "n04590129",
+    "906": "n04591157",
+    "907": "n04591713",
+    "908": "n04592741",
+    "909": "n04596742",
+    "910": "n04597913",
+    "911": "n04599235",
+    "912": "n04604644",
+    "913": "n04606251",
+    "914": "n04612504",
+    "915": "n04613696",
+    "916": "n06359193",
+    "917": "n06596364",
+    "918": "n06785654",
+    "919": "n06794110",
+    "920": "n06874185",
+    "921": "n07248320",
+    "922": "n07565083",
+    "923": "n07579787",
+    "924": "n07583066",
+    "925": "n07584110",
+    "926": "n07590611",
+    "927": "n07613480",
+    "928": "n07614500",
+    "929": "n07615774",
+    "930": "n07684084",
+    "931": "n07693725",
+    "932": "n07695742",
+    "933": "n07697313",
+    "934": "n07697537",
+    "935": "n07711569",
+    "936": "n07714571",
+    "937": "n07714990",
+    "938": "n07715103",
+    "939": "n07716358",
+    "940": "n07716906",
+    "941": "n07717410",
+    "942": "n07717556",
+    "943": "n07718472",
+    "944": "n07718747",
+    "945": "n07720875",
+    "946": "n07730033",
+    "947": "n07734744",
+    "948": "n07742313",
+    "949": "n07745940",
+    "950": "n07747607",
+    "951": "n07749582",
+    "952": "n07753113",
+    "953": "n07753275",
+    "954": "n07753592",
+    "955": "n07754684",
+    "956": "n07760859",
+    "957": "n07768694",
+    "958": "n07802026",
+    "959": "n07831146",
+    "960": "n07836838",
+    "961": "n07860988",
+    "962": "n07871810",
+    "963": "n07873807",
+    "964": "n07875152",
+    "965": "n07880968",
+    "966": "n07892512",
+    "967": "n07920052",
+    "968": "n07930864",
+    "969": "n07932039",
+    "970": "n09193705",
+    "971": "n09229709",
+    "972": "n09246464",
+    "973": "n09256479",
+    "974": "n09288635",
+    "975": "n09332890",
+    "976": "n09399592",
+    "977": "n09421951",
+    "978": "n09428293",
+    "979": "n09468604",
+    "980": "n09472597",
+    "981": "n09835506",
+    "982": "n10148035",
+    "983": "n10565667",
+    "984": "n11879895",
+    "985": "n11939491",
+    "986": "n12057211",
+    "987": "n12144580",
+    "988": "n12267677",
+    "989": "n12620546",
+    "990": "n12768682",
+    "991": "n12985857",
+    "992": "n12998815",
+    "993": "n13037406",
+    "994": "n13040303",
+    "995": "n13044778",
+    "996": "n13052670",
+    "997": "n13054560",
+    "998": "n13133613",
+    "999": "n15075141"
   },
+  "image_size": 224,
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "label2id": {
+    "n01440764": 0,
+    "n01443537": 1,
+    "n01484850": 2,
+    "n01491361": 3,
+    "n01494475": 4,
+    "n01496331": 5,
+    "n01498041": 6,
+    "n01514668": 7,
+    "n01514859": 8,
+    "n01518878": 9,
+    "n01530575": 10,
+    "n01531178": 11,
+    "n01532829": 12,
+    "n01534433": 13,
+    "n01537544": 14,
+    "n01558993": 15,
+    "n01560419": 16,
+    "n01580077": 17,
+    "n01582220": 18,
+    "n01592084": 19,
+    "n01601694": 20,
+    "n01608432": 21,
+    "n01614925": 22,
+    "n01616318": 23,
+    "n01622779": 24,
+    "n01629819": 25,
+    "n01630670": 26,
+    "n01631663": 27,
+    "n01632458": 28,
+    "n01632777": 29,
+    "n01641577": 30,
+    "n01644373": 31,
+    "n01644900": 32,
+    "n01664065": 33,
+    "n01665541": 34,
+    "n01667114": 35,
+    "n01667778": 36,
+    "n01669191": 37,
+    "n01675722": 38,
+    "n01677366": 39,
+    "n01682714": 40,
+    "n01685808": 41,
+    "n01687978": 42,
+    "n01688243": 43,
+    "n01689811": 44,
+    "n01692333": 45,
+    "n01693334": 46,
+    "n01694178": 47,
+    "n01695060": 48,
+    "n01697457": 49,
+    "n01698640": 50,
+    "n01704323": 51,
+    "n01728572": 52,
+    "n01728920": 53,
+    "n01729322": 54,
+    "n01729977": 55,
+    "n01734418": 56,
+    "n01735189": 57,
+    "n01737021": 58,
+    "n01739381": 59,
+    "n01740131": 60,
+    "n01742172": 61,
+    "n01744401": 62,
+    "n01748264": 63,
+    "n01749939": 64,
+    "n01751748": 65,
+    "n01753488": 66,
+    "n01755581": 67,
+    "n01756291": 68,
+    "n01768244": 69,
+    "n01770081": 70,
+    "n01770393": 71,
+    "n01773157": 72,
+    "n01773549": 73,
+    "n01773797": 74,
+    "n01774384": 75,
+    "n01774750": 76,
+    "n01775062": 77,
+    "n01776313": 78,
+    "n01784675": 79,
+    "n01795545": 80,
+    "n01796340": 81,
+    "n01797886": 82,
+    "n01798484": 83,
+    "n01806143": 84,
+    "n01806567": 85,
+    "n01807496": 86,
+    "n01817953": 87,
+    "n01818515": 88,
+    "n01819313": 89,
+    "n01820546": 90,
+    "n01824575": 91,
+    "n01828970": 92,
+    "n01829413": 93,
+    "n01833805": 94,
+    "n01843065": 95,
+    "n01843383": 96,
+    "n01847000": 97,
+    "n01855032": 98,
+    "n01855672": 99,
+    "n01860187": 100,
+    "n01871265": 101,
+    "n01872401": 102,
+    "n01873310": 103,
+    "n01877812": 104,
+    "n01882714": 105,
+    "n01883070": 106,
+    "n01910747": 107,
+    "n01914609": 108,
+    "n01917289": 109,
+    "n01924916": 110,
+    "n01930112": 111,
+    "n01943899": 112,
+    "n01944390": 113,
+    "n01945685": 114,
+    "n01950731": 115,
+    "n01955084": 116,
+    "n01968897": 117,
+    "n01978287": 118,
+    "n01978455": 119,
+    "n01980166": 120,
+    "n01981276": 121,
+    "n01983481": 122,
+    "n01984695": 123,
+    "n01985128": 124,
+    "n01986214": 125,
+    "n01990800": 126,
+    "n02002556": 127,
+    "n02002724": 128,
+    "n02006656": 129,
+    "n02007558": 130,
+    "n02009229": 131,
+    "n02009912": 132,
+    "n02011460": 133,
+    "n02012849": 134,
+    "n02013706": 135,
+    "n02017213": 136,
+    "n02018207": 137,
+    "n02018795": 138,
+    "n02025239": 139,
+    "n02027492": 140,
+    "n02028035": 141,
+    "n02033041": 142,
+    "n02037110": 143,
+    "n02051845": 144,
+    "n02056570": 145,
+    "n02058221": 146,
+    "n02066245": 147,
+    "n02071294": 148,
+    "n02074367": 149,
+    "n02077923": 150,
+    "n02085620": 151,
+    "n02085782": 152,
+    "n02085936": 153,
+    "n02086079": 154,
+    "n02086240": 155,
+    "n02086646": 156,
+    "n02086910": 157,
+    "n02087046": 158,
+    "n02087394": 159,
+    "n02088094": 160,
+    "n02088238": 161,
+    "n02088364": 162,
+    "n02088466": 163,
+    "n02088632": 164,
+    "n02089078": 165,
+    "n02089867": 166,
+    "n02089973": 167,
+    "n02090379": 168,
+    "n02090622": 169,
+    "n02090721": 170,
+    "n02091032": 171,
+    "n02091134": 172,
+    "n02091244": 173,
+    "n02091467": 174,
+    "n02091635": 175,
+    "n02091831": 176,
+    "n02092002": 177,
+    "n02092339": 178,
+    "n02093256": 179,
+    "n02093428": 180,
+    "n02093647": 181,
+    "n02093754": 182,
+    "n02093859": 183,
+    "n02093991": 184,
+    "n02094114": 185,
+    "n02094258": 186,
+    "n02094433": 187,
+    "n02095314": 188,
+    "n02095570": 189,
+    "n02095889": 190,
+    "n02096051": 191,
+    "n02096177": 192,
+    "n02096294": 193,
+    "n02096437": 194,
+    "n02096585": 195,
+    "n02097047": 196,
+    "n02097130": 197,
+    "n02097209": 198,
+    "n02097298": 199,
+    "n02097474": 200,
+    "n02097658": 201,
+    "n02098105": 202,
+    "n02098286": 203,
+    "n02098413": 204,
+    "n02099267": 205,
+    "n02099429": 206,
+    "n02099601": 207,
+    "n02099712": 208,
+    "n02099849": 209,
+    "n02100236": 210,
+    "n02100583": 211,
+    "n02100735": 212,
+    "n02100877": 213,
+    "n02101006": 214,
+    "n02101388": 215,
+    "n02101556": 216,
+    "n02102040": 217,
+    "n02102177": 218,
+    "n02102318": 219,
+    "n02102480": 220,
+    "n02102973": 221,
+    "n02104029": 222,
+    "n02104365": 223,
+    "n02105056": 224,
+    "n02105162": 225,
+    "n02105251": 226,
+    "n02105412": 227,
+    "n02105505": 228,
+    "n02105641": 229,
+    "n02105855": 230,
+    "n02106030": 231,
+    "n02106166": 232,
+    "n02106382": 233,
+    "n02106550": 234,
+    "n02106662": 235,
+    "n02107142": 236,
+    "n02107312": 237,
+    "n02107574": 238,
+    "n02107683": 239,
+    "n02107908": 240,
+    "n02108000": 241,
+    "n02108089": 242,
+    "n02108422": 243,
+    "n02108551": 244,
+    "n02108915": 245,
+    "n02109047": 246,
+    "n02109525": 247,
+    "n02109961": 248,
+    "n02110063": 249,
+    "n02110185": 250,
+    "n02110341": 251,
+    "n02110627": 252,
+    "n02110806": 253,
+    "n02110958": 254,
+    "n02111129": 255,
+    "n02111277": 256,
+    "n02111500": 257,
+    "n02111889": 258,
+    "n02112018": 259,
+    "n02112137": 260,
+    "n02112350": 261,
+    "n02112706": 262,
+    "n02113023": 263,
+    "n02113186": 264,
+    "n02113624": 265,
+    "n02113712": 266,
+    "n02113799": 267,
+    "n02113978": 268,
+    "n02114367": 269,
+    "n02114548": 270,
+    "n02114712": 271,
+    "n02114855": 272,
+    "n02115641": 273,
+    "n02115913": 274,
+    "n02116738": 275,
+    "n02117135": 276,
+    "n02119022": 277,
+    "n02119789": 278,
+    "n02120079": 279,
+    "n02120505": 280,
+    "n02123045": 281,
+    "n02123159": 282,
+    "n02123394": 283,
+    "n02123597": 284,
+    "n02124075": 285,
+    "n02125311": 286,
+    "n02127052": 287,
+    "n02128385": 288,
+    "n02128757": 289,
+    "n02128925": 290,
+    "n02129165": 291,
+    "n02129604": 292,
+    "n02130308": 293,
+    "n02132136": 294,
+    "n02133161": 295,
+    "n02134084": 296,
+    "n02134418": 297,
+    "n02137549": 298,
+    "n02138441": 299,
+    "n02165105": 300,
+    "n02165456": 301,
+    "n02167151": 302,
+    "n02168699": 303,
+    "n02169497": 304,
+    "n02172182": 305,
+    "n02174001": 306,
+    "n02177972": 307,
+    "n02190166": 308,
+    "n02206856": 309,
+    "n02219486": 310,
+    "n02226429": 311,
+    "n02229544": 312,
+    "n02231487": 313,
+    "n02233338": 314,
+    "n02236044": 315,
+    "n02256656": 316,
+    "n02259212": 317,
+    "n02264363": 318,
+    "n02268443": 319,
+    "n02268853": 320,
+    "n02276258": 321,
+    "n02277742": 322,
+    "n02279972": 323,
+    "n02280649": 324,
+    "n02281406": 325,
+    "n02281787": 326,
+    "n02317335": 327,
+    "n02319095": 328,
+    "n02321529": 329,
+    "n02325366": 330,
+    "n02326432": 331,
+    "n02328150": 332,
+    "n02342885": 333,
+    "n02346627": 334,
+    "n02356798": 335,
+    "n02361337": 336,
+    "n02363005": 337,
+    "n02364673": 338,
+    "n02389026": 339,
+    "n02391049": 340,
+    "n02395406": 341,
+    "n02396427": 342,
+    "n02397096": 343,
+    "n02398521": 344,
+    "n02403003": 345,
+    "n02408429": 346,
+    "n02410509": 347,
+    "n02412080": 348,
+    "n02415577": 349,
+    "n02417914": 350,
+    "n02422106": 351,
+    "n02422699": 352,
+    "n02423022": 353,
+    "n02437312": 354,
+    "n02437616": 355,
+    "n02441942": 356,
+    "n02442845": 357,
+    "n02443114": 358,
+    "n02443484": 359,
+    "n02444819": 360,
+    "n02445715": 361,
+    "n02447366": 362,
+    "n02454379": 363,
+    "n02457408": 364,
+    "n02480495": 365,
+    "n02480855": 366,
+    "n02481823": 367,
+    "n02483362": 368,
+    "n02483708": 369,
+    "n02484975": 370,
+    "n02486261": 371,
+    "n02486410": 372,
+    "n02487347": 373,
+    "n02488291": 374,
+    "n02488702": 375,
+    "n02489166": 376,
+    "n02490219": 377,
+    "n02492035": 378,
+    "n02492660": 379,
+    "n02493509": 380,
+    "n02493793": 381,
+    "n02494079": 382,
+    "n02497673": 383,
+    "n02500267": 384,
+    "n02504013": 385,
+    "n02504458": 386,
+    "n02509815": 387,
+    "n02510455": 388,
+    "n02514041": 389,
+    "n02526121": 390,
+    "n02536864": 391,
+    "n02606052": 392,
+    "n02607072": 393,
+    "n02640242": 394,
+    "n02641379": 395,
+    "n02643566": 396,
+    "n02655020": 397,
+    "n02666196": 398,
+    "n02667093": 399,
+    "n02669723": 400,
+    "n02672831": 401,
+    "n02676566": 402,
+    "n02687172": 403,
+    "n02690373": 404,
+    "n02692877": 405,
+    "n02699494": 406,
+    "n02701002": 407,
+    "n02704792": 408,
+    "n02708093": 409,
+    "n02727426": 410,
+    "n02730930": 411,
+    "n02747177": 412,
+    "n02749479": 413,
+    "n02769748": 414,
+    "n02776631": 415,
+    "n02777292": 416,
+    "n02782093": 417,
+    "n02783161": 418,
+    "n02786058": 419,
+    "n02787622": 420,
+    "n02788148": 421,
+    "n02790996": 422,
+    "n02791124": 423,
+    "n02791270": 424,
+    "n02793495": 425,
+    "n02794156": 426,
+    "n02795169": 427,
+    "n02797295": 428,
+    "n02799071": 429,
+    "n02802426": 430,
+    "n02804414": 431,
+    "n02804610": 432,
+    "n02807133": 433,
+    "n02808304": 434,
+    "n02808440": 435,
+    "n02814533": 436,
+    "n02814860": 437,
+    "n02815834": 438,
+    "n02817516": 439,
+    "n02823428": 440,
+    "n02823750": 441,
+    "n02825657": 442,
+    "n02834397": 443,
+    "n02835271": 444,
+    "n02837789": 445,
+    "n02840245": 446,
+    "n02841315": 447,
+    "n02843684": 448,
+    "n02859443": 449,
+    "n02860847": 450,
+    "n02865351": 451,
+    "n02869837": 452,
+    "n02870880": 453,
+    "n02871525": 454,
+    "n02877765": 455,
+    "n02879718": 456,
+    "n02883205": 457,
+    "n02892201": 458,
+    "n02892767": 459,
+    "n02894605": 460,
+    "n02895154": 461,
+    "n02906734": 462,
+    "n02909870": 463,
+    "n02910353": 464,
+    "n02916936": 465,
+    "n02917067": 466,
+    "n02927161": 467,
+    "n02930766": 468,
+    "n02939185": 469,
+    "n02948072": 470,
+    "n02950826": 471,
+    "n02951358": 472,
+    "n02951585": 473,
+    "n02963159": 474,
+    "n02965783": 475,
+    "n02966193": 476,
+    "n02966687": 477,
+    "n02971356": 478,
+    "n02974003": 479,
+    "n02977058": 480,
+    "n02978881": 481,
+    "n02979186": 482,
+    "n02980441": 483,
+    "n02981792": 484,
+    "n02988304": 485,
+    "n02992211": 486,
+    "n02992529": 487,
+    "n02999410": 488,
+    "n03000134": 489,
+    "n03000247": 490,
+    "n03000684": 491,
+    "n03014705": 492,
+    "n03016953": 493,
+    "n03017168": 494,
+    "n03018349": 495,
+    "n03026506": 496,
+    "n03028079": 497,
+    "n03032252": 498,
+    "n03041632": 499,
+    "n03042490": 500,
+    "n03045698": 501,
+    "n03047690": 502,
+    "n03062245": 503,
+    "n03063599": 504,
+    "n03063689": 505,
+    "n03065424": 506,
+    "n03075370": 507,
+    "n03085013": 508,
+    "n03089624": 509,
+    "n03095699": 510,
+    "n03100240": 511,
+    "n03109150": 512,
+    "n03110669": 513,
+    "n03124043": 514,
+    "n03124170": 515,
+    "n03125729": 516,
+    "n03126707": 517,
+    "n03127747": 518,
+    "n03127925": 519,
+    "n03131574": 520,
+    "n03133878": 521,
+    "n03134739": 522,
+    "n03141823": 523,
+    "n03146219": 524,
+    "n03160309": 525,
+    "n03179701": 526,
+    "n03180011": 527,
+    "n03187595": 528,
+    "n03188531": 529,
+    "n03196217": 530,
+    "n03197337": 531,
+    "n03201208": 532,
+    "n03207743": 533,
+    "n03207941": 534,
+    "n03208938": 535,
+    "n03216828": 536,
+    "n03218198": 537,
+    "n03220513": 538,
+    "n03223299": 539,
+    "n03240683": 540,
+    "n03249569": 541,
+    "n03250847": 542,
+    "n03255030": 543,
+    "n03259280": 544,
+    "n03271574": 545,
+    "n03272010": 546,
+    "n03272562": 547,
+    "n03290653": 548,
+    "n03291819": 549,
+    "n03297495": 550,
+    "n03314780": 551,
+    "n03325584": 552,
+    "n03337140": 553,
+    "n03344393": 554,
+    "n03345487": 555,
+    "n03347037": 556,
+    "n03355925": 557,
+    "n03372029": 558,
+    "n03376595": 559,
+    "n03379051": 560,
+    "n03384352": 561,
+    "n03388043": 562,
+    "n03388183": 563,
+    "n03388549": 564,
+    "n03393912": 565,
+    "n03394916": 566,
+    "n03400231": 567,
+    "n03404251": 568,
+    "n03417042": 569,
+    "n03424325": 570,
+    "n03425413": 571,
+    "n03443371": 572,
+    "n03444034": 573,
+    "n03445777": 574,
+    "n03445924": 575,
+    "n03447447": 576,
+    "n03447721": 577,
+    "n03450230": 578,
+    "n03452741": 579,
+    "n03457902": 580,
+    "n03459775": 581,
+    "n03461385": 582,
+    "n03467068": 583,
+    "n03476684": 584,
+    "n03476991": 585,
+    "n03478589": 586,
+    "n03481172": 587,
+    "n03482405": 588,
+    "n03483316": 589,
+    "n03485407": 590,
+    "n03485794": 591,
+    "n03492542": 592,
+    "n03494278": 593,
+    "n03495258": 594,
+    "n03496892": 595,
+    "n03498962": 596,
+    "n03527444": 597,
+    "n03529860": 598,
+    "n03530642": 599,
+    "n03532672": 600,
+    "n03534580": 601,
+    "n03535780": 602,
+    "n03538406": 603,
+    "n03544143": 604,
+    "n03584254": 605,
+    "n03584829": 606,
+    "n03590841": 607,
+    "n03594734": 608,
+    "n03594945": 609,
+    "n03595614": 610,
+    "n03598930": 611,
+    "n03599486": 612,
+    "n03602883": 613,
+    "n03617480": 614,
+    "n03623198": 615,
+    "n03627232": 616,
+    "n03630383": 617,
+    "n03633091": 618,
+    "n03637318": 619,
+    "n03642806": 620,
+    "n03649909": 621,
+    "n03657121": 622,
+    "n03658185": 623,
+    "n03661043": 624,
+    "n03662601": 625,
+    "n03666591": 626,
+    "n03670208": 627,
+    "n03673027": 628,
+    "n03676483": 629,
+    "n03680355": 630,
+    "n03690938": 631,
+    "n03691459": 632,
+    "n03692522": 633,
+    "n03697007": 634,
+    "n03706229": 635,
+    "n03709823": 636,
+    "n03710193": 637,
+    "n03710637": 638,
+    "n03710721": 639,
+    "n03717622": 640,
+    "n03720891": 641,
+    "n03721384": 642,
+    "n03724870": 643,
+    "n03729826": 644,
+    "n03733131": 645,
+    "n03733281": 646,
+    "n03733805": 647,
+    "n03742115": 648,
+    "n03743016": 649,
+    "n03759954": 650,
+    "n03761084": 651,
+    "n03763968": 652,
+    "n03764736": 653,
+    "n03769881": 654,
+    "n03770439": 655,
+    "n03770679": 656,
+    "n03773504": 657,
+    "n03775071": 658,
+    "n03775546": 659,
+    "n03776460": 660,
+    "n03777568": 661,
+    "n03777754": 662,
+    "n03781244": 663,
+    "n03782006": 664,
+    "n03785016": 665,
+    "n03786901": 666,
+    "n03787032": 667,
+    "n03788195": 668,
+    "n03788365": 669,
+    "n03791053": 670,
+    "n03792782": 671,
+    "n03792972": 672,
+    "n03793489": 673,
+    "n03794056": 674,
+    "n03796401": 675,
+    "n03803284": 676,
+    "n03804744": 677,
+    "n03814639": 678,
+    "n03814906": 679,
+    "n03825788": 680,
+    "n03832673": 681,
+    "n03837869": 682,
+    "n03838899": 683,
+    "n03840681": 684,
+    "n03841143": 685,
+    "n03843555": 686,
+    "n03854065": 687,
+    "n03857828": 688,
+    "n03866082": 689,
+    "n03868242": 690,
+    "n03868863": 691,
+    "n03871628": 692,
+    "n03873416": 693,
+    "n03874293": 694,
+    "n03874599": 695,
+    "n03876231": 696,
+    "n03877472": 697,
+    "n03877845": 698,
+    "n03884397": 699,
+    "n03887697": 700,
+    "n03888257": 701,
+    "n03888605": 702,
+    "n03891251": 703,
+    "n03891332": 704,
+    "n03895866": 705,
+    "n03899768": 706,
+    "n03902125": 707,
+    "n03903868": 708,
+    "n03908618": 709,
+    "n03908714": 710,
+    "n03916031": 711,
+    "n03920288": 712,
+    "n03924679": 713,
+    "n03929660": 714,
+    "n03929855": 715,
+    "n03930313": 716,
+    "n03930630": 717,
+    "n03933933": 718,
+    "n03935335": 719,
+    "n03937543": 720,
+    "n03938244": 721,
+    "n03942813": 722,
+    "n03944341": 723,
+    "n03947888": 724,
+    "n03950228": 725,
+    "n03954731": 726,
+    "n03956157": 727,
+    "n03958227": 728,
+    "n03961711": 729,
+    "n03967562": 730,
+    "n03970156": 731,
+    "n03976467": 732,
+    "n03976657": 733,
+    "n03977966": 734,
+    "n03980874": 735,
+    "n03982430": 736,
+    "n03983396": 737,
+    "n03991062": 738,
+    "n03992509": 739,
+    "n03995372": 740,
+    "n03998194": 741,
+    "n04004767": 742,
+    "n04005630": 743,
+    "n04008634": 744,
+    "n04009552": 745,
+    "n04019541": 746,
+    "n04023962": 747,
+    "n04026417": 748,
+    "n04033901": 749,
+    "n04033995": 750,
+    "n04037443": 751,
+    "n04039381": 752,
+    "n04040759": 753,
+    "n04041544": 754,
+    "n04044716": 755,
+    "n04049303": 756,
+    "n04065272": 757,
+    "n04067472": 758,
+    "n04069434": 759,
+    "n04070727": 760,
+    "n04074963": 761,
+    "n04081281": 762,
+    "n04086273": 763,
+    "n04090263": 764,
+    "n04099969": 765,
+    "n04111531": 766,
+    "n04116512": 767,
+    "n04118538": 768,
+    "n04118776": 769,
+    "n04120489": 770,
+    "n04125021": 771,
+    "n04127249": 772,
+    "n04131690": 773,
+    "n04133789": 774,
+    "n04136333": 775,
+    "n04141076": 776,
+    "n04141327": 777,
+    "n04141975": 778,
+    "n04146614": 779,
+    "n04147183": 780,
+    "n04149813": 781,
+    "n04152593": 782,
+    "n04153751": 783,
+    "n04154565": 784,
+    "n04162706": 785,
+    "n04179913": 786,
+    "n04192698": 787,
+    "n04200800": 788,
+    "n04201297": 789,
+    "n04204238": 790,
+    "n04204347": 791,
+    "n04208210": 792,
+    "n04209133": 793,
+    "n04209239": 794,
+    "n04228054": 795,
+    "n04229816": 796,
+    "n04235860": 797,
+    "n04238763": 798,
+    "n04239074": 799,
+    "n04243546": 800,
+    "n04251144": 801,
+    "n04252077": 802,
+    "n04252225": 803,
+    "n04254120": 804,
+    "n04254680": 805,
+    "n04254777": 806,
+    "n04258138": 807,
+    "n04259630": 808,
+    "n04263257": 809,
+    "n04264628": 810,
+    "n04265275": 811,
+    "n04266014": 812,
+    "n04270147": 813,
+    "n04273569": 814,
+    "n04275548": 815,
+    "n04277352": 816,
+    "n04285008": 817,
+    "n04286575": 818,
+    "n04296562": 819,
+    "n04310018": 820,
+    "n04311004": 821,
+    "n04311174": 822,
+    "n04317175": 823,
+    "n04325704": 824,
+    "n04326547": 825,
+    "n04328186": 826,
+    "n04330267": 827,
+    "n04332243": 828,
+    "n04335435": 829,
+    "n04336792": 830,
+    "n04344873": 831,
+    "n04346328": 832,
+    "n04347754": 833,
+    "n04350905": 834,
+    "n04355338": 835,
+    "n04355933": 836,
+    "n04356056": 837,
+    "n04357314": 838,
+    "n04366367": 839,
+    "n04367480": 840,
+    "n04370456": 841,
+    "n04371430": 842,
+    "n04371774": 843,
+    "n04372370": 844,
+    "n04376876": 845,
+    "n04380533": 846,
+    "n04389033": 847,
+    "n04392985": 848,
+    "n04398044": 849,
+    "n04399382": 850,
+    "n04404412": 851,
+    "n04409515": 852,
+    "n04417672": 853,
+    "n04418357": 854,
+    "n04423845": 855,
+    "n04428191": 856,
+    "n04429376": 857,
+    "n04435653": 858,
+    "n04442312": 859,
+    "n04443257": 860,
+    "n04447861": 861,
+    "n04456115": 862,
+    "n04458633": 863,
+    "n04461696": 864,
+    "n04462240": 865,
+    "n04465501": 866,
+    "n04467665": 867,
+    "n04476259": 868,
+    "n04479046": 869,
+    "n04482393": 870,
+    "n04483307": 871,
+    "n04485082": 872,
+    "n04486054": 873,
+    "n04487081": 874,
+    "n04487394": 875,
+    "n04493381": 876,
+    "n04501370": 877,
+    "n04505470": 878,
+    "n04507155": 879,
+    "n04509417": 880,
+    "n04515003": 881,
+    "n04517823": 882,
+    "n04522168": 883,
+    "n04523525": 884,
+    "n04525038": 885,
+    "n04525305": 886,
+    "n04532106": 887,
+    "n04532670": 888,
+    "n04536866": 889,
+    "n04540053": 890,
+    "n04542943": 891,
+    "n04548280": 892,
+    "n04548362": 893,
+    "n04550184": 894,
+    "n04552348": 895,
+    "n04553703": 896,
+    "n04554684": 897,
+    "n04557648": 898,
+    "n04560804": 899,
+    "n04562935": 900,
+    "n04579145": 901,
+    "n04579432": 902,
+    "n04584207": 903,
+    "n04589890": 904,
+    "n04590129": 905,
+    "n04591157": 906,
+    "n04591713": 907,
+    "n04592741": 908,
+    "n04596742": 909,
+    "n04597913": 910,
+    "n04599235": 911,
+    "n04604644": 912,
+    "n04606251": 913,
+    "n04612504": 914,
+    "n04613696": 915,
+    "n06359193": 916,
+    "n06596364": 917,
+    "n06785654": 918,
+    "n06794110": 919,
+    "n06874185": 920,
+    "n07248320": 921,
+    "n07565083": 922,
+    "n07579787": 923,
+    "n07583066": 924,
+    "n07584110": 925,
+    "n07590611": 926,
+    "n07613480": 927,
+    "n07614500": 928,
+    "n07615774": 929,
+    "n07684084": 930,
+    "n07693725": 931,
+    "n07695742": 932,
+    "n07697313": 933,
+    "n07697537": 934,
+    "n07711569": 935,
+    "n07714571": 936,
+    "n07714990": 937,
+    "n07715103": 938,
+    "n07716358": 939,
+    "n07716906": 940,
+    "n07717410": 941,
+    "n07717556": 942,
+    "n07718472": 943,
+    "n07718747": 944,
+    "n07720875": 945,
+    "n07730033": 946,
+    "n07734744": 947,
+    "n07742313": 948,
+    "n07745940": 949,
+    "n07747607": 950,
+    "n07749582": 951,
+    "n07753113": 952,
+    "n07753275": 953,
+    "n07753592": 954,
+    "n07754684": 955,
+    "n07760859": 956,
+    "n07768694": 957,
+    "n07802026": 958,
+    "n07831146": 959,
+    "n07836838": 960,
+    "n07860988": 961,
+    "n07871810": 962,
+    "n07873807": 963,
+    "n07875152": 964,
+    "n07880968": 965,
+    "n07892512": 966,
+    "n07920052": 967,
+    "n07930864": 968,
+    "n07932039": 969,
+    "n09193705": 970,
+    "n09229709": 971,
+    "n09246464": 972,
+    "n09256479": 973,
+    "n09288635": 974,
+    "n09332890": 975,
+    "n09399592": 976,
+    "n09421951": 977,
+    "n09428293": 978,
+    "n09468604": 979,
+    "n09472597": 980,
+    "n09835506": 981,
+    "n10148035": 982,
+    "n10565667": 983,
+    "n11879895": 984,
+    "n11939491": 985,
+    "n12057211": 986,
+    "n12144580": 987,
+    "n12267677": 988,
+    "n12620546": 989,
+    "n12768682": 990,
+    "n12985857": 991,
+    "n12998815": 992,
+    "n13037406": 993,
+    "n13040303": 994,
+    "n13044778": 995,
+    "n13052670": 996,
+    "n13054560": 997,
+    "n13133613": 998,
+    "n15075141": 999
+  },
+  "layer_norm_eps": 1e-12,
+  "model_type": "vit",
+  "num_attention_heads": 6,
+  "num_channels": 3,
+  "num_hidden_layers": 12,
+  "patch_size": 16,
+  "qkv_bias": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.2"
+}

pytorch_model.bin → model.safetensors RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a99d3d0eab5d260edfa1246d753fa4f7dabf308f32b856401f0968d7bd6d013
-size 88204542

 version https://git-lfs.github.com/spec/v1
+oid sha256:4372789ccc964b99b87bb8b4b39a37d57bce55360183a3208d7d37b48b40c309
+size 88225584

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_valid_processor_keys": [
+    "images",
+    "do_resize",
+    "size",
+    "resample",
+    "do_rescale",
+    "rescale_factor",
+    "do_normalize",
+    "image_mean",
+    "image_std",
+    "return_tensors",
+    "data_format",
+    "input_data_format"
+  ],
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "ViTImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

vision_transformer.py DELETED Viewed

@@ -1,1853 +0,0 @@
-""" Vision Transformer (ViT) in PyTorch
-A PyTorch implement of Vision Transformers as described in:
-'An Image Is Worth 16 x 16 Words: Transformers for Image Recognition at Scale'
-    - https://arxiv.org/abs/2010.11929
-`How to train your ViT? Data, Augmentation, and Regularization in Vision Transformers`
-    - https://arxiv.org/abs/2106.10270
-`FlexiViT: One Model for All Patch Sizes`
-    - https://arxiv.org/abs/2212.08013
-The official jax code is released and available at
-  * https://github.com/google-research/vision_transformer
-  * https://github.com/google-research/big_vision
-Acknowledgments:
-  * The paper authors for releasing code and weights, thanks!
-  * I fixed my class token impl based on Phil Wang's https://github.com/lucidrains/vit-pytorch
-  * Simple transformer style inspired by Andrej Karpathy's https://github.com/karpathy/minGPT
-  * Bert reference code checks against Huggingface Transformers and Tensorflow Bert
-Hacked together by / Copyright 2020, Ross Wightman
-"""
-import logging
-import math
-from collections import OrderedDict
-from functools import partial
-from typing import Optional, List
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.utils.checkpoint
-from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD, \
-    OPENAI_CLIP_MEAN, OPENAI_CLIP_STD
-from timm.layers import PatchEmbed, Mlp, DropPath, trunc_normal_, lecun_normal_, resample_patch_embed, \
-    resample_abs_pos_embed
-from timm.models._builder import build_model_with_cfg
-from timm.models._manipulate import named_apply, checkpoint_seq, adapt_input_conv
-from timm.models._pretrained import generate_default_cfgs
-from timm.models._registry import register_model
-import math
-from functools import partial
-from typing import Optional, Tuple
-import argparse
-import json
-import logging
-import os
-import numpy as np
-import torch
-import torch.utils.checkpoint
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.utils.checkpoint
-from torch.jit import Final
-from quantization.utils import BaseEnumOptions
-from transformers_language.models.softmax import clipped_softmax, clipped_softmax1
-__all__ = ['VisionTransformer']  # model_registry will add each entrypoint fn to this
-_logger = logging.getLogger(__name__)
-# import torch.nn.Function as F
-# Set to True if exporting a model with Same padding via ONNX
-_EXPORTABLE = False
-# Set to True if wanting to use torch.jit.script on a model
-_SCRIPTABLE = False
-# use torch.scaled_dot_product_attention where possible
-_HAS_FUSED_ATTN = hasattr(torch.nn.functional, 'scaled_dot_product_attention')
-if 'TIMM_FUSED_ATTN' in os.environ:
-    _USE_FUSED_ATTN = int(os.environ['TIMM_FUSED_ATTN'])
-else:
-    _USE_FUSED_ATTN = 1  # 0 == off, 1 == on (for tested use), 2 == on (for experimental use)
-def logit(p, eps=1e-16):
-    p = np.clip(p, eps, 1 - eps)
-    return -np.log(1 / p - 1)
-class AttentionGateType(BaseEnumOptions):
-    none = 0
-    unconditional_per_head = 1
-    conditional_per_head = 2
-    conditional_per_token = 3
-def use_fused_attn(experimental: bool = False) -> bool:
-    # NOTE: ONNX export cannot handle F.scaled_dot_product_attention as of pytorch 2.0
-    if not _HAS_FUSED_ATTN or _EXPORTABLE:
-        return False
-    if experimental:
-        return _USE_FUSED_ATTN > 1
-    return _USE_FUSED_ATTN > 0
-def scaled_dot_product_attention(query, key, value, softmax_fn, attn_mask=None, dropout_p=0.0, is_causal=False, scale=None) -> torch.Tensor:
-    # Efficient implementation equivalent to the following:
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    L, S = query.size(-2), key.size(-2)
-    scale_factor = 1 / math.sqrt(query.size(-1)) if scale is None else scale
-    attn_bias = torch.zeros(L, S, dtype=query.dtype, device=query.device)
-    if is_causal:
-        assert attn_mask is None
-        temp_mask = torch.ones(L, S, dtype=torch.bool).tril(diagonal=0)
-        attn_bias.masked_fill_(temp_mask.logical_not(), float("-inf"))
-        attn_bias.to(query.dtype)
-    if attn_mask is not None:
-        if attn_mask.dtype == torch.bool:
-            attn_mask.masked_fill_(attn_mask.logical_not(), float("-inf"))
-        else:
-            attn_bias += attn_mask
-    attn_weight = query @ key.transpose(-2, -1) * scale_factor
-    attn_weight += attn_bias
-    attn_weight = softmax_fn(attn_weight, dim=-1)
-    attn_weight = torch.dropout(attn_weight, dropout_p, train=True)
-    return attn_weight @ value
-class Attention(nn.Module):
-    fused_attn: Final[bool]
-    def __init__(
-            self,
-            dim: int,
-            num_heads: int = 8,
-            qkv_bias: bool = False,
-            qk_norm: bool = False,
-            attn_drop: float = 0.,
-            proj_drop: float = 0.,
-            norm_layer: nn.Module = nn.LayerNorm,
-            softmax_fn=torch.nn.functional.softmax,
-            gamma=None,
-            ssm_eps=None,
-            tau=None,
-            skip_attn=False,
-            attn_gate_type=AttentionGateType.none,
-            attn_gate_init=None,
-            attn_gate_mlp=False,
-            attn_gate_mlp2=False,
-            attn_gate_linear_all_features=False,
-            fine_tuning=False,
-            max_seq_length=None,
-    ) -> None:
-        super().__init__()
-        assert dim % num_heads == 0, 'dim should be divisible by num_heads'
-        self.num_attention_heads = num_heads
-        self.attention_head_size = dim // num_heads
-        self.scale = self.attention_head_size ** -0.5
-        self.fused_attn = use_fused_attn()
-        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
-        self.q_norm = norm_layer(self.attention_head_size) if qk_norm else nn.Identity()
-        self.k_norm = norm_layer(self.attention_head_size) if qk_norm else nn.Identity()
-        self.attn_drop = nn.Dropout(attn_drop)
-        self.proj = nn.Linear(dim, dim)
-        self.proj_drop = nn.Dropout(proj_drop)
-        self.attn_scores = nn.Identity()  # before attention mask
-        self.attn_probs_before_dropout = nn.Identity()
-        self.attn_probs_after_dropout = nn.Identity()
-        self.gamma = gamma
-        self.ssm_eps = ssm_eps
-        self.tau = tau
-        self.max_seq_length = max_seq_length
-        # define softmax function
-        self.softmax_fn = softmax_fn
-        self.skip_attn = skip_attn
-        # attention gating
-        self.last_gate_avg_prob = None
-        self.last_gate_all_probs = None
-        self.attn_gate_type = attn_gate_type
-        self.attn_gate_init = attn_gate_init
-        self.attn_gate_mlp = attn_gate_mlp
-        self.attn_gate_mlp2 = attn_gate_mlp2
-        self.attn_gate_linear_all_features = attn_gate_linear_all_features
-        self.alpha = None
-        self.gate_fn = torch.sigmoid
-        self.pooling_fn = partial(torch.mean, dim=1, keepdims=True)
-        self.fine_tuning = fine_tuning
-        # gate scaling factor
-        self.gate_scaling_factor = 1.0
-        if self.fine_tuning and self.attn_gate_init is not None:
-            self.gate_scaling_factor = 1.0 / self.attn_gate_init
-        # define gate
-        if self.attn_gate_type == AttentionGateType.unconditional_per_head:
-            init_alpha = torch.zeros(size=(self.num_attention_heads,))
-            self.alpha = nn.Parameter(init_alpha, requires_grad=True)
-        elif self.attn_gate_type in (
-            AttentionGateType.conditional_per_head,
-            AttentionGateType.conditional_per_token,
-        ):
-            if self.attn_gate_linear_all_features:
-                self.alpha = nn.Linear(self.all_head_size, self.num_attention_heads, bias=True)
-            else:  # separate predictors for each head
-                module_list = []
-                for _ in range(self.num_attention_heads):
-                    if self.attn_gate_mlp:
-                        fc = nn.Sequential(
-                            nn.Linear(
-                                self.attention_head_size, self.attention_head_size // 4, bias=True
-                            ),
-                            nn.ReLU(),
-                            nn.Linear(self.attention_head_size // 4, 1, bias=True),
-                        )
-                    elif self.attn_gate_mlp2:
-                        fc = nn.Sequential(
-                            nn.Linear(
-                                self.attention_head_size, self.attention_head_size, bias=True
-                            ),
-                            nn.ReLU(),
-                            nn.Linear(self.attention_head_size, 1, bias=True),
-                        )
-                    else:
-                        fc = nn.Linear(self.attention_head_size, 1, bias=True)
-                        if self.attn_gate_init is not None:
-                            init_bias = logit(self.attn_gate_init)
-                            torch.nn.init.constant_(fc.bias, init_bias)
-                        if self.fine_tuning:
-                            # init to a very small values
-                            torch.nn.init.normal_(fc.weight, mean=0.0, std=0.01)
-                    module_list.append(fc)
-                self.alpha = nn.ModuleList(module_list)
-    def transpose_for_scores(self, x: torch.Tensor) -> torch.Tensor:
-        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
-        x = x.view(new_x_shape)
-        return x.permute(0, 2, 1, 3)
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        hidden_states = x
-        B, N, C = x.shape
-        qkv = self.qkv(x).reshape(B, N, 3, self.num_attention_heads, self.attention_head_size).permute(2, 0, 3, 1, 4)
-        q, k, v = qkv.unbind(0)
-        q, k = self.q_norm(q), self.k_norm(k)
-        if self.fused_attn:
-            context_layer = scaled_dot_product_attention(
-                q, k, v, self.softmax_fn,
-                dropout_p=self.attn_drop.p if self.training else 0.,
-            )
-        else:
-            q = q * self.scale
-            attn = q @ k.transpose(-2, -1)
-            attn = self.softmax_fn(attn, dim=-1)
-            attn = self.attn_probs_before_dropout(attn)
-            attn = self.attn_drop(attn)
-            attn = self.attn_probs_after_dropout(attn)
-            context_layer = attn @ v
-        # *** Gating ***
-        if self.attn_gate_type == AttentionGateType.unconditional_per_head:
-            gate = self.gate_fn(self.alpha)  # (H,)
-            context_layer *= gate.view(-1, 1, 1)  # (B, H, T, d_head)
-            self.last_gate_avg_prob = gate.view(-1)
-        elif self.attn_gate_type in (
-            AttentionGateType.conditional_per_head,
-            AttentionGateType.conditional_per_token,
-        ):
-            x = hidden_states
-            if self.attn_gate_linear_all_features:  # assume per_token
-                alpha = self.alpha(x)  # (B, T, H)
-                gate = self.gate_fn(alpha)
-                gate = gate.permute(0, 2, 1).contiguous()  # (B, H, T)
-                gate = gate.unsqueeze(3)  # (B, H, T, 1)
-            else:
-                x = self.transpose_for_scores(x)  # (B, H, T, d_head)
-                alpha = []
-                for head_idx in range(self.num_attention_heads):
-                    x_head = x[:, head_idx, ...]  # (B, T, d_head)
-                    fc_head = self.alpha[head_idx]
-                    alpha_head = fc_head(x_head)  # (B, T, 1)
-                    if self.attn_gate_type == AttentionGateType.conditional_per_head:
-                        alpha_head = self.pooling_fn(alpha_head)  # (B, 1, 1)
-                    alpha.append(alpha_head)
-                alpha = torch.stack(alpha, dim=1)  # (B, H, *, 1)
-                gate = self.gate_fn(alpha)
-            context_layer *= gate * self.gate_scaling_factor
-            self.last_gate_all_probs = gate  # all gates to see the distributions
-            avg_gate = gate.mean(dim=0)
-            self.last_gate_avg_prob = avg_gate.view(self.num_attention_heads, -1).mean(dim=1)
-        x = context_layer.transpose(1, 2).reshape(B, N, C)
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        return x
-class LayerScale(nn.Module):
-    def __init__(self, dim, init_values=1e-5, inplace=False):
-        super().__init__()
-        self.inplace = inplace
-        self.gamma = nn.Parameter(init_values * torch.ones(dim))
-    def forward(self, x):
-        return x.mul_(self.gamma) if self.inplace else x * self.gamma
-class Block(nn.Module):
-    def __init__(
-            self,
-            dim,
-            num_heads,
-            mlp_ratio=4.,
-            qkv_bias=False,
-            drop=0.,
-            attn_drop=0.,
-            init_values=None,
-            drop_path=0.,
-            act_layer=nn.GELU,
-            norm_layer=nn.LayerNorm
-    ):
-        super().__init__()
-        self.norm1 = norm_layer(dim)
-        self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)
-        self.ls1 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
-        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
-        self.drop_path1 = DropPath(drop_path) if drop_path > 0. else nn.Identity()
-        self.norm2 = norm_layer(dim)
-        self.mlp = Mlp(in_features=dim, hidden_features=int(dim * mlp_ratio), act_layer=act_layer, drop=drop)
-        self.ls2 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
-        self.drop_path2 = DropPath(drop_path) if drop_path > 0. else nn.Identity()
-    def forward(self, x):
-        x = x + self.drop_path1(self.ls1(self.attn(self.norm1(x))))
-        x = x + self.drop_path2(self.ls2(self.mlp(self.norm2(x))))
-        return x
-class ResPostBlock(nn.Module):
-    def __init__(
-            self,
-            dim,
-            num_heads,
-            mlp_ratio=4.,
-            qkv_bias=False,
-            drop=0.,
-            attn_drop=0.,
-            init_values=None,
-            drop_path=0.,
-            act_layer=nn.GELU,
-            norm_layer=nn.LayerNorm
-    ):
-        super().__init__()
-        self.init_values = init_values
-        self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)
-        self.norm1 = norm_layer(dim)
-        self.drop_path1 = DropPath(drop_path) if drop_path > 0. else nn.Identity()
-        self.mlp = Mlp(in_features=dim, hidden_features=int(dim * mlp_ratio), act_layer=act_layer, drop=drop)
-        self.norm2 = norm_layer(dim)
-        self.drop_path2 = DropPath(drop_path) if drop_path > 0. else nn.Identity()
-        self.init_weights()
-    def init_weights(self):
-        # NOTE this init overrides that base model init with specific changes for the block type
-        if self.init_values is not None:
-            nn.init.constant_(self.norm1.weight, self.init_values)
-            nn.init.constant_(self.norm2.weight, self.init_values)
-    def forward(self, x):
-        x = x + self.drop_path1(self.norm1(self.attn(x)))
-        x = x + self.drop_path2(self.norm2(self.mlp(x)))
-        return x
-class ParallelBlock(nn.Module):
-    def __init__(
-            self,
-            dim,
-            num_heads,
-            num_parallel=2,
-            mlp_ratio=4.,
-            qkv_bias=False,
-            init_values=None,
-            drop=0.,
-            attn_drop=0.,
-            drop_path=0.,
-            act_layer=nn.GELU,
-            norm_layer=nn.LayerNorm
-    ):
-        super().__init__()
-        self.num_parallel = num_parallel
-        self.attns = nn.ModuleList()
-        self.ffns = nn.ModuleList()
-        for _ in range(num_parallel):
-            self.attns.append(nn.Sequential(OrderedDict([
-                ('norm', norm_layer(dim)),
-                ('attn', Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)),
-                ('ls', LayerScale(dim, init_values=init_values) if init_values else nn.Identity()),
-                ('drop_path', DropPath(drop_path) if drop_path > 0. else nn.Identity())
-            ])))
-            self.ffns.append(nn.Sequential(OrderedDict([
-                ('norm', norm_layer(dim)),
-                ('mlp', Mlp(dim, hidden_features=int(dim * mlp_ratio), act_layer=act_layer, drop=drop)),
-                ('ls', LayerScale(dim, init_values=init_values) if init_values else nn.Identity()),
-                ('drop_path', DropPath(drop_path) if drop_path > 0. else nn.Identity())
-            ])))
-    def _forward_jit(self, x):
-        x = x + torch.stack([attn(x) for attn in self.attns]).sum(dim=0)
-        x = x + torch.stack([ffn(x) for ffn in self.ffns]).sum(dim=0)
-        return x
-    @torch.jit.ignore
-    def _forward(self, x):
-        x = x + sum(attn(x) for attn in self.attns)
-        x = x + sum(ffn(x) for ffn in self.ffns)
-        return x
-    def forward(self, x):
-        if torch.jit.is_scripting() or torch.jit.is_tracing():
-            return self._forward_jit(x)
-        else:
-            return self._forward(x)
-class VisionTransformer(nn.Module):
-    """ Vision Transformer
-    A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale`
-        - https://arxiv.org/abs/2010.11929
-    """
-    def __init__(
-            self,
-            img_size=224,
-            patch_size=16,
-            in_chans=3,
-            num_classes=1000,
-            global_pool='token',
-            embed_dim=768,
-            depth=12,
-            num_heads=12,
-            mlp_ratio=4.,
-            qkv_bias=True,
-            init_values=None,
-            class_token=True,
-            no_embed_class=False,
-            pre_norm=False,
-            fc_norm=None,
-            drop_rate=0.,
-            attn_drop_rate=0.,
-            drop_path_rate=0.,
-            weight_init='',
-            embed_layer=PatchEmbed,
-            norm_layer=None,
-            act_layer=None,
-            block_fn=Block,
-    ):
-        """
-        Args:
-            img_size (int, tuple): input image size
-            patch_size (int, tuple): patch size
-            in_chans (int): number of input channels
-            num_classes (int): number of classes for classification head
-            global_pool (str): type of global pooling for final sequence (default: 'token')
-            embed_dim (int): embedding dimension
-            depth (int): depth of transformer
-            num_heads (int): number of attention heads
-            mlp_ratio (int): ratio of mlp hidden dim to embedding dim
-            qkv_bias (bool): enable bias for qkv if True
-            init_values: (float): layer-scale init values
-            class_token (bool): use class token
-            fc_norm (Optional[bool]): pre-fc norm after pool, set if global_pool == 'avg' if None (default: None)
-            drop_rate (float): dropout rate
-            attn_drop_rate (float): attention dropout rate
-            drop_path_rate (float): stochastic depth rate
-            weight_init (str): weight init scheme
-            embed_layer (nn.Module): patch embedding layer
-            norm_layer: (nn.Module): normalization layer
-            act_layer: (nn.Module): MLP activation layer
-        """
-        super().__init__()
-        assert global_pool in ('', 'avg', 'token')
-        assert class_token or global_pool != 'token'
-        use_fc_norm = global_pool == 'avg' if fc_norm is None else fc_norm
-        norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
-        act_layer = act_layer or nn.GELU
-        self.num_classes = num_classes
-        self.global_pool = global_pool
-        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
-        self.num_prefix_tokens = 1 if class_token else 0
-        self.no_embed_class = no_embed_class
-        self.grad_checkpointing = False
-        self.patch_embed = embed_layer(
-            img_size=img_size,
-            patch_size=patch_size,
-            in_chans=in_chans,
-            embed_dim=embed_dim,
-            bias=not pre_norm,  # disable bias if pre-norm is used (e.g. CLIP)
-        )
-        num_patches = self.patch_embed.num_patches
-        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) if class_token else None
-        embed_len = num_patches if no_embed_class else num_patches + self.num_prefix_tokens
-        self.pos_embed = nn.Parameter(torch.randn(1, embed_len, embed_dim) * .02)
-        self.pos_drop = nn.Dropout(p=drop_rate)
-        self.norm_pre = norm_layer(embed_dim) if pre_norm else nn.Identity()
-        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]  # stochastic depth decay rule
-        self.blocks = nn.Sequential(*[
-            block_fn(
-                dim=embed_dim,
-                num_heads=num_heads,
-                mlp_ratio=mlp_ratio,
-                qkv_bias=qkv_bias,
-                init_values=init_values,
-                drop=drop_rate,
-                attn_drop=attn_drop_rate,
-                drop_path=dpr[i],
-                norm_layer=norm_layer,
-                act_layer=act_layer
-            )
-            for i in range(depth)])
-        self.norm = norm_layer(embed_dim) if not use_fc_norm else nn.Identity()
-        # Classifier Head
-        self.fc_norm = norm_layer(embed_dim) if use_fc_norm else nn.Identity()
-        self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
-        if weight_init != 'skip':
-            self.init_weights(weight_init)
-    def init_weights(self, mode=''):
-        assert mode in ('jax', 'jax_nlhb', 'moco', '')
-        head_bias = -math.log(self.num_classes) if 'nlhb' in mode else 0.
-        trunc_normal_(self.pos_embed, std=.02)
-        if self.cls_token is not None:
-            nn.init.normal_(self.cls_token, std=1e-6)
-        named_apply(get_init_weights_vit(mode, head_bias), self)
-    def _init_weights(self, m):
-        # this fn left here for compat with downstream users
-        init_weights_vit_timm(m)
-    @torch.jit.ignore()
-    def load_pretrained(self, checkpoint_path, prefix=''):
-        _load_weights(self, checkpoint_path, prefix)
-    @torch.jit.ignore
-    def no_weight_decay(self):
-        return {'pos_embed', 'cls_token', 'dist_token'}
-    @torch.jit.ignore
-    def group_matcher(self, coarse=False):
-        return dict(
-            stem=r'^cls_token|pos_embed|patch_embed',  # stem and embed
-            blocks=[(r'^blocks\.(\d+)', None), (r'^norm', (99999,))]
-        )
-    @torch.jit.ignore
-    def set_grad_checkpointing(self, enable=True):
-        self.grad_checkpointing = enable
-    @torch.jit.ignore
-    def get_classifier(self):
-        return self.head
-    def reset_classifier(self, num_classes: int, global_pool=None):
-        self.num_classes = num_classes
-        if global_pool is not None:
-            assert global_pool in ('', 'avg', 'token')
-            self.global_pool = global_pool
-        self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
-    def _pos_embed(self, x):
-        if self.no_embed_class:
-            # deit-3, updated JAX (big vision)
-            # position embedding does not overlap with class token, add then concat
-            x = x + self.pos_embed
-            if self.cls_token is not None:
-                x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1)
-        else:
-            # original timm, JAX, and deit vit impl
-            # pos_embed has entry for class token, concat then add
-            if self.cls_token is not None:
-                x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1)
-            x = x + self.pos_embed
-        return self.pos_drop(x)
-    def forward_features(self, x):
-        x = self.patch_embed(x)
-        x = self._pos_embed(x)
-        x = self.norm_pre(x)
-        if self.grad_checkpointing and not torch.jit.is_scripting():
-            x = checkpoint_seq(self.blocks, x)
-        else:
-            x = self.blocks(x)
-        x = self.norm(x)
-        return x
-    def forward_head(self, x, pre_logits: bool = False):
-        if self.global_pool:
-            x = x[:, self.num_prefix_tokens:].mean(dim=1) if self.global_pool == 'avg' else x[:, 0]
-        x = self.fc_norm(x)
-        return x if pre_logits else self.head(x)
-    def forward(self, x):
-        x = self.forward_features(x)
-        x = self.forward_head(x)
-        return x
-def init_weights_vit_timm(module: nn.Module, name: str = ''):
-    """ ViT weight initialization, original timm impl (for reproducibility) """
-    if isinstance(module, nn.Linear):
-        trunc_normal_(module.weight, std=.02)
-        if module.bias is not None:
-            nn.init.zeros_(module.bias)
-    elif hasattr(module, 'init_weights'):
-        module.init_weights()
-def init_weights_vit_jax(module: nn.Module, name: str = '', head_bias: float = 0.):
-    """ ViT weight initialization, matching JAX (Flax) impl """
-    if isinstance(module, nn.Linear):
-        if name.startswith('head'):
-            nn.init.zeros_(module.weight)
-            nn.init.constant_(module.bias, head_bias)
-        else:
-            nn.init.xavier_uniform_(module.weight)
-            if module.bias is not None:
-                nn.init.normal_(module.bias, std=1e-6) if 'mlp' in name else nn.init.zeros_(module.bias)
-    elif isinstance(module, nn.Conv2d):
-        lecun_normal_(module.weight)
-        if module.bias is not None:
-            nn.init.zeros_(module.bias)
-    elif hasattr(module, 'init_weights'):
-        module.init_weights()
-def init_weights_vit_moco(module: nn.Module, name: str = ''):
-    """ ViT weight initialization, matching moco-v3 impl minus fixed PatchEmbed """
-    if isinstance(module, nn.Linear):
-        if 'qkv' in name:
-            # treat the weights of Q, K, V separately
-            val = math.sqrt(6. / float(module.weight.shape[0] // 3 + module.weight.shape[1]))
-            nn.init.uniform_(module.weight, -val, val)
-        else:
-            nn.init.xavier_uniform_(module.weight)
-        if module.bias is not None:
-            nn.init.zeros_(module.bias)
-    elif hasattr(module, 'init_weights'):
-        module.init_weights()
-def get_init_weights_vit(mode='jax', head_bias: float = 0.):
-    if 'jax' in mode:
-        return partial(init_weights_vit_jax, head_bias=head_bias)
-    elif 'moco' in mode:
-        return init_weights_vit_moco
-    else:
-        return init_weights_vit_timm
-def resize_pos_embed(
-        posemb,
-        posemb_new,
-        num_prefix_tokens=1,
-        gs_new=(),
-        interpolation='bicubic',
-        antialias=False,
-):
-    """ Rescale the grid of position embeddings when loading from state_dict.
-    *DEPRECATED* This function is being deprecated in favour of resample_abs_pos_embed
-    Adapted from:
-        https://github.com/google-research/vision_transformer/blob/00883dd691c63a6830751563748663526e811cee/vit_jax/checkpoint.py#L224
-    """
-    ntok_new = posemb_new.shape[1]
-    if num_prefix_tokens:
-        posemb_prefix, posemb_grid = posemb[:, :num_prefix_tokens], posemb[0, num_prefix_tokens:]
-        ntok_new -= num_prefix_tokens
-    else:
-        posemb_prefix, posemb_grid = posemb[:, :0], posemb[0]
-    gs_old = int(math.sqrt(len(posemb_grid)))
-    if not len(gs_new):  # backwards compatibility
-        gs_new = [int(math.sqrt(ntok_new))] * 2
-    assert len(gs_new) >= 2
-    _logger.info(f'Resized position embedding: {posemb.shape} ({[gs_old, gs_old]}) to {posemb_new.shape} ({gs_new}).')
-    posemb_grid = posemb_grid.reshape(1, gs_old, gs_old, -1).permute(0, 3, 1, 2)
-    posemb_grid = F.interpolate(posemb_grid, size=gs_new, mode=interpolation, antialias=antialias, align_corners=False)
-    posemb_grid = posemb_grid.permute(0, 2, 3, 1).reshape(1, gs_new[0] * gs_new[1], -1)
-    posemb = torch.cat([posemb_prefix, posemb_grid], dim=1)
-    return posemb
-@torch.no_grad()
-def _load_weights(model: VisionTransformer, checkpoint_path: str, prefix: str = ''):
-    """ Load weights from .npz checkpoints for official Google Brain Flax implementation
-    """
-    import numpy as np
-    def _n2p(w, t=True):
-        if w.ndim == 4 and w.shape[0] == w.shape[1] == w.shape[2] == 1:
-            w = w.flatten()
-        if t:
-            if w.ndim == 4:
-                w = w.transpose([3, 2, 0, 1])
-            elif w.ndim == 3:
-                w = w.transpose([2, 0, 1])
-            elif w.ndim == 2:
-                w = w.transpose([1, 0])
-        return torch.from_numpy(w)
-    w = np.load(checkpoint_path)
-    interpolation = 'bilinear'
-    antialias = False
-    big_vision = False
-    if not prefix:
-        if 'opt/target/embedding/kernel' in w:
-            prefix = 'opt/target/'
-        elif 'params/embedding/kernel' in w:
-            prefix = 'params/'
-            big_vision = True
-    if hasattr(model.patch_embed, 'backbone'):
-        # hybrid
-        backbone = model.patch_embed.backbone
-        stem_only = not hasattr(backbone, 'stem')
-        stem = backbone if stem_only else backbone.stem
-        stem.conv.weight.copy_(adapt_input_conv(stem.conv.weight.shape[1], _n2p(w[f'{prefix}conv_root/kernel'])))
-        stem.norm.weight.copy_(_n2p(w[f'{prefix}gn_root/scale']))
-        stem.norm.bias.copy_(_n2p(w[f'{prefix}gn_root/bias']))
-        if not stem_only:
-            for i, stage in enumerate(backbone.stages):
-                for j, block in enumerate(stage.blocks):
-                    bp = f'{prefix}block{i + 1}/unit{j + 1}/'
-                    for r in range(3):
-                        getattr(block, f'conv{r + 1}').weight.copy_(_n2p(w[f'{bp}conv{r + 1}/kernel']))
-                        getattr(block, f'norm{r + 1}').weight.copy_(_n2p(w[f'{bp}gn{r + 1}/scale']))
-                        getattr(block, f'norm{r + 1}').bias.copy_(_n2p(w[f'{bp}gn{r + 1}/bias']))
-                    if block.downsample is not None:
-                        block.downsample.conv.weight.copy_(_n2p(w[f'{bp}conv_proj/kernel']))
-                        block.downsample.norm.weight.copy_(_n2p(w[f'{bp}gn_proj/scale']))
-                        block.downsample.norm.bias.copy_(_n2p(w[f'{bp}gn_proj/bias']))
-        embed_conv_w = _n2p(w[f'{prefix}embedding/kernel'])
-    else:
-        embed_conv_w = adapt_input_conv(
-            model.patch_embed.proj.weight.shape[1], _n2p(w[f'{prefix}embedding/kernel']))
-    if embed_conv_w.shape[-2:] != model.patch_embed.proj.weight.shape[-2:]:
-        embed_conv_w = resample_patch_embed(
-            embed_conv_w,
-            model.patch_embed.proj.weight.shape[-2:],
-            interpolation=interpolation,
-            antialias=antialias,
-            verbose=True,
-        )
-    model.patch_embed.proj.weight.copy_(embed_conv_w)
-    model.patch_embed.proj.bias.copy_(_n2p(w[f'{prefix}embedding/bias']))
-    if model.cls_token is not None:
-        model.cls_token.copy_(_n2p(w[f'{prefix}cls'], t=False))
-    if big_vision:
-        pos_embed_w = _n2p(w[f'{prefix}pos_embedding'], t=False)
-    else:
-        pos_embed_w = _n2p(w[f'{prefix}Transformer/posembed_input/pos_embedding'], t=False)
-    if pos_embed_w.shape != model.pos_embed.shape:
-        old_shape = pos_embed_w.shape
-        num_prefix_tokens = 0 if getattr(model, 'no_embed_class', False) else getattr(model, 'num_prefix_tokens', 1)
-        pos_embed_w = resample_abs_pos_embed(  # resize pos embedding when different size from pretrained weights
-            pos_embed_w,
-            new_size=model.patch_embed.grid_size,
-            num_prefix_tokens=num_prefix_tokens,
-            interpolation=interpolation,
-            antialias=antialias,
-            verbose=True,
-        )
-    model.pos_embed.copy_(pos_embed_w)
-    model.norm.weight.copy_(_n2p(w[f'{prefix}Transformer/encoder_norm/scale']))
-    model.norm.bias.copy_(_n2p(w[f'{prefix}Transformer/encoder_norm/bias']))
-    if isinstance(model.head, nn.Linear) and model.head.bias.shape[0] == w[f'{prefix}head/bias'].shape[-1]:
-        model.head.weight.copy_(_n2p(w[f'{prefix}head/kernel']))
-        model.head.bias.copy_(_n2p(w[f'{prefix}head/bias']))
-    # NOTE representation layer has been removed, not used in latest 21k/1k pretrained weights
-    # if isinstance(getattr(model.pre_logits, 'fc', None), nn.Linear) and f'{prefix}pre_logits/bias' in w:
-    #     model.pre_logits.fc.weight.copy_(_n2p(w[f'{prefix}pre_logits/kernel']))
-    #     model.pre_logits.fc.bias.copy_(_n2p(w[f'{prefix}pre_logits/bias']))
-    mha_sub, b_sub, ln1_sub = (0, 0, 1) if big_vision else (1, 3, 2)
-    for i, block in enumerate(model.blocks.children()):
-        block_prefix = f'{prefix}Transformer/encoderblock_{i}/'
-        mha_prefix = block_prefix + f'MultiHeadDotProductAttention_{mha_sub}/'
-        block.norm1.weight.copy_(_n2p(w[f'{block_prefix}LayerNorm_0/scale']))
-        block.norm1.bias.copy_(_n2p(w[f'{block_prefix}LayerNorm_0/bias']))
-        block.attn.qkv.weight.copy_(torch.cat([
-            _n2p(w[f'{mha_prefix}{n}/kernel'], t=False).flatten(1).T for n in ('query', 'key', 'value')]))
-        block.attn.qkv.bias.copy_(torch.cat([
-            _n2p(w[f'{mha_prefix}{n}/bias'], t=False).reshape(-1) for n in ('query', 'key', 'value')]))
-        block.attn.proj.weight.copy_(_n2p(w[f'{mha_prefix}out/kernel']).flatten(1))
-        block.attn.proj.bias.copy_(_n2p(w[f'{mha_prefix}out/bias']))
-        for r in range(2):
-            getattr(block.mlp, f'fc{r + 1}').weight.copy_(_n2p(w[f'{block_prefix}MlpBlock_{b_sub}/Dense_{r}/kernel']))
-            getattr(block.mlp, f'fc{r + 1}').bias.copy_(_n2p(w[f'{block_prefix}MlpBlock_{b_sub}/Dense_{r}/bias']))
-        block.norm2.weight.copy_(_n2p(w[f'{block_prefix}LayerNorm_{ln1_sub}/scale']))
-        block.norm2.bias.copy_(_n2p(w[f'{block_prefix}LayerNorm_{ln1_sub}/bias']))
-def _convert_openai_clip(state_dict, model):
-    out_dict = {}
-    swaps = [
-        ('visual.', ''), ('conv1', 'patch_embed.proj'), ('positional_embedding', 'pos_embed'),
-        ('transformer.resblocks.', 'blocks.'), ('ln_pre', 'norm_pre'), ('ln_post', 'norm'), ('ln_', 'norm'),
-        ('in_proj_', 'qkv.'), ('out_proj', 'proj'), ('mlp.c_fc', 'mlp.fc1'), ('mlp.c_proj', 'mlp.fc2'),
-    ]
-    for k, v in state_dict.items():
-        if not k.startswith('visual.'):
-            continue
-        for sp in swaps:
-            k = k.replace(sp[0], sp[1])
-        if k == 'proj':
-            k = 'head.weight'
-            v = v.transpose(0, 1)
-            out_dict['head.bias'] = torch.zeros(v.shape[0])
-        elif k == 'class_embedding':
-            k = 'cls_token'
-            v = v.unsqueeze(0).unsqueeze(1)
-        elif k == 'pos_embed':
-            v = v.unsqueeze(0)
-            if v.shape[1] != model.pos_embed.shape[1]:
-                # To resize pos embedding when using model at different size from pretrained weights
-                v = resize_pos_embed(
-                    v,
-                    model.pos_embed,
-                    0 if getattr(model, 'no_embed_class') else getattr(model, 'num_prefix_tokens', 1),
-                    model.patch_embed.grid_size
-                )
-        out_dict[k] = v
-    return out_dict
-def checkpoint_filter_fn(
-        state_dict,
-        model,
-        adapt_layer_scale=False,
-        interpolation='bicubic',
-        antialias=True,
-):
-    """ convert patch embedding weight from manual patchify + linear proj to conv"""
-    import re
-    out_dict = {}
-    if 'model' in state_dict:
-        # For deit models
-        state_dict = state_dict['model']
-    if 'visual.class_embedding' in state_dict:
-        return _convert_openai_clip(state_dict, model)
-    for k, v in state_dict.items():
-        if 'patch_embed.proj.weight' in k:
-            O, I, H, W = model.patch_embed.proj.weight.shape
-            if len(v.shape) < 4:
-                # For old models that I trained prior to conv based patchification
-                O, I, H, W = model.patch_embed.proj.weight.shape
-                v = v.reshape(O, -1, H, W)
-            if v.shape[-1] != W or v.shape[-2] != H:
-                v = resample_patch_embed(
-                    v,
-                    (H, W),
-                    interpolation=interpolation,
-                    antialias=antialias,
-                    verbose=True,
-                )
-        elif k == 'pos_embed' and v.shape[1] != model.pos_embed.shape[1]:
-            # To resize pos embedding when using model at different size from pretrained weights
-            num_prefix_tokens = 0 if getattr(model, 'no_embed_class', False) else getattr(model, 'num_prefix_tokens', 1)
-            v = resample_abs_pos_embed(
-                v,
-                new_size=model.patch_embed.grid_size,
-                num_prefix_tokens=num_prefix_tokens,
-                interpolation=interpolation,
-                antialias=antialias,
-                verbose=True,
-            )
-        elif adapt_layer_scale and 'gamma_' in k:
-            # remap layer-scale gamma into sub-module (deit3 models)
-            k = re.sub(r'gamma_([0-9])', r'ls\1.gamma', k)
-        elif 'pre_logits' in k:
-            # NOTE representation layer removed as not used in latest 21k/1k pretrained weights
-            continue
-        out_dict[k] = v
-    return out_dict
-def _cfg(url='', **kwargs):
-    return {
-        'url': url,
-        'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None,
-        'crop_pct': .9, 'interpolation': 'bicubic', 'fixed_input_size': True,
-        'mean': IMAGENET_INCEPTION_MEAN, 'std': IMAGENET_INCEPTION_STD,
-        'first_conv': 'patch_embed.proj', 'classifier': 'head',
-        **kwargs
-    }
-default_cfgs = generate_default_cfgs({
-    # re-finetuned augreg 21k FT on in1k weights
-    'vit_base_patch16_224.augreg2_in21k_ft_in1k': _cfg(
-        hf_hub_id='timm/'),
-    'vit_base_patch16_384.augreg2_in21k_ft_in1k': _cfg(),
-    'vit_base_patch8_224.augreg2_in21k_ft_in1k': _cfg(
-        hf_hub_id='timm/'),
-    # How to train your ViT (augreg) weights, pretrained on 21k FT on in1k
-    'vit_tiny_patch16_224.augreg_in21k_ft_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz',
-        hf_hub_id='timm/',
-        custom_load=True),
-    'vit_tiny_patch16_384.augreg_in21k_ft_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz',
-        hf_hub_id='timm/',
-        custom_load=True, input_size=(3, 384, 384), crop_pct=1.0),
-    'vit_small_patch32_224.augreg_in21k_ft_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz',
-        hf_hub_id='timm/',
-        custom_load=True),
-    'vit_small_patch32_384.augreg_in21k_ft_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz',
-        hf_hub_id='timm/',
-        custom_load=True, input_size=(3, 384, 384), crop_pct=1.0),
-    'vit_small_patch16_224.augreg_in21k_ft_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz',
-        hf_hub_id='timm/',
-        custom_load=True),
-    'vit_small_patch16_384.augreg_in21k_ft_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz',
-        hf_hub_id='timm/',
-        custom_load=True, input_size=(3, 384, 384), crop_pct=1.0),
-    'vit_base_patch32_224.augreg_in21k_ft_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz',
-        hf_hub_id='timm/',
-        custom_load=True),
-    'vit_base_patch32_384.augreg_in21k_ft_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_light1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz',
-        hf_hub_id='timm/',
-        custom_load=True, input_size=(3, 384, 384), crop_pct=1.0),
-    'vit_base_patch16_224.augreg_in21k_ft_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npz',
-        hf_hub_id='timm/',
-        custom_load=True),
-    'vit_base_patch16_384.augreg_in21k_ft_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npz',
-        hf_hub_id='timm/',
-        custom_load=True, input_size=(3, 384, 384), crop_pct=1.0),
-    'vit_base_patch8_224.augreg_in21k_ft_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/B_8-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npz',
-        hf_hub_id='timm/',
-        custom_load=True),
-    'vit_large_patch16_224.augreg_in21k_ft_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npz',
-        hf_hub_id='timm/',
-        custom_load=True),
-    'vit_large_patch16_384.augreg_in21k_ft_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz',
-        hf_hub_id='timm/',
-        custom_load=True, input_size=(3, 384, 384), crop_pct=1.0),
-    # patch models (weights from official Google JAX impl) pretrained on in21k FT on in1k
-    'vit_base_patch16_224.orig_in21k_ft_in1k': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth',
-        hf_hub_id='timm/'),
-    'vit_base_patch16_384.orig_in21k_ft_in1k': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_384-83fb41ba.pth',
-        hf_hub_id='timm/',
-        input_size=(3, 384, 384), crop_pct=1.0),
-    'vit_large_patch32_384.orig_in21k_ft_in1k': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p32_384-9b920ba8.pth',
-        hf_hub_id='timm/',
-        input_size=(3, 384, 384), crop_pct=1.0),
-    # How to train your ViT (augreg) weights trained on in1k only
-    'vit_small_patch16_224.augreg_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/S_16-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npz',
-        hf_hub_id='timm/',
-        custom_load=True),
-    'vit_small_patch16_384.augreg_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/S_16-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npz',
-        hf_hub_id='timm/',
-        custom_load=True, input_size=(3, 384, 384), crop_pct=1.0),
-    'vit_base_patch32_224.augreg_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/B_32-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npz',
-        hf_hub_id='timm/',
-        custom_load=True),
-    'vit_base_patch32_384.augreg_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/B_32-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz',
-        hf_hub_id='timm/',
-        custom_load=True, input_size=(3, 384, 384), crop_pct=1.0),
-    'vit_base_patch16_224.augreg_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/B_16-i1k-300ep-lr_0.001-aug_strong2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npz',
-        hf_hub_id='timm/',
-        custom_load=True),
-    'vit_base_patch16_384.augreg_in1k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/B_16-i1k-300ep-lr_0.001-aug_strong2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz',
-        hf_hub_id='timm/',
-        custom_load=True, input_size=(3, 384, 384), crop_pct=1.0),
-    'vit_large_patch14_224.untrained': _cfg(url=''),
-    'vit_huge_patch14_224.untrained': _cfg(url=''),
-    'vit_giant_patch14_224.untrained': _cfg(url=''),
-    'vit_gigantic_patch14_224.untrained': _cfg(url=''),
-    # patch models, imagenet21k (weights from official Google JAX impl)
-    'vit_large_patch32_224.orig_in21k': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch32_224_in21k-9046d2e7.pth',
-        hf_hub_id='timm/',
-        num_classes=21843),
-    'vit_huge_patch14_224.orig_in21k': _cfg(
-        url='https://storage.googleapis.com/vit_models/imagenet21k/ViT-H_14.npz',
-        hf_hub_id='timm/',
-        custom_load=True, num_classes=21843),
-    # How to train your ViT (augreg) weights, pretrained on in21k
-    'vit_tiny_patch16_224.augreg_in21k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0.npz',
-        hf_hub_id='timm/',
-        custom_load=True, num_classes=21843),
-    'vit_small_patch32_224.augreg_in21k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0.npz',
-        hf_hub_id='timm/',
-        custom_load=True, num_classes=21843),
-    'vit_small_patch16_224.augreg_in21k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0.npz',
-        hf_hub_id='timm/',
-        custom_load=True, num_classes=21843),
-    'vit_base_patch32_224.augreg_in21k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.03-do_0.0-sd_0.0.npz',
-        hf_hub_id='timm/',
-        custom_load=True, num_classes=21843),
-    'vit_base_patch16_224.augreg_in21k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npz',
-        hf_hub_id='timm/',
-        custom_load=True, num_classes=21843),
-    'vit_base_patch8_224.augreg_in21k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/B_8-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npz',
-        hf_hub_id='timm/',
-        custom_load=True, num_classes=21843),
-    'vit_large_patch16_224.augreg_in21k': _cfg(
-        url='https://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1.npz',
-        hf_hub_id='timm/',
-        custom_load=True, num_classes=21843),
-    # SAM trained models (https://arxiv.org/abs/2106.01548)
-    'vit_base_patch32_224.sam': _cfg(
-        url='https://storage.googleapis.com/vit_models/sam/ViT-B_32.npz', custom_load=True,
-        hf_hub_id='timm/'),
-    'vit_base_patch16_224.sam': _cfg(
-        url='https://storage.googleapis.com/vit_models/sam/ViT-B_16.npz', custom_load=True,
-        hf_hub_id='timm/'),
-    # DINO pretrained - https://arxiv.org/abs/2104.14294 (no classifier head, for fine-tune only)
-    'vit_small_patch16_224.dino': _cfg(
-        url='https://dl.fbaipublicfiles.com/dino/dino_deitsmall16_pretrain/dino_deitsmall16_pretrain.pth',
-        hf_hub_id='timm/',
-        mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, num_classes=0),
-    'vit_small_patch8_224.dino': _cfg(
-        url='https://dl.fbaipublicfiles.com/dino/dino_deitsmall8_pretrain/dino_deitsmall8_pretrain.pth',
-        hf_hub_id='timm/',
-        mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, num_classes=0),
-    'vit_base_patch16_224.dino': _cfg(
-        url='https://dl.fbaipublicfiles.com/dino/dino_vitbase16_pretrain/dino_vitbase16_pretrain.pth',
-        hf_hub_id='timm/',
-        mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, num_classes=0),
-    'vit_base_patch8_224.dino': _cfg(
-        url='https://dl.fbaipublicfiles.com/dino/dino_vitbase8_pretrain/dino_vitbase8_pretrain.pth',
-        hf_hub_id='timm/',
-        mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, num_classes=0),
-    # ViT ImageNet-21K-P pretraining by MILL
-    'vit_base_patch16_224_miil.in21k': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/vit_base_patch16_224_in21k_miil-887286df.pth',
-        hf_hub_id='timm/',
-        mean=(0., 0., 0.), std=(1., 1., 1.), crop_pct=0.875, interpolation='bilinear', num_classes=11221),
-    'vit_base_patch16_224_miil.in21k_ft_in1k': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/vit_base_patch16_224_1k_miil_84_4-2deb18e3.pth',
-        hf_hub_id='timm/',
-        mean=(0., 0., 0.), std=(1., 1., 1.), crop_pct=0.875, interpolation='bilinear'),
-    # Custom timm variants
-    'vit_base_patch16_rpn_224.in1k': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_base_patch16_rpn_224-sw-3b07e89d.pth',
-        hf_hub_id='timm/'),
-    'vit_medium_patch16_gap_240.in12k': _cfg(
-        hf_hub_id='timm/',
-        input_size=(3, 240, 240), crop_pct=0.95, num_classes=11821),
-    'vit_medium_patch16_gap_256.in12k_ft_in1k': _cfg(
-        hf_hub_id='timm/',
-        input_size=(3, 256, 256), crop_pct=0.95),
-    'vit_medium_patch16_gap_384.in12k_ft_in1k': _cfg(
-        hf_hub_id='timm/',
-        input_size=(3, 384, 384), crop_pct=0.95, crop_mode='squash'),
-    'vit_base_patch16_gap_224': _cfg(),
-    # CLIP pretrained image tower and related fine-tuned weights
-    'vit_base_patch32_clip_224.laion2b_ft_in12k_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD),
-    'vit_base_patch32_clip_384.laion2b_ft_in12k_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, input_size=(3, 384, 384)),
-    'vit_base_patch32_clip_448.laion2b_ft_in12k_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, input_size=(3, 448, 448)),
-    'vit_base_patch16_clip_224.laion2b_ft_in12k_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=0.95),
-    'vit_base_patch16_clip_384.laion2b_ft_in12k_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
-        crop_pct=1.0, input_size=(3, 384, 384), crop_mode='squash'),
-    'vit_large_patch14_clip_224.laion2b_ft_in12k_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD, crop_pct=1.0),
-    'vit_large_patch14_clip_336.laion2b_ft_in12k_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD,
-        crop_pct=1.0, input_size=(3, 336, 336), crop_mode='squash'),
-    'vit_huge_patch14_clip_224.laion2b_ft_in12k_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0),
-    'vit_huge_patch14_clip_336.laion2b_ft_in12k_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
-        crop_pct=1.0, input_size=(3, 336, 336), crop_mode='squash'),
-    'vit_base_patch32_clip_224.openai_ft_in12k_in1k': _cfg(
-        # hf_hub_id='timm/vit_base_patch32_clip_224.openai_ft_in12k_in1k',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD),
-    'vit_base_patch32_clip_384.openai_ft_in12k_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
-        crop_pct=0.95, input_size=(3, 384, 384), crop_mode='squash'),
-    'vit_base_patch16_clip_224.openai_ft_in12k_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=0.95),
-    'vit_base_patch16_clip_384.openai_ft_in12k_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
-        crop_pct=0.95, input_size=(3, 384, 384), crop_mode='squash'),
-    'vit_large_patch14_clip_224.openai_ft_in12k_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0),
-    'vit_large_patch14_clip_336.openai_ft_in12k_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
-        crop_pct=1.0, input_size=(3, 336, 336), crop_mode='squash'),
-    'vit_base_patch32_clip_224.laion2b_ft_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD),
-    'vit_base_patch16_clip_224.laion2b_ft_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0),
-    'vit_base_patch16_clip_384.laion2b_ft_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
-        crop_pct=1.0, input_size=(3, 384, 384), crop_mode='squash'),
-    'vit_large_patch14_clip_224.laion2b_ft_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD, crop_pct=1.0),
-    'vit_large_patch14_clip_336.laion2b_ft_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD,
-        crop_pct=1.0, input_size=(3, 336, 336), crop_mode='squash'),
-    'vit_huge_patch14_clip_224.laion2b_ft_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0),
-    'vit_huge_patch14_clip_336.laion2b_ft_in1k': _cfg(
-        hf_hub_id='',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
-        crop_pct=1.0, input_size=(3, 336, 336), crop_mode='squash'),
-    'vit_base_patch32_clip_224.openai_ft_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD),
-    'vit_base_patch16_clip_224.openai_ft_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD),
-    'vit_base_patch16_clip_384.openai_ft_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
-        crop_pct=1.0, input_size=(3, 384, 384), crop_mode='squash'),
-    'vit_large_patch14_clip_224.openai_ft_in1k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0),
-    'vit_base_patch32_clip_224.laion2b_ft_in12k': _cfg(
-        #hf_hub_id='timm/vit_base_patch32_clip_224.laion2b_ft_in12k',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=11821),
-    'vit_base_patch16_clip_224.laion2b_ft_in12k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=11821),
-    'vit_large_patch14_clip_224.laion2b_ft_in12k': _cfg(
-        hf_hub_id='timm/',
-        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD, crop_pct=1.0, num_classes=11821),
-    'vit_huge_patch14_clip_224.laion2b_ft_in12k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=11821),
-    'vit_base_patch32_clip_224.openai_ft_in12k': _cfg(
-        # hf_hub_id='timm/vit_base_patch32_clip_224.openai_ft_in12k',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=11821),
-    'vit_base_patch16_clip_224.openai_ft_in12k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=11821),
-    'vit_large_patch14_clip_224.openai_ft_in12k': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=11821),
-    'vit_base_patch32_clip_224.laion2b': _cfg(
-        hf_hub_id='laion/CLIP-ViT-B-32-laion2B-s34B-b79K',
-        hf_hub_filename='open_clip_pytorch_model.bin',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=512),
-    'vit_base_patch16_clip_224.laion2b': _cfg(
-        # hf_hub_id='laion/CLIP-ViT-B-16-laion2B-s34B-b88K',
-        hf_hub_filename='open_clip_pytorch_model.bin',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=512),
-    'vit_large_patch14_clip_224.laion2b': _cfg(
-        hf_hub_id='laion/CLIP-ViT-L-14-laion2B-s32B-b82K',
-        hf_hub_filename='open_clip_pytorch_model.bin',
-        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD, crop_pct=1.0, num_classes=768),
-    'vit_huge_patch14_clip_224.laion2b': _cfg(
-        hf_hub_id='laion/CLIP-ViT-H-14-laion2B-s32B-b79K',
-        hf_hub_filename='open_clip_pytorch_model.bin',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=1024),
-    'vit_giant_patch14_clip_224.laion2b': _cfg(
-        hf_hub_id='laion/CLIP-ViT-g-14-laion2B-s12B-b42K',
-        hf_hub_filename='open_clip_pytorch_model.bin',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=1024),
-    'vit_base_patch32_clip_224.openai': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=512),
-    'vit_base_patch16_clip_224.openai': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=512),
-    'vit_large_patch14_clip_224.openai': _cfg(
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=768),
-    # experimental (may be removed)
-    'vit_base_patch32_plus_256': _cfg(url='', input_size=(3, 256, 256), crop_pct=0.95),
-    'vit_base_patch16_plus_240': _cfg(url='', input_size=(3, 240, 240), crop_pct=0.95),
-    'vit_small_patch16_36x1_224': _cfg(url=''),
-    'vit_small_patch16_18x2_224': _cfg(url=''),
-    'vit_base_patch16_18x2_224': _cfg(url=''),
-    # EVA fine-tuned weights from MAE style MIM - EVA-CLIP target pretrain
-    # https://github.com/baaivision/EVA/blob/7ecf2c0a370d97967e86d047d7af9188f78d2df3/eva/README.md#eva-l-learning-better-mim-representations-from-eva-clip
-    'eva_large_patch14_196.in22k_ft_in22k_in1k': _cfg(
-        # hf_hub_id='BAAI/EVA', hf_hub_filename='eva_l_psz14_196px_21k_to_1k_ft_88p6.pt',
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
-        input_size=(3, 196, 196), crop_pct=1.0),
-    'eva_large_patch14_336.in22k_ft_in22k_in1k': _cfg(
-        # hf_hub_id='BAAI/EVA', hf_hub_filename='eva_l_psz14_336px_21k_to_1k_ft_89p2.pt',
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
-        input_size=(3, 336, 336), crop_pct=1.0, crop_mode='squash'),
-    'eva_large_patch14_196.in22k_ft_in1k': _cfg(
-        # hf_hub_id='BAAI/EVA', hf_hub_filename='eva_l_psz14_196px_1k_ft_88p0.pt',
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
-        input_size=(3, 196, 196), crop_pct=1.0),
-    'eva_large_patch14_336.in22k_ft_in1k': _cfg(
-        # hf_hub_id='BAAI/EVA', hf_hub_filename='eva_l_psz14_336px_1k_ft_88p65.pt',
-        hf_hub_id='timm/',
-        mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
-        input_size=(3, 336, 336), crop_pct=1.0, crop_mode='squash'),
-    'flexivit_small.1200ep_in1k': _cfg(
-        url='https://storage.googleapis.com/big_vision/flexivit/flexivit_s_i1k.npz', custom_load=True,
-        hf_hub_id='timm/',
-        input_size=(3, 240, 240), crop_pct=0.95),
-    'flexivit_small.600ep_in1k': _cfg(
-        url='https://storage.googleapis.com/big_vision/flexivit/flexivit_s_i1k_600ep.npz', custom_load=True,
-        hf_hub_id='timm/',
-        input_size=(3, 240, 240), crop_pct=0.95),
-    'flexivit_small.300ep_in1k': _cfg(
-        url='https://storage.googleapis.com/big_vision/flexivit/flexivit_s_i1k_300ep.npz', custom_load=True,
-        hf_hub_id='timm/',
-        input_size=(3, 240, 240), crop_pct=0.95),
-    'flexivit_base.1200ep_in1k': _cfg(
-        url='https://storage.googleapis.com/big_vision/flexivit/flexivit_b_i1k.npz', custom_load=True,
-        hf_hub_id='timm/',
-        input_size=(3, 240, 240), crop_pct=0.95),
-    'flexivit_base.600ep_in1k': _cfg(
-        url='https://storage.googleapis.com/big_vision/flexivit/flexivit_b_i1k_600ep.npz', custom_load=True,
-        hf_hub_id='timm/',
-        input_size=(3, 240, 240), crop_pct=0.95),
-    'flexivit_base.300ep_in1k': _cfg(
-        url='https://storage.googleapis.com/big_vision/flexivit/flexivit_b_i1k_300ep.npz', custom_load=True,
-        hf_hub_id='timm/',
-        input_size=(3, 240, 240), crop_pct=0.95),
-    'flexivit_base.1000ep_in21k': _cfg(
-        url='https://storage.googleapis.com/big_vision/flexivit/flexivit_b_i21k_1000ep.npz', custom_load=True,
-        hf_hub_id='timm/',
-        input_size=(3, 240, 240), crop_pct=0.95, num_classes=21843),
-    'flexivit_base.300ep_in21k': _cfg(
-        url='https://storage.googleapis.com/big_vision/flexivit/flexivit_b_i21k_300ep.npz', custom_load=True,
-        hf_hub_id='timm/',
-        input_size=(3, 240, 240), crop_pct=0.95, num_classes=21843),
-    'flexivit_large.1200ep_in1k': _cfg(
-        url='https://storage.googleapis.com/big_vision/flexivit/flexivit_l_i1k.npz', custom_load=True,
-        hf_hub_id='timm/',
-        input_size=(3, 240, 240), crop_pct=0.95),
-    'flexivit_large.600ep_in1k': _cfg(
-        url='https://storage.googleapis.com/big_vision/flexivit/flexivit_l_i1k_600ep.npz', custom_load=True,
-        hf_hub_id='timm/',
-        input_size=(3, 240, 240), crop_pct=0.95),
-    'flexivit_large.300ep_in1k': _cfg(
-        url='https://storage.googleapis.com/big_vision/flexivit/flexivit_l_i1k_300ep.npz', custom_load=True,
-        hf_hub_id='timm/',
-        input_size=(3, 240, 240), crop_pct=0.95),
-    'flexivit_base.patch16_in21k': _cfg(
-        url='https://storage.googleapis.com/big_vision/flexivit/vit_b16_i21k_300ep.npz', custom_load=True,
-        hf_hub_id='timm/',
-        input_size=(3, 240, 240), crop_pct=0.95, num_classes=21843),
-    'flexivit_base.patch30_in21k': _cfg(
-        url='https://storage.googleapis.com/big_vision/flexivit/vit_b30_i21k_300ep.npz', custom_load=True,
-        hf_hub_id='timm/',
-        input_size=(3, 240, 240), crop_pct=0.95, num_classes=21843),
-})
-def _create_vision_transformer(variant, pretrained=False, **kwargs):
-    if kwargs.get('features_only', None):
-        raise RuntimeError('features_only not implemented for Vision Transformer models.')
-    if 'flexi' in variant:
-        # FIXME Google FlexiViT pretrained models have a strong preference for bilinear patch / embed
-        # interpolation, other pretrained models resize better w/ anti-aliased bicubic interpolation.
-        _filter_fn = partial(checkpoint_filter_fn, interpolation='bilinear', antialias=False)
-    else:
-        _filter_fn = checkpoint_filter_fn
-    return build_model_with_cfg(
-        VisionTransformer, variant, pretrained,
-        pretrained_filter_fn=_filter_fn,
-        **kwargs,
-    )
-@register_model
-def vit_tiny_patch16_224(pretrained=False, **kwargs):
-    """ ViT-Tiny (Vit-Ti/16)
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=192, depth=12, num_heads=3)
-    model = _create_vision_transformer('vit_tiny_patch16_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_tiny_patch16_384(pretrained=False, **kwargs):
-    """ ViT-Tiny (Vit-Ti/16) @ 384x384.
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=192, depth=12, num_heads=3)
-    model = _create_vision_transformer('vit_tiny_patch16_384', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_small_patch32_224(pretrained=False, **kwargs):
-    """ ViT-Small (ViT-S/32)
-    """
-    model_kwargs = dict(patch_size=32, embed_dim=384, depth=12, num_heads=6)
-    model = _create_vision_transformer('vit_small_patch32_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_small_patch32_384(pretrained=False, **kwargs):
-    """ ViT-Small (ViT-S/32) at 384x384.
-    """
-    model_kwargs = dict(patch_size=32, embed_dim=384, depth=12, num_heads=6)
-    model = _create_vision_transformer('vit_small_patch32_384', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_small_patch16_224(pretrained=False, **kwargs):
-    """ ViT-Small (ViT-S/16)
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6)
-    model = _create_vision_transformer('vit_small_patch16_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_small_patch16_384(pretrained=False, **kwargs):
-    """ ViT-Small (ViT-S/16)
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6)
-    model = _create_vision_transformer('vit_small_patch16_384', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_small_patch8_224(pretrained=False, **kwargs):
-    """ ViT-Small (ViT-S/8)
-    """
-    model_kwargs = dict(patch_size=8, embed_dim=384, depth=12, num_heads=6)
-    model = _create_vision_transformer('vit_small_patch8_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_base_patch32_224(pretrained=False, **kwargs):
-    """ ViT-Base (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
-    ImageNet-1k weights fine-tuned from in21k, source https://github.com/google-research/vision_transformer.
-    """
-    model_kwargs = dict(patch_size=32, embed_dim=768, depth=12, num_heads=12)
-    model = _create_vision_transformer('vit_base_patch32_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_base_patch32_384(pretrained=False, **kwargs):
-    """ ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
-    ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
-    """
-    model_kwargs = dict(patch_size=32, embed_dim=768, depth=12, num_heads=12)
-    model = _create_vision_transformer('vit_base_patch32_384', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_base_patch16_224(pretrained=False, **kwargs):
-    """ ViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
-    ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12)
-    model = _create_vision_transformer('vit_base_patch16_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_base_patch16_384(pretrained=False, **kwargs):
-    """ ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
-    ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12)
-    model = _create_vision_transformer('vit_base_patch16_384', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_base_patch8_224(pretrained=False, **kwargs):
-    """ ViT-Base (ViT-B/8) from original paper (https://arxiv.org/abs/2010.11929).
-    ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
-    """
-    model_kwargs = dict(patch_size=8, embed_dim=768, depth=12, num_heads=12)
-    model = _create_vision_transformer('vit_base_patch8_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_large_patch32_224(pretrained=False, **kwargs):
-    """ ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). No pretrained weights.
-    """
-    model_kwargs = dict(patch_size=32, embed_dim=1024, depth=24, num_heads=16)
-    model = _create_vision_transformer('vit_large_patch32_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_large_patch32_384(pretrained=False, **kwargs):
-    """ ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).
-    ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
-    """
-    model_kwargs = dict(patch_size=32, embed_dim=1024, depth=24, num_heads=16)
-    model = _create_vision_transformer('vit_large_patch32_384', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_large_patch16_224(pretrained=False, **kwargs):
-    """ ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
-    ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=1024, depth=24, num_heads=16)
-    model = _create_vision_transformer('vit_large_patch16_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_large_patch16_384(pretrained=False, **kwargs):
-    """ ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
-    ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=1024, depth=24, num_heads=16)
-    model = _create_vision_transformer('vit_large_patch16_384', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_large_patch14_224(pretrained=False, **kwargs):
-    """ ViT-Large model (ViT-L/14)
-    """
-    model_kwargs = dict(patch_size=14, embed_dim=1024, depth=24, num_heads=16)
-    model = _create_vision_transformer('vit_large_patch14_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_huge_patch14_224(pretrained=False, **kwargs):
-    """ ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929).
-    """
-    model_kwargs = dict(patch_size=14, embed_dim=1280, depth=32, num_heads=16)
-    model = _create_vision_transformer('vit_huge_patch14_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_giant_patch14_224(pretrained=False, **kwargs):
-    """ ViT-Giant (little-g) model (ViT-g/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
-    """
-    model_kwargs = dict(patch_size=14, embed_dim=1408, mlp_ratio=48/11, depth=40, num_heads=16)
-    model = _create_vision_transformer('vit_giant_patch14_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_gigantic_patch14_224(pretrained=False, **kwargs):
-    """ ViT-Gigantic (big-G) model (ViT-G/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
-    """
-    model_kwargs = dict(patch_size=14, embed_dim=1664, mlp_ratio=64/13, depth=48, num_heads=16)
-    model = _create_vision_transformer(
-        'vit_gigantic_patch14_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_base_patch16_224_miil(pretrained=False, **kwargs):
-    """ ViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
-    Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, qkv_bias=False)
-    model = _create_vision_transformer(
-        'vit_base_patch16_224_miil', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_medium_patch16_gap_240(pretrained=False, **kwargs):
-    """ ViT-Medium (ViT-M/16) w/o class token, w/ avg-pool @ 240x240
-    """
-    model_kwargs = dict(
-        patch_size=16, embed_dim=512, depth=12, num_heads=8, class_token=False,
-        global_pool='avg', qkv_bias=False, init_values=1e-6, fc_norm=False)
-    model = _create_vision_transformer(
-        'vit_medium_patch16_gap_240', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_medium_patch16_gap_256(pretrained=False, **kwargs):
-    """ ViT-Medium (ViT-M/16) w/o class token, w/ avg-pool @ 256x256
-    """
-    model_kwargs = dict(
-        patch_size=16, embed_dim=512, depth=12, num_heads=8, class_token=False,
-        global_pool='avg', qkv_bias=False, init_values=1e-6, fc_norm=False)
-    model = _create_vision_transformer(
-        'vit_medium_patch16_gap_256', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_medium_patch16_gap_384(pretrained=False, **kwargs):
-    """ ViT-Medium (ViT-M/16) w/o class token, w/ avg-pool @ 384x384
-    """
-    model_kwargs = dict(
-        patch_size=16, embed_dim=512, depth=12, num_heads=8, class_token=False,
-        global_pool='avg', qkv_bias=False, init_values=1e-6, fc_norm=False)
-    model = _create_vision_transformer(
-        'vit_medium_patch16_gap_384', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_base_patch16_gap_224(pretrained=False, **kwargs):
-    """ ViT-Base (ViT-B/16) w/o class token, w/ avg-pool @ 256x256
-    """
-    model_kwargs = dict(
-        patch_size=16, embed_dim=768, depth=12, num_heads=16, class_token=False, global_pool='avg', fc_norm=False)
-    model = _create_vision_transformer(
-        'vit_base_patch16_gap_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_base_patch32_clip_224(pretrained=False, **kwargs):
-    """ ViT-B/32 CLIP image tower @ 224x224
-    """
-    model_kwargs = dict(
-        patch_size=32, embed_dim=768, depth=12, num_heads=12, pre_norm=True, norm_layer=nn.LayerNorm)
-    model = _create_vision_transformer(
-        'vit_base_patch32_clip_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_base_patch32_clip_384(pretrained=False, **kwargs):
-    """ ViT-B/32 CLIP image tower @ 384x384
-    """
-    model_kwargs = dict(
-        patch_size=32, embed_dim=768, depth=12, num_heads=12, pre_norm=True, norm_layer=nn.LayerNorm)
-    model = _create_vision_transformer(
-        'vit_base_patch32_clip_384', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_base_patch32_clip_448(pretrained=False, **kwargs):
-    """ ViT-B/32 CLIP image tower @ 448x448
-    """
-    model_kwargs = dict(
-        patch_size=32, embed_dim=768, depth=12, num_heads=12, pre_norm=True, norm_layer=nn.LayerNorm)
-    model = _create_vision_transformer(
-        'vit_base_patch32_clip_448', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_base_patch16_clip_224(pretrained=False, **kwargs):
-    """ ViT-B/16 CLIP image tower
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, pre_norm=True, norm_layer=nn.LayerNorm)
-    model = _create_vision_transformer(
-        'vit_base_patch16_clip_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_base_patch16_clip_384(pretrained=False, **kwargs):
-    """ ViT-B/16 CLIP image tower @ 384x384
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, pre_norm=True, norm_layer=nn.LayerNorm)
-    model = _create_vision_transformer(
-        'vit_base_patch16_clip_384', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_large_patch14_clip_224(pretrained=False, **kwargs):
-    """ ViT-Large model (ViT-L/14) CLIP image tower
-    """
-    model_kwargs = dict(patch_size=14, embed_dim=1024, depth=24, num_heads=16, pre_norm=True, norm_layer=nn.LayerNorm)
-    model = _create_vision_transformer(
-        'vit_large_patch14_clip_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_large_patch14_clip_336(pretrained=False, **kwargs):
-    """ ViT-Large model (ViT-L/14) CLIP image tower @ 336x336
-    """
-    model_kwargs = dict(patch_size=14, embed_dim=1024, depth=24, num_heads=16, pre_norm=True, norm_layer=nn.LayerNorm)
-    model = _create_vision_transformer(
-        'vit_large_patch14_clip_336', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_huge_patch14_clip_224(pretrained=False, **kwargs):
-    """ ViT-Huge model (ViT-H/14) CLIP image tower.
-    """
-    model_kwargs = dict(patch_size=14, embed_dim=1280, depth=32, num_heads=16, pre_norm=True, norm_layer=nn.LayerNorm)
-    model = _create_vision_transformer(
-        'vit_huge_patch14_clip_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_huge_patch14_clip_336(pretrained=False, **kwargs):
-    """ ViT-Huge model (ViT-H/14) CLIP image tower @ 336x336
-    """
-    model_kwargs = dict(patch_size=14, embed_dim=1280, depth=32, num_heads=16, pre_norm=True, norm_layer=nn.LayerNorm)
-    model = _create_vision_transformer(
-        'vit_huge_patch14_clip_336', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_giant_patch14_clip_224(pretrained=False, **kwargs):
-    """ ViT-Giant (little-g) model (ViT-g/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
-    Pretrained weights from CLIP image tower.
-    """
-    model_kwargs = dict(
-        patch_size=14, embed_dim=1408, mlp_ratio=48/11, depth=40, num_heads=16, pre_norm=True, norm_layer=nn.LayerNorm)
-    model = _create_vision_transformer(
-        'vit_giant_patch14_clip_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-# Experimental models below
-@register_model
-def vit_base_patch32_plus_256(pretrained=False, **kwargs):
-    """ ViT-Base (ViT-B/32+)
-    """
-    model_kwargs = dict(patch_size=32, embed_dim=896, depth=12, num_heads=14, init_values=1e-5)
-    model = _create_vision_transformer(
-        'vit_base_patch32_plus_256', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_base_patch16_plus_240(pretrained=False, **kwargs):
-    """ ViT-Base (ViT-B/16+)
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=896, depth=12, num_heads=14, init_values=1e-5)
-    model = _create_vision_transformer(
-        'vit_base_patch16_plus_240', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_base_patch16_rpn_224(pretrained=False, **kwargs):
-    """ ViT-Base (ViT-B/16) w/ residual post-norm
-    """
-    model_kwargs = dict(
-        patch_size=16, embed_dim=768, depth=12, num_heads=12, qkv_bias=False, init_values=1e-5,
-        class_token=False, block_fn=ResPostBlock, global_pool='avg')
-    model = _create_vision_transformer(
-        'vit_base_patch16_rpn_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_small_patch16_36x1_224(pretrained=False, **kwargs):
-    """ ViT-Base w/ LayerScale + 36 x 1 (36 block serial) config. Experimental, may remove.
-    Based on `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
-    Paper focuses on 24x2 + 48x1 for 'Small' width but those are extremely slow.
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=384, depth=36, num_heads=6, init_values=1e-5)
-    model = _create_vision_transformer(
-        'vit_small_patch16_36x1_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_small_patch16_18x2_224(pretrained=False, **kwargs):
-    """ ViT-Small w/ LayerScale + 18 x 2 (36 block parallel) config. Experimental, may remove.
-    Based on `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
-    Paper focuses on 24x2 + 48x1 for 'Small' width but those are extremely slow.
-    """
-    model_kwargs = dict(
-        patch_size=16, embed_dim=384, depth=18, num_heads=6, init_values=1e-5, block_fn=ParallelBlock)
-    model = _create_vision_transformer(
-        'vit_small_patch16_18x2_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def vit_base_patch16_18x2_224(pretrained=False, **kwargs):
-    """ ViT-Base w/ LayerScale + 18 x 2 (36 block parallel) config. Experimental, may remove.
-    Based on `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=768, depth=18, num_heads=12, init_values=1e-5, block_fn=ParallelBlock)
-    model = _create_vision_transformer(
-        'vit_base_patch16_18x2_224', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def eva_large_patch14_196(pretrained=False, **kwargs):
-    """ EVA-large model https://arxiv.org/abs/2211.07636 /via MAE MIM pretrain"""
-    model_kwargs = dict(patch_size=14, embed_dim=1024, depth=24, num_heads=16, global_pool='avg')
-    model = _create_vision_transformer(
-        'eva_large_patch14_196', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def eva_large_patch14_336(pretrained=False, **kwargs):
-    """ EVA-large model https://arxiv.org/abs/2211.07636 via MAE MIM pretrain"""
-    model_kwargs = dict(patch_size=14, embed_dim=1024, depth=24, num_heads=16, global_pool='avg')
-    model = _create_vision_transformer('eva_large_patch14_336', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def flexivit_small(pretrained=False, **kwargs):
-    """ FlexiViT-Small
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, no_embed_class=True)
-    model = _create_vision_transformer('flexivit_small', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def flexivit_base(pretrained=False, **kwargs):
-    """ FlexiViT-Base
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, no_embed_class=True)
-    model = _create_vision_transformer('flexivit_base', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model
-@register_model
-def flexivit_large(pretrained=False, **kwargs):
-    """ FlexiViT-Large
-    """
-    model_kwargs = dict(patch_size=16, embed_dim=1024, depth=24, num_heads=16, no_embed_class=True)
-    model = _create_vision_transformer('flexivit_large', pretrained=pretrained, **dict(model_kwargs, **kwargs))
-    return model