Spaces:

Hila
/

RobustViT

Runtime error

App Files Files Community

Hila commited on Jun 5, 2022

Commit

7754b29

1 Parent(s): 9f7f854

init commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +1 -0
CLS2IDX.py +1000 -0
README.md +124 -13
RobustViT.ipynb +0 -0
SegmentationTest/data/Imagenet.py +74 -0
SegmentationTest/data/VOC.py +372 -0
SegmentationTest/data/__init__.py +0 -0
SegmentationTest/data/imagenet_utils.py +1002 -0
SegmentationTest/data/transforms.py +442 -0
SegmentationTest/imagenet_seg_eval.py +319 -0
SegmentationTest/utils/__init__.py +0 -0
SegmentationTest/utils/confusionmatrix.py +88 -0
SegmentationTest/utils/iou.py +93 -0
SegmentationTest/utils/metric.py +12 -0
SegmentationTest/utils/metrices.py +208 -0
SegmentationTest/utils/parallel.py +260 -0
SegmentationTest/utils/render.py +266 -0
SegmentationTest/utils/saver.py +34 -0
SegmentationTest/utils/summaries.py +11 -0
ViT/ViT.py +308 -0
ViT_new.py → ViT/ViT_new.py +0 -0
ViT/__init__.py +0 -0
ViT/explainer.py +71 -0
ViT/helpers.py +295 -0
ViT/layer_helpers.py +21 -0
ViT/weight_init.py +60 -0
imagenet_ablation_gt.py +590 -0
imagenet_classes.json +1002 -0
imagenet_eval_robustness.py +337 -0
imagenet_eval_robustness_per_class.py +343 -0
imagenet_finetune.py +567 -0
imagenet_finetune_gradmask.py +586 -0
imagenet_finetune_rrr.py +570 -0
imagenet_finetune_tokencut.py +577 -0
label_str_to_imagenet_classes.py +133 -0
objectnet_dataset.py +117 -0
robustness_dataset.py +66 -0
robustness_dataset_per_class.py +65 -0
samples/augreg_base/1_in.png +0 -0
samples/augreg_base/2_in.png +0 -0
samples/augreg_base/3_in.png +0 -0
samples/augreg_base/a.png +0 -0
samples/augreg_base/a_2.png +0 -0
samples/augreg_base/a_3.png +0 -0
samples/catdog.png +0 -0
samples/deit_base/1_in.png +0 -0
samples/deit_base/2_in.png +0 -0
samples/deit_base/3_in.png +0 -0
samples/deit_base/a.png +0 -0
samples/deit_base/a_2.png +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .idea

CLS2IDX.py ADDED Viewed

	@@ -0,0 +1,1000 @@

+CLS2IDX = {0: 'tench, Tinca tinca',
+ 1: 'goldfish, Carassius auratus',
+ 2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
+ 3: 'tiger shark, Galeocerdo cuvieri',
+ 4: 'hammerhead, hammerhead shark',
+ 5: 'electric ray, crampfish, numbfish, torpedo',
+ 6: 'stingray',
+ 7: 'cock',
+ 8: 'hen',
+ 9: 'ostrich, Struthio camelus',
+ 10: 'brambling, Fringilla montifringilla',
+ 11: 'goldfinch, Carduelis carduelis',
+ 12: 'house finch, linnet, Carpodacus mexicanus',
+ 13: 'junco, snowbird',
+ 14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
+ 15: 'robin, American robin, Turdus migratorius',
+ 16: 'bulbul',
+ 17: 'jay',
+ 18: 'magpie',
+ 19: 'chickadee',
+ 20: 'water ouzel, dipper',
+ 21: 'kite',
+ 22: 'bald eagle, American eagle, Haliaeetus leucocephalus',
+ 23: 'vulture',
+ 24: 'great grey owl, great gray owl, Strix nebulosa',
+ 25: 'European fire salamander, Salamandra salamandra',
+ 26: 'common newt, Triturus vulgaris',
+ 27: 'eft',
+ 28: 'spotted salamander, Ambystoma maculatum',
+ 29: 'axolotl, mud puppy, Ambystoma mexicanum',
+ 30: 'bullfrog, Rana catesbeiana',
+ 31: 'tree frog, tree-frog',
+ 32: 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui',
+ 33: 'loggerhead, loggerhead turtle, Caretta caretta',
+ 34: 'leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea',
+ 35: 'mud turtle',
+ 36: 'terrapin',
+ 37: 'box turtle, box tortoise',
+ 38: 'banded gecko',
+ 39: 'common iguana, iguana, Iguana iguana',
+ 40: 'American chameleon, anole, Anolis carolinensis',
+ 41: 'whiptail, whiptail lizard',
+ 42: 'agama',
+ 43: 'frilled lizard, Chlamydosaurus kingi',
+ 44: 'alligator lizard',
+ 45: 'Gila monster, Heloderma suspectum',
+ 46: 'green lizard, Lacerta viridis',
+ 47: 'African chameleon, Chamaeleo chamaeleon',
+ 48: 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis',
+ 49: 'African crocodile, Nile crocodile, Crocodylus niloticus',
+ 50: 'American alligator, Alligator mississipiensis',
+ 51: 'triceratops',
+ 52: 'thunder snake, worm snake, Carphophis amoenus',
+ 53: 'ringneck snake, ring-necked snake, ring snake',
+ 54: 'hognose snake, puff adder, sand viper',
+ 55: 'green snake, grass snake',
+ 56: 'king snake, kingsnake',
+ 57: 'garter snake, grass snake',
+ 58: 'water snake',
+ 59: 'vine snake',
+ 60: 'night snake, Hypsiglena torquata',
+ 61: 'boa constrictor, Constrictor constrictor',
+ 62: 'rock python, rock snake, Python sebae',
+ 63: 'Indian cobra, Naja naja',
+ 64: 'green mamba',
+ 65: 'sea snake',
+ 66: 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus',
+ 67: 'diamondback, diamondback rattlesnake, Crotalus adamanteus',
+ 68: 'sidewinder, horned rattlesnake, Crotalus cerastes',
+ 69: 'trilobite',
+ 70: 'harvestman, daddy longlegs, Phalangium opilio',
+ 71: 'scorpion',
+ 72: 'black and gold garden spider, Argiope aurantia',
+ 73: 'barn spider, Araneus cavaticus',
+ 74: 'garden spider, Aranea diademata',
+ 75: 'black widow, Latrodectus mactans',
+ 76: 'tarantula',
+ 77: 'wolf spider, hunting spider',
+ 78: 'tick',
+ 79: 'centipede',
+ 80: 'black grouse',
+ 81: 'ptarmigan',
+ 82: 'ruffed grouse, partridge, Bonasa umbellus',
+ 83: 'prairie chicken, prairie grouse, prairie fowl',
+ 84: 'peacock',
+ 85: 'quail',
+ 86: 'partridge',
+ 87: 'African grey, African gray, Psittacus erithacus',
+ 88: 'macaw',
+ 89: 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita',
+ 90: 'lorikeet',
+ 91: 'coucal',
+ 92: 'bee eater',
+ 93: 'hornbill',
+ 94: 'hummingbird',
+ 95: 'jacamar',
+ 96: 'toucan',
+ 97: 'drake',
+ 98: 'red-breasted merganser, Mergus serrator',
+ 99: 'goose',
+ 100: 'black swan, Cygnus atratus',
+ 101: 'tusker',
+ 102: 'echidna, spiny anteater, anteater',
+ 103: 'platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus',
+ 104: 'wallaby, brush kangaroo',
+ 105: 'koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus',
+ 106: 'wombat',
+ 107: 'jellyfish',
+ 108: 'sea anemone, anemone',
+ 109: 'brain coral',
+ 110: 'flatworm, platyhelminth',
+ 111: 'nematode, nematode worm, roundworm',
+ 112: 'conch',
+ 113: 'snail',
+ 114: 'slug',
+ 115: 'sea slug, nudibranch',
+ 116: 'chiton, coat-of-mail shell, sea cradle, polyplacophore',
+ 117: 'chambered nautilus, pearly nautilus, nautilus',
+ 118: 'Dungeness crab, Cancer magister',
+ 119: 'rock crab, Cancer irroratus',
+ 120: 'fiddler crab',
+ 121: 'king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica',
+ 122: 'American lobster, Northern lobster, Maine lobster, Homarus americanus',
+ 123: 'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish',
+ 124: 'crayfish, crawfish, crawdad, crawdaddy',
+ 125: 'hermit crab',
+ 126: 'isopod',
+ 127: 'white stork, Ciconia ciconia',
+ 128: 'black stork, Ciconia nigra',
+ 129: 'spoonbill',
+ 130: 'flamingo',
+ 131: 'little blue heron, Egretta caerulea',
+ 132: 'American egret, great white heron, Egretta albus',
+ 133: 'bittern',
+ 134: 'crane',
+ 135: 'limpkin, Aramus pictus',
+ 136: 'European gallinule, Porphyrio porphyrio',
+ 137: 'American coot, marsh hen, mud hen, water hen, Fulica americana',
+ 138: 'bustard',
+ 139: 'ruddy turnstone, Arenaria interpres',
+ 140: 'red-backed sandpiper, dunlin, Erolia alpina',
+ 141: 'redshank, Tringa totanus',
+ 142: 'dowitcher',
+ 143: 'oystercatcher, oyster catcher',
+ 144: 'pelican',
+ 145: 'king penguin, Aptenodytes patagonica',
+ 146: 'albatross, mollymawk',
+ 147: 'grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus',
+ 148: 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca',
+ 149: 'dugong, Dugong dugon',
+ 150: 'sea lion',
+ 151: 'Chihuahua',
+ 152: 'Japanese spaniel',
+ 153: 'Maltese dog, Maltese terrier, Maltese',
+ 154: 'Pekinese, Pekingese, Peke',
+ 155: 'Shih-Tzu',
+ 156: 'Blenheim spaniel',
+ 157: 'papillon',
+ 158: 'toy terrier',
+ 159: 'Rhodesian ridgeback',
+ 160: 'Afghan hound, Afghan',
+ 161: 'basset, basset hound',
+ 162: 'beagle',
+ 163: 'bloodhound, sleuthhound',
+ 164: 'bluetick',
+ 165: 'black-and-tan coonhound',
+ 166: 'Walker hound, Walker foxhound',
+ 167: 'English foxhound',
+ 168: 'redbone',
+ 169: 'borzoi, Russian wolfhound',
+ 170: 'Irish wolfhound',
+ 171: 'Italian greyhound',
+ 172: 'whippet',
+ 173: 'Ibizan hound, Ibizan Podenco',
+ 174: 'Norwegian elkhound, elkhound',
+ 175: 'otterhound, otter hound',
+ 176: 'Saluki, gazelle hound',
+ 177: 'Scottish deerhound, deerhound',
+ 178: 'Weimaraner',
+ 179: 'Staffordshire bullterrier, Staffordshire bull terrier',
+ 180: 'American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier',
+ 181: 'Bedlington terrier',
+ 182: 'Border terrier',
+ 183: 'Kerry blue terrier',
+ 184: 'Irish terrier',
+ 185: 'Norfolk terrier',
+ 186: 'Norwich terrier',
+ 187: 'Yorkshire terrier',
+ 188: 'wire-haired fox terrier',
+ 189: 'Lakeland terrier',
+ 190: 'Sealyham terrier, Sealyham',
+ 191: 'Airedale, Airedale terrier',
+ 192: 'cairn, cairn terrier',
+ 193: 'Australian terrier',
+ 194: 'Dandie Dinmont, Dandie Dinmont terrier',
+ 195: 'Boston bull, Boston terrier',
+ 196: 'miniature schnauzer',
+ 197: 'giant schnauzer',
+ 198: 'standard schnauzer',
+ 199: 'Scotch terrier, Scottish terrier, Scottie',
+ 200: 'Tibetan terrier, chrysanthemum dog',
+ 201: 'silky terrier, Sydney silky',
+ 202: 'soft-coated wheaten terrier',
+ 203: 'West Highland white terrier',
+ 204: 'Lhasa, Lhasa apso',
+ 205: 'flat-coated retriever',
+ 206: 'curly-coated retriever',
+ 207: 'golden retriever',
+ 208: 'Labrador retriever',
+ 209: 'Chesapeake Bay retriever',
+ 210: 'German short-haired pointer',
+ 211: 'vizsla, Hungarian pointer',
+ 212: 'English setter',
+ 213: 'Irish setter, red setter',
+ 214: 'Gordon setter',
+ 215: 'Brittany spaniel',
+ 216: 'clumber, clumber spaniel',
+ 217: 'English springer, English springer spaniel',
+ 218: 'Welsh springer spaniel',
+ 219: 'cocker spaniel, English cocker spaniel, cocker',
+ 220: 'Sussex spaniel',
+ 221: 'Irish water spaniel',
+ 222: 'kuvasz',
+ 223: 'schipperke',
+ 224: 'groenendael',
+ 225: 'malinois',
+ 226: 'briard',
+ 227: 'kelpie',
+ 228: 'komondor',
+ 229: 'Old English sheepdog, bobtail',
+ 230: 'Shetland sheepdog, Shetland sheep dog, Shetland',
+ 231: 'collie',
+ 232: 'Border collie',
+ 233: 'Bouvier des Flandres, Bouviers des Flandres',
+ 234: 'Rottweiler',
+ 235: 'German shepherd, German shepherd dog, German police dog, alsatian',
+ 236: 'Doberman, Doberman pinscher',
+ 237: 'miniature pinscher',
+ 238: 'Greater Swiss Mountain dog',
+ 239: 'Bernese mountain dog',
+ 240: 'Appenzeller',
+ 241: 'EntleBucher',
+ 242: 'boxer',
+ 243: 'bull mastiff',
+ 244: 'Tibetan mastiff',
+ 245: 'French bulldog',
+ 246: 'Great Dane',
+ 247: 'Saint Bernard, St Bernard',
+ 248: 'Eskimo dog, husky',
+ 249: 'malamute, malemute, Alaskan malamute',
+ 250: 'Siberian husky',
+ 251: 'dalmatian, coach dog, carriage dog',
+ 252: 'affenpinscher, monkey pinscher, monkey dog',
+ 253: 'basenji',
+ 254: 'pug, pug-dog',
+ 255: 'Leonberg',
+ 256: 'Newfoundland, Newfoundland dog',
+ 257: 'Great Pyrenees',
+ 258: 'Samoyed, Samoyede',
+ 259: 'Pomeranian',
+ 260: 'chow, chow chow',
+ 261: 'keeshond',
+ 262: 'Brabancon griffon',
+ 263: 'Pembroke, Pembroke Welsh corgi',
+ 264: 'Cardigan, Cardigan Welsh corgi',
+ 265: 'toy poodle',
+ 266: 'miniature poodle',
+ 267: 'standard poodle',
+ 268: 'Mexican hairless',
+ 269: 'timber wolf, grey wolf, gray wolf, Canis lupus',
+ 270: 'white wolf, Arctic wolf, Canis lupus tundrarum',
+ 271: 'red wolf, maned wolf, Canis rufus, Canis niger',
+ 272: 'coyote, prairie wolf, brush wolf, Canis latrans',
+ 273: 'dingo, warrigal, warragal, Canis dingo',
+ 274: 'dhole, Cuon alpinus',
+ 275: 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus',
+ 276: 'hyena, hyaena',
+ 277: 'red fox, Vulpes vulpes',
+ 278: 'kit fox, Vulpes macrotis',
+ 279: 'Arctic fox, white fox, Alopex lagopus',
+ 280: 'grey fox, gray fox, Urocyon cinereoargenteus',
+ 281: 'tabby, tabby cat',
+ 282: 'tiger cat',
+ 283: 'Persian cat',
+ 284: 'Siamese cat, Siamese',
+ 285: 'Egyptian cat',
+ 286: 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor',
+ 287: 'lynx, catamount',
+ 288: 'leopard, Panthera pardus',
+ 289: 'snow leopard, ounce, Panthera uncia',
+ 290: 'jaguar, panther, Panthera onca, Felis onca',
+ 291: 'lion, king of beasts, Panthera leo',
+ 292: 'tiger, Panthera tigris',
+ 293: 'cheetah, chetah, Acinonyx jubatus',
+ 294: 'brown bear, bruin, Ursus arctos',
+ 295: 'American black bear, black bear, Ursus americanus, Euarctos americanus',
+ 296: 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus',
+ 297: 'sloth bear, Melursus ursinus, Ursus ursinus',
+ 298: 'mongoose',
+ 299: 'meerkat, mierkat',
+ 300: 'tiger beetle',
+ 301: 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle',
+ 302: 'ground beetle, carabid beetle',
+ 303: 'long-horned beetle, longicorn, longicorn beetle',
+ 304: 'leaf beetle, chrysomelid',
+ 305: 'dung beetle',
+ 306: 'rhinoceros beetle',
+ 307: 'weevil',
+ 308: 'fly',
+ 309: 'bee',
+ 310: 'ant, emmet, pismire',
+ 311: 'grasshopper, hopper',
+ 312: 'cricket',
+ 313: 'walking stick, walkingstick, stick insect',
+ 314: 'cockroach, roach',
+ 315: 'mantis, mantid',
+ 316: 'cicada, cicala',
+ 317: 'leafhopper',
+ 318: 'lacewing, lacewing fly',
+ 319: "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
+ 320: 'damselfly',
+ 321: 'admiral',
+ 322: 'ringlet, ringlet butterfly',
+ 323: 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus',
+ 324: 'cabbage butterfly',
+ 325: 'sulphur butterfly, sulfur butterfly',
+ 326: 'lycaenid, lycaenid butterfly',
+ 327: 'starfish, sea star',
+ 328: 'sea urchin',
+ 329: 'sea cucumber, holothurian',
+ 330: 'wood rabbit, cottontail, cottontail rabbit',
+ 331: 'hare',
+ 332: 'Angora, Angora rabbit',
+ 333: 'hamster',
+ 334: 'porcupine, hedgehog',
+ 335: 'fox squirrel, eastern fox squirrel, Sciurus niger',
+ 336: 'marmot',
+ 337: 'beaver',
+ 338: 'guinea pig, Cavia cobaya',
+ 339: 'sorrel',
+ 340: 'zebra',
+ 341: 'hog, pig, grunter, squealer, Sus scrofa',
+ 342: 'wild boar, boar, Sus scrofa',
+ 343: 'warthog',
+ 344: 'hippopotamus, hippo, river horse, Hippopotamus amphibius',
+ 345: 'ox',
+ 346: 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis',
+ 347: 'bison',
+ 348: 'ram, tup',
+ 349: 'bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis',
+ 350: 'ibex, Capra ibex',
+ 351: 'hartebeest',
+ 352: 'impala, Aepyceros melampus',
+ 353: 'gazelle',
+ 354: 'Arabian camel, dromedary, Camelus dromedarius',
+ 355: 'llama',
+ 356: 'weasel',
+ 357: 'mink',
+ 358: 'polecat, fitch, foulmart, foumart, Mustela putorius',
+ 359: 'black-footed ferret, ferret, Mustela nigripes',
+ 360: 'otter',
+ 361: 'skunk, polecat, wood pussy',
+ 362: 'badger',
+ 363: 'armadillo',
+ 364: 'three-toed sloth, ai, Bradypus tridactylus',
+ 365: 'orangutan, orang, orangutang, Pongo pygmaeus',
+ 366: 'gorilla, Gorilla gorilla',
+ 367: 'chimpanzee, chimp, Pan troglodytes',
+ 368: 'gibbon, Hylobates lar',
+ 369: 'siamang, Hylobates syndactylus, Symphalangus syndactylus',
+ 370: 'guenon, guenon monkey',
+ 371: 'patas, hussar monkey, Erythrocebus patas',
+ 372: 'baboon',
+ 373: 'macaque',
+ 374: 'langur',
+ 375: 'colobus, colobus monkey',
+ 376: 'proboscis monkey, Nasalis larvatus',
+ 377: 'marmoset',
+ 378: 'capuchin, ringtail, Cebus capucinus',
+ 379: 'howler monkey, howler',
+ 380: 'titi, titi monkey',
+ 381: 'spider monkey, Ateles geoffroyi',
+ 382: 'squirrel monkey, Saimiri sciureus',
+ 383: 'Madagascar cat, ring-tailed lemur, Lemur catta',
+ 384: 'indri, indris, Indri indri, Indri brevicaudatus',
+ 385: 'Indian elephant, Elephas maximus',
+ 386: 'African elephant, Loxodonta africana',
+ 387: 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens',
+ 388: 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca',
+ 389: 'barracouta, snoek',
+ 390: 'eel',
+ 391: 'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch',
+ 392: 'rock beauty, Holocanthus tricolor',
+ 393: 'anemone fish',
+ 394: 'sturgeon',
+ 395: 'gar, garfish, garpike, billfish, Lepisosteus osseus',
+ 396: 'lionfish',
+ 397: 'puffer, pufferfish, blowfish, globefish',
+ 398: 'abacus',
+ 399: 'abaya',
+ 400: "academic gown, academic robe, judge's robe",
+ 401: 'accordion, piano accordion, squeeze box',
+ 402: 'acoustic guitar',
+ 403: 'aircraft carrier, carrier, flattop, attack aircraft carrier',
+ 404: 'airliner',
+ 405: 'airship, dirigible',
+ 406: 'altar',
+ 407: 'ambulance',
+ 408: 'amphibian, amphibious vehicle',
+ 409: 'analog clock',
+ 410: 'apiary, bee house',
+ 411: 'apron',
+ 412: 'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin',
+ 413: 'assault rifle, assault gun',
+ 414: 'backpack, back pack, knapsack, packsack, rucksack, haversack',
+ 415: 'bakery, bakeshop, bakehouse',
+ 416: 'balance beam, beam',
+ 417: 'balloon',
+ 418: 'ballpoint, ballpoint pen, ballpen, Biro',
+ 419: 'Band Aid',
+ 420: 'banjo',
+ 421: 'bannister, banister, balustrade, balusters, handrail',
+ 422: 'barbell',
+ 423: 'barber chair',
+ 424: 'barbershop',
+ 425: 'barn',
+ 426: 'barometer',
+ 427: 'barrel, cask',
+ 428: 'barrow, garden cart, lawn cart, wheelbarrow',
+ 429: 'baseball',
+ 430: 'basketball',
+ 431: 'bassinet',
+ 432: 'bassoon',
+ 433: 'bathing cap, swimming cap',
+ 434: 'bath towel',
+ 435: 'bathtub, bathing tub, bath, tub',
+ 436: 'beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon',
+ 437: 'beacon, lighthouse, beacon light, pharos',
+ 438: 'beaker',
+ 439: 'bearskin, busby, shako',
+ 440: 'beer bottle',
+ 441: 'beer glass',
+ 442: 'bell cote, bell cot',
+ 443: 'bib',
+ 444: 'bicycle-built-for-two, tandem bicycle, tandem',
+ 445: 'bikini, two-piece',
+ 446: 'binder, ring-binder',
+ 447: 'binoculars, field glasses, opera glasses',
+ 448: 'birdhouse',
+ 449: 'boathouse',
+ 450: 'bobsled, bobsleigh, bob',
+ 451: 'bolo tie, bolo, bola tie, bola',
+ 452: 'bonnet, poke bonnet',
+ 453: 'bookcase',
+ 454: 'bookshop, bookstore, bookstall',
+ 455: 'bottlecap',
+ 456: 'bow',
+ 457: 'bow tie, bow-tie, bowtie',
+ 458: 'brass, memorial tablet, plaque',
+ 459: 'brassiere, bra, bandeau',
+ 460: 'breakwater, groin, groyne, mole, bulwark, seawall, jetty',
+ 461: 'breastplate, aegis, egis',
+ 462: 'broom',
+ 463: 'bucket, pail',
+ 464: 'buckle',
+ 465: 'bulletproof vest',
+ 466: 'bullet train, bullet',
+ 467: 'butcher shop, meat market',
+ 468: 'cab, hack, taxi, taxicab',
+ 469: 'caldron, cauldron',
+ 470: 'candle, taper, wax light',
+ 471: 'cannon',
+ 472: 'canoe',
+ 473: 'can opener, tin opener',
+ 474: 'cardigan',
+ 475: 'car mirror',
+ 476: 'carousel, carrousel, merry-go-round, roundabout, whirligig',
+ 477: "carpenter's kit, tool kit",
+ 478: 'carton',
+ 479: 'car wheel',
+ 480: 'cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM',
+ 481: 'cassette',
+ 482: 'cassette player',
+ 483: 'castle',
+ 484: 'catamaran',
+ 485: 'CD player',
+ 486: 'cello, violoncello',
+ 487: 'cellular telephone, cellular phone, cellphone, cell, mobile phone',
+ 488: 'chain',
+ 489: 'chainlink fence',
+ 490: 'chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour',
+ 491: 'chain saw, chainsaw',
+ 492: 'chest',
+ 493: 'chiffonier, commode',
+ 494: 'chime, bell, gong',
+ 495: 'china cabinet, china closet',
+ 496: 'Christmas stocking',
+ 497: 'church, church building',
+ 498: 'cinema, movie theater, movie theatre, movie house, picture palace',
+ 499: 'cleaver, meat cleaver, chopper',
+ 500: 'cliff dwelling',
+ 501: 'cloak',
+ 502: 'clog, geta, patten, sabot',
+ 503: 'cocktail shaker',
+ 504: 'coffee mug',
+ 505: 'coffeepot',
+ 506: 'coil, spiral, volute, whorl, helix',
+ 507: 'combination lock',
+ 508: 'computer keyboard, keypad',
+ 509: 'confectionery, confectionary, candy store',
+ 510: 'container ship, containership, container vessel',
+ 511: 'convertible',
+ 512: 'corkscrew, bottle screw',
+ 513: 'cornet, horn, trumpet, trump',
+ 514: 'cowboy boot',
+ 515: 'cowboy hat, ten-gallon hat',
+ 516: 'cradle',
+ 517: 'crane',
+ 518: 'crash helmet',
+ 519: 'crate',
+ 520: 'crib, cot',
+ 521: 'Crock Pot',
+ 522: 'croquet ball',
+ 523: 'crutch',
+ 524: 'cuirass',
+ 525: 'dam, dike, dyke',
+ 526: 'desk',
+ 527: 'desktop computer',
+ 528: 'dial telephone, dial phone',
+ 529: 'diaper, nappy, napkin',
+ 530: 'digital clock',
+ 531: 'digital watch',
+ 532: 'dining table, board',
+ 533: 'dishrag, dishcloth',
+ 534: 'dishwasher, dish washer, dishwashing machine',
+ 535: 'disk brake, disc brake',
+ 536: 'dock, dockage, docking facility',
+ 537: 'dogsled, dog sled, dog sleigh',
+ 538: 'dome',
+ 539: 'doormat, welcome mat',
+ 540: 'drilling platform, offshore rig',
+ 541: 'drum, membranophone, tympan',
+ 542: 'drumstick',
+ 543: 'dumbbell',
+ 544: 'Dutch oven',
+ 545: 'electric fan, blower',
+ 546: 'electric guitar',
+ 547: 'electric locomotive',
+ 548: 'entertainment center',
+ 549: 'envelope',
+ 550: 'espresso maker',
+ 551: 'face powder',
+ 552: 'feather boa, boa',
+ 553: 'file, file cabinet, filing cabinet',
+ 554: 'fireboat',
+ 555: 'fire engine, fire truck',
+ 556: 'fire screen, fireguard',
+ 557: 'flagpole, flagstaff',
+ 558: 'flute, transverse flute',
+ 559: 'folding chair',
+ 560: 'football helmet',
+ 561: 'forklift',
+ 562: 'fountain',
+ 563: 'fountain pen',
+ 564: 'four-poster',
+ 565: 'freight car',
+ 566: 'French horn, horn',
+ 567: 'frying pan, frypan, skillet',
+ 568: 'fur coat',
+ 569: 'garbage truck, dustcart',
+ 570: 'gasmask, respirator, gas helmet',
+ 571: 'gas pump, gasoline pump, petrol pump, island dispenser',
+ 572: 'goblet',
+ 573: 'go-kart',
+ 574: 'golf ball',
+ 575: 'golfcart, golf cart',
+ 576: 'gondola',
+ 577: 'gong, tam-tam',
+ 578: 'gown',
+ 579: 'grand piano, grand',
+ 580: 'greenhouse, nursery, glasshouse',
+ 581: 'grille, radiator grille',
+ 582: 'grocery store, grocery, food market, market',
+ 583: 'guillotine',
+ 584: 'hair slide',
+ 585: 'hair spray',
+ 586: 'half track',
+ 587: 'hammer',
+ 588: 'hamper',
+ 589: 'hand blower, blow dryer, blow drier, hair dryer, hair drier',
+ 590: 'hand-held computer, hand-held microcomputer',
+ 591: 'handkerchief, hankie, hanky, hankey',
+ 592: 'hard disc, hard disk, fixed disk',
+ 593: 'harmonica, mouth organ, harp, mouth harp',
+ 594: 'harp',
+ 595: 'harvester, reaper',
+ 596: 'hatchet',
+ 597: 'holster',
+ 598: 'home theater, home theatre',
+ 599: 'honeycomb',
+ 600: 'hook, claw',
+ 601: 'hoopskirt, crinoline',
+ 602: 'horizontal bar, high bar',
+ 603: 'horse cart, horse-cart',
+ 604: 'hourglass',
+ 605: 'iPod',
+ 606: 'iron, smoothing iron',
+ 607: "jack-o'-lantern",
+ 608: 'jean, blue jean, denim',
+ 609: 'jeep, landrover',
+ 610: 'jersey, T-shirt, tee shirt',
+ 611: 'jigsaw puzzle',
+ 612: 'jinrikisha, ricksha, rickshaw',
+ 613: 'joystick',
+ 614: 'kimono',
+ 615: 'knee pad',
+ 616: 'knot',
+ 617: 'lab coat, laboratory coat',
+ 618: 'ladle',
+ 619: 'lampshade, lamp shade',
+ 620: 'laptop, laptop computer',
+ 621: 'lawn mower, mower',
+ 622: 'lens cap, lens cover',
+ 623: 'letter opener, paper knife, paperknife',
+ 624: 'library',
+ 625: 'lifeboat',
+ 626: 'lighter, light, igniter, ignitor',
+ 627: 'limousine, limo',
+ 628: 'liner, ocean liner',
+ 629: 'lipstick, lip rouge',
+ 630: 'Loafer',
+ 631: 'lotion',
+ 632: 'loudspeaker, speaker, speaker unit, loudspeaker system, speaker system',
+ 633: "loupe, jeweler's loupe",
+ 634: 'lumbermill, sawmill',
+ 635: 'magnetic compass',
+ 636: 'mailbag, postbag',
+ 637: 'mailbox, letter box',
+ 638: 'maillot',
+ 639: 'maillot, tank suit',
+ 640: 'manhole cover',
+ 641: 'maraca',
+ 642: 'marimba, xylophone',
+ 643: 'mask',
+ 644: 'matchstick',
+ 645: 'maypole',
+ 646: 'maze, labyrinth',
+ 647: 'measuring cup',
+ 648: 'medicine chest, medicine cabinet',
+ 649: 'megalith, megalithic structure',
+ 650: 'microphone, mike',
+ 651: 'microwave, microwave oven',
+ 652: 'military uniform',
+ 653: 'milk can',
+ 654: 'minibus',
+ 655: 'miniskirt, mini',
+ 656: 'minivan',
+ 657: 'missile',
+ 658: 'mitten',
+ 659: 'mixing bowl',
+ 660: 'mobile home, manufactured home',
+ 661: 'Model T',
+ 662: 'modem',
+ 663: 'monastery',
+ 664: 'monitor',
+ 665: 'moped',
+ 666: 'mortar',
+ 667: 'mortarboard',
+ 668: 'mosque',
+ 669: 'mosquito net',
+ 670: 'motor scooter, scooter',
+ 671: 'mountain bike, all-terrain bike, off-roader',
+ 672: 'mountain tent',
+ 673: 'mouse, computer mouse',
+ 674: 'mousetrap',
+ 675: 'moving van',
+ 676: 'muzzle',
+ 677: 'nail',
+ 678: 'neck brace',
+ 679: 'necklace',
+ 680: 'nipple',
+ 681: 'notebook, notebook computer',
+ 682: 'obelisk',
+ 683: 'oboe, hautboy, hautbois',
+ 684: 'ocarina, sweet potato',
+ 685: 'odometer, hodometer, mileometer, milometer',
+ 686: 'oil filter',
+ 687: 'organ, pipe organ',
+ 688: 'oscilloscope, scope, cathode-ray oscilloscope, CRO',
+ 689: 'overskirt',
+ 690: 'oxcart',
+ 691: 'oxygen mask',
+ 692: 'packet',
+ 693: 'paddle, boat paddle',
+ 694: 'paddlewheel, paddle wheel',
+ 695: 'padlock',
+ 696: 'paintbrush',
+ 697: "pajama, pyjama, pj's, jammies",
+ 698: 'palace',
+ 699: 'panpipe, pandean pipe, syrinx',
+ 700: 'paper towel',
+ 701: 'parachute, chute',
+ 702: 'parallel bars, bars',
+ 703: 'park bench',
+ 704: 'parking meter',
+ 705: 'passenger car, coach, carriage',
+ 706: 'patio, terrace',
+ 707: 'pay-phone, pay-station',
+ 708: 'pedestal, plinth, footstall',
+ 709: 'pencil box, pencil case',
+ 710: 'pencil sharpener',
+ 711: 'perfume, essence',
+ 712: 'Petri dish',
+ 713: 'photocopier',
+ 714: 'pick, plectrum, plectron',
+ 715: 'pickelhaube',
+ 716: 'picket fence, paling',
+ 717: 'pickup, pickup truck',
+ 718: 'pier',
+ 719: 'piggy bank, penny bank',
+ 720: 'pill bottle',
+ 721: 'pillow',
+ 722: 'ping-pong ball',
+ 723: 'pinwheel',
+ 724: 'pirate, pirate ship',
+ 725: 'pitcher, ewer',
+ 726: "plane, carpenter's plane, woodworking plane",
+ 727: 'planetarium',
+ 728: 'plastic bag',
+ 729: 'plate rack',
+ 730: 'plow, plough',
+ 731: "plunger, plumber's helper",
+ 732: 'Polaroid camera, Polaroid Land camera',
+ 733: 'pole',
+ 734: 'police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria',
+ 735: 'poncho',
+ 736: 'pool table, billiard table, snooker table',
+ 737: 'pop bottle, soda bottle',
+ 738: 'pot, flowerpot',
+ 739: "potter's wheel",
+ 740: 'power drill',
+ 741: 'prayer rug, prayer mat',
+ 742: 'printer',
+ 743: 'prison, prison house',
+ 744: 'projectile, missile',
+ 745: 'projector',
+ 746: 'puck, hockey puck',
+ 747: 'punching bag, punch bag, punching ball, punchball',
+ 748: 'purse',
+ 749: 'quill, quill pen',
+ 750: 'quilt, comforter, comfort, puff',
+ 751: 'racer, race car, racing car',
+ 752: 'racket, racquet',
+ 753: 'radiator',
+ 754: 'radio, wireless',
+ 755: 'radio telescope, radio reflector',
+ 756: 'rain barrel',
+ 757: 'recreational vehicle, RV, R.V.',
+ 758: 'reel',
+ 759: 'reflex camera',
+ 760: 'refrigerator, icebox',
+ 761: 'remote control, remote',
+ 762: 'restaurant, eating house, eating place, eatery',
+ 763: 'revolver, six-gun, six-shooter',
+ 764: 'rifle',
+ 765: 'rocking chair, rocker',
+ 766: 'rotisserie',
+ 767: 'rubber eraser, rubber, pencil eraser',
+ 768: 'rugby ball',
+ 769: 'rule, ruler',
+ 770: 'running shoe',
+ 771: 'safe',
+ 772: 'safety pin',
+ 773: 'saltshaker, salt shaker',
+ 774: 'sandal',
+ 775: 'sarong',
+ 776: 'sax, saxophone',
+ 777: 'scabbard',
+ 778: 'scale, weighing machine',
+ 779: 'school bus',
+ 780: 'schooner',
+ 781: 'scoreboard',
+ 782: 'screen, CRT screen',
+ 783: 'screw',
+ 784: 'screwdriver',
+ 785: 'seat belt, seatbelt',
+ 786: 'sewing machine',
+ 787: 'shield, buckler',
+ 788: 'shoe shop, shoe-shop, shoe store',
+ 789: 'shoji',
+ 790: 'shopping basket',
+ 791: 'shopping cart',
+ 792: 'shovel',
+ 793: 'shower cap',
+ 794: 'shower curtain',
+ 795: 'ski',
+ 796: 'ski mask',
+ 797: 'sleeping bag',
+ 798: 'slide rule, slipstick',
+ 799: 'sliding door',
+ 800: 'slot, one-armed bandit',
+ 801: 'snorkel',
+ 802: 'snowmobile',
+ 803: 'snowplow, snowplough',
+ 804: 'soap dispenser',
+ 805: 'soccer ball',
+ 806: 'sock',
+ 807: 'solar dish, solar collector, solar furnace',
+ 808: 'sombrero',
+ 809: 'soup bowl',
+ 810: 'space bar',
+ 811: 'space heater',
+ 812: 'space shuttle',
+ 813: 'spatula',
+ 814: 'speedboat',
+ 815: "spider web, spider's web",
+ 816: 'spindle',
+ 817: 'sports car, sport car',
+ 818: 'spotlight, spot',
+ 819: 'stage',
+ 820: 'steam locomotive',
+ 821: 'steel arch bridge',
+ 822: 'steel drum',
+ 823: 'stethoscope',
+ 824: 'stole',
+ 825: 'stone wall',
+ 826: 'stopwatch, stop watch',
+ 827: 'stove',
+ 828: 'strainer',
+ 829: 'streetcar, tram, tramcar, trolley, trolley car',
+ 830: 'stretcher',
+ 831: 'studio couch, day bed',
+ 832: 'stupa, tope',
+ 833: 'submarine, pigboat, sub, U-boat',
+ 834: 'suit, suit of clothes',
+ 835: 'sundial',
+ 836: 'sunglass',
+ 837: 'sunglasses, dark glasses, shades',
+ 838: 'sunscreen, sunblock, sun blocker',
+ 839: 'suspension bridge',
+ 840: 'swab, swob, mop',
+ 841: 'sweatshirt',
+ 842: 'swimming trunks, bathing trunks',
+ 843: 'swing',
+ 844: 'switch, electric switch, electrical switch',
+ 845: 'syringe',
+ 846: 'table lamp',
+ 847: 'tank, army tank, armored combat vehicle, armoured combat vehicle',
+ 848: 'tape player',
+ 849: 'teapot',
+ 850: 'teddy, teddy bear',
+ 851: 'television, television system',
+ 852: 'tennis ball',
+ 853: 'thatch, thatched roof',
+ 854: 'theater curtain, theatre curtain',
+ 855: 'thimble',
+ 856: 'thresher, thrasher, threshing machine',
+ 857: 'throne',
+ 858: 'tile roof',
+ 859: 'toaster',
+ 860: 'tobacco shop, tobacconist shop, tobacconist',
+ 861: 'toilet seat',
+ 862: 'torch',
+ 863: 'totem pole',
+ 864: 'tow truck, tow car, wrecker',
+ 865: 'toyshop',
+ 866: 'tractor',
+ 867: 'trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi',
+ 868: 'tray',
+ 869: 'trench coat',
+ 870: 'tricycle, trike, velocipede',
+ 871: 'trimaran',
+ 872: 'tripod',
+ 873: 'triumphal arch',
+ 874: 'trolleybus, trolley coach, trackless trolley',
+ 875: 'trombone',
+ 876: 'tub, vat',
+ 877: 'turnstile',
+ 878: 'typewriter keyboard',
+ 879: 'umbrella',
+ 880: 'unicycle, monocycle',
+ 881: 'upright, upright piano',
+ 882: 'vacuum, vacuum cleaner',
+ 883: 'vase',
+ 884: 'vault',
+ 885: 'velvet',
+ 886: 'vending machine',
+ 887: 'vestment',
+ 888: 'viaduct',
+ 889: 'violin, fiddle',
+ 890: 'volleyball',
+ 891: 'waffle iron',
+ 892: 'wall clock',
+ 893: 'wallet, billfold, notecase, pocketbook',
+ 894: 'wardrobe, closet, press',
+ 895: 'warplane, military plane',
+ 896: 'washbasin, handbasin, washbowl, lavabo, wash-hand basin',
+ 897: 'washer, automatic washer, washing machine',
+ 898: 'water bottle',
+ 899: 'water jug',
+ 900: 'water tower',
+ 901: 'whiskey jug',
+ 902: 'whistle',
+ 903: 'wig',
+ 904: 'window screen',
+ 905: 'window shade',
+ 906: 'Windsor tie',
+ 907: 'wine bottle',
+ 908: 'wing',
+ 909: 'wok',
+ 910: 'wooden spoon',
+ 911: 'wool, woolen, woollen',
+ 912: 'worm fence, snake fence, snake-rail fence, Virginia fence',
+ 913: 'wreck',
+ 914: 'yawl',
+ 915: 'yurt',
+ 916: 'web site, website, internet site, site',
+ 917: 'comic book',
+ 918: 'crossword puzzle, crossword',
+ 919: 'street sign',
+ 920: 'traffic light, traffic signal, stoplight',
+ 921: 'book jacket, dust cover, dust jacket, dust wrapper',
+ 922: 'menu',
+ 923: 'plate',
+ 924: 'guacamole',
+ 925: 'consomme',
+ 926: 'hot pot, hotpot',
+ 927: 'trifle',
+ 928: 'ice cream, icecream',
+ 929: 'ice lolly, lolly, lollipop, popsicle',
+ 930: 'French loaf',
+ 931: 'bagel, beigel',
+ 932: 'pretzel',
+ 933: 'cheeseburger',
+ 934: 'hotdog, hot dog, red hot',
+ 935: 'mashed potato',
+ 936: 'head cabbage',
+ 937: 'broccoli',
+ 938: 'cauliflower',
+ 939: 'zucchini, courgette',
+ 940: 'spaghetti squash',
+ 941: 'acorn squash',
+ 942: 'butternut squash',
+ 943: 'cucumber, cuke',
+ 944: 'artichoke, globe artichoke',
+ 945: 'bell pepper',
+ 946: 'cardoon',
+ 947: 'mushroom',
+ 948: 'Granny Smith',
+ 949: 'strawberry',
+ 950: 'orange',
+ 951: 'lemon',
+ 952: 'fig',
+ 953: 'pineapple, ananas',
+ 954: 'banana',
+ 955: 'jackfruit, jak, jack',
+ 956: 'custard apple',
+ 957: 'pomegranate',
+ 958: 'hay',
+ 959: 'carbonara',
+ 960: 'chocolate sauce, chocolate syrup',
+ 961: 'dough',
+ 962: 'meat loaf, meatloaf',
+ 963: 'pizza, pizza pie',
+ 964: 'potpie',
+ 965: 'burrito',
+ 966: 'red wine',
+ 967: 'espresso',
+ 968: 'cup',
+ 969: 'eggnog',
+ 970: 'alp',
+ 971: 'bubble',
+ 972: 'cliff, drop, drop-off',
+ 973: 'coral reef',
+ 974: 'geyser',
+ 975: 'lakeside, lakeshore',
+ 976: 'promontory, headland, head, foreland',
+ 977: 'sandbar, sand bar',
+ 978: 'seashore, coast, seacoast, sea-coast',
+ 979: 'valley, vale',
+ 980: 'volcano',
+ 981: 'ballplayer, baseball player',
+ 982: 'groom, bridegroom',
+ 983: 'scuba diver',
+ 984: 'rapeseed',
+ 985: 'daisy',
+ 986: "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
+ 987: 'corn',
+ 988: 'acorn',
+ 989: 'hip, rose hip, rosehip',
+ 990: 'buckeye, horse chestnut, conker',
+ 991: 'coral fungus',
+ 992: 'agaric',
+ 993: 'gyromitra',
+ 994: 'stinkhorn, carrion fungus',
+ 995: 'earthstar',
+ 996: 'hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa',
+ 997: 'bolete',
+ 998: 'ear, spike, capitulum',
+ 999: 'toilet tissue, toilet paper, bathroom tissue'}

README.md CHANGED Viewed

@@ -1,13 +1,124 @@
----
-title: RobustViT
-emoji: ⚡
-colorFrom: red
-colorTo: indigo
-sdk: gradio
-sdk_version: 3.0.11
-app_file: app.py
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference

+# RobustViT
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/hila-chefer/RobustViT/blob/master/RobustViT.ipynb)
+Official PyTorch implementation of **Optimizing Relevance Maps of Vision Transformers Improves Robustness**. This code allows to
+finetune the explainability maps of Vision Transformers to enhance robustness.
+The method employs loss functions directly to the explainability maps to ensure that the model is focused mostly on the foreground of the image:
+<p align="center">
+  <img width="500" height="400" src="teaser.png">
+</p>
+Using a short finetuning process with only 3 labeled examples from 500 classes, our method imrpoves robustness of ViT models across different model sizes and training techniques, even when data augmentations/ regularization are applied.
+## Producing Segmenataion Data
+### Using ImageNet-S
+To use the ImageNet-S labeled data, [download the `ImageNetS919` dataset](https://github.com/UnsupervisedSemanticSegmentation/ImageNet-S)
+### Using TokenCut for unsupervised segmentation
+1.  Clone the TokenCut project
+    ```
+    git clone https://github.com/YangtaoWANG95/TokenCut.git
+    ```
+2.  Install the dependencies
+    Python 3.7, PyTorch 1.7.1 and CUDA 11.2. Please refer to the official installation. If CUDA 10.2 has been properly installed:
+    ```
+    pip install torch==1.7.1 torchvision==0.8.2
+    ```
+    Followed by
+    ```
+    pip install -r TokenCut/requirements.txt
+3. Use the following command to extract the segmentation maps:
+    ```
+   python tokencut_generate_segmentation.py --img_path <PATH_TO_IMAGE> --out_dir <PATH_TO_OUTPUT_DIRECTORY>
+   ```
+## Finetuning ViT models
+To finetune a pretrained ViT model use the `imagenet_finetune.py` script. Notice to uncomment the import line containing the pretrained model you
+wish to finetune.
+Usage example:
+```bash
+python imagenet_finetune.py --seg_data <PATH_TO_SEGMENTATION_DATA> --data <PATH_TO_IMAGENET> --gpu 0  --lr <LR> --lambda_seg <SEG> --lambda_acc <ACC> --lambda_background <BACK> --lambda_foreground <FORE>
+```
+Notes:
+* For all models we use :
+    * `lambda_seg=0.8`
+    * `lambda_acc=0.2`
+    * `lambda_background=2`
+    * `lambda_foreground=0.3`
+ * For **DeiT** models, a temprature is required as follows:
+    * `temprature=0.65` for DeiT-B
+    * `temprature=0.55` for DeiT-S
+ * The learning rates per model are:
+    * ViT-B: 3e-6
+    * ViT-L: 9e-7
+    * AR-S: 2e-6
+    * AR-B: 6e-7
+    * AR-L: 9e-7
+    * DeiT-S: 1e-6
+    * DeiT-B: 8e-7
+## Baseline methods
+Notice to uncomment the import line containing the pretrained model you wish to finetune in the code.
+### GradMask
+Run the following command:
+```bash
+python imagenet_finetune_gradmask.py --seg_data <PATH_TO_SEGMENTATION_DATA> --data <PATH_TO_IMAGENET> --gpu 0  --lr <LR> --lambda_seg <SEG> --lambda_acc <ACC>
+```
+All hyperparameters for the different models can be found in section D of the supplementary material.
+### Right for the Right Reasons
+Run the following command:
+```bash
+python imagenet_finetune_rrr.py --seg_data <PATH_TO_SEGMENTATION_DATA> --data <PATH_TO_IMAGENET> --gpu 0  --lr <LR> --lambda_seg <SEG> --lambda_acc <ACC>
+```
+All hyperparameters for the different models can be found in section D of the supplementary material.
+## Evaluation
+### Robustness Evaluation
+1. Download the evaluation datasets:
+    * [INet-A](https://github.com/hendrycks/natural-adv-examples)
+    * [INet-R](https://github.com/hendrycks/imagenet-r)
+    * [INet-v2](https://github.com/modestyachts/ImageNetV2)
+    * [ObjectNet](https://objectnet.dev/)
+    * [SI-Score](https://github.com/google-research/si-score)
+2. Run the following script to evaluate:
+```bash
+python imagenet_eval_robustness.py --data <PATH_TO_ROBUSTNESS_DATASET> --batch-size <BATCH_SIZE> --evaluate --checkpoint <PATH_TO_FINETUNED_CHECKPOINT>
+```
+* Notice to uncomment the import line containing the pretrained model you wish to evaluate in the code.
+* To evaluate the original model simply omit the `checkpoint` parameter.
+* For the INet-v2 dataset add `--isV2`.
+* For the ObjectNet dataset add `--isObjectNet`.
+* For the SI datasets add `--isSI`.
+### Segmentation Evaluation
+Our segmentation tests are based on the test in the official implementation of [Transformer Interpretability Beyond Attention Visualization](https://github.com/hila-chefer/Transformer-Explainability).
+1. [Download the ImageNet segmentation test set](https://github.com/hila-chefer/Transformer-Explainability#section-a-segmentation-results).
+2. Run the following script to evaluate:
+ ```bash
+PYTHONPATH=./:$PYTHONPATH python SegmentationTest/imagenet_seg_eval.py  --imagenet-seg-path <PATH_TO_gtsegs_ijcv.mat>
+```
+* Notice to uncomment the import line containing the pretrained model you wish to evaluate in the code.
+### Credits
+* The TokenCut code is built on top of [LOST](https://github.com/valeoai/LOST), [DINO](https://github.com/facebookresearch/dino), [Segswap](https://github.com/XiSHEN0220/SegSwap), and [Bilateral_Sovlver](https://github.com/poolio/bilateral_solver).
+* Our ViT code is based on the [pytorch-image-models](https://github.com/rwightman/pytorch-image-models) repository.
+* Our ImageNet finetuning code is based on [code from the official PyTorch repo](https://github.com/pytorch/examples/blob/main/imagenet/main.py).
+* The code to convert ObjectNet classes to ImageNet classes was taken from [the torchprune repo](https://github.com/lucaslie/torchprune/blob/b753745b773c3ed259bf819d193ce8573d89efbb/src/torchprune/torchprune/util/datasets/objectnet.py).
+* The code to convert SI-Score classes to ImageNet classes was taken from [the official implementation](https://github.com/google-research/si-score).
+We would like to sincerely thank the authors for their great works.

RobustViT.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

SegmentationTest/data/Imagenet.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import os
+import torch
+import torch.utils.data as data
+import numpy as np
+from PIL import Image
+import h5py
+__all__ = ['ImagenetResults']
+class Imagenet_Segmentation(data.Dataset):
+    CLASSES = 2
+    def __init__(self,
+                 path,
+                 transform=None,
+                 target_transform=None):
+        self.path = path
+        self.transform = transform
+        self.target_transform = target_transform
+        self.h5py = None
+        tmp = h5py.File(path, 'r')
+        self.data_length = len(tmp['/value/img'])
+        tmp.close()
+        del tmp
+    def __getitem__(self, index):
+        if self.h5py is None:
+            self.h5py = h5py.File(self.path, 'r')
+        img = np.array(self.h5py[self.h5py['/value/img'][index, 0]]).transpose((2, 1, 0))
+        target = np.array(self.h5py[self.h5py[self.h5py['/value/gt'][index, 0]][0, 0]]).transpose((1, 0))
+        img = Image.fromarray(img).convert('RGB')
+        target = Image.fromarray(target)
+        if self.transform is not None:
+            img = self.transform(img)
+        if self.target_transform is not None:
+            target = np.array(self.target_transform(target)).astype('int32')
+            target = torch.from_numpy(target).long()
+        return img, target
+    def __len__(self):
+        return self.data_length
+class ImagenetResults(data.Dataset):
+    def __init__(self, path):
+        super(ImagenetResults, self).__init__()
+        self.path = os.path.join(path, 'results.hdf5')
+        self.data = None
+        print('Reading dataset length...')
+        with h5py.File(self.path, 'r') as f:
+            self.data_length = len(f['/image'])
+    def __len__(self):
+        return self.data_length
+    def __getitem__(self, item):
+        if self.data is None:
+            self.data = h5py.File(self.path, 'r')
+        image = torch.tensor(self.data['image'][item])
+        vis = torch.tensor(self.data['vis'][item])
+        target = torch.tensor(self.data['target'][item]).long()
+        return image, vis, target

SegmentationTest/data/VOC.py ADDED Viewed

	@@ -0,0 +1,372 @@

+import os
+import tarfile
+import torch
+import torch.utils.data as data
+import numpy as np
+import h5py
+from PIL import Image
+from scipy import io
+from torchvision.datasets.utils import download_url
+DATASET_YEAR_DICT = {
+    '2012': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar',
+        'filename': 'VOCtrainval_11-May-2012.tar',
+        'md5': '6cd6e144f989b92b3379bac3b3de84fd',
+        'base_dir': 'VOCdevkit/VOC2012'
+    },
+    '2011': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2011/VOCtrainval_25-May-2011.tar',
+        'filename': 'VOCtrainval_25-May-2011.tar',
+        'md5': '6c3384ef61512963050cb5d687e5bf1e',
+        'base_dir': 'TrainVal/VOCdevkit/VOC2011'
+    },
+    '2010': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar',
+        'filename': 'VOCtrainval_03-May-2010.tar',
+        'md5': 'da459979d0c395079b5c75ee67908abb',
+        'base_dir': 'VOCdevkit/VOC2010'
+    },
+    '2009': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2009/VOCtrainval_11-May-2009.tar',
+        'filename': 'VOCtrainval_11-May-2009.tar',
+        'md5': '59065e4b188729180974ef6572f6a212',
+        'base_dir': 'VOCdevkit/VOC2009'
+    },
+    '2008': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2008/VOCtrainval_14-Jul-2008.tar',
+        'filename': 'VOCtrainval_11-May-2012.tar',
+        'md5': '2629fa636546599198acfcfbfcf1904a',
+        'base_dir': 'VOCdevkit/VOC2008'
+    },
+    '2007': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',
+        'filename': 'VOCtrainval_06-Nov-2007.tar',
+        'md5': 'c52e279531787c972589f7e41ab4ae64',
+        'base_dir': 'VOCdevkit/VOC2007'
+    }
+}
+class VOCSegmentation(data.Dataset):
+    """`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Segmentation Dataset.
+    Args:
+        root (string): Root directory of the VOC Dataset.
+        year (string, optional): The dataset year, supports years 2007 to 2012.
+        image_set (string, optional): Select the image_set to use, ``train``, ``trainval`` or ``val``
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+    """
+    CLASSES = 20
+    CLASSES_NAMES = [
+        'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
+        'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
+        'motorbike', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
+        'tvmonitor', 'ambigious'
+    ]
+    def __init__(self,
+                 root,
+                 year='2012',
+                 image_set='train',
+                 download=False,
+                 transform=None,
+                 target_transform=None):
+        self.root = os.path.expanduser(root)
+        self.year = year
+        self.url = DATASET_YEAR_DICT[year]['url']
+        self.filename = DATASET_YEAR_DICT[year]['filename']
+        self.md5 = DATASET_YEAR_DICT[year]['md5']
+        self.transform = transform
+        self.target_transform = target_transform
+        self.image_set = image_set
+        base_dir = DATASET_YEAR_DICT[year]['base_dir']
+        voc_root = os.path.join(self.root, base_dir)
+        image_dir = os.path.join(voc_root, 'JPEGImages')
+        mask_dir = os.path.join(voc_root, 'SegmentationClass')
+        if download:
+            download_extract(self.url, self.root, self.filename, self.md5)
+        if not os.path.isdir(voc_root):
+            raise RuntimeError('Dataset not found or corrupted.' +
+                               ' You can use download=True to download it')
+        splits_dir = os.path.join(voc_root, 'ImageSets/Segmentation')
+        split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt')
+        if not os.path.exists(split_f):
+            raise ValueError(
+                'Wrong image_set entered! Please use image_set="train" '
+                'or image_set="trainval" or image_set="val"')
+        with open(os.path.join(split_f), "r") as f:
+            file_names = [x.strip() for x in f.readlines()]
+        self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
+        self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names]
+        assert (len(self.images) == len(self.masks))
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is the image segmentation.
+        """
+        img = Image.open(self.images[index]).convert('RGB')
+        target = Image.open(self.masks[index])
+        if self.transform is not None:
+            img = self.transform(img)
+        if self.target_transform is not None:
+            target = np.array(self.target_transform(target)).astype('int32')
+            target[target == 255] = -1
+            target = torch.from_numpy(target).long()
+        return img, target
+    @staticmethod
+    def _mask_transform(mask):
+        target = np.array(mask).astype('int32')
+        target[target == 255] = -1
+        return torch.from_numpy(target).long()
+    def __len__(self):
+        return len(self.images)
+    @property
+    def pred_offset(self):
+        return 0
+class VOCClassification(data.Dataset):
+    """`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Segmentation Dataset.
+    Args:
+        root (string): Root directory of the VOC Dataset.
+        year (string, optional): The dataset year, supports years 2007 to 2012.
+        image_set (string, optional): Select the image_set to use, ``train``, ``trainval`` or ``val``
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+    """
+    CLASSES = 20
+    def __init__(self,
+                 root,
+                 year='2012',
+                 image_set='train',
+                 download=False,
+                 transform=None):
+        self.root = os.path.expanduser(root)
+        self.year = year
+        self.url = DATASET_YEAR_DICT[year]['url']
+        self.filename = DATASET_YEAR_DICT[year]['filename']
+        self.md5 = DATASET_YEAR_DICT[year]['md5']
+        self.transform = transform
+        self.image_set = image_set
+        base_dir = DATASET_YEAR_DICT[year]['base_dir']
+        voc_root = os.path.join(self.root, base_dir)
+        image_dir = os.path.join(voc_root, 'JPEGImages')
+        mask_dir = os.path.join(voc_root, 'SegmentationClass')
+        if download:
+            download_extract(self.url, self.root, self.filename, self.md5)
+        if not os.path.isdir(voc_root):
+            raise RuntimeError('Dataset not found or corrupted.' +
+                               ' You can use download=True to download it')
+        splits_dir = os.path.join(voc_root, 'ImageSets/Segmentation')
+        split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt')
+        if not os.path.exists(split_f):
+            raise ValueError(
+                'Wrong image_set entered! Please use image_set="train" '
+                'or image_set="trainval" or image_set="val"')
+        with open(os.path.join(split_f), "r") as f:
+            file_names = [x.strip() for x in f.readlines()]
+        self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
+        self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names]
+        assert (len(self.images) == len(self.masks))
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is the image segmentation.
+        """
+        img = Image.open(self.images[index]).convert('RGB')
+        target = Image.open(self.masks[index])
+        # if self.transform is not None:
+        #     img = self.transform(img)
+        if self.transform is not None:
+            img, target = self.transform(img, target)
+        visible_classes = np.unique(target)
+        labels = torch.zeros(self.CLASSES)
+        for id in visible_classes:
+            if id not in (0, 255):
+                labels[id - 1].fill_(1)
+        return img, labels
+    def __len__(self):
+        return len(self.images)
+class VOCSBDClassification(data.Dataset):
+    """`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Segmentation Dataset.
+    Args:
+        root (string): Root directory of the VOC Dataset.
+        year (string, optional): The dataset year, supports years 2007 to 2012.
+        image_set (string, optional): Select the image_set to use, ``train``, ``trainval`` or ``val``
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+    """
+    CLASSES = 20
+    def __init__(self,
+                 root,
+                 sbd_root,
+                 year='2012',
+                 image_set='train',
+                 download=False,
+                 transform=None):
+        self.root = os.path.expanduser(root)
+        self.sbd_root = os.path.expanduser(sbd_root)
+        self.year = year
+        self.url = DATASET_YEAR_DICT[year]['url']
+        self.filename = DATASET_YEAR_DICT[year]['filename']
+        self.md5 = DATASET_YEAR_DICT[year]['md5']
+        self.transform = transform
+        self.image_set = image_set
+        base_dir = DATASET_YEAR_DICT[year]['base_dir']
+        voc_root = os.path.join(self.root, base_dir)
+        image_dir = os.path.join(voc_root, 'JPEGImages')
+        mask_dir = os.path.join(voc_root, 'SegmentationClass')
+        sbd_image_dir = os.path.join(sbd_root, 'img')
+        sbd_mask_dir = os.path.join(sbd_root, 'cls')
+        if download:
+            download_extract(self.url, self.root, self.filename, self.md5)
+        if not os.path.isdir(voc_root):
+            raise RuntimeError('Dataset not found or corrupted.' +
+                               ' You can use download=True to download it')
+        splits_dir = os.path.join(voc_root, 'ImageSets/Segmentation')
+        split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt')
+        sbd_split = os.path.join(sbd_root, 'train.txt')
+        if not os.path.exists(split_f):
+            raise ValueError(
+                'Wrong image_set entered! Please use image_set="train" '
+                'or image_set="trainval" or image_set="val"')
+        with open(os.path.join(split_f), "r") as f:
+            voc_file_names = [x.strip() for x in f.readlines()]
+        with open(os.path.join(sbd_split), "r") as f:
+            sbd_file_names = [x.strip() for x in f.readlines()]
+        self.images = [os.path.join(image_dir, x + ".jpg") for x in voc_file_names]
+        self.images += [os.path.join(sbd_image_dir, x + ".jpg") for x in sbd_file_names]
+        self.masks = [os.path.join(mask_dir, x + ".png") for x in voc_file_names]
+        self.masks += [os.path.join(sbd_mask_dir, x + ".mat") for x in sbd_file_names]
+        assert (len(self.images) == len(self.masks))
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is the image segmentation.
+        """
+        img = Image.open(self.images[index]).convert('RGB')
+        mask_path = self.masks[index]
+        if mask_path[-3:] == 'mat':
+            target = io.loadmat(mask_path, struct_as_record=False, squeeze_me=True)['GTcls'].Segmentation
+            target = Image.fromarray(target, mode='P')
+        else:
+            target = Image.open(self.masks[index])
+        if self.transform is not None:
+            img, target = self.transform(img, target)
+        visible_classes = np.unique(target)
+        labels = torch.zeros(self.CLASSES)
+        for id in visible_classes:
+            if id not in (0, 255):
+                labels[id - 1].fill_(1)
+        return img, labels
+    def __len__(self):
+        return len(self.images)
+def download_extract(url, root, filename, md5):
+    download_url(url, root, filename, md5)
+    with tarfile.open(os.path.join(root, filename), "r") as tar:
+        tar.extractall(path=root)
+class VOCResults(data.Dataset):
+    CLASSES = 20
+    CLASSES_NAMES = [
+        'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
+        'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
+        'motorbike', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
+        'tvmonitor', 'ambigious'
+    ]
+    def __init__(self, path):
+        super(VOCResults, self).__init__()
+        self.path = os.path.join(path, 'results.hdf5')
+        self.data = None
+        print('Reading dataset length...')
+        with h5py.File(self.path , 'r') as f:
+            self.data_length = len(f['/image'])
+    def __len__(self):
+        return self.data_length
+    def __getitem__(self, item):
+        if self.data is None:
+            self.data = h5py.File(self.path, 'r')
+        image = torch.tensor(self.data['image'][item])
+        vis = torch.tensor(self.data['vis'][item])
+        target = torch.tensor(self.data['target'][item])
+        class_pred = torch.tensor(self.data['class_pred'][item])
+        return image, vis, target, class_pred

SegmentationTest/data/__init__.py ADDED Viewed

File without changes

SegmentationTest/data/imagenet_utils.py ADDED Viewed

	@@ -0,0 +1,1002 @@

+CLS2IDX = {
+    0: 'tench, Tinca tinca',
+    1: 'goldfish, Carassius auratus',
+    2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
+    3: 'tiger shark, Galeocerdo cuvieri',
+    4: 'hammerhead, hammerhead shark',
+    5: 'electric ray, crampfish, numbfish, torpedo',
+    6: 'stingray',
+    7: 'cock',
+    8: 'hen',
+    9: 'ostrich, Struthio camelus',
+    10: 'brambling, Fringilla montifringilla',
+    11: 'goldfinch, Carduelis carduelis',
+    12: 'house finch, linnet, Carpodacus mexicanus',
+    13: 'junco, snowbird',
+    14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
+    15: 'robin, American robin, Turdus migratorius',
+    16: 'bulbul',
+    17: 'jay',
+    18: 'magpie',
+    19: 'chickadee',
+    20: 'water ouzel, dipper',
+    21: 'kite',
+    22: 'bald eagle, American eagle, Haliaeetus leucocephalus',
+    23: 'vulture',
+    24: 'great grey owl, great gray owl, Strix nebulosa',
+    25: 'European fire salamander, Salamandra salamandra',
+    26: 'common newt, Triturus vulgaris',
+    27: 'eft',
+    28: 'spotted salamander, Ambystoma maculatum',
+    29: 'axolotl, mud puppy, Ambystoma mexicanum',
+    30: 'bullfrog, Rana catesbeiana',
+    31: 'tree frog, tree-frog',
+    32: 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui',
+    33: 'loggerhead, loggerhead turtle, Caretta caretta',
+    34: 'leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea',
+    35: 'mud turtle',
+    36: 'terrapin',
+    37: 'box turtle, box tortoise',
+    38: 'banded gecko',
+    39: 'common iguana, iguana, Iguana iguana',
+    40: 'American chameleon, anole, Anolis carolinensis',
+    41: 'whiptail, whiptail lizard',
+    42: 'agama',
+    43: 'frilled lizard, Chlamydosaurus kingi',
+    44: 'alligator lizard',
+    45: 'Gila monster, Heloderma suspectum',
+    46: 'green lizard, Lacerta viridis',
+    47: 'African chameleon, Chamaeleo chamaeleon',
+    48: 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis',
+    49: 'African crocodile, Nile crocodile, Crocodylus niloticus',
+    50: 'American alligator, Alligator mississipiensis',
+    51: 'triceratops',
+    52: 'thunder snake, worm snake, Carphophis amoenus',
+    53: 'ringneck snake, ring-necked snake, ring snake',
+    54: 'hognose snake, puff adder, sand viper',
+    55: 'green snake, grass snake',
+    56: 'king snake, kingsnake',
+    57: 'garter snake, grass snake',
+    58: 'water snake',
+    59: 'vine snake',
+    60: 'night snake, Hypsiglena torquata',
+    61: 'boa constrictor, Constrictor constrictor',
+    62: 'rock python, rock snake, Python sebae',
+    63: 'Indian cobra, Naja naja',
+    64: 'green mamba',
+    65: 'sea snake',
+    66: 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus',
+    67: 'diamondback, diamondback rattlesnake, Crotalus adamanteus',
+    68: 'sidewinder, horned rattlesnake, Crotalus cerastes',
+    69: 'trilobite',
+    70: 'harvestman, daddy longlegs, Phalangium opilio',
+    71: 'scorpion',
+    72: 'black and gold garden spider, Argiope aurantia',
+    73: 'barn spider, Araneus cavaticus',
+    74: 'garden spider, Aranea diademata',
+    75: 'black widow, Latrodectus mactans',
+    76: 'tarantula',
+    77: 'wolf spider, hunting spider',
+    78: 'tick',
+    79: 'centipede',
+    80: 'black grouse',
+    81: 'ptarmigan',
+    82: 'ruffed grouse, partridge, Bonasa umbellus',
+    83: 'prairie chicken, prairie grouse, prairie fowl',
+    84: 'peacock',
+    85: 'quail',
+    86: 'partridge',
+    87: 'African grey, African gray, Psittacus erithacus',
+    88: 'macaw',
+    89: 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita',
+    90: 'lorikeet',
+    91: 'coucal',
+    92: 'bee eater',
+    93: 'hornbill',
+    94: 'hummingbird',
+    95: 'jacamar',
+    96: 'toucan',
+    97: 'drake',
+    98: 'red-breasted merganser, Mergus serrator',
+    99: 'goose',
+    100: 'black swan, Cygnus atratus',
+    101: 'tusker',
+    102: 'echidna, spiny anteater, anteater',
+    103: 'platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus',
+    104: 'wallaby, brush kangaroo',
+    105: 'koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus',
+    106: 'wombat',
+    107: 'jellyfish',
+    108: 'sea anemone, anemone',
+    109: 'brain coral',
+    110: 'flatworm, platyhelminth',
+    111: 'nematode, nematode worm, roundworm',
+    112: 'conch',
+    113: 'snail',
+    114: 'slug',
+    115: 'sea slug, nudibranch',
+    116: 'chiton, coat-of-mail shell, sea cradle, polyplacophore',
+    117: 'chambered nautilus, pearly nautilus, nautilus',
+    118: 'Dungeness crab, Cancer magister',
+    119: 'rock crab, Cancer irroratus',
+    120: 'fiddler crab',
+    121: 'king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica',
+    122: 'American lobster, Northern lobster, Maine lobster, Homarus americanus',
+    123: 'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish',
+    124: 'crayfish, crawfish, crawdad, crawdaddy',
+    125: 'hermit crab',
+    126: 'isopod',
+    127: 'white stork, Ciconia ciconia',
+    128: 'black stork, Ciconia nigra',
+    129: 'spoonbill',
+    130: 'flamingo',
+    131: 'little blue heron, Egretta caerulea',
+    132: 'American egret, great white heron, Egretta albus',
+    133: 'bittern',
+    134: 'crane',
+    135: 'limpkin, Aramus pictus',
+    136: 'European gallinule, Porphyrio porphyrio',
+    137: 'American coot, marsh hen, mud hen, water hen, Fulica americana',
+    138: 'bustard',
+    139: 'ruddy turnstone, Arenaria interpres',
+    140: 'red-backed sandpiper, dunlin, Erolia alpina',
+    141: 'redshank, Tringa totanus',
+    142: 'dowitcher',
+    143: 'oystercatcher, oyster catcher',
+    144: 'pelican',
+    145: 'king penguin, Aptenodytes patagonica',
+    146: 'albatross, mollymawk',
+    147: 'grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus',
+    148: 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca',
+    149: 'dugong, Dugong dugon',
+    150: 'sea lion',
+    151: 'Chihuahua',
+    152: 'Japanese spaniel',
+    153: 'Maltese dog, Maltese terrier, Maltese',
+    154: 'Pekinese, Pekingese, Peke',
+    155: 'Shih-Tzu',
+    156: 'Blenheim spaniel',
+    157: 'papillon',
+    158: 'toy terrier',
+    159: 'Rhodesian ridgeback',
+    160: 'Afghan hound, Afghan',
+    161: 'basset, basset hound',
+    162: 'beagle',
+    163: 'bloodhound, sleuthhound',
+    164: 'bluetick',
+    165: 'black-and-tan coonhound',
+    166: 'Walker hound, Walker foxhound',
+    167: 'English foxhound',
+    168: 'redbone',
+    169: 'borzoi, Russian wolfhound',
+    170: 'Irish wolfhound',
+    171: 'Italian greyhound',
+    172: 'whippet',
+    173: 'Ibizan hound, Ibizan Podenco',
+    174: 'Norwegian elkhound, elkhound',
+    175: 'otterhound, otter hound',
+    176: 'Saluki, gazelle hound',
+    177: 'Scottish deerhound, deerhound',
+    178: 'Weimaraner',
+    179: 'Staffordshire bullterrier, Staffordshire bull terrier',
+    180: 'American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier',
+    181: 'Bedlington terrier',
+    182: 'Border terrier',
+    183: 'Kerry blue terrier',
+    184: 'Irish terrier',
+    185: 'Norfolk terrier',
+    186: 'Norwich terrier',
+    187: 'Yorkshire terrier',
+    188: 'wire-haired fox terrier',
+    189: 'Lakeland terrier',
+    190: 'Sealyham terrier, Sealyham',
+    191: 'Airedale, Airedale terrier',
+    192: 'cairn, cairn terrier',
+    193: 'Australian terrier',
+    194: 'Dandie Dinmont, Dandie Dinmont terrier',
+    195: 'Boston bull, Boston terrier',
+    196: 'miniature schnauzer',
+    197: 'giant schnauzer',
+    198: 'standard schnauzer',
+    199: 'Scotch terrier, Scottish terrier, Scottie',
+    200: 'Tibetan terrier, chrysanthemum dog',
+    201: 'silky terrier, Sydney silky',
+    202: 'soft-coated wheaten terrier',
+    203: 'West Highland white terrier',
+    204: 'Lhasa, Lhasa apso',
+    205: 'flat-coated retriever',
+    206: 'curly-coated retriever',
+    207: 'golden retriever',
+    208: 'Labrador retriever',
+    209: 'Chesapeake Bay retriever',
+    210: 'German short-haired pointer',
+    211: 'vizsla, Hungarian pointer',
+    212: 'English setter',
+    213: 'Irish setter, red setter',
+    214: 'Gordon setter',
+    215: 'Brittany spaniel',
+    216: 'clumber, clumber spaniel',
+    217: 'English springer, English springer spaniel',
+    218: 'Welsh springer spaniel',
+    219: 'cocker spaniel, English cocker spaniel, cocker',
+    220: 'Sussex spaniel',
+    221: 'Irish water spaniel',
+    222: 'kuvasz',
+    223: 'schipperke',
+    224: 'groenendael',
+    225: 'malinois',
+    226: 'briard',
+    227: 'kelpie',
+    228: 'komondor',
+    229: 'Old English sheepdog, bobtail',
+    230: 'Shetland sheepdog, Shetland sheep dog, Shetland',
+    231: 'collie',
+    232: 'Border collie',
+    233: 'Bouvier des Flandres, Bouviers des Flandres',
+    234: 'Rottweiler',
+    235: 'German shepherd, German shepherd dog, German police dog, alsatian',
+    236: 'Doberman, Doberman pinscher',
+    237: 'miniature pinscher',
+    238: 'Greater Swiss Mountain dog',
+    239: 'Bernese mountain dog',
+    240: 'Appenzeller',
+    241: 'EntleBucher',
+    242: 'boxer',
+    243: 'bull mastiff',
+    244: 'Tibetan mastiff',
+    245: 'French bulldog',
+    246: 'Great Dane',
+    247: 'Saint Bernard, St Bernard',
+    248: 'Eskimo dog, husky',
+    249: 'malamute, malemute, Alaskan malamute',
+    250: 'Siberian husky',
+    251: 'dalmatian, coach dog, carriage dog',
+    252: 'affenpinscher, monkey pinscher, monkey dog',
+    253: 'basenji',
+    254: 'pug, pug-dog',
+    255: 'Leonberg',
+    256: 'Newfoundland, Newfoundland dog',
+    257: 'Great Pyrenees',
+    258: 'Samoyed, Samoyede',
+    259: 'Pomeranian',
+    260: 'chow, chow chow',
+    261: 'keeshond',
+    262: 'Brabancon griffon',
+    263: 'Pembroke, Pembroke Welsh corgi',
+    264: 'Cardigan, Cardigan Welsh corgi',
+    265: 'toy poodle',
+    266: 'miniature poodle',
+    267: 'standard poodle',
+    268: 'Mexican hairless',
+    269: 'timber wolf, grey wolf, gray wolf, Canis lupus',
+    270: 'white wolf, Arctic wolf, Canis lupus tundrarum',
+    271: 'red wolf, maned wolf, Canis rufus, Canis niger',
+    272: 'coyote, prairie wolf, brush wolf, Canis latrans',
+    273: 'dingo, warrigal, warragal, Canis dingo',
+    274: 'dhole, Cuon alpinus',
+    275: 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus',
+    276: 'hyena, hyaena',
+    277: 'red fox, Vulpes vulpes',
+    278: 'kit fox, Vulpes macrotis',
+    279: 'Arctic fox, white fox, Alopex lagopus',
+    280: 'grey fox, gray fox, Urocyon cinereoargenteus',
+    281: 'tabby, tabby cat',
+    282: 'tiger cat',
+    283: 'Persian cat',
+    284: 'Siamese cat, Siamese',
+    285: 'Egyptian cat',
+    286: 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor',
+    287: 'lynx, catamount',
+    288: 'leopard, Panthera pardus',
+    289: 'snow leopard, ounce, Panthera uncia',
+    290: 'jaguar, panther, Panthera onca, Felis onca',
+    291: 'lion, king of beasts, Panthera leo',
+    292: 'tiger, Panthera tigris',
+    293: 'cheetah, chetah, Acinonyx jubatus',
+    294: 'brown bear, bruin, Ursus arctos',
+    295: 'American black bear, black bear, Ursus americanus, Euarctos americanus',
+    296: 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus',
+    297: 'sloth bear, Melursus ursinus, Ursus ursinus',
+    298: 'mongoose',
+    299: 'meerkat, mierkat',
+    300: 'tiger beetle',
+    301: 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle',
+    302: 'ground beetle, carabid beetle',
+    303: 'long-horned beetle, longicorn, longicorn beetle',
+    304: 'leaf beetle, chrysomelid',
+    305: 'dung beetle',
+    306: 'rhinoceros beetle',
+    307: 'weevil',
+    308: 'fly',
+    309: 'bee',
+    310: 'ant, emmet, pismire',
+    311: 'grasshopper, hopper',
+    312: 'cricket',
+    313: 'walking stick, walkingstick, stick insect',
+    314: 'cockroach, roach',
+    315: 'mantis, mantid',
+    316: 'cicada, cicala',
+    317: 'leafhopper',
+    318: 'lacewing, lacewing fly',
+    319: "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
+    320: 'damselfly',
+    321: 'admiral',
+    322: 'ringlet, ringlet butterfly',
+    323: 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus',
+    324: 'cabbage butterfly',
+    325: 'sulphur butterfly, sulfur butterfly',
+    326: 'lycaenid, lycaenid butterfly',
+    327: 'starfish, sea star',
+    328: 'sea urchin',
+    329: 'sea cucumber, holothurian',
+    330: 'wood rabbit, cottontail, cottontail rabbit',
+    331: 'hare',
+    332: 'Angora, Angora rabbit',
+    333: 'hamster',
+    334: 'porcupine, hedgehog',
+    335: 'fox squirrel, eastern fox squirrel, Sciurus niger',
+    336: 'marmot',
+    337: 'beaver',
+    338: 'guinea pig, Cavia cobaya',
+    339: 'sorrel',
+    340: 'zebra',
+    341: 'hog, pig, grunter, squealer, Sus scrofa',
+    342: 'wild boar, boar, Sus scrofa',
+    343: 'warthog',
+    344: 'hippopotamus, hippo, river horse, Hippopotamus amphibius',
+    345: 'ox',
+    346: 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis',
+    347: 'bison',
+    348: 'ram, tup',
+    349: 'bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis',
+    350: 'ibex, Capra ibex',
+    351: 'hartebeest',
+    352: 'impala, Aepyceros melampus',
+    353: 'gazelle',
+    354: 'Arabian camel, dromedary, Camelus dromedarius',
+    355: 'llama',
+    356: 'weasel',
+    357: 'mink',
+    358: 'polecat, fitch, foulmart, foumart, Mustela putorius',
+    359: 'black-footed ferret, ferret, Mustela nigripes',
+    360: 'otter',
+    361: 'skunk, polecat, wood pussy',
+    362: 'badger',
+    363: 'armadillo',
+    364: 'three-toed sloth, ai, Bradypus tridactylus',
+    365: 'orangutan, orang, orangutang, Pongo pygmaeus',
+    366: 'gorilla, Gorilla gorilla',
+    367: 'chimpanzee, chimp, Pan troglodytes',
+    368: 'gibbon, Hylobates lar',
+    369: 'siamang, Hylobates syndactylus, Symphalangus syndactylus',
+    370: 'guenon, guenon monkey',
+    371: 'patas, hussar monkey, Erythrocebus patas',
+    372: 'baboon',
+    373: 'macaque',
+    374: 'langur',
+    375: 'colobus, colobus monkey',
+    376: 'proboscis monkey, Nasalis larvatus',
+    377: 'marmoset',
+    378: 'capuchin, ringtail, Cebus capucinus',
+    379: 'howler monkey, howler',
+    380: 'titi, titi monkey',
+    381: 'spider monkey, Ateles geoffroyi',
+    382: 'squirrel monkey, Saimiri sciureus',
+    383: 'Madagascar cat, ring-tailed lemur, Lemur catta',
+    384: 'indri, indris, Indri indri, Indri brevicaudatus',
+    385: 'Indian elephant, Elephas maximus',
+    386: 'African elephant, Loxodonta africana',
+    387: 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens',
+    388: 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca',
+    389: 'barracouta, snoek',
+    390: 'eel',
+    391: 'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch',
+    392: 'rock beauty, Holocanthus tricolor',
+    393: 'anemone fish',
+    394: 'sturgeon',
+    395: 'gar, garfish, garpike, billfish, Lepisosteus osseus',
+    396: 'lionfish',
+    397: 'puffer, pufferfish, blowfish, globefish',
+    398: 'abacus',
+    399: 'abaya',
+    400: "academic gown, academic robe, judge's robe",
+    401: 'accordion, piano accordion, squeeze box',
+    402: 'acoustic guitar',
+    403: 'aircraft carrier, carrier, flattop, attack aircraft carrier',
+    404: 'airliner',
+    405: 'airship, dirigible',
+    406: 'altar',
+    407: 'ambulance',
+    408: 'amphibian, amphibious vehicle',
+    409: 'analog clock',
+    410: 'apiary, bee house',
+    411: 'apron',
+    412: 'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin',
+    413: 'assault rifle, assault gun',
+    414: 'backpack, back pack, knapsack, packsack, rucksack, haversack',
+    415: 'bakery, bakeshop, bakehouse',
+    416: 'balance beam, beam',
+    417: 'balloon',
+    418: 'ballpoint, ballpoint pen, ballpen, Biro',
+    419: 'Band Aid',
+    420: 'banjo',
+    421: 'bannister, banister, balustrade, balusters, handrail',
+    422: 'barbell',
+    423: 'barber chair',
+    424: 'barbershop',
+    425: 'barn',
+    426: 'barometer',
+    427: 'barrel, cask',
+    428: 'barrow, garden cart, lawn cart, wheelbarrow',
+    429: 'baseball',
+    430: 'basketball',
+    431: 'bassinet',
+    432: 'bassoon',
+    433: 'bathing cap, swimming cap',
+    434: 'bath towel',
+    435: 'bathtub, bathing tub, bath, tub',
+    436: 'beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon',
+    437: 'beacon, lighthouse, beacon light, pharos',
+    438: 'beaker',
+    439: 'bearskin, busby, shako',
+    440: 'beer bottle',
+    441: 'beer glass',
+    442: 'bell cote, bell cot',
+    443: 'bib',
+    444: 'bicycle-built-for-two, tandem bicycle, tandem',
+    445: 'bikini, two-piece',
+    446: 'binder, ring-binder',
+    447: 'binoculars, field glasses, opera glasses',
+    448: 'birdhouse',
+    449: 'boathouse',
+    450: 'bobsled, bobsleigh, bob',
+    451: 'bolo tie, bolo, bola tie, bola',
+    452: 'bonnet, poke bonnet',
+    453: 'bookcase',
+    454: 'bookshop, bookstore, bookstall',
+    455: 'bottlecap',
+    456: 'bow',
+    457: 'bow tie, bow-tie, bowtie',
+    458: 'brass, memorial tablet, plaque',
+    459: 'brassiere, bra, bandeau',
+    460: 'breakwater, groin, groyne, mole, bulwark, seawall, jetty',
+    461: 'breastplate, aegis, egis',
+    462: 'broom',
+    463: 'bucket, pail',
+    464: 'buckle',
+    465: 'bulletproof vest',
+    466: 'bullet train, bullet',
+    467: 'butcher shop, meat market',
+    468: 'cab, hack, taxi, taxicab',
+    469: 'caldron, cauldron',
+    470: 'candle, taper, wax light',
+    471: 'cannon',
+    472: 'canoe',
+    473: 'can opener, tin opener',
+    474: 'cardigan',
+    475: 'car mirror',
+    476: 'carousel, carrousel, merry-go-round, roundabout, whirligig',
+    477: "carpenter's kit, tool kit",
+    478: 'carton',
+    479: 'car wheel',
+    480: 'cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM',
+    481: 'cassette',
+    482: 'cassette player',
+    483: 'castle',
+    484: 'catamaran',
+    485: 'CD player',
+    486: 'cello, violoncello',
+    487: 'cellular telephone, cellular phone, cellphone, cell, mobile phone',
+    488: 'chain',
+    489: 'chainlink fence',
+    490: 'chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour',
+    491: 'chain saw, chainsaw',
+    492: 'chest',
+    493: 'chiffonier, commode',
+    494: 'chime, bell, gong',
+    495: 'china cabinet, china closet',
+    496: 'Christmas stocking',
+    497: 'church, church building',
+    498: 'cinema, movie theater, movie theatre, movie house, picture palace',
+    499: 'cleaver, meat cleaver, chopper',
+    500: 'cliff dwelling',
+    501: 'cloak',
+    502: 'clog, geta, patten, sabot',
+    503: 'cocktail shaker',
+    504: 'coffee mug',
+    505: 'coffeepot',
+    506: 'coil, spiral, volute, whorl, helix',
+    507: 'combination lock',
+    508: 'computer keyboard, keypad',
+    509: 'confectionery, confectionary, candy store',
+    510: 'container ship, containership, container vessel',
+    511: 'convertible',
+    512: 'corkscrew, bottle screw',
+    513: 'cornet, horn, trumpet, trump',
+    514: 'cowboy boot',
+    515: 'cowboy hat, ten-gallon hat',
+    516: 'cradle',
+    517: 'crane',
+    518: 'crash helmet',
+    519: 'crate',
+    520: 'crib, cot',
+    521: 'Crock Pot',
+    522: 'croquet ball',
+    523: 'crutch',
+    524: 'cuirass',
+    525: 'dam, dike, dyke',
+    526: 'desk',
+    527: 'desktop computer',
+    528: 'dial telephone, dial phone',
+    529: 'diaper, nappy, napkin',
+    530: 'digital clock',
+    531: 'digital watch',
+    532: 'dining table, board',
+    533: 'dishrag, dishcloth',
+    534: 'dishwasher, dish washer, dishwashing machine',
+    535: 'disk brake, disc brake',
+    536: 'dock, dockage, docking facility',
+    537: 'dogsled, dog sled, dog sleigh',
+    538: 'dome',
+    539: 'doormat, welcome mat',
+    540: 'drilling platform, offshore rig',
+    541: 'drum, membranophone, tympan',
+    542: 'drumstick',
+    543: 'dumbbell',
+    544: 'Dutch oven',
+    545: 'electric fan, blower',
+    546: 'electric guitar',
+    547: 'electric locomotive',
+    548: 'entertainment center',
+    549: 'envelope',
+    550: 'espresso maker',
+    551: 'face powder',
+    552: 'feather boa, boa',
+    553: 'file, file cabinet, filing cabinet',
+    554: 'fireboat',
+    555: 'fire engine, fire truck',
+    556: 'fire screen, fireguard',
+    557: 'flagpole, flagstaff',
+    558: 'flute, transverse flute',
+    559: 'folding chair',
+    560: 'football helmet',
+    561: 'forklift',
+    562: 'fountain',
+    563: 'fountain pen',
+    564: 'four-poster',
+    565: 'freight car',
+    566: 'French horn, horn',
+    567: 'frying pan, frypan, skillet',
+    568: 'fur coat',
+    569: 'garbage truck, dustcart',
+    570: 'gasmask, respirator, gas helmet',
+    571: 'gas pump, gasoline pump, petrol pump, island dispenser',
+    572: 'goblet',
+    573: 'go-kart',
+    574: 'golf ball',
+    575: 'golfcart, golf cart',
+    576: 'gondola',
+    577: 'gong, tam-tam',
+    578: 'gown',
+    579: 'grand piano, grand',
+    580: 'greenhouse, nursery, glasshouse',
+    581: 'grille, radiator grille',
+    582: 'grocery store, grocery, food market, market',
+    583: 'guillotine',
+    584: 'hair slide',
+    585: 'hair spray',
+    586: 'half track',
+    587: 'hammer',
+    588: 'hamper',
+    589: 'hand blower, blow dryer, blow drier, hair dryer, hair drier',
+    590: 'hand-held computer, hand-held microcomputer',
+    591: 'handkerchief, hankie, hanky, hankey',
+    592: 'hard disc, hard disk, fixed disk',
+    593: 'harmonica, mouth organ, harp, mouth harp',
+    594: 'harp',
+    595: 'harvester, reaper',
+    596: 'hatchet',
+    597: 'holster',
+    598: 'home theater, home theatre',
+    599: 'honeycomb',
+    600: 'hook, claw',
+    601: 'hoopskirt, crinoline',
+    602: 'horizontal bar, high bar',
+    603: 'horse cart, horse-cart',
+    604: 'hourglass',
+    605: 'iPod',
+    606: 'iron, smoothing iron',
+    607: "jack-o'-lantern",
+    608: 'jean, blue jean, denim',
+    609: 'jeep, landrover',
+    610: 'jersey, T-shirt, tee shirt',
+    611: 'jigsaw puzzle',
+    612: 'jinrikisha, ricksha, rickshaw',
+    613: 'joystick',
+    614: 'kimono',
+    615: 'knee pad',
+    616: 'knot',
+    617: 'lab coat, laboratory coat',
+    618: 'ladle',
+    619: 'lampshade, lamp shade',
+    620: 'laptop, laptop computer',
+    621: 'lawn mower, mower',
+    622: 'lens cap, lens cover',
+    623: 'letter opener, paper knife, paperknife',
+    624: 'library',
+    625: 'lifeboat',
+    626: 'lighter, light, igniter, ignitor',
+    627: 'limousine, limo',
+    628: 'liner, ocean liner',
+    629: 'lipstick, lip rouge',
+    630: 'Loafer',
+    631: 'lotion',
+    632: 'loudspeaker, speaker, speaker unit, loudspeaker system, speaker system',
+    633: "loupe, jeweler's loupe",
+    634: 'lumbermill, sawmill',
+    635: 'magnetic compass',
+    636: 'mailbag, postbag',
+    637: 'mailbox, letter box',
+    638: 'maillot',
+    639: 'maillot, tank suit',
+    640: 'manhole cover',
+    641: 'maraca',
+    642: 'marimba, xylophone',
+    643: 'mask',
+    644: 'matchstick',
+    645: 'maypole',
+    646: 'maze, labyrinth',
+    647: 'measuring cup',
+    648: 'medicine chest, medicine cabinet',
+    649: 'megalith, megalithic structure',
+    650: 'microphone, mike',
+    651: 'microwave, microwave oven',
+    652: 'military uniform',
+    653: 'milk can',
+    654: 'minibus',
+    655: 'miniskirt, mini',
+    656: 'minivan',
+    657: 'missile',
+    658: 'mitten',
+    659: 'mixing bowl',
+    660: 'mobile home, manufactured home',
+    661: 'Model T',
+    662: 'modem',
+    663: 'monastery',
+    664: 'monitor',
+    665: 'moped',
+    666: 'mortar',
+    667: 'mortarboard',
+    668: 'mosque',
+    669: 'mosquito net',
+    670: 'motor scooter, scooter',
+    671: 'mountain bike, all-terrain bike, off-roader',
+    672: 'mountain tent',
+    673: 'mouse, computer mouse',
+    674: 'mousetrap',
+    675: 'moving van',
+    676: 'muzzle',
+    677: 'nail',
+    678: 'neck brace',
+    679: 'necklace',
+    680: 'nipple',
+    681: 'notebook, notebook computer',
+    682: 'obelisk',
+    683: 'oboe, hautboy, hautbois',
+    684: 'ocarina, sweet potato',
+    685: 'odometer, hodometer, mileometer, milometer',
+    686: 'oil filter',
+    687: 'organ, pipe organ',
+    688: 'oscilloscope, scope, cathode-ray oscilloscope, CRO',
+    689: 'overskirt',
+    690: 'oxcart',
+    691: 'oxygen mask',
+    692: 'packet',
+    693: 'paddle, boat paddle',
+    694: 'paddlewheel, paddle wheel',
+    695: 'padlock',
+    696: 'paintbrush',
+    697: "pajama, pyjama, pj's, jammies",
+    698: 'palace',
+    699: 'panpipe, pandean pipe, syrinx',
+    700: 'paper towel',
+    701: 'parachute, chute',
+    702: 'parallel bars, bars',
+    703: 'park bench',
+    704: 'parking meter',
+    705: 'passenger car, coach, carriage',
+    706: 'patio, terrace',
+    707: 'pay-phone, pay-station',
+    708: 'pedestal, plinth, footstall',
+    709: 'pencil box, pencil case',
+    710: 'pencil sharpener',
+    711: 'perfume, essence',
+    712: 'Petri dish',
+    713: 'photocopier',
+    714: 'pick, plectrum, plectron',
+    715: 'pickelhaube',
+    716: 'picket fence, paling',
+    717: 'pickup, pickup truck',
+    718: 'pier',
+    719: 'piggy bank, penny bank',
+    720: 'pill bottle',
+    721: 'pillow',
+    722: 'ping-pong ball',
+    723: 'pinwheel',
+    724: 'pirate, pirate ship',
+    725: 'pitcher, ewer',
+    726: "plane, carpenter's plane, woodworking plane",
+    727: 'planetarium',
+    728: 'plastic bag',
+    729: 'plate rack',
+    730: 'plow, plough',
+    731: "plunger, plumber's helper",
+    732: 'Polaroid camera, Polaroid Land camera',
+    733: 'pole',
+    734: 'police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria',
+    735: 'poncho',
+    736: 'pool table, billiard table, snooker table',
+    737: 'pop bottle, soda bottle',
+    738: 'pot, flowerpot',
+    739: "potter's wheel",
+    740: 'power drill',
+    741: 'prayer rug, prayer mat',
+    742: 'printer',
+    743: 'prison, prison house',
+    744: 'projectile, missile',
+    745: 'projector',
+    746: 'puck, hockey puck',
+    747: 'punching bag, punch bag, punching ball, punchball',
+    748: 'purse',
+    749: 'quill, quill pen',
+    750: 'quilt, comforter, comfort, puff',
+    751: 'racer, race car, racing car',
+    752: 'racket, racquet',
+    753: 'radiator',
+    754: 'radio, wireless',
+    755: 'radio telescope, radio reflector',
+    756: 'rain barrel',
+    757: 'recreational vehicle, RV, R.V.',
+    758: 'reel',
+    759: 'reflex camera',
+    760: 'refrigerator, icebox',
+    761: 'remote control, remote',
+    762: 'restaurant, eating house, eating place, eatery',
+    763: 'revolver, six-gun, six-shooter',
+    764: 'rifle',
+    765: 'rocking chair, rocker',
+    766: 'rotisserie',
+    767: 'rubber eraser, rubber, pencil eraser',
+    768: 'rugby ball',
+    769: 'rule, ruler',
+    770: 'running shoe',
+    771: 'safe',
+    772: 'safety pin',
+    773: 'saltshaker, salt shaker',
+    774: 'sandal',
+    775: 'sarong',
+    776: 'sax, saxophone',
+    777: 'scabbard',
+    778: 'scale, weighing machine',
+    779: 'school bus',
+    780: 'schooner',
+    781: 'scoreboard',
+    782: 'screen, CRT screen',
+    783: 'screw',
+    784: 'screwdriver',
+    785: 'seat belt, seatbelt',
+    786: 'sewing machine',
+    787: 'shield, buckler',
+    788: 'shoe shop, shoe-shop, shoe store',
+    789: 'shoji',
+    790: 'shopping basket',
+    791: 'shopping cart',
+    792: 'shovel',
+    793: 'shower cap',
+    794: 'shower curtain',
+    795: 'ski',
+    796: 'ski mask',
+    797: 'sleeping bag',
+    798: 'slide rule, slipstick',
+    799: 'sliding door',
+    800: 'slot, one-armed bandit',
+    801: 'snorkel',
+    802: 'snowmobile',
+    803: 'snowplow, snowplough',
+    804: 'soap dispenser',
+    805: 'soccer ball',
+    806: 'sock',
+    807: 'solar dish, solar collector, solar furnace',
+    808: 'sombrero',
+    809: 'soup bowl',
+    810: 'space bar',
+    811: 'space heater',
+    812: 'space shuttle',
+    813: 'spatula',
+    814: 'speedboat',
+    815: "spider web, spider's web",
+    816: 'spindle',
+    817: 'sports car, sport car',
+    818: 'spotlight, spot',
+    819: 'stage',
+    820: 'steam locomotive',
+    821: 'steel arch bridge',
+    822: 'steel drum',
+    823: 'stethoscope',
+    824: 'stole',
+    825: 'stone wall',
+    826: 'stopwatch, stop watch',
+    827: 'stove',
+    828: 'strainer',
+    829: 'streetcar, tram, tramcar, trolley, trolley car',
+    830: 'stretcher',
+    831: 'studio couch, day bed',
+    832: 'stupa, tope',
+    833: 'submarine, pigboat, sub, U-boat',
+    834: 'suit, suit of clothes',
+    835: 'sundial',
+    836: 'sunglass',
+    837: 'sunglasses, dark glasses, shades',
+    838: 'sunscreen, sunblock, sun blocker',
+    839: 'suspension bridge',
+    840: 'swab, swob, mop',
+    841: 'sweatshirt',
+    842: 'swimming trunks, bathing trunks',
+    843: 'swing',
+    844: 'switch, electric switch, electrical switch',
+    845: 'syringe',
+    846: 'table lamp',
+    847: 'tank, army tank, armored combat vehicle, armoured combat vehicle',
+    848: 'tape player',
+    849: 'teapot',
+    850: 'teddy, teddy bear',
+    851: 'television, television system',
+    852: 'tennis ball',
+    853: 'thatch, thatched roof',
+    854: 'theater curtain, theatre curtain',
+    855: 'thimble',
+    856: 'thresher, thrasher, threshing machine',
+    857: 'throne',
+    858: 'tile roof',
+    859: 'toaster',
+    860: 'tobacco shop, tobacconist shop, tobacconist',
+    861: 'toilet seat',
+    862: 'torch',
+    863: 'totem pole',
+    864: 'tow truck, tow car, wrecker',
+    865: 'toyshop',
+    866: 'tractor',
+    867: 'trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi',
+    868: 'tray',
+    869: 'trench coat',
+    870: 'tricycle, trike, velocipede',
+    871: 'trimaran',
+    872: 'tripod',
+    873: 'triumphal arch',
+    874: 'trolleybus, trolley coach, trackless trolley',
+    875: 'trombone',
+    876: 'tub, vat',
+    877: 'turnstile',
+    878: 'typewriter keyboard',
+    879: 'umbrella',
+    880: 'unicycle, monocycle',
+    881: 'upright, upright piano',
+    882: 'vacuum, vacuum cleaner',
+    883: 'vase',
+    884: 'vault',
+    885: 'velvet',
+    886: 'vending machine',
+    887: 'vestment',
+    888: 'viaduct',
+    889: 'violin, fiddle',
+    890: 'volleyball',
+    891: 'waffle iron',
+    892: 'wall clock',
+    893: 'wallet, billfold, notecase, pocketbook',
+    894: 'wardrobe, closet, press',
+    895: 'warplane, military plane',
+    896: 'washbasin, handbasin, washbowl, lavabo, wash-hand basin',
+    897: 'washer, automatic washer, washing machine',
+    898: 'water bottle',
+    899: 'water jug',
+    900: 'water tower',
+    901: 'whiskey jug',
+    902: 'whistle',
+    903: 'wig',
+    904: 'window screen',
+    905: 'window shade',
+    906: 'Windsor tie',
+    907: 'wine bottle',
+    908: 'wing',
+    909: 'wok',
+    910: 'wooden spoon',
+    911: 'wool, woolen, woollen',
+    912: 'worm fence, snake fence, snake-rail fence, Virginia fence',
+    913: 'wreck',
+    914: 'yawl',
+    915: 'yurt',
+    916: 'web site, website, internet site, site',
+    917: 'comic book',
+    918: 'crossword puzzle, crossword',
+    919: 'street sign',
+    920: 'traffic light, traffic signal, stoplight',
+    921: 'book jacket, dust cover, dust jacket, dust wrapper',
+    922: 'menu',
+    923: 'plate',
+    924: 'guacamole',
+    925: 'consomme',
+    926: 'hot pot, hotpot',
+    927: 'trifle',
+    928: 'ice cream, icecream',
+    929: 'ice lolly, lolly, lollipop, popsicle',
+    930: 'French loaf',
+    931: 'bagel, beigel',
+    932: 'pretzel',
+    933: 'cheeseburger',
+    934: 'hotdog, hot dog, red hot',
+    935: 'mashed potato',
+    936: 'head cabbage',
+    937: 'broccoli',
+    938: 'cauliflower',
+    939: 'zucchini, courgette',
+    940: 'spaghetti squash',
+    941: 'acorn squash',
+    942: 'butternut squash',
+    943: 'cucumber, cuke',
+    944: 'artichoke, globe artichoke',
+    945: 'bell pepper',
+    946: 'cardoon',
+    947: 'mushroom',
+    948: 'Granny Smith',
+    949: 'strawberry',
+    950: 'orange',
+    951: 'lemon',
+    952: 'fig',
+    953: 'pineapple, ananas',
+    954: 'banana',
+    955: 'jackfruit, jak, jack',
+    956: 'custard apple',
+    957: 'pomegranate',
+    958: 'hay',
+    959: 'carbonara',
+    960: 'chocolate sauce, chocolate syrup',
+    961: 'dough',
+    962: 'meat loaf, meatloaf',
+    963: 'pizza, pizza pie',
+    964: 'potpie',
+    965: 'burrito',
+    966: 'red wine',
+    967: 'espresso',
+    968: 'cup',
+    969: 'eggnog',
+    970: 'alp',
+    971: 'bubble',
+    972: 'cliff, drop, drop-off',
+    973: 'coral reef',
+    974: 'geyser',
+    975: 'lakeside, lakeshore',
+    976: 'promontory, headland, head, foreland',
+    977: 'sandbar, sand bar',
+    978: 'seashore, coast, seacoast, sea-coast',
+    979: 'valley, vale',
+    980: 'volcano',
+    981: 'ballplayer, baseball player',
+    982: 'groom, bridegroom',
+    983: 'scuba diver',
+    984: 'rapeseed',
+    985: 'daisy',
+    986: "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
+    987: 'corn',
+    988: 'acorn',
+    989: 'hip, rose hip, rosehip',
+    990: 'buckeye, horse chestnut, conker',
+    991: 'coral fungus',
+    992: 'agaric',
+    993: 'gyromitra',
+    994: 'stinkhorn, carrion fungus',
+    995: 'earthstar',
+    996: 'hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa',
+    997: 'bolete',
+    998: 'ear, spike, capitulum',
+    999: 'toilet tissue, toilet paper, bathroom tissue'
+}

SegmentationTest/data/transforms.py ADDED Viewed

	@@ -0,0 +1,442 @@

+from __future__ import division
+import sys
+import random
+from PIL import Image
+try:
+    import accimage
+except ImportError:
+    accimage = None
+import numbers
+import collections
+from torchvision.transforms import functional as F
+if sys.version_info < (3, 3):
+    Sequence = collections.Sequence
+    Iterable = collections.Iterable
+else:
+    Sequence = collections.abc.Sequence
+    Iterable = collections.abc.Iterable
+_pil_interpolation_to_str = {
+    Image.NEAREST: 'PIL.Image.NEAREST',
+    Image.BILINEAR: 'PIL.Image.BILINEAR',
+    Image.BICUBIC: 'PIL.Image.BICUBIC',
+    Image.LANCZOS: 'PIL.Image.LANCZOS',
+    Image.HAMMING: 'PIL.Image.HAMMING',
+    Image.BOX: 'PIL.Image.BOX',
+}
+class Compose(object):
+    """Composes several transforms together.
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+    Example:
+        >>> transforms.Compose([
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.ToTensor(),
+        >>> ])
+    """
+    def __init__(self, transforms):
+        self.transforms = transforms
+    def __call__(self, img, tgt):
+        for t in self.transforms:
+            img, tgt = t(img, tgt)
+        return img, tgt
+    def __repr__(self):
+        format_string = self.__class__.__name__ + '('
+        for t in self.transforms:
+            format_string += '\n'
+            format_string += '    {0}'.format(t)
+        format_string += '\n)'
+        return format_string
+class Resize(object):
+    """Resize the input PIL Image to the given size.
+    Args:
+        size (sequence or int): Desired output size. If size is a sequence like
+            (h, w), output size will be matched to this. If size is an int,
+            smaller edge of the image will be matched to this number.
+            i.e, if height > width, then image will be rescaled to
+            (size * height / width, size)
+        interpolation (int, optional): Desired interpolation. Default is
+            ``PIL.Image.BILINEAR``
+    """
+    def __init__(self, size, interpolation=Image.BILINEAR):
+        assert isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)
+        self.size = size
+        self.interpolation = interpolation
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Image to be scaled.
+        Returns:
+            PIL Image: Rescaled image.
+        """
+        return F.resize(img, self.size, self.interpolation), F.resize(tgt, self.size, Image.NEAREST)
+    def __repr__(self):
+        interpolate_str = _pil_interpolation_to_str[self.interpolation]
+        return self.__class__.__name__ + '(size={0}, interpolation={1})'.format(self.size, interpolate_str)
+class CenterCrop(object):
+    """Crops the given PIL Image at the center.
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
+    """
+    def __init__(self, size):
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        else:
+            self.size = size
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Image to be cropped.
+        Returns:
+            PIL Image: Cropped image.
+        """
+        return F.center_crop(img, self.size), F.center_crop(tgt, self.size)
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0})'.format(self.size)
+class RandomCrop(object):
+    """Crop the given PIL Image at a random location.
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
+        padding (int or sequence, optional): Optional padding on each border
+            of the image. Default is None, i.e no padding. If a sequence of length
+            4 is provided, it is used to pad left, top, right, bottom borders
+            respectively. If a sequence of length 2 is provided, it is used to
+            pad left/right, top/bottom borders, respectively.
+        pad_if_needed (boolean): It will pad the image if smaller than the
+            desired size to avoid raising an exception.
+        fill: Pixel fill value for constant fill. Default is 0. If a tuple of
+            length 3, it is used to fill R, G, B channels respectively.
+            This value is only used when the padding_mode is constant
+        padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
+             - constant: pads with a constant value, this value is specified with fill
+             - edge: pads with the last value on the edge of the image
+             - reflect: pads with reflection of image (without repeating the last value on the edge)
+                padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+                will result in [3, 2, 1, 2, 3, 4, 3, 2]
+             - symmetric: pads with reflection of image (repeating the last value on the edge)
+                padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+                will result in [2, 1, 1, 2, 3, 4, 4, 3]
+    """
+    def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode='constant'):
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        else:
+            self.size = size
+        self.padding = padding
+        self.pad_if_needed = pad_if_needed
+        self.fill = fill
+        self.padding_mode = padding_mode
+    @staticmethod
+    def get_params(img, output_size):
+        """Get parameters for ``crop`` for a random crop.
+        Args:
+            img (PIL Image): Image to be cropped.
+            output_size (tuple): Expected output size of the crop.
+        Returns:
+            tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
+        """
+        w, h = img.size
+        th, tw = output_size
+        if w == tw and h == th:
+            return 0, 0, h, w
+        i = random.randint(0, h - th)
+        j = random.randint(0, w - tw)
+        return i, j, th, tw
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Image to be cropped.
+        Returns:
+            PIL Image: Cropped image.
+        """
+        if self.padding is not None:
+            img = F.pad(img, self.padding, self.fill, self.padding_mode)
+            tgt = F.pad(tgt, self.padding, self.fill, self.padding_mode)
+        # pad the width if needed
+        if self.pad_if_needed and img.size[0] < self.size[1]:
+            img = F.pad(img, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode)
+            tgt = F.pad(tgt, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode)
+        # pad the height if needed
+        if self.pad_if_needed and img.size[1] < self.size[0]:
+            img = F.pad(img, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode)
+            tgt = F.pad(tgt, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode)
+        i, j, h, w = self.get_params(img, self.size)
+        return F.crop(img, i, j, h, w), F.crop(tgt, i, j, h, w)
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0}, padding={1})'.format(self.size, self.padding)
+class RandomHorizontalFlip(object):
+    """Horizontally flip the given PIL Image randomly with a given probability.
+    Args:
+        p (float): probability of the image being flipped. Default value is 0.5
+    """
+    def __init__(self, p=0.5):
+        self.p = p
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Image to be flipped.
+        Returns:
+            PIL Image: Randomly flipped image.
+        """
+        if random.random() < self.p:
+            return F.hflip(img), F.hflip(tgt)
+        return img, tgt
+    def __repr__(self):
+        return self.__class__.__name__ + '(p={})'.format(self.p)
+class RandomVerticalFlip(object):
+    """Vertically flip the given PIL Image randomly with a given probability.
+    Args:
+        p (float): probability of the image being flipped. Default value is 0.5
+    """
+    def __init__(self, p=0.5):
+        self.p = p
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Image to be flipped.
+        Returns:
+            PIL Image: Randomly flipped image.
+        """
+        if random.random() < self.p:
+            return F.vflip(img), F.vflip(tgt)
+        return img, tgt
+    def __repr__(self):
+        return self.__class__.__name__ + '(p={})'.format(self.p)
+class Lambda(object):
+    """Apply a user-defined lambda as a transform.
+    Args:
+        lambd (function): Lambda/function to be used for transform.
+    """
+    def __init__(self, lambd):
+        assert callable(lambd), repr(type(lambd).__name__) + " object is not callable"
+        self.lambd = lambd
+    def __call__(self, img, tgt):
+        return self.lambd(img, tgt)
+    def __repr__(self):
+        return self.__class__.__name__ + '()'
+class ColorJitter(object):
+    """Randomly change the brightness, contrast and saturation of an image.
+    Args:
+        brightness (float or tuple of float (min, max)): How much to jitter brightness.
+            brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
+            or the given [min, max]. Should be non negative numbers.
+        contrast (float or tuple of float (min, max)): How much to jitter contrast.
+            contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
+            or the given [min, max]. Should be non negative numbers.
+        saturation (float or tuple of float (min, max)): How much to jitter saturation.
+            saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
+            or the given [min, max]. Should be non negative numbers.
+        hue (float or tuple of float (min, max)): How much to jitter hue.
+            hue_factor is chosen uniformly from [-hue, hue] or the given [min, max].
+            Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
+    """
+    def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
+        self.brightness = self._check_input(brightness, 'brightness')
+        self.contrast = self._check_input(contrast, 'contrast')
+        self.saturation = self._check_input(saturation, 'saturation')
+        self.hue = self._check_input(hue, 'hue', center=0, bound=(-0.5, 0.5),
+                                     clip_first_on_zero=False)
+    def _check_input(self, value, name, center=1, bound=(0, float('inf')), clip_first_on_zero=True):
+        if isinstance(value, numbers.Number):
+            if value < 0:
+                raise ValueError("If {} is a single number, it must be non negative.".format(name))
+            value = [center - value, center + value]
+            if clip_first_on_zero:
+                value[0] = max(value[0], 0)
+        elif isinstance(value, (tuple, list)) and len(value) == 2:
+            if not bound[0] <= value[0] <= value[1] <= bound[1]:
+                raise ValueError("{} values should be between {}".format(name, bound))
+        else:
+            raise TypeError("{} should be a single number or a list/tuple with lenght 2.".format(name))
+        # if value is 0 or (1., 1.) for brightness/contrast/saturation
+        # or (0., 0.) for hue, do nothing
+        if value[0] == value[1] == center:
+            value = None
+        return value
+    @staticmethod
+    def get_params(brightness, contrast, saturation, hue):
+        """Get a randomized transform to be applied on image.
+        Arguments are same as that of __init__.
+        Returns:
+            Transform which randomly adjusts brightness, contrast and
+            saturation in a random order.
+        """
+        transforms = []
+        if brightness is not None:
+            brightness_factor = random.uniform(brightness[0], brightness[1])
+            transforms.append(Lambda(lambda img, tgt: (F.adjust_brightness(img, brightness_factor), tgt)))
+        if contrast is not None:
+            contrast_factor = random.uniform(contrast[0], contrast[1])
+            transforms.append(Lambda(lambda img, tgt: (F.adjust_contrast(img, contrast_factor), tgt)))
+        if saturation is not None:
+            saturation_factor = random.uniform(saturation[0], saturation[1])
+            transforms.append(Lambda(lambda img, tgt: (F.adjust_saturation(img, saturation_factor), tgt)))
+        if hue is not None:
+            hue_factor = random.uniform(hue[0], hue[1])
+            transforms.append(Lambda(lambda img, tgt: (F.adjust_hue(img, hue_factor), tgt)))
+        random.shuffle(transforms)
+        transform = Compose(transforms)
+        return transform
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Input image.
+        Returns:
+            PIL Image: Color jittered image.
+        """
+        transform = self.get_params(self.brightness, self.contrast,
+                                    self.saturation, self.hue)
+        return transform(img, tgt)
+    def __repr__(self):
+        format_string = self.__class__.__name__ + '('
+        format_string += 'brightness={0}'.format(self.brightness)
+        format_string += ', contrast={0}'.format(self.contrast)
+        format_string += ', saturation={0}'.format(self.saturation)
+        format_string += ', hue={0})'.format(self.hue)
+        return format_string
+class Normalize(object):
+    """Normalize a tensor image with mean and standard deviation.
+    Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
+    will normalize each channel of the input ``torch.*Tensor`` i.e.
+    ``input[channel] = (input[channel] - mean[channel]) / std[channel]``
+    .. note::
+        This transform acts out of place, i.e., it does not mutates the input tensor.
+    Args:
+        mean (sequence): Sequence of means for each channel.
+        std (sequence): Sequence of standard deviations for each channel.
+    """
+    def __init__(self, mean, std, inplace=False):
+        self.mean = mean
+        self.std = std
+        self.inplace = inplace
+    def __call__(self, img, tgt):
+        """
+        Args:
+            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
+        Returns:
+            Tensor: Normalized Tensor image.
+        """
+        # return F.normalize(img, self.mean, self.std, self.inplace), tgt
+        return F.normalize(img, self.mean, self.std), tgt
+    def __repr__(self):
+        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
+class ToTensor(object):
+    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+    Converts a PIL Image or numpy.ndarray (H x W x C) in the range
+    [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
+    if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1)
+    or if the numpy.ndarray has dtype = np.uint8
+    In the other cases, tensors are returned without scaling.
+    """
+    def __call__(self, img, tgt):
+        """
+        Args:
+            pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
+        Returns:
+            Tensor: Converted image.
+        """
+        return F.to_tensor(img), tgt
+    def __repr__(self):
+        return self.__class__.__name__ + '()'

SegmentationTest/imagenet_seg_eval.py ADDED Viewed

	@@ -0,0 +1,319 @@

+import numpy as np
+import torch
+import torchvision.transforms as transforms
+from torch.utils.data import DataLoader
+from numpy import *
+import argparse
+from PIL import Image
+import imageio
+import os
+from tqdm import tqdm
+from SegmentationTest.utils.metrices import *
+from SegmentationTest.utils import render
+from SegmentationTest.utils.saver import Saver
+from SegmentationTest.utils.iou import IoU
+from SegmentationTest.data.Imagenet import Imagenet_Segmentation
+# Uncomment the expected model below
+# ViT
+from ViT.ViT import vit_base_patch16_224 as vit
+# from ViT.ViT import vit_large_patch16_224 as vit
+# ViT-AugReg
+# from ViT.ViT_new import vit_small_patch16_224 as vit
+# from ViT.ViT_new import vit_base_patch16_224 as vit
+# from ViT.ViT_new import vit_large_patch16_224 as vit
+# DeiT
+# from ViT.ViT import deit_base_patch16_224 as vit
+# from ViT.ViT import deit_small_patch16_224 as vit
+from ViT.explainer import generate_relevance, get_image_with_relevance
+from sklearn.metrics import precision_recall_curve
+import matplotlib.pyplot as plt
+import torch.nn.functional as F
+import warnings
+warnings.filterwarnings("ignore")
+plt.switch_backend('agg')
+# hyperparameters
+num_workers = 0
+batch_size = 1
+cls = ['airplane',
+       'bicycle',
+       'bird',
+       'boat',
+       'bottle',
+       'bus',
+       'car',
+       'cat',
+       'chair',
+       'cow',
+       'dining table',
+       'dog',
+       'horse',
+       'motobike',
+       'person',
+       'potted plant',
+       'sheep',
+       'sofa',
+       'train',
+       'tv'
+       ]
+# Args
+parser = argparse.ArgumentParser(description='Training multi-class classifier')
+parser.add_argument('--arc', type=str, default='vgg', metavar='N',
+                    help='Model architecture')
+parser.add_argument('--train_dataset', type=str, default='imagenet', metavar='N',
+                    help='Testing Dataset')
+parser.add_argument('--method', type=str,
+                    default='grad_rollout',
+                    choices=['rollout', 'lrp', 'transformer_attribution', 'full_lrp', 'lrp_last_layer',
+                             'attn_last_layer', 'attn_gradcam'],
+                    help='')
+parser.add_argument('--thr', type=float, default=0.,
+                    help='threshold')
+parser.add_argument('--K', type=int, default=1,
+                    help='new - top K results')
+parser.add_argument('--save-img', action='store_true',
+                    default=False,
+                    help='')
+parser.add_argument('--no-ia', action='store_true',
+                    default=False,
+                    help='')
+parser.add_argument('--no-fx', action='store_true',
+                    default=False,
+                    help='')
+parser.add_argument('--no-fgx', action='store_true',
+                    default=False,
+                    help='')
+parser.add_argument('--no-m', action='store_true',
+                    default=False,
+                    help='')
+parser.add_argument('--no-reg', action='store_true',
+                    default=False,
+                    help='')
+parser.add_argument('--is-ablation', type=bool,
+                    default=False,
+                    help='')
+parser.add_argument('--imagenet-seg-path', type=str, required=True)
+parser.add_argument('--checkpoint', default='', type=str, metavar='PATH',
+                    help='path to latest checkpoint (default: none)')
+args = parser.parse_args()
+args.checkname = args.method + '_' + args.arc
+alpha = 2
+cuda = torch.cuda.is_available()
+device = torch.device("cuda" if cuda else "cpu")
+# Define Saver
+saver = Saver(args)
+saver.results_dir = os.path.join(saver.experiment_dir, 'results')
+if not os.path.exists(saver.results_dir):
+    os.makedirs(saver.results_dir)
+if not os.path.exists(os.path.join(saver.results_dir, 'input')):
+    os.makedirs(os.path.join(saver.results_dir, 'input'))
+if not os.path.exists(os.path.join(saver.results_dir, 'explain')):
+    os.makedirs(os.path.join(saver.results_dir, 'explain'))
+args.exp_img_path = os.path.join(saver.results_dir, 'explain/img')
+if not os.path.exists(args.exp_img_path):
+    os.makedirs(args.exp_img_path)
+args.exp_np_path = os.path.join(saver.results_dir, 'explain/np')
+if not os.path.exists(args.exp_np_path):
+    os.makedirs(args.exp_np_path)
+# Data
+normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+test_img_trans = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.ToTensor(),
+    normalize,
+])
+test_lbl_trans = transforms.Compose([
+    transforms.Resize((224, 224), Image.NEAREST),
+])
+ds = Imagenet_Segmentation(args.imagenet_seg_path,
+                           transform=test_img_trans, target_transform=test_lbl_trans)
+dl = DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=False)
+# Model
+if args.checkpoint:
+    print(f"loading model from checkpoint {args.checkpoint}")
+    model = vit().cuda()
+    checkpoint = torch.load(args.checkpoint)
+    model.load_state_dict(checkpoint['state_dict'])
+else:
+    model = vit(pretrained=True).cuda()
+metric = IoU(2, ignore_index=-1)
+iterator = tqdm(dl)
+model.eval()
+def compute_pred(output):
+    pred = output.data.max(1, keepdim=True)[1]  # get the index of the max log-probability
+    # pred[0, 0] = 282
+    # print('Pred cls : ' + str(pred))
+    T = pred.squeeze().cpu().numpy()
+    T = np.expand_dims(T, 0)
+    T = (T[:, np.newaxis] == np.arange(1000)) * 1.0
+    T = torch.from_numpy(T).type(torch.FloatTensor)
+    Tt = T.cuda()
+    return Tt
+def eval_batch(image, labels, evaluator, index):
+    evaluator.zero_grad()
+    # Save input image
+    if args.save_img:
+        img = image[0].permute(1, 2, 0).data.cpu().numpy()
+        img = 255 * (img - img.min()) / (img.max() - img.min())
+        img = img.astype('uint8')
+        Image.fromarray(img, 'RGB').save(os.path.join(saver.results_dir, 'input/{}_input.png'.format(index)))
+        Image.fromarray((labels.repeat(3, 1, 1).permute(1, 2, 0).data.cpu().numpy() * 255).astype('uint8'), 'RGB').save(
+            os.path.join(saver.results_dir, 'input/{}_mask.png'.format(index)))
+    image.requires_grad = True
+    image = image.requires_grad_()
+    predictions = evaluator(image)
+    Res = generate_relevance(model, image.cuda())
+    # threshold between FG and BG is the mean
+    Res = (Res - Res.min()) / (Res.max() - Res.min())
+    ret = Res.mean()
+    Res_1 = Res.gt(ret).type(Res.type())
+    Res_0 = Res.le(ret).type(Res.type())
+    Res_1_AP = Res
+    Res_0_AP = 1 - Res
+    Res_1[Res_1 != Res_1] = 0
+    Res_0[Res_0 != Res_0] = 0
+    Res_1_AP[Res_1_AP != Res_1_AP] = 0
+    Res_0_AP[Res_0_AP != Res_0_AP] = 0
+    # TEST
+    pred = Res.clamp(min=args.thr) / Res.max()
+    pred = pred.view(-1).data.cpu().numpy()
+    target = labels.view(-1).data.cpu().numpy()
+    # print("target", target.shape)
+    output = torch.cat((Res_0, Res_1), 1)
+    output_AP = torch.cat((Res_0_AP, Res_1_AP), 1)
+    if args.save_img:
+        # Save predicted mask
+        mask = F.interpolate(Res_1, [64, 64], mode='bilinear')
+        mask = mask[0].squeeze().data.cpu().numpy()
+        # mask = Res_1[0].squeeze().data.cpu().numpy()
+        mask = 255 * mask
+        mask = mask.astype('uint8')
+        imageio.imsave(os.path.join(args.exp_img_path, 'mask_' + str(index) + '.jpg'), mask)
+        relevance = F.interpolate(Res, [64, 64], mode='bilinear')
+        relevance = relevance[0].permute(1, 2, 0).data.cpu().numpy()
+        # relevance = Res[0].permute(1, 2, 0).data.cpu().numpy()
+        hm = np.sum(relevance, axis=-1)
+        maps = (render.hm_to_rgb(hm, scaling=3, sigma=1, cmap='seismic') * 255).astype(np.uint8)
+        imageio.imsave(os.path.join(args.exp_img_path, 'heatmap_' + str(index) + '.jpg'), maps)
+    # Evaluate Segmentation
+    batch_inter, batch_union, batch_correct, batch_label = 0, 0, 0, 0
+    batch_ap, batch_f1 = 0, 0
+    # Segmentation resutls
+    correct, labeled = batch_pix_accuracy(output[0].data.cpu(), labels[0])
+    inter, union = batch_intersection_union(output[0].data.cpu(), labels[0], 2)
+    batch_correct += correct
+    batch_label += labeled
+    batch_inter += inter
+    batch_union += union
+    # print("output", output.shape)
+    # print("ap labels", labels.shape)
+    # ap = np.nan_to_num(get_ap_scores(output, labels))
+    ap = np.nan_to_num(get_ap_scores(output_AP, labels))
+    # f1 = np.nan_to_num(get_f1_scores(output[0, 1].data.cpu(), labels[0]))
+    batch_ap += ap
+    # batch_f1 += f1
+    # return batch_correct, batch_label, batch_inter, batch_union, batch_ap, batch_f1, pred, target
+    return batch_correct, batch_label, batch_inter, batch_union, batch_ap, pred, target
+total_inter, total_union, total_correct, total_label = np.int64(0), np.int64(0), np.int64(0), np.int64(0)
+total_ap, total_f1 = [], []
+predictions, targets = [], []
+for batch_idx, (image, labels) in enumerate(iterator):
+    if args.method == "blur":
+        images = (image[0].cuda(), image[1].cuda())
+    else:
+        images = image.cuda()
+    labels = labels.cuda()
+    # print("image", image.shape)
+    # print("lables", labels.shape)
+    # correct, labeled, inter, union, ap, f1, pred, target = eval_batch(images, labels, model, batch_idx)
+    correct, labeled, inter, union, ap, pred, target = eval_batch(images, labels, model, batch_idx)
+    predictions.append(pred)
+    targets.append(target)
+    total_correct += correct.astype('int64')
+    total_label += labeled.astype('int64')
+    total_inter += inter.astype('int64')
+    total_union += union.astype('int64')
+    total_ap += [ap]
+    # total_f1 += [f1]
+    pixAcc = np.float64(1.0) * total_correct / (np.spacing(1, dtype=np.float64) + total_label)
+    IoU = np.float64(1.0) * total_inter / (np.spacing(1, dtype=np.float64) + total_union)
+    mIoU = IoU.mean()
+    mAp = np.mean(total_ap)
+    # mF1 = np.mean(total_f1)
+    # iterator.set_description('pixAcc: %.4f, mIoU: %.4f, mAP: %.4f, mF1: %.4f' % (pixAcc, mIoU, mAp, mF1))
+    iterator.set_description('pixAcc: %.4f, mIoU: %.4f, mAP: %.4f' % (pixAcc, mIoU, mAp))
+predictions = np.concatenate(predictions)
+targets = np.concatenate(targets)
+pr, rc, thr = precision_recall_curve(targets, predictions)
+np.save(os.path.join(saver.experiment_dir, 'precision.npy'), pr)
+np.save(os.path.join(saver.experiment_dir, 'recall.npy'), rc)
+plt.figure()
+plt.plot(rc, pr)
+plt.savefig(os.path.join(saver.experiment_dir, 'PR_curve_{}.png'.format(args.method)))
+txtfile = os.path.join(saver.experiment_dir, 'result_mIoU_%.4f.txt' % mIoU)
+# txtfile = 'result_mIoU_%.4f.txt' % mIoU
+fh = open(txtfile, 'w')
+print("Mean IoU over %d classes: %.4f\n" % (2, mIoU))
+print("Pixel-wise Accuracy: %2.2f%%\n" % (pixAcc * 100))
+print("Mean AP over %d classes: %.4f\n" % (2, mAp))
+# print("Mean F1 over %d classes: %.4f\n" % (2, mF1))
+fh.write("Mean IoU over %d classes: %.4f\n" % (2, mIoU))
+fh.write("Pixel-wise Accuracy: %2.2f%%\n" % (pixAcc * 100))
+fh.write("Mean AP over %d classes: %.4f\n" % (2, mAp))
+# fh.write("Mean F1 over %d classes: %.4f\n" % (2, mF1))
+fh.close()

SegmentationTest/utils/__init__.py ADDED Viewed

File without changes

SegmentationTest/utils/confusionmatrix.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import numpy as np
+import torch
+from . import metric
+class ConfusionMatrix(metric.Metric):
+    """Constructs a confusion matrix for a multi-class classification problems.
+    Does not support multi-label, multi-class problems.
+    Keyword arguments:
+    - num_classes (int): number of classes in the classification problem.
+    - normalized (boolean, optional): Determines whether or not the confusion
+    matrix is normalized or not. Default: False.
+    Modified from: https://github.com/pytorch/tnt/blob/master/torchnet/meter/confusionmeter.py
+    """
+    def __init__(self, num_classes, normalized=False):
+        super().__init__()
+        self.conf = np.ndarray((num_classes, num_classes), dtype=np.int32)
+        self.normalized = normalized
+        self.num_classes = num_classes
+        self.reset()
+    def reset(self):
+        self.conf.fill(0)
+    def add(self, predicted, target):
+        """Computes the confusion matrix
+        The shape of the confusion matrix is K x K, where K is the number
+        of classes.
+        Keyword arguments:
+        - predicted (Tensor or numpy.ndarray): Can be an N x K tensor/array of
+        predicted scores obtained from the model for N examples and K classes,
+        or an N-tensor/array of integer values between 0 and K-1.
+        - target (Tensor or numpy.ndarray): Can be an N x K tensor/array of
+        ground-truth classes for N examples and K classes, or an N-tensor/array
+        of integer values between 0 and K-1.
+        """
+        # If target and/or predicted are tensors, convert them to numpy arrays
+        if torch.is_tensor(predicted):
+            predicted = predicted.cpu().numpy()
+        if torch.is_tensor(target):
+            target = target.cpu().numpy()
+        assert predicted.shape[0] == target.shape[0], \
+            'number of targets and predicted outputs do not match'
+        if np.ndim(predicted) != 1:
+            assert predicted.shape[1] == self.num_classes, \
+                'number of predictions does not match size of confusion matrix'
+            predicted = np.argmax(predicted, 1)
+        else:
+            assert (predicted.max() < self.num_classes) and (predicted.min() >= 0), \
+                'predicted values are not between 0 and k-1'
+        if np.ndim(target) != 1:
+            assert target.shape[1] == self.num_classes, \
+                'Onehot target does not match size of confusion matrix'
+            assert (target >= 0).all() and (target <= 1).all(), \
+                'in one-hot encoding, target values should be 0 or 1'
+            assert (target.sum(1) == 1).all(), \
+                'multi-label setting is not supported'
+            target = np.argmax(target, 1)
+        else:
+            assert (target.max() < self.num_classes) and (target.min() >= 0), \
+                'target values are not between 0 and k-1'
+        # hack for bincounting 2 arrays together
+        x = predicted + self.num_classes * target
+        bincount_2d = np.bincount(
+            x.astype(np.int32), minlength=self.num_classes**2)
+        assert bincount_2d.size == self.num_classes**2
+        conf = bincount_2d.reshape((self.num_classes, self.num_classes))
+        self.conf += conf
+    def value(self):
+        """
+        Returns:
+            Confustion matrix of K rows and K columns, where rows corresponds
+            to ground-truth targets and columns corresponds to predicted
+            targets.
+        """
+        if self.normalized:
+            conf = self.conf.astype(np.float32)
+            return conf / conf.sum(1).clip(min=1e-12)[:, None]
+        else:
+            return self.conf

SegmentationTest/utils/iou.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import torch
+import numpy as np
+from . import metric
+from .confusionmatrix import ConfusionMatrix
+class IoU(metric.Metric):
+    """Computes the intersection over union (IoU) per class and corresponding
+    mean (mIoU).
+    Intersection over union (IoU) is a common evaluation metric for semantic
+    segmentation. The predictions are first accumulated in a confusion matrix
+    and the IoU is computed from it as follows:
+        IoU = true_positive / (true_positive + false_positive + false_negative).
+    Keyword arguments:
+    - num_classes (int): number of classes in the classification problem
+    - normalized (boolean, optional): Determines whether or not the confusion
+    matrix is normalized or not. Default: False.
+    - ignore_index (int or iterable, optional): Index of the classes to ignore
+    when computing the IoU. Can be an int, or any iterable of ints.
+    """
+    def __init__(self, num_classes, normalized=False, ignore_index=None):
+        super().__init__()
+        self.conf_metric = ConfusionMatrix(num_classes, normalized)
+        if ignore_index is None:
+            self.ignore_index = None
+        elif isinstance(ignore_index, int):
+            self.ignore_index = (ignore_index,)
+        else:
+            try:
+                self.ignore_index = tuple(ignore_index)
+            except TypeError:
+                raise ValueError("'ignore_index' must be an int or iterable")
+    def reset(self):
+        self.conf_metric.reset()
+    def add(self, predicted, target):
+        """Adds the predicted and target pair to the IoU metric.
+        Keyword arguments:
+        - predicted (Tensor): Can be a (N, K, H, W) tensor of
+        predicted scores obtained from the model for N examples and K classes,
+        or (N, H, W) tensor of integer values between 0 and K-1.
+        - target (Tensor): Can be a (N, K, H, W) tensor of
+        target scores for N examples and K classes, or (N, H, W) tensor of
+        integer values between 0 and K-1.
+        """
+        # Dimensions check
+        assert predicted.size(0) == target.size(0), \
+            'number of targets and predicted outputs do not match'
+        assert predicted.dim() == 3 or predicted.dim() == 4, \
+            "predictions must be of dimension (N, H, W) or (N, K, H, W)"
+        assert target.dim() == 3 or target.dim() == 4, \
+            "targets must be of dimension (N, H, W) or (N, K, H, W)"
+        # If the tensor is in categorical format convert it to integer format
+        if predicted.dim() == 4:
+            _, predicted = predicted.max(1)
+        if target.dim() == 4:
+            _, target = target.max(1)
+        self.conf_metric.add(predicted.view(-1), target.view(-1))
+    def value(self):
+        """Computes the IoU and mean IoU.
+        The mean computation ignores NaN elements of the IoU array.
+        Returns:
+            Tuple: (IoU, mIoU). The first output is the per class IoU,
+            for K classes it's numpy.ndarray with K elements. The second output,
+            is the mean IoU.
+        """
+        conf_matrix = self.conf_metric.value()
+        if self.ignore_index is not None:
+            for index in self.ignore_index:
+                conf_matrix[:, self.ignore_index] = 0
+                conf_matrix[self.ignore_index, :] = 0
+        true_positive = np.diag(conf_matrix)
+        false_positive = np.sum(conf_matrix, 0) - true_positive
+        false_negative = np.sum(conf_matrix, 1) - true_positive
+        # Just in case we get a division by 0, ignore/hide the error
+        with np.errstate(divide='ignore', invalid='ignore'):
+            iou = true_positive / (true_positive + false_positive + false_negative)
+        return iou, np.nanmean(iou)

SegmentationTest/utils/metric.py ADDED Viewed

	@@ -0,0 +1,12 @@

+class Metric(object):
+    """Base class for all metrics.
+    From: https://github.com/pytorch/tnt/blob/master/torchnet/meter/meter.py
+    """
+    def reset(self):
+        pass
+    def add(self):
+        pass
+    def value(self):
+        pass

SegmentationTest/utils/metrices.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import numpy as np
+import torch
+from sklearn.metrics import f1_score, average_precision_score
+from sklearn.metrics import precision_recall_curve, roc_curve
+SMOOTH = 1e-6
+__all__ = ['get_f1_scores', 'get_ap_scores', 'batch_pix_accuracy', 'batch_intersection_union', 'get_iou', 'get_pr',
+           'get_roc', 'get_ap_multiclass']
+def get_iou(outputs: torch.Tensor, labels: torch.Tensor):
+    # You can comment out this line if you are passing tensors of equal shape
+    # But if you are passing output from UNet or something it will most probably
+    # be with the BATCH x 1 x H x W shape
+    outputs = outputs.squeeze(1)  # BATCH x 1 x H x W => BATCH x H x W
+    labels = labels.squeeze(1)  # BATCH x 1 x H x W => BATCH x H x W
+    intersection = (outputs & labels).float().sum((1, 2))  # Will be zero if Truth=0 or Prediction=0
+    union = (outputs | labels).float().sum((1, 2))  # Will be zzero if both are 0
+    iou = (intersection + SMOOTH) / (union + SMOOTH)  # We smooth our devision to avoid 0/0
+    return iou.cpu().numpy()
+def get_f1_scores(predict, target, ignore_index=-1):
+    # Tensor process
+    batch_size = predict.shape[0]
+    predict = predict.data.cpu().numpy().reshape(-1)
+    target = target.data.cpu().numpy().reshape(-1)
+    pb = predict[target != ignore_index].reshape(batch_size, -1)
+    tb = target[target != ignore_index].reshape(batch_size, -1)
+    total = []
+    for p, t in zip(pb, tb):
+        total.append(np.nan_to_num(f1_score(t, p)))
+    return total
+def get_roc(predict, target, ignore_index=-1):
+    target_expand = target.unsqueeze(1).expand_as(predict)
+    target_expand_numpy = target_expand.data.cpu().numpy().reshape(-1)
+    # Tensor process
+    x = torch.zeros_like(target_expand)
+    t = target.unsqueeze(1).clamp(min=0)
+    target_1hot = x.scatter_(1, t, 1)
+    batch_size = predict.shape[0]
+    predict = predict.data.cpu().numpy().reshape(-1)
+    target = target_1hot.data.cpu().numpy().reshape(-1)
+    pb = predict[target_expand_numpy != ignore_index].reshape(batch_size, -1)
+    tb = target[target_expand_numpy != ignore_index].reshape(batch_size, -1)
+    total = []
+    for p, t in zip(pb, tb):
+        total.append(roc_curve(t, p))
+    return total
+def get_pr(predict, target, ignore_index=-1):
+    target_expand = target.unsqueeze(1).expand_as(predict)
+    target_expand_numpy = target_expand.data.cpu().numpy().reshape(-1)
+    # Tensor process
+    x = torch.zeros_like(target_expand)
+    t = target.unsqueeze(1).clamp(min=0)
+    target_1hot = x.scatter_(1, t, 1)
+    batch_size = predict.shape[0]
+    predict = predict.data.cpu().numpy().reshape(-1)
+    target = target_1hot.data.cpu().numpy().reshape(-1)
+    pb = predict[target_expand_numpy != ignore_index].reshape(batch_size, -1)
+    tb = target[target_expand_numpy != ignore_index].reshape(batch_size, -1)
+    total = []
+    for p, t in zip(pb, tb):
+        total.append(precision_recall_curve(t, p))
+    return total
+def get_ap_scores(predict, target, ignore_index=-1):
+    total = []
+    for pred, tgt in zip(predict, target):
+        target_expand = tgt.unsqueeze(0).expand_as(pred)
+        target_expand_numpy = target_expand.data.cpu().numpy().reshape(-1)
+        # Tensor process
+        x = torch.zeros_like(target_expand)
+        t = tgt.unsqueeze(0).clamp(min=0).long()
+        target_1hot = x.scatter_(0, t, 1)
+        predict_flat = pred.data.cpu().numpy().reshape(-1)
+        target_flat = target_1hot.data.cpu().numpy().reshape(-1)
+        p = predict_flat[target_expand_numpy != ignore_index]
+        t = target_flat[target_expand_numpy != ignore_index]
+        total.append(np.nan_to_num(average_precision_score(t, p)))
+    return total
+def get_ap_multiclass(predict, target):
+    total = []
+    for pred, tgt in zip(predict, target):
+        predict_flat = pred.data.cpu().numpy().reshape(-1)
+        target_flat = tgt.data.cpu().numpy().reshape(-1)
+        total.append(np.nan_to_num(average_precision_score(target_flat, predict_flat)))
+    return total
+def batch_precision_recall(predict, target, thr=0.5):
+    """Batch Precision Recall
+    Args:
+        predict: input 4D tensor
+        target: label 4D tensor
+    """
+    # _, predict = torch.max(predict, 1)
+    predict = predict > thr
+    predict = predict.data.cpu().numpy() + 1
+    target = target.data.cpu().numpy() + 1
+    tp = np.sum(((predict == 2) * (target == 2)) * (target > 0))
+    fp = np.sum(((predict == 2) * (target == 1)) * (target > 0))
+    fn = np.sum(((predict == 1) * (target == 2)) * (target > 0))
+    precision = float(np.nan_to_num(tp / (tp + fp)))
+    recall = float(np.nan_to_num(tp / (tp + fn)))
+    return precision, recall
+def batch_pix_accuracy(predict, target):
+    """Batch Pixel Accuracy
+    Args:
+        predict: input 3D tensor
+        target: label 3D tensor
+    """
+    # for thr in np.linspace(0, 1, slices):
+    _, predict = torch.max(predict, 0)
+    predict = predict.cpu().numpy() + 1
+    target = target.cpu().numpy() + 1
+    pixel_labeled = np.sum(target > 0)
+    pixel_correct = np.sum((predict == target) * (target > 0))
+    assert pixel_correct <= pixel_labeled, \
+        "Correct area should be smaller than Labeled"
+    return pixel_correct, pixel_labeled
+def batch_intersection_union(predict, target, nclass):
+    """Batch Intersection of Union
+    Args:
+        predict: input 3D tensor
+        target: label 3D tensor
+        nclass: number of categories (int)
+    """
+    _, predict = torch.max(predict, 0)
+    mini = 1
+    maxi = nclass
+    nbins = nclass
+    predict = predict.cpu().numpy() + 1
+    target = target.cpu().numpy() + 1
+    predict = predict * (target > 0).astype(predict.dtype)
+    intersection = predict * (predict == target)
+    # areas of intersection and union
+    area_inter, _ = np.histogram(intersection, bins=nbins, range=(mini, maxi))
+    area_pred, _ = np.histogram(predict, bins=nbins, range=(mini, maxi))
+    area_lab, _ = np.histogram(target, bins=nbins, range=(mini, maxi))
+    area_union = area_pred + area_lab - area_inter
+    assert (area_inter <= area_union).all(), \
+        "Intersection area should be smaller than Union area"
+    return area_inter, area_union
+# ref https://github.com/CSAILVision/sceneparsing/blob/master/evaluationCode/utils_eval.py
+def pixel_accuracy(im_pred, im_lab):
+    im_pred = np.asarray(im_pred)
+    im_lab = np.asarray(im_lab)
+    # Remove classes from unlabeled pixels in gt image.
+    # We should not penalize detections in unlabeled portions of the image.
+    pixel_labeled = np.sum(im_lab > 0)
+    pixel_correct = np.sum((im_pred == im_lab) * (im_lab > 0))
+    # pixel_accuracy = 1.0 * pixel_correct / pixel_labeled
+    return pixel_correct, pixel_labeled
+def intersection_and_union(im_pred, im_lab, num_class):
+    im_pred = np.asarray(im_pred)
+    im_lab = np.asarray(im_lab)
+    # Remove classes from unlabeled pixels in gt image.
+    im_pred = im_pred * (im_lab > 0)
+    # Compute area intersection:
+    intersection = im_pred * (im_pred == im_lab)
+    area_inter, _ = np.histogram(intersection, bins=num_class - 1,
+                                 range=(1, num_class - 1))
+    # Compute area union:
+    area_pred, _ = np.histogram(im_pred, bins=num_class - 1,
+                                range=(1, num_class - 1))
+    area_lab, _ = np.histogram(im_lab, bins=num_class - 1,
+                               range=(1, num_class - 1))
+    area_union = area_pred + area_lab - area_inter
+    return area_inter, area_union

SegmentationTest/utils/parallel.py ADDED Viewed

	@@ -0,0 +1,260 @@

+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Hang Zhang
+## ECE Department, Rutgers University
+## Email: zhang.hang@rutgers.edu
+## Copyright (c) 2017
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+"""Encoding Data Parallel"""
+import threading
+import functools
+import torch
+from torch.autograd import Variable, Function
+import torch.cuda.comm as comm
+from torch.nn.parallel.data_parallel import DataParallel
+from torch.nn.parallel.parallel_apply import get_a_var
+from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast
+torch_ver = torch.__version__[:3]
+__all__ = ['allreduce', 'DataParallelModel', 'DataParallelCriterion',
+           'patch_replication_callback']
+def allreduce(*inputs):
+    """Cross GPU all reduce autograd operation for calculate mean and
+    variance in SyncBN.
+    """
+    return AllReduce.apply(*inputs)
+class AllReduce(Function):
+    @staticmethod
+    def forward(ctx, num_inputs, *inputs):
+        ctx.num_inputs = num_inputs
+        ctx.target_gpus = [inputs[i].get_device() for i in range(0, len(inputs), num_inputs)]
+        inputs = [inputs[i:i + num_inputs]
+                 for i in range(0, len(inputs), num_inputs)]
+        # sort before reduce sum
+        inputs = sorted(inputs, key=lambda i: i[0].get_device())
+        results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
+        outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
+        return tuple([t for tensors in outputs for t in tensors])
+    @staticmethod
+    def backward(ctx, *inputs):
+        inputs = [i.data for i in inputs]
+        inputs = [inputs[i:i + ctx.num_inputs]
+                 for i in range(0, len(inputs), ctx.num_inputs)]
+        results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
+        outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
+        return (None,) + tuple([Variable(t) for tensors in outputs for t in tensors])
+class Reduce(Function):
+    @staticmethod
+    def forward(ctx, *inputs):
+        ctx.target_gpus = [inputs[i].get_device() for i in range(len(inputs))]
+        inputs = sorted(inputs, key=lambda i: i.get_device())
+        return comm.reduce_add(inputs)
+    @staticmethod
+    def backward(ctx, gradOutput):
+        return Broadcast.apply(ctx.target_gpus, gradOutput)
+class DataParallelModel(DataParallel):
+    """Implements data parallelism at the module level.
+    This container parallelizes the application of the given module by
+    splitting the input across the specified devices by chunking in the
+    batch dimension.
+    In the forward pass, the module is replicated on each device,
+    and each replica handles a portion of the input. During the backwards pass, gradients from each replica are summed into the original module.
+    Note that the outputs are not gathered, please use compatible
+    :class:`encoding.parallel.DataParallelCriterion`.
+    The batch size should be larger than the number of GPUs used. It should
+    also be an integer multiple of the number of GPUs so that each chunk is
+    the same size (so that each GPU processes the same number of samples).
+    Args:
+        module: module to be parallelized
+        device_ids: CUDA devices (default: all devices)
+    Reference:
+        Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
+        Amit Agrawal. “Context Encoding for Semantic Segmentation.
+        *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
+    Example::
+        >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
+        >>> y = net(x)
+    """
+    def gather(self, outputs, output_device):
+        return outputs
+    def replicate(self, module, device_ids):
+        modules = super(DataParallelModel, self).replicate(module, device_ids)
+        execute_replication_callbacks(modules)
+        return modules
+class DataParallelCriterion(DataParallel):
+    """
+    Calculate loss in multiple-GPUs, which balance the memory usage for
+    Semantic Segmentation.
+    The targets are splitted across the specified devices by chunking in
+    the batch dimension. Please use together with :class:`encoding.parallel.DataParallelModel`.
+    Reference:
+        Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
+        Amit Agrawal. “Context Encoding for Semantic Segmentation.
+        *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
+    Example::
+        >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
+        >>> criterion = encoding.nn.DataParallelCriterion(criterion, device_ids=[0, 1, 2])
+        >>> y = net(x)
+        >>> loss = criterion(y, target)
+    """
+    def forward(self, inputs, *targets, **kwargs):
+        # input should be already scatterd
+        # scattering the targets instead
+        if not self.device_ids:
+            return self.module(inputs, *targets, **kwargs)
+        targets, kwargs = self.scatter(targets, kwargs, self.device_ids)
+        if len(self.device_ids) == 1:
+            return self.module(inputs, *targets[0], **kwargs[0])
+        replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
+        outputs = _criterion_parallel_apply(replicas, inputs, targets, kwargs)
+        return Reduce.apply(*outputs) / len(outputs)
+        #return self.gather(outputs, self.output_device).mean()
+def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices=None):
+    assert len(modules) == len(inputs)
+    assert len(targets) == len(inputs)
+    if kwargs_tup:
+        assert len(modules) == len(kwargs_tup)
+    else:
+        kwargs_tup = ({},) * len(modules)
+    if devices is not None:
+        assert len(modules) == len(devices)
+    else:
+        devices = [None] * len(modules)
+    lock = threading.Lock()
+    results = {}
+    if torch_ver != "0.3":
+        grad_enabled = torch.is_grad_enabled()
+    def _worker(i, module, input, target, kwargs, device=None):
+        if torch_ver != "0.3":
+            torch.set_grad_enabled(grad_enabled)
+        if device is None:
+            device = get_a_var(input).get_device()
+        try:
+            with torch.cuda.device(device):
+                # this also avoids accidental slicing of `input` if it is a Tensor
+                if not isinstance(input, (list, tuple)):
+                    input = (input,)
+                if type(input) != type(target):
+                    if isinstance(target, tuple):
+                        input = tuple(input)
+                    elif isinstance(target, list):
+                        input = list(input)
+                    else:
+                        raise Exception("Types problem")
+                output = module(*(input + target), **kwargs)
+            with lock:
+                results[i] = output
+        except Exception as e:
+            with lock:
+                results[i] = e
+    if len(modules) > 1:
+        threads = [threading.Thread(target=_worker,
+                                    args=(i, module, input, target,
+                                          kwargs, device),)
+                   for i, (module, input, target, kwargs, device) in
+                   enumerate(zip(modules, inputs, targets, kwargs_tup, devices))]
+        for thread in threads:
+            thread.start()
+        for thread in threads:
+            thread.join()
+    else:
+        _worker(0, modules[0], inputs[0], kwargs_tup[0], devices[0])
+    outputs = []
+    for i in range(len(inputs)):
+        output = results[i]
+        if isinstance(output, Exception):
+            raise output
+        outputs.append(output)
+    return outputs
+###########################################################################
+# Adapted from Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+#
+class CallbackContext(object):
+    pass
+def execute_replication_callbacks(modules):
+    """
+    Execute an replication callback `__data_parallel_replicate__` on each module created
+    by original replication.
+    The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`
+    Note that, as all modules are isomorphism, we assign each sub-module with a context
+    (shared among multiple copies of this module on different devices).
+    Through this context, different copies can share some information.
+    We guarantee that the callback on the master copy (the first copy) will be called ahead
+    of calling the callback of any slave copies.
+    """
+    master_copy = modules[0]
+    nr_modules = len(list(master_copy.modules()))
+    ctxs = [CallbackContext() for _ in range(nr_modules)]
+    for i, module in enumerate(modules):
+        for j, m in enumerate(module.modules()):
+            if hasattr(m, '__data_parallel_replicate__'):
+                m.__data_parallel_replicate__(ctxs[j], i)
+def patch_replication_callback(data_parallel):
+    """
+    Monkey-patch an existing `DataParallel` object. Add the replication callback.
+    Useful when you have customized `DataParallel` implementation.
+    Examples:
+        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
+        > sync_bn = DataParallel(sync_bn, device_ids=[0, 1])
+        > patch_replication_callback(sync_bn)
+        # this is equivalent to
+        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
+        > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
+    """
+    assert isinstance(data_parallel, DataParallel)
+    old_replicate = data_parallel.replicate
+    @functools.wraps(old_replicate)
+    def new_replicate(module, device_ids):
+        modules = old_replicate(module, device_ids)
+        execute_replication_callbacks(modules)
+        return modules
+    data_parallel.replicate = new_replicate

SegmentationTest/utils/render.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import numpy as np
+import matplotlib.cm
+import skimage.io
+import skimage.feature
+import skimage.filters
+def vec2im(V, shape=()):
+    '''
+    Transform an array V into a specified shape - or if no shape is given assume a square output format.
+    Parameters
+    ----------
+    V : numpy.ndarray
+        an array either representing a matrix or vector to be reshaped into an two-dimensional image
+    shape : tuple or list
+        optional. containing the shape information for the output array if not given, the output is assumed to be square
+    Returns
+    -------
+    W : numpy.ndarray
+        with W.shape = shape or W.shape = [np.sqrt(V.size)]*2
+    '''
+    if len(shape) < 2:
+        shape = [np.sqrt(V.size)] * 2
+        shape = map(int, shape)
+    return np.reshape(V, shape)
+def enlarge_image(img, scaling=3):
+    '''
+    Enlarges a given input matrix by replicating each pixel value scaling times in horizontal and vertical direction.
+    Parameters
+    ----------
+    img : numpy.ndarray
+        array of shape [H x W] OR [H x W x D]
+    scaling : int
+        positive integer value > 0
+    Returns
+    -------
+    out : numpy.ndarray
+        two-dimensional array of shape [scaling*H x scaling*W]
+        OR
+        three-dimensional array of shape [scaling*H x scaling*W x D]
+        depending on the dimensionality of the input
+    '''
+    if scaling < 1 or not isinstance(scaling, int):
+        print('scaling factor needs to be an int >= 1')
+    if len(img.shape) == 2:
+        H, W = img.shape
+        out = np.zeros((scaling * H, scaling * W))
+        for h in range(H):
+            fh = scaling * h
+            for w in range(W):
+                fw = scaling * w
+                out[fh:fh + scaling, fw:fw + scaling] = img[h, w]
+    elif len(img.shape) == 3:
+        H, W, D = img.shape
+        out = np.zeros((scaling * H, scaling * W, D))
+        for h in range(H):
+            fh = scaling * h
+            for w in range(W):
+                fw = scaling * w
+                out[fh:fh + scaling, fw:fw + scaling, :] = img[h, w, :]
+    return out
+def repaint_corner_pixels(rgbimg, scaling=3):
+    '''
+    DEPRECATED/OBSOLETE.
+    Recolors the top left and bottom right pixel (groups) with the average rgb value of its three neighboring pixel (groups).
+    The recoloring visually masks the opposing pixel values which are a product of stabilizing the scaling.
+    Assumes those image ares will pretty much never show evidence.
+    Parameters
+    ----------
+    rgbimg : numpy.ndarray
+        array of shape [H x W x 3]
+    scaling : int
+        positive integer value > 0
+    Returns
+    -------
+    rgbimg : numpy.ndarray
+        three-dimensional array of shape [scaling*H x scaling*W x 3]
+    '''
+    # top left corner.
+    rgbimg[0:scaling, 0:scaling, :] = (rgbimg[0, scaling, :] + rgbimg[scaling, 0, :] + rgbimg[scaling, scaling,
+                                                                                       :]) / 3.0
+    # bottom right corner
+    rgbimg[-scaling:, -scaling:, :] = (rgbimg[-1, -1 - scaling, :] + rgbimg[-1 - scaling, -1, :] + rgbimg[-1 - scaling,
+                                                                                                   -1 - scaling,
+                                                                                                   :]) / 3.0
+    return rgbimg
+def digit_to_rgb(X, scaling=3, shape=(), cmap='binary'):
+    '''
+    Takes as input an intensity array and produces a rgb image due to some color map
+    Parameters
+    ----------
+    X : numpy.ndarray
+        intensity matrix as array of shape [M x N]
+    scaling : int
+        optional. positive integer value > 0
+    shape: tuple or list of its , length = 2
+        optional. if not given, X is reshaped to be square.
+    cmap : str
+        name of color map of choice. default is 'binary'
+    Returns
+    -------
+    image : numpy.ndarray
+        three-dimensional array of shape [scaling*H x scaling*W x 3] , where H*W == M*N
+    '''
+    # create color map object from name string
+    cmap = eval('matplotlib.cm.{}'.format(cmap))
+    image = enlarge_image(vec2im(X, shape), scaling)  # enlarge
+    image = cmap(image.flatten())[..., 0:3].reshape([image.shape[0], image.shape[1], 3])  # colorize, reshape
+    return image
+def hm_to_rgb(R, X=None, scaling=3, shape=(), sigma=2, cmap='bwr', normalize=True):
+    '''
+    Takes as input an intensity array and produces a rgb image for the represented heatmap.
+    optionally draws the outline of another input on top of it.
+    Parameters
+    ----------
+    R : numpy.ndarray
+        the heatmap to be visualized, shaped [M x N]
+    X : numpy.ndarray
+        optional. some input, usually the data point for which the heatmap R is for, which shall serve
+        as a template for a black outline to be drawn on top of the image
+        shaped [M x N]
+    scaling: int
+        factor, on how to enlarge the heatmap (to control resolution and as a inverse way to control outline thickness)
+        after reshaping it using shape.
+    shape: tuple or list, length = 2
+        optional. if not given, X is reshaped to be square.
+    sigma : double
+        optional. sigma-parameter for the canny algorithm used for edge detection. the found edges are drawn as outlines.
+    cmap : str
+        optional. color map of choice
+    normalize : bool
+        optional. whether to normalize the heatmap to [-1 1] prior to colorization or not.
+    Returns
+    -------
+    rgbimg : numpy.ndarray
+        three-dimensional array of shape [scaling*H x scaling*W x 3] , where H*W == M*N
+    '''
+    # create color map object from name string
+    cmap = eval('matplotlib.cm.{}'.format(cmap))
+    if normalize:
+        R = R / np.max(np.abs(R))  # normalize to [-1,1] wrt to max relevance magnitude
+        R = (R + 1.) / 2.  # shift/normalize to [0,1] for color mapping
+    R = enlarge_image(R, scaling)
+    rgb = cmap(R.flatten())[..., 0:3].reshape([R.shape[0], R.shape[1], 3])
+    # rgb = repaint_corner_pixels(rgb, scaling) #obsolete due to directly calling the color map with [0,1]-normalized inputs
+    if not X is None:  # compute the outline of the input
+        # X = enlarge_image(vec2im(X,shape), scaling)
+        xdims = X.shape
+        Rdims = R.shape
+        # if not np.all(xdims == Rdims):
+        #     print 'transformed heatmap and data dimension mismatch. data dimensions differ?'
+        #     print 'R.shape = ',Rdims, 'X.shape = ', xdims
+        #     print 'skipping drawing of outline\n'
+        # else:
+        #     #edges = skimage.filters.canny(X, sigma=sigma)
+        #     edges = skimage.feature.canny(X, sigma=sigma)
+        #     edges = np.invert(np.dstack([edges]*3))*1.0
+        #     rgb *= edges # set outline pixels to black color
+    return rgb
+def save_image(rgb_images, path, gap=2):
+    '''
+    Takes as input a list of rgb images, places them next to each other with a gap and writes out the result.
+    Parameters
+    ----------
+    rgb_images : list , tuple, collection. such stuff
+        each item in the collection is expected to be an rgb image of dimensions [H x _ x 3]
+        where the width is variable
+    path : str
+        the output path of the assembled image
+    gap : int
+        optional. sets the width of a black area of pixels realized as an image shaped [H x gap x 3] in between the input images
+    Returns
+    -------
+    image : numpy.ndarray
+        the assembled image as written out to path
+    '''
+    sz = []
+    image = []
+    for i in range(len(rgb_images)):
+        if not sz:
+            sz = rgb_images[i].shape
+            image = rgb_images[i]
+            gap = np.zeros((sz[0], gap, sz[2]))
+            continue
+        if not sz[0] == rgb_images[i].shape[0] and sz[1] == rgb_images[i].shape[2]:
+            print('image', i, 'differs in size. unable to perform horizontal alignment')
+            print('expected: Hx_xD = {0}x_x{1}'.format(sz[0], sz[1]))
+            print('got     : Hx_xD = {0}x_x{1}'.format(rgb_images[i].shape[0], rgb_images[i].shape[1]))
+            print('skipping image\n')
+        else:
+            image = np.hstack((image, gap, rgb_images[i]))
+    image *= 255
+    image = image.astype(np.uint8)
+    print('saving image to ', path)
+    skimage.io.imsave(path, image)
+    return image

SegmentationTest/utils/saver.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import os
+import torch
+from collections import OrderedDict
+import glob
+class Saver(object):
+    def __init__(self, args):
+        self.args = args
+        self.directory = os.path.join('run', args.train_dataset, args.checkname)
+        self.runs = sorted(glob.glob(os.path.join(self.directory, 'experiment_*')))
+        run_id = int(self.runs[-1].split('_')[-1]) + 1 if self.runs else 0
+        self.experiment_dir = os.path.join(self.directory, 'experiment_{}'.format(str(run_id)))
+        if not os.path.exists(self.experiment_dir):
+            os.makedirs(self.experiment_dir)
+    def save_checkpoint(self, state, filename='checkpoint.pth.tar'):
+        """Saves checkpoint to disk"""
+        filename = os.path.join(self.experiment_dir, filename)
+        torch.save(state, filename)
+    def save_experiment_config(self):
+        logfile = os.path.join(self.experiment_dir, 'parameters.txt')
+        log_file = open(logfile, 'w')
+        p = OrderedDict()
+        p['train_dataset'] = self.args.train_dataset
+        p['lr'] = self.args.lr
+        p['epoch'] = self.args.epochs
+        for key, val in p.items():
+            log_file.write(key + ':' + str(val) + '\n')
+        log_file.close()

SegmentationTest/utils/summaries.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import os
+from torch.utils.tensorboard import SummaryWriter
+class TensorboardSummary(object):
+    def __init__(self, directory):
+        self.directory = directory
+        self.writer = SummaryWriter(log_dir=os.path.join(self.directory))
+    def add_scalar(self, *args):
+        self.writer.add_scalar(*args)

ViT/ViT.py ADDED Viewed

	@@ -0,0 +1,308 @@

+""" Vision Transformer (ViT) in PyTorch
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import torch
+import torch.nn as nn
+from functools import partial
+from einops import rearrange
+from ViT.helpers import load_pretrained
+from ViT.weight_init import trunc_normal_
+from ViT.layer_helpers import to_2tuple
+def _cfg(url='', **kwargs):
+    return {
+        'url': url,
+        'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None,
+        'crop_pct': .9, 'interpolation': 'bicubic',
+        'first_conv': 'patch_embed.proj', 'classifier': 'head',
+        **kwargs
+    }
+default_cfgs = {
+    # patch models
+    'vit_small_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/vit_small_p16_224-15ec54c9.pth',
+    ),
+    'vit_base_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth',
+        mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
+    ),
+    'vit_large_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p16_224-4ee7a4dc.pth',
+        mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
+    # deit models (FB weights)
+    'deit_tiny_patch16_224': _cfg(
+        url='https://dl.fbaipublicfiles.com/deit/deit_tiny_patch16_224-a1311bcf.pth'),
+    'deit_small_patch16_224': _cfg(
+        url='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth'),
+    'deit_base_patch16_224': _cfg(
+        url='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', ),
+    'deit_base_patch16_384': _cfg(
+        url='',  # no weights yet
+        input_size=(3, 384, 384)),
+}
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False,attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
+        self.scale = head_dim ** -0.5
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.attn_gradients = None
+        self.attention_map = None
+    def save_attn_gradients(self, attn_gradients):
+        self.attn_gradients = attn_gradients
+    def get_attn_gradients(self):
+        return self.attn_gradients
+    def save_attention_map(self, attention_map):
+        self.attention_map = attention_map
+    def get_attention_map(self):
+        return self.attention_map
+    def forward(self, x, register_hook=False, return_attentions=False):
+        b, n, _, h = *x.shape, self.num_heads
+        qkv = self.qkv(x)
+        q, k, v = rearrange(qkv, 'b n (qkv h d) -> qkv b h n d', qkv = 3, h = h)
+        dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale
+        attn = dots.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        out = torch.einsum('bhij,bhjd->bhid', attn, v)
+        self.save_attention_map(attn)
+        if register_hook:
+            attn.register_hook(self.save_attn_gradients)
+        out = rearrange(out, 'b h n d -> b n (h d)')
+        out =  self.proj(out)
+        out = self.proj_drop(out)
+        if not return_attentions:
+            return out
+        else:
+            return out, attn
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+    def forward(self, x, register_hook=False, return_attentions=False):
+        if not return_attentions:
+            x = x + self.attn(self.norm1(x), register_hook=register_hook)
+        else:
+            attn_res, attn = self.attn(self.norm1(x), register_hook=register_hook, return_attentions=True)
+            x = x + attn_res
+        x = x + self.mlp(self.norm2(x))
+        if not return_attentions:
+            return x
+        else:
+            return x, attn
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.num_patches = num_patches
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+    def forward(self, x):
+        B, C, H, W = x.shape
+        # FIXME look at relaxing size constraints
+        assert H == self.img_size[0] and W == self.img_size[1], \
+            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        return x
+class VisionTransformer(nn.Module):
+    """ Vision Transformer
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12,
+                 num_heads=12, mlp_ratio=4., qkv_bias=False, drop_rate=0., attn_drop_rate=0., norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
+        self.patch_embed = PatchEmbed(
+                img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
+        num_patches = self.patch_embed.num_patches
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        self.blocks = nn.ModuleList([
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                drop=drop_rate, attn_drop=attn_drop_rate, norm_layer=norm_layer)
+            for i in range(depth)])
+        self.norm = norm_layer(embed_dim)
+        # Classifier head
+        self.head = nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+        trunc_normal_(self.pos_embed, std=.02)
+        trunc_normal_(self.cls_token, std=.02)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'pos_embed', 'cls_token'}
+    def forward(self, x, register_hook=False, return_attentions=False):
+        if return_attentions:
+            attentions = []
+        B = x.shape[0]
+        x = self.patch_embed(x)
+        cls_tokens = self.cls_token.expand(B, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
+        x = torch.cat((cls_tokens, x), dim=1)
+        x = x + self.pos_embed
+        x = self.pos_drop(x)
+        for blk in self.blocks:
+            if not return_attentions:
+                x = blk(x, register_hook=register_hook)
+            else:
+                x, attn = blk(x, register_hook=register_hook, return_attentions=True)
+                attentions.append(attn)
+        x = self.norm(x)
+        x = x[:, 0]
+        x = self.head(x)
+        if not return_attentions:
+            return x
+        else:
+            return x, torch.cat(attentions).unsqueeze(0)
+def _conv_filter(state_dict, patch_size=16):
+    """ convert patch embedding weight from manual patchify + linear proj to conv"""
+    out_dict = {}
+    for k, v in state_dict.items():
+        if 'patch_embed.proj.weight' in k:
+            v = v.reshape((v.shape[0], 3, patch_size, patch_size))
+        out_dict[k] = v
+    return out_dict
+def vit_base_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    model.default_cfg = default_cfgs['vit_base_patch16_224']
+    if pretrained:
+        load_pretrained(
+            model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3), filter_fn=_conv_filter)
+    return model
+def vit_base_finetuned_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    model.default_cfg = default_cfgs['vit_base_finetuned_patch16_224']
+    if pretrained:
+        load_pretrained(
+            model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3), filter_fn=_conv_filter)
+    return model
+def vit_large_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=1024, depth=24, num_heads=16, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    model.default_cfg = default_cfgs['vit_large_patch16_224']
+    if pretrained:
+        load_pretrained(model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3))
+    return model
+def deit_tiny_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=192, depth=12, num_heads=3, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    model.default_cfg = default_cfgs['deit_tiny_patch16_224']
+    if pretrained:
+        load_pretrained(
+            model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3), filter_fn=lambda x: x['model'])
+    return model
+def deit_small_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=384, depth=12, num_heads=6, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    model.default_cfg = default_cfgs['deit_small_patch16_224']
+    if pretrained:
+        load_pretrained(
+            model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3), filter_fn=lambda x: x['model'])
+    return model
+def deit_base_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    model.default_cfg = default_cfgs['deit_base_patch16_224']
+    if pretrained:
+        load_pretrained(
+            model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3), filter_fn=lambda x: x['model'])
+    return model

ViT_new.py → ViT/ViT_new.py RENAMED Viewed

File without changes

ViT/__init__.py ADDED Viewed

File without changes

ViT/explainer.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import torch
+import numpy as np
+import cv2
+# rule 5 from paper
+def avg_heads(cam, grad):
+    cam = cam.reshape(-1, cam.shape[-3], cam.shape[-2], cam.shape[-1])
+    grad = grad.reshape(-1, cam.shape[-3], grad.shape[-2], grad.shape[-1])
+    cam = grad * cam
+    cam = cam.clamp(min=0).mean(dim=1)
+    return cam
+# rule 6 from paper
+def apply_self_attention_rules(R_ss, cam_ss):
+    R_ss_addition = torch.matmul(cam_ss, R_ss)
+    return R_ss_addition
+def upscale_relevance(relevance):
+    relevance = relevance.reshape(-1, 1, 14, 14)
+    relevance = torch.nn.functional.interpolate(relevance, scale_factor=16, mode='bilinear')
+    # normalize between 0 and 1
+    relevance = relevance.reshape(relevance.shape[0], -1)
+    min = relevance.min(1, keepdim=True)[0]
+    max = relevance.max(1, keepdim=True)[0]
+    relevance = (relevance - min) / (max - min)
+    relevance = relevance.reshape(-1, 1, 224, 224)
+    return relevance
+def generate_relevance(model, input, index=None):
+    # a batch of samples
+    batch_size = input.shape[0]
+    output = model(input, register_hook=True)
+    if index == None:
+        index = np.argmax(output.cpu().data.numpy(), axis=-1)
+        index = torch.tensor(index)
+    one_hot = np.zeros((batch_size, output.shape[-1]), dtype=np.float32)
+    one_hot[torch.arange(batch_size), index.data.cpu().numpy()] = 1
+    one_hot = torch.from_numpy(one_hot).requires_grad_(True)
+    one_hot = torch.sum(one_hot.to(input.device) * output)
+    model.zero_grad()
+    num_tokens = model.blocks[0].attn.get_attention_map().shape[-1]
+    R = torch.eye(num_tokens, num_tokens).cuda()
+    R = R.unsqueeze(0).expand(batch_size, num_tokens, num_tokens)
+    for i, blk in enumerate(model.blocks):
+        grad = torch.autograd.grad(one_hot, [blk.attn.attention_map], retain_graph=True)[0]
+        cam = blk.attn.get_attention_map()
+        cam = avg_heads(cam, grad)
+        R = R + apply_self_attention_rules(R, cam)
+    relevance = R[:, 0, 1:]
+    return upscale_relevance(relevance)
+# create heatmap from mask on image
+def show_cam_on_image(img, mask):
+    heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
+    heatmap = np.float32(heatmap) / 255
+    cam = heatmap + np.float32(img)
+    cam = cam / np.max(cam)
+    return cam
+def get_image_with_relevance(image, relevance):
+    image = image.permute(1, 2, 0)
+    relevance = relevance.permute(1, 2, 0)
+    image = (image - image.min()) / (image.max() - image.min())
+    image = 255 * image
+    vis = image * relevance
+    return vis.data.cpu().numpy()

ViT/helpers.py ADDED Viewed

	@@ -0,0 +1,295 @@

+""" Model creation / weight loading / state_dict helpers
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import logging
+import os
+import math
+from collections import OrderedDict
+from copy import deepcopy
+from typing import Callable
+import torch
+import torch.nn as nn
+import torch.utils.model_zoo as model_zoo
+_logger = logging.getLogger(__name__)
+def load_state_dict(checkpoint_path, use_ema=False):
+    if checkpoint_path and os.path.isfile(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path, map_location='cpu')
+        state_dict_key = 'state_dict'
+        if isinstance(checkpoint, dict):
+            if use_ema and 'state_dict_ema' in checkpoint:
+                state_dict_key = 'state_dict_ema'
+        if state_dict_key and state_dict_key in checkpoint:
+            new_state_dict = OrderedDict()
+            for k, v in checkpoint[state_dict_key].items():
+                # strip `module.` prefix
+                name = k[7:] if k.startswith('module') else k
+                new_state_dict[name] = v
+            state_dict = new_state_dict
+        else:
+            state_dict = checkpoint
+        _logger.info("Loaded {} from checkpoint '{}'".format(state_dict_key, checkpoint_path))
+        return state_dict
+    else:
+        _logger.error("No checkpoint found at '{}'".format(checkpoint_path))
+        raise FileNotFoundError()
+def load_checkpoint(model, checkpoint_path, use_ema=False, strict=True):
+    state_dict = load_state_dict(checkpoint_path, use_ema)
+    model.load_state_dict(state_dict, strict=strict)
+def resume_checkpoint(model, checkpoint_path, optimizer=None, loss_scaler=None, log_info=True):
+    resume_epoch = None
+    if os.path.isfile(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path, map_location='cpu')
+        if isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
+            if log_info:
+                _logger.info('Restoring model state from checkpoint...')
+            new_state_dict = OrderedDict()
+            for k, v in checkpoint['state_dict'].items():
+                name = k[7:] if k.startswith('module') else k
+                new_state_dict[name] = v
+            model.load_state_dict(new_state_dict)
+            if optimizer is not None and 'optimizer' in checkpoint:
+                if log_info:
+                    _logger.info('Restoring optimizer state from checkpoint...')
+                optimizer.load_state_dict(checkpoint['optimizer'])
+            if loss_scaler is not None and loss_scaler.state_dict_key in checkpoint:
+                if log_info:
+                    _logger.info('Restoring AMP loss scaler state from checkpoint...')
+                loss_scaler.load_state_dict(checkpoint[loss_scaler.state_dict_key])
+            if 'epoch' in checkpoint:
+                resume_epoch = checkpoint['epoch']
+                if 'version' in checkpoint and checkpoint['version'] > 1:
+                    resume_epoch += 1  # start at the next epoch, old checkpoints incremented before save
+            if log_info:
+                _logger.info("Loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch']))
+        else:
+            model.load_state_dict(checkpoint)
+            if log_info:
+                _logger.info("Loaded checkpoint '{}'".format(checkpoint_path))
+        return resume_epoch
+    else:
+        _logger.error("No checkpoint found at '{}'".format(checkpoint_path))
+        raise FileNotFoundError()
+def load_pretrained(model, cfg=None, num_classes=1000, in_chans=3, filter_fn=None, strict=True):
+    if cfg is None:
+        cfg = getattr(model, 'default_cfg')
+    if cfg is None or 'url' not in cfg or not cfg['url']:
+        _logger.warning("Pretrained model URL is invalid, using random initialization.")
+        return
+    state_dict = model_zoo.load_url(cfg['url'], progress=False, map_location='cpu')
+    if filter_fn is not None:
+        state_dict = filter_fn(state_dict)
+    if in_chans == 1:
+        conv1_name = cfg['first_conv']
+        _logger.info('Converting first conv (%s) pretrained weights from 3 to 1 channel' % conv1_name)
+        conv1_weight = state_dict[conv1_name + '.weight']
+        # Some weights are in torch.half, ensure it's float for sum on CPU
+        conv1_type = conv1_weight.dtype
+        conv1_weight = conv1_weight.float()
+        O, I, J, K = conv1_weight.shape
+        if I > 3:
+            assert conv1_weight.shape[1] % 3 == 0
+            # For models with space2depth stems
+            conv1_weight = conv1_weight.reshape(O, I // 3, 3, J, K)
+            conv1_weight = conv1_weight.sum(dim=2, keepdim=False)
+        else:
+            conv1_weight = conv1_weight.sum(dim=1, keepdim=True)
+        conv1_weight = conv1_weight.to(conv1_type)
+        state_dict[conv1_name + '.weight'] = conv1_weight
+    elif in_chans != 3:
+        conv1_name = cfg['first_conv']
+        conv1_weight = state_dict[conv1_name + '.weight']
+        conv1_type = conv1_weight.dtype
+        conv1_weight = conv1_weight.float()
+        O, I, J, K = conv1_weight.shape
+        if I != 3:
+            _logger.warning('Deleting first conv (%s) from pretrained weights.' % conv1_name)
+            del state_dict[conv1_name + '.weight']
+            strict = False
+        else:
+            # NOTE this strategy should be better than random init, but there could be other combinations of
+            # the original RGB input layer weights that'd work better for specific cases.
+            _logger.info('Repeating first conv (%s) weights in channel dim.' % conv1_name)
+            repeat = int(math.ceil(in_chans / 3))
+            conv1_weight = conv1_weight.repeat(1, repeat, 1, 1)[:, :in_chans, :, :]
+            conv1_weight *= (3 / float(in_chans))
+            conv1_weight = conv1_weight.to(conv1_type)
+            state_dict[conv1_name + '.weight'] = conv1_weight
+    classifier_name = cfg['classifier']
+    if num_classes == 1000 and cfg['num_classes'] == 1001:
+        # special case for imagenet trained models with extra background class in pretrained weights
+        classifier_weight = state_dict[classifier_name + '.weight']
+        state_dict[classifier_name + '.weight'] = classifier_weight[1:]
+        classifier_bias = state_dict[classifier_name + '.bias']
+        state_dict[classifier_name + '.bias'] = classifier_bias[1:]
+    elif num_classes != cfg['num_classes']:
+        # completely discard fully connected for all other differences between pretrained and created model
+        del state_dict[classifier_name + '.weight']
+        del state_dict[classifier_name + '.bias']
+        strict = False
+    model.load_state_dict(state_dict, strict=strict)
+def extract_layer(model, layer):
+    layer = layer.split('.')
+    module = model
+    if hasattr(model, 'module') and layer[0] != 'module':
+        module = model.module
+    if not hasattr(model, 'module') and layer[0] == 'module':
+        layer = layer[1:]
+    for l in layer:
+        if hasattr(module, l):
+            if not l.isdigit():
+                module = getattr(module, l)
+            else:
+                module = module[int(l)]
+        else:
+            return module
+    return module
+def set_layer(model, layer, val):
+    layer = layer.split('.')
+    module = model
+    if hasattr(model, 'module') and layer[0] != 'module':
+        module = model.module
+    lst_index = 0
+    module2 = module
+    for l in layer:
+        if hasattr(module2, l):
+            if not l.isdigit():
+                module2 = getattr(module2, l)
+            else:
+                module2 = module2[int(l)]
+            lst_index += 1
+    lst_index -= 1
+    for l in layer[:lst_index]:
+        if not l.isdigit():
+            module = getattr(module, l)
+        else:
+            module = module[int(l)]
+    l = layer[lst_index]
+    setattr(module, l, val)
+def adapt_model_from_string(parent_module, model_string):
+    separator = '***'
+    state_dict = {}
+    lst_shape = model_string.split(separator)
+    for k in lst_shape:
+        k = k.split(':')
+        key = k[0]
+        shape = k[1][1:-1].split(',')
+        if shape[0] != '':
+            state_dict[key] = [int(i) for i in shape]
+    new_module = deepcopy(parent_module)
+    for n, m in parent_module.named_modules():
+        old_module = extract_layer(parent_module, n)
+        if isinstance(old_module, nn.Conv2d) or isinstance(old_module, Conv2dSame):
+            if isinstance(old_module, Conv2dSame):
+                conv = Conv2dSame
+            else:
+                conv = nn.Conv2d
+            s = state_dict[n + '.weight']
+            in_channels = s[1]
+            out_channels = s[0]
+            g = 1
+            if old_module.groups > 1:
+                in_channels = out_channels
+                g = in_channels
+            new_conv = conv(
+                in_channels=in_channels, out_channels=out_channels, kernel_size=old_module.kernel_size,
+                bias=old_module.bias is not None, padding=old_module.padding, dilation=old_module.dilation,
+                groups=g, stride=old_module.stride)
+            set_layer(new_module, n, new_conv)
+        if isinstance(old_module, nn.BatchNorm2d):
+            new_bn = nn.BatchNorm2d(
+                num_features=state_dict[n + '.weight'][0], eps=old_module.eps, momentum=old_module.momentum,
+                affine=old_module.affine, track_running_stats=True)
+            set_layer(new_module, n, new_bn)
+        if isinstance(old_module, nn.Linear):
+            # FIXME extra checks to ensure this is actually the FC classifier layer and not a diff Linear layer?
+            num_features = state_dict[n + '.weight'][1]
+            new_fc = nn.Linear(
+                in_features=num_features, out_features=old_module.out_features, bias=old_module.bias is not None)
+            set_layer(new_module, n, new_fc)
+            if hasattr(new_module, 'num_features'):
+                new_module.num_features = num_features
+    new_module.eval()
+    parent_module.eval()
+    return new_module
+def adapt_model_from_file(parent_module, model_variant):
+    adapt_file = os.path.join(os.path.dirname(__file__), 'pruned', model_variant + '.txt')
+    with open(adapt_file, 'r') as f:
+        return adapt_model_from_string(parent_module, f.read().strip())
+def build_model_with_cfg(
+        model_cls: Callable,
+        variant: str,
+        pretrained: bool,
+        default_cfg: dict,
+        model_cfg: dict = None,
+        feature_cfg: dict = None,
+        pretrained_strict: bool = True,
+        pretrained_filter_fn: Callable = None,
+        **kwargs):
+    pruned = kwargs.pop('pruned', False)
+    features = False
+    feature_cfg = feature_cfg or {}
+    if kwargs.pop('features_only', False):
+        features = True
+        feature_cfg.setdefault('out_indices', (0, 1, 2, 3, 4))
+        if 'out_indices' in kwargs:
+            feature_cfg['out_indices'] = kwargs.pop('out_indices')
+    model = model_cls(**kwargs) if model_cfg is None else model_cls(cfg=model_cfg, **kwargs)
+    model.default_cfg = deepcopy(default_cfg)
+    if pruned:
+        model = adapt_model_from_file(model, variant)
+    if pretrained:
+        load_pretrained(
+            model,
+            num_classes=kwargs.get('num_classes', 0),
+            in_chans=kwargs.get('in_chans', 3),
+            filter_fn=pretrained_filter_fn, strict=pretrained_strict)
+    if features:
+        feature_cls = FeatureListNet
+        if 'feature_cls' in feature_cfg:
+            feature_cls = feature_cfg.pop('feature_cls')
+            if isinstance(feature_cls, str):
+                feature_cls = feature_cls.lower()
+                if 'hook' in feature_cls:
+                    feature_cls = FeatureHookNet
+                else:
+                    assert False, f'Unknown feature class {feature_cls}'
+        model = feature_cls(model, **feature_cfg)
+    return model

ViT/layer_helpers.py ADDED Viewed

	@@ -0,0 +1,21 @@

+""" Layer/Module Helpers
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+from itertools import repeat
+import collections.abc
+# From PyTorch internals
+def _ntuple(n):
+    def parse(x):
+        if isinstance(x, collections.abc.Iterable):
+            return x
+        return tuple(repeat(x, n))
+    return parse
+to_1tuple = _ntuple(1)
+to_2tuple = _ntuple(2)
+to_3tuple = _ntuple(3)
+to_4tuple = _ntuple(4)
+to_ntuple = _ntuple

ViT/weight_init.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import torch
+import math
+import warnings
+def _no_grad_trunc_normal_(tensor, mean, std, a, b):
+    # Cut & paste from PyTorch official master until it's in a few official releases - RW
+    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    def norm_cdf(x):
+        # Computes standard normal cumulative distribution function
+        return (1. + math.erf(x / math.sqrt(2.))) / 2.
+    if (mean < a - 2 * std) or (mean > b + 2 * std):
+        warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
+                      "The distribution of values may be incorrect.",
+                      stacklevel=2)
+    with torch.no_grad():
+        # Values are generated by using a truncated uniform distribution and
+        # then using the inverse CDF for the normal distribution.
+        # Get upper and lower cdf values
+        l = norm_cdf((a - mean) / std)
+        u = norm_cdf((b - mean) / std)
+        # Uniformly fill tensor with values from [l, u], then translate to
+        # [2l-1, 2u-1].
+        tensor.uniform_(2 * l - 1, 2 * u - 1)
+        # Use inverse cdf transform for normal distribution to get truncated
+        # standard normal
+        tensor.erfinv_()
+        # Transform to proper mean, std
+        tensor.mul_(std * math.sqrt(2.))
+        tensor.add_(mean)
+        # Clamp to ensure it's in the proper range
+        tensor.clamp_(min=a, max=b)
+        return tensor
+def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
+    # type: (Tensor, float, float, float, float) -> Tensor
+    r"""Fills the input Tensor with values drawn from a truncated
+    normal distribution. The values are effectively drawn from the
+    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
+    with values outside :math:`[a, b]` redrawn until they are within
+    the bounds. The method used for generating the random values works
+    best when :math:`a \leq \text{mean} \leq b`.
+    Args:
+        tensor: an n-dimensional `torch.Tensor`
+        mean: the mean of the normal distribution
+        std: the standard deviation of the normal distribution
+        a: the minimum cutoff value
+        b: the maximum cutoff value
+    Examples:
+        >>> w = torch.empty(3, 5)
+        >>> nn.init.trunc_normal_(w)
+    """
+    return _no_grad_trunc_normal_(tensor, mean, std, a, b)

imagenet_ablation_gt.py ADDED Viewed

	@@ -0,0 +1,590 @@

+import argparse
+import os
+import random
+import shutil
+import time
+import warnings
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.backends.cudnn as cudnn
+import torch.distributed as dist
+import torch.optim
+import torch.multiprocessing as mp
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+import torchvision.models as models
+from segmentation_dataset import SegmentationDataset, VAL_PARTITION, TRAIN_PARTITION
+# Uncomment the expected model below
+# ViT
+from ViT.ViT import vit_base_patch16_224 as vit
+# from ViT.ViT import vit_large_patch16_224 as vit
+# ViT-AugReg
+# from ViT.ViT_new import vit_small_patch16_224 as vit
+# from ViT.ViT_new import vit_base_patch16_224 as vit
+# from ViT.ViT_new import vit_large_patch16_224 as vit
+# DeiT
+# from ViT.ViT import deit_base_patch16_224 as vit
+# from ViT.ViT import deit_small_patch16_224 as vit
+from ViT.explainer import generate_relevance, get_image_with_relevance
+import torchvision
+import cv2
+from torch.utils.tensorboard import SummaryWriter
+import json
+model_names = sorted(name for name in models.__dict__
+    if name.islower() and not name.startswith("__")
+    and callable(models.__dict__[name]))
+model_names.append("vit")
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+parser.add_argument('--data', metavar='DATA',
+                    help='path to dataset')
+parser.add_argument('--seg_data', metavar='SEG_DATA',
+                    help='path to segmentation dataset')
+parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
+                    choices=model_names,
+                    help='model architecture: ' +
+                        ' | '.join(model_names) +
+                        ' (default: resnet18)')
+parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
+                    help='number of data loading workers (default: 4)')
+parser.add_argument('--epochs', default=150, type=int, metavar='N',
+                    help='number of total epochs to run')
+parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
+                    help='manual epoch number (useful on restarts)')
+parser.add_argument('-b', '--batch-size', default=8, type=int,
+                    metavar='N',
+                    help='mini-batch size (default: 256), this is the total '
+                         'batch size of all GPUs on the current node when '
+                         'using Data Parallel or Distributed Data Parallel')
+parser.add_argument('--lr', '--learning-rate', default=3e-6, type=float,
+                    metavar='LR', help='initial learning rate', dest='lr')
+parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
+                    help='momentum')
+parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
+                    metavar='W', help='weight decay (default: 1e-4)',
+                    dest='weight_decay')
+parser.add_argument('-p', '--print-freq', default=10, type=int,
+                    metavar='N', help='print frequency (default: 10)')
+parser.add_argument('--resume', default='', type=str, metavar='PATH',
+                    help='path to latest checkpoint (default: none)')
+parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
+                    help='evaluate model on validation set')
+parser.add_argument('--pretrained', dest='pretrained', action='store_true',
+                    help='use pre-trained model')
+parser.add_argument('--world-size', default=-1, type=int,
+                    help='number of nodes for distributed training')
+parser.add_argument('--rank', default=-1, type=int,
+                    help='node rank for distributed training')
+parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
+                    help='url used to set up distributed training')
+parser.add_argument('--dist-backend', default='nccl', type=str,
+                    help='distributed backend')
+parser.add_argument('--seed', default=None, type=int,
+                    help='seed for initializing training. ')
+parser.add_argument('--gpu', default=None, type=int,
+                    help='GPU id to use.')
+parser.add_argument('--save_interval', default=20, type=int,
+                    help='interval to save segmentation results.')
+parser.add_argument('--num_samples', default=3, type=int,
+                    help='number of samples per class for training')
+parser.add_argument('--multiprocessing-distributed', action='store_true',
+                    help='Use multi-processing distributed training to launch '
+                         'N processes per node, which has N GPUs. This is the '
+                         'fastest way to use PyTorch for either single node or '
+                         'multi node data parallel training')
+parser.add_argument('--lambda_seg', default=0.8, type=float,
+                    help='influence of segmentation loss.')
+parser.add_argument('--lambda_acc', default=0.2, type=float,
+                    help='influence of accuracy loss.')
+parser.add_argument('--experiment_folder', default=None, type=str,
+                    help='path to folder to use for experiment.')
+parser.add_argument('--dilation', default=0, type=float,
+                    help='Use dilation on the segmentation maps.')
+parser.add_argument('--lambda_background', default=2, type=float,
+                    help='coefficient of loss for segmentation background.')
+parser.add_argument('--lambda_foreground', default=0.3, type=float,
+                    help='coefficient of loss for segmentation foreground.')
+parser.add_argument('--num_classes', default=500, type=int,
+                    help='coefficient of loss for segmentation foreground.')
+parser.add_argument('--temperature', default=1, type=float,
+                    help='temperature for softmax (mostly for DeiT).')
+best_loss = float('inf')
+def main():
+    args = parser.parse_args()
+    if args.experiment_folder is None:
+        args.experiment_folder = f'experiment/' \
+                                 f'lr_{args.lr}_seg_{args.lambda_seg}_acc_{args.lambda_acc}' \
+                                 f'_bckg_{args.lambda_background}_fgd_{args.lambda_foreground}'
+        if args.temperature != 1:
+            args.experiment_folder = args.experiment_folder + f'_tempera_{args.temperature}'
+        if args.batch_size != 8:
+            args.experiment_folder = args.experiment_folder + f'_bs_{args.batch_size}'
+        if args.num_classes != 500:
+            args.experiment_folder = args.experiment_folder + f'_num_classes_{args.num_classes}'
+        if args.num_samples != 3:
+            args.experiment_folder = args.experiment_folder + f'_num_samples_{args.num_samples}'
+        if args.epochs != 150:
+            args.experiment_folder = args.experiment_folder + f'_num_epochs_{args.epochs}'
+    if os.path.exists(args.experiment_folder):
+        raise Exception(f"Experiment path {args.experiment_folder} already exists!")
+    os.mkdir(args.experiment_folder)
+    os.mkdir(f'{args.experiment_folder}/train_samples')
+    os.mkdir(f'{args.experiment_folder}/val_samples')
+    with open(f'{args.experiment_folder}/commandline_args.txt', 'w') as f:
+        json.dump(args.__dict__, f, indent=2)
+    if args.seed is not None:
+        random.seed(args.seed)
+        torch.manual_seed(args.seed)
+        cudnn.deterministic = True
+        warnings.warn('You have chosen to seed training. '
+                      'This will turn on the CUDNN deterministic setting, '
+                      'which can slow down your training considerably! '
+                      'You may see unexpected behavior when restarting '
+                      'from checkpoints.')
+    if args.gpu is not None:
+        warnings.warn('You have chosen a specific GPU. This will completely '
+                      'disable data parallelism.')
+    if args.dist_url == "env://" and args.world_size == -1:
+        args.world_size = int(os.environ["WORLD_SIZE"])
+    args.distributed = args.world_size > 1 or args.multiprocessing_distributed
+    ngpus_per_node = torch.cuda.device_count()
+    if args.multiprocessing_distributed:
+        # Since we have ngpus_per_node processes per node, the total world_size
+        # needs to be adjusted accordingly
+        args.world_size = ngpus_per_node * args.world_size
+        # Use torch.multiprocessing.spawn to launch distributed processes: the
+        # main_worker process function
+        mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
+    else:
+        # Simply call main_worker function
+        main_worker(args.gpu, ngpus_per_node, args)
+def main_worker(gpu, ngpus_per_node, args):
+    global best_loss
+    args.gpu = gpu
+    if args.gpu is not None:
+        print("Use GPU: {} for training".format(args.gpu))
+    if args.distributed:
+        if args.dist_url == "env://" and args.rank == -1:
+            args.rank = int(os.environ["RANK"])
+        if args.multiprocessing_distributed:
+            # For multiprocessing distributed training, rank needs to be the
+            # global rank among all the processes
+            args.rank = args.rank * ngpus_per_node + gpu
+        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                world_size=args.world_size, rank=args.rank)
+    # create model
+    if args.pretrained:
+        print("=> using pre-trained model '{}'".format(args.arch))
+        model = models.__dict__[args.arch](pretrained=True)
+    else:
+        print("=> creating model '{}'".format(args.arch))
+        #model = models.__dict__[args.arch]()
+        model = vit(pretrained=True).cuda()
+        model.train()
+        print("done")
+    if not torch.cuda.is_available():
+        print('using CPU, this will be slow')
+    elif args.distributed:
+        # For multiprocessing distributed, DistributedDataParallel constructor
+        # should always set the single device scope, otherwise,
+        # DistributedDataParallel will use all available devices.
+        if args.gpu is not None:
+            torch.cuda.set_device(args.gpu)
+            model.cuda(args.gpu)
+            # When using a single GPU per process and per
+            # DistributedDataParallel, we need to divide the batch size
+            # ourselves based on the total number of GPUs we have
+            args.batch_size = int(args.batch_size / ngpus_per_node)
+            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
+            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
+        else:
+            model.cuda()
+            # DistributedDataParallel will divide and allocate batch_size to all
+            # available GPUs if device_ids are not set
+            model = torch.nn.parallel.DistributedDataParallel(model)
+    elif args.gpu is not None:
+        torch.cuda.set_device(args.gpu)
+        model = model.cuda(args.gpu)
+    else:
+        # DataParallel will divide and allocate batch_size to all available GPUs
+        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
+            model.features = torch.nn.DataParallel(model.features)
+            model.cuda()
+        else:
+            print("start")
+            model = torch.nn.DataParallel(model).cuda()
+    # define loss function (criterion) and optimizer
+    criterion = nn.CrossEntropyLoss().cuda(args.gpu)
+    optimizer = torch.optim.AdamW(model.parameters(), args.lr, weight_decay=args.weight_decay)
+    # optionally resume from a checkpoint
+    if args.resume:
+        if os.path.isfile(args.resume):
+            print("=> loading checkpoint '{}'".format(args.resume))
+            if args.gpu is None:
+                checkpoint = torch.load(args.resume)
+            else:
+                # Map model to be loaded to specified single gpu.
+                loc = 'cuda:{}'.format(args.gpu)
+                checkpoint = torch.load(args.resume, map_location=loc)
+            args.start_epoch = checkpoint['epoch']
+            best_loss = checkpoint['best_loss']
+            if args.gpu is not None:
+                # best_loss may be from a checkpoint from a different GPU
+                best_loss = best_loss.to(args.gpu)
+            model.load_state_dict(checkpoint['state_dict'])
+            optimizer.load_state_dict(checkpoint['optimizer'])
+            print("=> loaded checkpoint '{}' (epoch {})"
+                  .format(args.resume, checkpoint['epoch']))
+        else:
+            print("=> no checkpoint found at '{}'".format(args.resume))
+    cudnn.benchmark = True
+    train_dataset = SegmentationDataset(args.seg_data, args.data, partition=TRAIN_PARTITION, train_classes=args.num_classes,
+                                        num_samples=args.num_samples)
+    if args.distributed:
+        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
+    else:
+        train_sampler = None
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
+        num_workers=args.workers, pin_memory=True, sampler=train_sampler)
+    val_dataset = SegmentationDataset(args.seg_data, args.data, partition=VAL_PARTITION, train_classes=args.num_classes,
+                                      num_samples=1)
+    val_loader = torch.utils.data.DataLoader(
+        val_dataset, batch_size=10, shuffle=False,
+        num_workers=args.workers, pin_memory=True)
+    if args.evaluate:
+        validate(val_loader, model, criterion, 0, args)
+        return
+    for epoch in range(args.start_epoch, args.epochs):
+        if args.distributed:
+            train_sampler.set_epoch(epoch)
+        adjust_learning_rate(optimizer, epoch, args)
+        log_dir = os.path.join(args.experiment_folder, 'logs')
+        logger = SummaryWriter(log_dir=log_dir)
+        args.logger = logger
+        # train for one epoch
+        train(train_loader, model, criterion, optimizer, epoch, args)
+        # evaluate on validation set
+        loss1 = validate(val_loader, model, criterion, epoch, args)
+        # remember best acc@1 and save checkpoint
+        is_best = loss1 <= best_loss
+        best_loss = min(loss1, best_loss)
+        if not args.multiprocessing_distributed or (args.multiprocessing_distributed
+                and args.rank % ngpus_per_node == 0):
+            save_checkpoint({
+                'epoch': epoch + 1,
+                'arch': args.arch,
+                'state_dict': model.state_dict(),
+                'best_loss': best_loss,
+                'optimizer' : optimizer.state_dict(),
+            }, is_best, folder=args.experiment_folder)
+def train(train_loader, model, criterion, optimizer, epoch, args):
+    mse_criterion = torch.nn.MSELoss(reduction='mean')
+    losses = AverageMeter('Loss', ':.4e')
+    top1 = AverageMeter('Acc@1', ':6.2f')
+    top5 = AverageMeter('Acc@5', ':6.2f')
+    orig_top1 = AverageMeter('Acc@1_orig', ':6.2f')
+    orig_top5 = AverageMeter('Acc@5_orig', ':6.2f')
+    progress = ProgressMeter(
+        len(train_loader),
+        [losses, top1, top5, orig_top1, orig_top5],
+        prefix="Epoch: [{}]".format(epoch))
+    orig_model = vit(pretrained=True).cuda()
+    orig_model.eval()
+    # switch to train mode
+    model.train()
+    for i, (seg_map, image_ten, class_name) in enumerate(train_loader):
+        if torch.cuda.is_available():
+            image_ten = image_ten.cuda(args.gpu, non_blocking=True)
+            seg_map = seg_map.cuda(args.gpu, non_blocking=True)
+            class_name = class_name.cuda(args.gpu, non_blocking=True)
+        # segmentation loss
+        relevance = generate_relevance(model, image_ten, index=class_name)
+        reverse_seg_map = seg_map.clone()
+        reverse_seg_map[reverse_seg_map == 1] = -1
+        reverse_seg_map[reverse_seg_map == 0] = 1
+        reverse_seg_map[reverse_seg_map == -1] = 0
+        background_loss = mse_criterion(relevance * reverse_seg_map, torch.zeros_like(relevance))
+        foreground_loss = mse_criterion(relevance * seg_map, seg_map)
+        segmentation_loss = args.lambda_background * background_loss
+        segmentation_loss += args.lambda_foreground * foreground_loss
+        # classification loss
+        output = model(image_ten)
+        with torch.no_grad():
+            output_orig = orig_model(image_ten)
+        _, pred = output.topk(1, 1, True, True)
+        pred = pred.flatten()
+        if args.temperature != 1:
+            output = output / args.temperature
+        classification_loss = criterion(output, class_name.flatten())
+        loss = args.lambda_seg * segmentation_loss + args.lambda_acc * classification_loss
+        # debugging output
+        if i % args.save_interval == 0:
+            orig_relevance = generate_relevance(orig_model, image_ten, index=class_name)
+            for j in range(image_ten.shape[0]):
+                image = get_image_with_relevance(image_ten[j], torch.ones_like(image_ten[j]))
+                new_vis = get_image_with_relevance(image_ten[j], relevance[j])
+                old_vis = get_image_with_relevance(image_ten[j], orig_relevance[j])
+                gt = get_image_with_relevance(image_ten[j], seg_map[j])
+                h_img = cv2.hconcat([image, gt, old_vis, new_vis])
+                cv2.imwrite(f'{args.experiment_folder}/train_samples/res_{i}_{j}.jpg', h_img)
+        # measure accuracy and record loss
+        acc1, acc5 = accuracy(output, class_name, topk=(1, 5))
+        losses.update(loss.item(), image_ten.size(0))
+        top1.update(acc1[0], image_ten.size(0))
+        top5.update(acc5[0], image_ten.size(0))
+        # metrics for original vit
+        acc1_orig, acc5_orig = accuracy(output_orig, class_name, topk=(1, 5))
+        orig_top1.update(acc1_orig[0], image_ten.size(0))
+        orig_top5.update(acc5_orig[0], image_ten.size(0))
+        # compute gradient and do SGD step
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        if i % args.print_freq == 0:
+            progress.display(i)
+            args.logger.add_scalar('{}/{}'.format('train', 'segmentation_loss'), segmentation_loss,
+                                   epoch*len(train_loader)+i)
+            args.logger.add_scalar('{}/{}'.format('train', 'classification_loss'), classification_loss,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'orig_top1'), acc1_orig,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'top1'), acc1,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'orig_top5'), acc5_orig,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'top5'), acc5,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'tot_loss'), loss,
+                                   epoch * len(train_loader) + i)
+def validate(val_loader, model, criterion, epoch, args):
+    mse_criterion = torch.nn.MSELoss(reduction='mean')
+    losses = AverageMeter('Loss', ':.4e')
+    top1 = AverageMeter('Acc@1', ':6.2f')
+    top5 = AverageMeter('Acc@5', ':6.2f')
+    orig_top1 = AverageMeter('Acc@1_orig', ':6.2f')
+    orig_top5 = AverageMeter('Acc@5_orig', ':6.2f')
+    progress = ProgressMeter(
+        len(val_loader),
+        [losses, top1, top5, orig_top1, orig_top5],
+        prefix="Epoch: [{}]".format(val_loader))
+    # switch to evaluate mode
+    model.eval()
+    orig_model = vit(pretrained=True).cuda()
+    orig_model.eval()
+    with torch.no_grad():
+        for i, (seg_map, image_ten, class_name) in enumerate(val_loader):
+            if args.gpu is not None:
+                image_ten = image_ten.cuda(args.gpu, non_blocking=True)
+            if torch.cuda.is_available():
+                seg_map = seg_map.cuda(args.gpu, non_blocking=True)
+                class_name = class_name.cuda(args.gpu, non_blocking=True)
+                # segmentation loss
+                with torch.enable_grad():
+                    relevance = generate_relevance(model, image_ten, index=class_name)
+                reverse_seg_map = seg_map.clone()
+                reverse_seg_map[reverse_seg_map == 1] = -1
+                reverse_seg_map[reverse_seg_map == 0] = 1
+                reverse_seg_map[reverse_seg_map == -1] = 0
+                background_loss = mse_criterion(relevance * reverse_seg_map, torch.zeros_like(relevance))
+                foreground_loss = mse_criterion(relevance * seg_map, seg_map)
+                segmentation_loss = args.lambda_background * background_loss
+                segmentation_loss += args.lambda_foreground * foreground_loss
+                # classification loss
+                with torch.no_grad():
+                    output = model(image_ten)
+                    output_orig = orig_model(image_ten)
+                _, pred = output.topk(1, 1, True, True)
+                pred = pred.flatten()
+                if args.temperature != 1:
+                    output = output / args.temperature
+                classification_loss = criterion(output, class_name.flatten())
+                loss = args.lambda_seg * segmentation_loss + args.lambda_acc * classification_loss
+            # save results
+            if i % args.save_interval == 0:
+                with torch.enable_grad():
+                    orig_relevance = generate_relevance(orig_model, image_ten, index=class_name)
+                for j in range(image_ten.shape[0]):
+                    image = get_image_with_relevance(image_ten[j], torch.ones_like(image_ten[j]))
+                    new_vis = get_image_with_relevance(image_ten[j], relevance[j])
+                    old_vis = get_image_with_relevance(image_ten[j], orig_relevance[j])
+                    gt = get_image_with_relevance(image_ten[j], seg_map[j])
+                    h_img = cv2.hconcat([image, gt, old_vis, new_vis])
+                    cv2.imwrite(f'{args.experiment_folder}/val_samples/res_{i}_{j}.jpg', h_img)
+            # measure accuracy and record loss
+            acc1, acc5 = accuracy(output, class_name, topk=(1, 5))
+            losses.update(loss.item(), image_ten.size(0))
+            top1.update(acc1[0], image_ten.size(0))
+            top5.update(acc5[0], image_ten.size(0))
+            # metrics for original vit
+            acc1_orig, acc5_orig = accuracy(output_orig, class_name, topk=(1, 5))
+            orig_top1.update(acc1_orig[0], image_ten.size(0))
+            orig_top5.update(acc5_orig[0], image_ten.size(0))
+            if i % args.print_freq == 0:
+                progress.display(i)
+                args.logger.add_scalar('{}/{}'.format('val', 'segmentation_loss'), segmentation_loss,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'classification_loss'), classification_loss,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'orig_top1'), acc1_orig,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'top1'), acc1,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'orig_top5'), acc5_orig,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'top5'), acc5,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'tot_loss'), loss,
+                                       epoch * len(val_loader) + i)
+        # TODO: this should also be done with the ProgressMeter
+        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
+              .format(top1=top1, top5=top5))
+    return losses.avg
+def save_checkpoint(state, is_best, folder, filename='checkpoint.pth.tar'):
+    torch.save(state, f'{folder}/{filename}')
+    if is_best:
+        shutil.copyfile(f'{folder}/{filename}', f'{folder}/model_best.pth.tar')
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self, name, fmt=':f'):
+        self.name = name
+        self.fmt = fmt
+        self.reset()
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+    def __str__(self):
+        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+        return fmtstr.format(**self.__dict__)
+class ProgressMeter(object):
+    def __init__(self, num_batches, meters, prefix=""):
+        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+        self.meters = meters
+        self.prefix = prefix
+    def display(self, batch):
+        entries = [self.prefix + self.batch_fmtstr.format(batch)]
+        entries += [str(meter) for meter in self.meters]
+        print('\t'.join(entries))
+    def _get_batch_fmtstr(self, num_batches):
+        num_digits = len(str(num_batches // 1))
+        fmt = '{:' + str(num_digits) + 'd}'
+        return '[' + fmt + '/' + fmt.format(num_batches) + ']'
+def adjust_learning_rate(optimizer, epoch, args):
+    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+    lr = args.lr * (0.85 ** (epoch // 2))
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+def accuracy(output, target, topk=(1,)):
+    """Computes the accuracy over the k top predictions for the specified values of k"""
+    with torch.no_grad():
+        maxk = max(topk)
+        batch_size = target.size(0)
+        _, pred = output.topk(maxk, 1, True, True)
+        pred = pred.t()
+        correct = pred.eq(target.view(1, -1).expand_as(pred))
+        res = []
+        for k in topk:
+            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
+            res.append(correct_k.mul_(100.0 / batch_size))
+        return res
+if __name__ == '__main__':
+    main()

imagenet_classes.json ADDED Viewed

	@@ -0,0 +1,1002 @@

+{
+  "n01440764": 0,
+  "n01443537": 1,
+  "n01484850": 2,
+  "n01491361": 3,
+  "n01494475": 4,
+  "n01496331": 5,
+  "n01498041": 6,
+  "n01514668": 7,
+  "n01514859": 8,
+  "n01518878": 9,
+  "n01530575": 10,
+  "n01531178": 11,
+  "n01532829": 12,
+  "n01534433": 13,
+  "n01537544": 14,
+  "n01558993": 15,
+  "n01560419": 16,
+  "n01580077": 17,
+  "n01582220": 18,
+  "n01592084": 19,
+  "n01601694": 20,
+  "n01608432": 21,
+  "n01614925": 22,
+  "n01616318": 23,
+  "n01622779": 24,
+  "n01629819": 25,
+  "n01630670": 26,
+  "n01631663": 27,
+  "n01632458": 28,
+  "n01632777": 29,
+  "n01641577": 30,
+  "n01644373": 31,
+  "n01644900": 32,
+  "n01664065": 33,
+  "n01665541": 34,
+  "n01667114": 35,
+  "n01667778": 36,
+  "n01669191": 37,
+  "n01675722": 38,
+  "n01677366": 39,
+  "n01682714": 40,
+  "n01685808": 41,
+  "n01687978": 42,
+  "n01688243": 43,
+  "n01689811": 44,
+  "n01692333": 45,
+  "n01693334": 46,
+  "n01694178": 47,
+  "n01695060": 48,
+  "n01697457": 49,
+  "n01698640": 50,
+  "n01704323": 51,
+  "n01728572": 52,
+  "n01728920": 53,
+  "n01729322": 54,
+  "n01729977": 55,
+  "n01734418": 56,
+  "n01735189": 57,
+  "n01737021": 58,
+  "n01739381": 59,
+  "n01740131": 60,
+  "n01742172": 61,
+  "n01744401": 62,
+  "n01748264": 63,
+  "n01749939": 64,
+  "n01751748": 65,
+  "n01753488": 66,
+  "n01755581": 67,
+  "n01756291": 68,
+  "n01768244": 69,
+  "n01770081": 70,
+  "n01770393": 71,
+  "n01773157": 72,
+  "n01773549": 73,
+  "n01773797": 74,
+  "n01774384": 75,
+  "n01774750": 76,
+  "n01775062": 77,
+  "n01776313": 78,
+  "n01784675": 79,
+  "n01795545": 80,
+  "n01796340": 81,
+  "n01797886": 82,
+  "n01798484": 83,
+  "n01806143": 84,
+  "n01806567": 85,
+  "n01807496": 86,
+  "n01817953": 87,
+  "n01818515": 88,
+  "n01819313": 89,
+  "n01820546": 90,
+  "n01824575": 91,
+  "n01828970": 92,
+  "n01829413": 93,
+  "n01833805": 94,
+  "n01843065": 95,
+  "n01843383": 96,
+  "n01847000": 97,
+  "n01855032": 98,
+  "n01855672": 99,
+  "n01860187": 100,
+  "n01871265": 101,
+  "n01872401": 102,
+  "n01873310": 103,
+  "n01877812": 104,
+  "n01882714": 105,
+  "n01883070": 106,
+  "n01910747": 107,
+  "n01914609": 108,
+  "n01917289": 109,
+  "n01924916": 110,
+  "n01930112": 111,
+  "n01943899": 112,
+  "n01944390": 113,
+  "n01945685": 114,
+  "n01950731": 115,
+  "n01955084": 116,
+  "n01968897": 117,
+  "n01978287": 118,
+  "n01978455": 119,
+  "n01980166": 120,
+  "n01981276": 121,
+  "n01983481": 122,
+  "n01984695": 123,
+  "n01985128": 124,
+  "n01986214": 125,
+  "n01990800": 126,
+  "n02002556": 127,
+  "n02002724": 128,
+  "n02006656": 129,
+  "n02007558": 130,
+  "n02009229": 131,
+  "n02009912": 132,
+  "n02011460": 133,
+  "n02012849": 134,
+  "n02013706": 135,
+  "n02017213": 136,
+  "n02018207": 137,
+  "n02018795": 138,
+  "n02025239": 139,
+  "n02027492": 140,
+  "n02028035": 141,
+  "n02033041": 142,
+  "n02037110": 143,
+  "n02051845": 144,
+  "n02056570": 145,
+  "n02058221": 146,
+  "n02066245": 147,
+  "n02071294": 148,
+  "n02074367": 149,
+  "n02077923": 150,
+  "n02085620": 151,
+  "n02085782": 152,
+  "n02085936": 153,
+  "n02086079": 154,
+  "n02086240": 155,
+  "n02086646": 156,
+  "n02086910": 157,
+  "n02087046": 158,
+  "n02087394": 159,
+  "n02088094": 160,
+  "n02088238": 161,
+  "n02088364": 162,
+  "n02088466": 163,
+  "n02088632": 164,
+  "n02089078": 165,
+  "n02089867": 166,
+  "n02089973": 167,
+  "n02090379": 168,
+  "n02090622": 169,
+  "n02090721": 170,
+  "n02091032": 171,
+  "n02091134": 172,
+  "n02091244": 173,
+  "n02091467": 174,
+  "n02091635": 175,
+  "n02091831": 176,
+  "n02092002": 177,
+  "n02092339": 178,
+  "n02093256": 179,
+  "n02093428": 180,
+  "n02093647": 181,
+  "n02093754": 182,
+  "n02093859": 183,
+  "n02093991": 184,
+  "n02094114": 185,
+  "n02094258": 186,
+  "n02094433": 187,
+  "n02095314": 188,
+  "n02095570": 189,
+  "n02095889": 190,
+  "n02096051": 191,
+  "n02096177": 192,
+  "n02096294": 193,
+  "n02096437": 194,
+  "n02096585": 195,
+  "n02097047": 196,
+  "n02097130": 197,
+  "n02097209": 198,
+  "n02097298": 199,
+  "n02097474": 200,
+  "n02097658": 201,
+  "n02098105": 202,
+  "n02098286": 203,
+  "n02098413": 204,
+  "n02099267": 205,
+  "n02099429": 206,
+  "n02099601": 207,
+  "n02099712": 208,
+  "n02099849": 209,
+  "n02100236": 210,
+  "n02100583": 211,
+  "n02100735": 212,
+  "n02100877": 213,
+  "n02101006": 214,
+  "n02101388": 215,
+  "n02101556": 216,
+  "n02102040": 217,
+  "n02102177": 218,
+  "n02102318": 219,
+  "n02102480": 220,
+  "n02102973": 221,
+  "n02104029": 222,
+  "n02104365": 223,
+  "n02105056": 224,
+  "n02105162": 225,
+  "n02105251": 226,
+  "n02105412": 227,
+  "n02105505": 228,
+  "n02105641": 229,
+  "n02105855": 230,
+  "n02106030": 231,
+  "n02106166": 232,
+  "n02106382": 233,
+  "n02106550": 234,
+  "n02106662": 235,
+  "n02107142": 236,
+  "n02107312": 237,
+  "n02107574": 238,
+  "n02107683": 239,
+  "n02107908": 240,
+  "n02108000": 241,
+  "n02108089": 242,
+  "n02108422": 243,
+  "n02108551": 244,
+  "n02108915": 245,
+  "n02109047": 246,
+  "n02109525": 247,
+  "n02109961": 248,
+  "n02110063": 249,
+  "n02110185": 250,
+  "n02110341": 251,
+  "n02110627": 252,
+  "n02110806": 253,
+  "n02110958": 254,
+  "n02111129": 255,
+  "n02111277": 256,
+  "n02111500": 257,
+  "n02111889": 258,
+  "n02112018": 259,
+  "n02112137": 260,
+  "n02112350": 261,
+  "n02112706": 262,
+  "n02113023": 263,
+  "n02113186": 264,
+  "n02113624": 265,
+  "n02113712": 266,
+  "n02113799": 267,
+  "n02113978": 268,
+  "n02114367": 269,
+  "n02114548": 270,
+  "n02114712": 271,
+  "n02114855": 272,
+  "n02115641": 273,
+  "n02115913": 274,
+  "n02116738": 275,
+  "n02117135": 276,
+  "n02119022": 277,
+  "n02119789": 278,
+  "n02120079": 279,
+  "n02120505": 280,
+  "n02123045": 281,
+  "n02123159": 282,
+  "n02123394": 283,
+  "n02123597": 284,
+  "n02124075": 285,
+  "n02125311": 286,
+  "n02127052": 287,
+  "n02128385": 288,
+  "n02128757": 289,
+  "n02128925": 290,
+  "n02129165": 291,
+  "n02129604": 292,
+  "n02130308": 293,
+  "n02132136": 294,
+  "n02133161": 295,
+  "n02134084": 296,
+  "n02134418": 297,
+  "n02137549": 298,
+  "n02138441": 299,
+  "n02165105": 300,
+  "n02165456": 301,
+  "n02167151": 302,
+  "n02168699": 303,
+  "n02169497": 304,
+  "n02172182": 305,
+  "n02174001": 306,
+  "n02177972": 307,
+  "n02190166": 308,
+  "n02206856": 309,
+  "n02219486": 310,
+  "n02226429": 311,
+  "n02229544": 312,
+  "n02231487": 313,
+  "n02233338": 314,
+  "n02236044": 315,
+  "n02256656": 316,
+  "n02259212": 317,
+  "n02264363": 318,
+  "n02268443": 319,
+  "n02268853": 320,
+  "n02276258": 321,
+  "n02277742": 322,
+  "n02279972": 323,
+  "n02280649": 324,
+  "n02281406": 325,
+  "n02281787": 326,
+  "n02317335": 327,
+  "n02319095": 328,
+  "n02321529": 329,
+  "n02325366": 330,
+  "n02326432": 331,
+  "n02328150": 332,
+  "n02342885": 333,
+  "n02346627": 334,
+  "n02356798": 335,
+  "n02361337": 336,
+  "n02363005": 337,
+  "n02364673": 338,
+  "n02389026": 339,
+  "n02391049": 340,
+  "n02395406": 341,
+  "n02396427": 342,
+  "n02397096": 343,
+  "n02398521": 344,
+  "n02403003": 345,
+  "n02408429": 346,
+  "n02410509": 347,
+  "n02412080": 348,
+  "n02415577": 349,
+  "n02417914": 350,
+  "n02422106": 351,
+  "n02422699": 352,
+  "n02423022": 353,
+  "n02437312": 354,
+  "n02437616": 355,
+  "n02441942": 356,
+  "n02442845": 357,
+  "n02443114": 358,
+  "n02443484": 359,
+  "n02444819": 360,
+  "n02445715": 361,
+  "n02447366": 362,
+  "n02454379": 363,
+  "n02457408": 364,
+  "n02480495": 365,
+  "n02480855": 366,
+  "n02481823": 367,
+  "n02483362": 368,
+  "n02483708": 369,
+  "n02484975": 370,
+  "n02486261": 371,
+  "n02486410": 372,
+  "n02487347": 373,
+  "n02488291": 374,
+  "n02488702": 375,
+  "n02489166": 376,
+  "n02490219": 377,
+  "n02492035": 378,
+  "n02492660": 379,
+  "n02493509": 380,
+  "n02493793": 381,
+  "n02494079": 382,
+  "n02497673": 383,
+  "n02500267": 384,
+  "n02504013": 385,
+  "n02504458": 386,
+  "n02509815": 387,
+  "n02510455": 388,
+  "n02514041": 389,
+  "n02526121": 390,
+  "n02536864": 391,
+  "n02606052": 392,
+  "n02607072": 393,
+  "n02640242": 394,
+  "n02641379": 395,
+  "n02643566": 396,
+  "n02655020": 397,
+  "n02666196": 398,
+  "n02667093": 399,
+  "n02669723": 400,
+  "n02672831": 401,
+  "n02676566": 402,
+  "n02687172": 403,
+  "n02690373": 404,
+  "n02692877": 405,
+  "n02699494": 406,
+  "n02701002": 407,
+  "n02704792": 408,
+  "n02708093": 409,
+  "n02727426": 410,
+  "n02730930": 411,
+  "n02747177": 412,
+  "n02749479": 413,
+  "n02769748": 414,
+  "n02776631": 415,
+  "n02777292": 416,
+  "n02782093": 417,
+  "n02783161": 418,
+  "n02786058": 419,
+  "n02787622": 420,
+  "n02788148": 421,
+  "n02790996": 422,
+  "n02791124": 423,
+  "n02791270": 424,
+  "n02793495": 425,
+  "n02794156": 426,
+  "n02795169": 427,
+  "n02797295": 428,
+  "n02799071": 429,
+  "n02802426": 430,
+  "n02804414": 431,
+  "n02804610": 432,
+  "n02807133": 433,
+  "n02808304": 434,
+  "n02808440": 435,
+  "n02814533": 436,
+  "n02814860": 437,
+  "n02815834": 438,
+  "n02817516": 439,
+  "n02823428": 440,
+  "n02823750": 441,
+  "n02825657": 442,
+  "n02834397": 443,
+  "n02835271": 444,
+  "n02837789": 445,
+  "n02840245": 446,
+  "n02841315": 447,
+  "n02843684": 448,
+  "n02859443": 449,
+  "n02860847": 450,
+  "n02865351": 451,
+  "n02869837": 452,
+  "n02870880": 453,
+  "n02871525": 454,
+  "n02877765": 455,
+  "n02879718": 456,
+  "n02883205": 457,
+  "n02892201": 458,
+  "n02892767": 459,
+  "n02894605": 460,
+  "n02895154": 461,
+  "n02906734": 462,
+  "n02909870": 463,
+  "n02910353": 464,
+  "n02916936": 465,
+  "n02917067": 466,
+  "n02927161": 467,
+  "n02930766": 468,
+  "n02939185": 469,
+  "n02948072": 470,
+  "n02950826": 471,
+  "n02951358": 472,
+  "n02951585": 473,
+  "n02963159": 474,
+  "n02965783": 475,
+  "n02966193": 476,
+  "n02966687": 477,
+  "n02971356": 478,
+  "n02974003": 479,
+  "n02977058": 480,
+  "n02978881": 481,
+  "n02979186": 482,
+  "n02980441": 483,
+  "n02981792": 484,
+  "n02988304": 485,
+  "n02992211": 486,
+  "n02992529": 487,
+  "n02999410": 488,
+  "n03000134": 489,
+  "n03000247": 490,
+  "n03000684": 491,
+  "n03014705": 492,
+  "n03016953": 493,
+  "n03017168": 494,
+  "n03018349": 495,
+  "n03026506": 496,
+  "n03028079": 497,
+  "n03032252": 498,
+  "n03041632": 499,
+  "n03042490": 500,
+  "n03045698": 501,
+  "n03047690": 502,
+  "n03062245": 503,
+  "n03063599": 504,
+  "n03063689": 505,
+  "n03065424": 506,
+  "n03075370": 507,
+  "n03085013": 508,
+  "n03089624": 509,
+  "n03095699": 510,
+  "n03100240": 511,
+  "n03109150": 512,
+  "n03110669": 513,
+  "n03124043": 514,
+  "n03124170": 515,
+  "n03125729": 516,
+  "n03126707": 517,
+  "n03127747": 518,
+  "n03127925": 519,
+  "n03131574": 520,
+  "n03133878": 521,
+  "n03134739": 522,
+  "n03141823": 523,
+  "n03146219": 524,
+  "n03160309": 525,
+  "n03179701": 526,
+  "n03180011": 527,
+  "n03187595": 528,
+  "n03188531": 529,
+  "n03196217": 530,
+  "n03197337": 531,
+  "n03201208": 532,
+  "n03207743": 533,
+  "n03207941": 534,
+  "n03208938": 535,
+  "n03216828": 536,
+  "n03218198": 537,
+  "n03220513": 538,
+  "n03223299": 539,
+  "n03240683": 540,
+  "n03249569": 541,
+  "n03250847": 542,
+  "n03255030": 543,
+  "n03259280": 544,
+  "n03271574": 545,
+  "n03272010": 546,
+  "n03272562": 547,
+  "n03290653": 548,
+  "n03291819": 549,
+  "n03297495": 550,
+  "n03314780": 551,
+  "n03325584": 552,
+  "n03337140": 553,
+  "n03344393": 554,
+  "n03345487": 555,
+  "n03347037": 556,
+  "n03355925": 557,
+  "n03372029": 558,
+  "n03376595": 559,
+  "n03379051": 560,
+  "n03384352": 561,
+  "n03388043": 562,
+  "n03388183": 563,
+  "n03388549": 564,
+  "n03393912": 565,
+  "n03394916": 566,
+  "n03400231": 567,
+  "n03404251": 568,
+  "n03417042": 569,
+  "n03424325": 570,
+  "n03425413": 571,
+  "n03443371": 572,
+  "n03444034": 573,
+  "n03445777": 574,
+  "n03445924": 575,
+  "n03447447": 576,
+  "n03447721": 577,
+  "n03450230": 578,
+  "n03452741": 579,
+  "n03457902": 580,
+  "n03459775": 581,
+  "n03461385": 582,
+  "n03467068": 583,
+  "n03476684": 584,
+  "n03476991": 585,
+  "n03478589": 586,
+  "n03481172": 587,
+  "n03482405": 588,
+  "n03483316": 589,
+  "n03485407": 590,
+  "n03485794": 591,
+  "n03492542": 592,
+  "n03494278": 593,
+  "n03495258": 594,
+  "n03496892": 595,
+  "n03498962": 596,
+  "n03527444": 597,
+  "n03529860": 598,
+  "n03530642": 599,
+  "n03532672": 600,
+  "n03534580": 601,
+  "n03535780": 602,
+  "n03538406": 603,
+  "n03544143": 604,
+  "n03584254": 605,
+  "n03584829": 606,
+  "n03590841": 607,
+  "n03594734": 608,
+  "n03594945": 609,
+  "n03595614": 610,
+  "n03598930": 611,
+  "n03599486": 612,
+  "n03602883": 613,
+  "n03617480": 614,
+  "n03623198": 615,
+  "n03627232": 616,
+  "n03630383": 617,
+  "n03633091": 618,
+  "n03637318": 619,
+  "n03642806": 620,
+  "n03649909": 621,
+  "n03657121": 622,
+  "n03658185": 623,
+  "n03661043": 624,
+  "n03662601": 625,
+  "n03666591": 626,
+  "n03670208": 627,
+  "n03673027": 628,
+  "n03676483": 629,
+  "n03680355": 630,
+  "n03690938": 631,
+  "n03691459": 632,
+  "n03692522": 633,
+  "n03697007": 634,
+  "n03706229": 635,
+  "n03709823": 636,
+  "n03710193": 637,
+  "n03710637": 638,
+  "n03710721": 639,
+  "n03717622": 640,
+  "n03720891": 641,
+  "n03721384": 642,
+  "n03724870": 643,
+  "n03729826": 644,
+  "n03733131": 645,
+  "n03733281": 646,
+  "n03733805": 647,
+  "n03742115": 648,
+  "n03743016": 649,
+  "n03759954": 650,
+  "n03761084": 651,
+  "n03763968": 652,
+  "n03764736": 653,
+  "n03769881": 654,
+  "n03770439": 655,
+  "n03770679": 656,
+  "n03773504": 657,
+  "n03775071": 658,
+  "n03775546": 659,
+  "n03776460": 660,
+  "n03777568": 661,
+  "n03777754": 662,
+  "n03781244": 663,
+  "n03782006": 664,
+  "n03785016": 665,
+  "n03786901": 666,
+  "n03787032": 667,
+  "n03788195": 668,
+  "n03788365": 669,
+  "n03791053": 670,
+  "n03792782": 671,
+  "n03792972": 672,
+  "n03793489": 673,
+  "n03794056": 674,
+  "n03796401": 675,
+  "n03803284": 676,
+  "n03804744": 677,
+  "n03814639": 678,
+  "n03814906": 679,
+  "n03825788": 680,
+  "n03832673": 681,
+  "n03837869": 682,
+  "n03838899": 683,
+  "n03840681": 684,
+  "n03841143": 685,
+  "n03843555": 686,
+  "n03854065": 687,
+  "n03857828": 688,
+  "n03866082": 689,
+  "n03868242": 690,
+  "n03868863": 691,
+  "n03871628": 692,
+  "n03873416": 693,
+  "n03874293": 694,
+  "n03874599": 695,
+  "n03876231": 696,
+  "n03877472": 697,
+  "n03877845": 698,
+  "n03884397": 699,
+  "n03887697": 700,
+  "n03888257": 701,
+  "n03888605": 702,
+  "n03891251": 703,
+  "n03891332": 704,
+  "n03895866": 705,
+  "n03899768": 706,
+  "n03902125": 707,
+  "n03903868": 708,
+  "n03908618": 709,
+  "n03908714": 710,
+  "n03916031": 711,
+  "n03920288": 712,
+  "n03924679": 713,
+  "n03929660": 714,
+  "n03929855": 715,
+  "n03930313": 716,
+  "n03930630": 717,
+  "n03933933": 718,
+  "n03935335": 719,
+  "n03937543": 720,
+  "n03938244": 721,
+  "n03942813": 722,
+  "n03944341": 723,
+  "n03947888": 724,
+  "n03950228": 725,
+  "n03954731": 726,
+  "n03956157": 727,
+  "n03958227": 728,
+  "n03961711": 729,
+  "n03967562": 730,
+  "n03970156": 731,
+  "n03976467": 732,
+  "n03976657": 733,
+  "n03977966": 734,
+  "n03980874": 735,
+  "n03982430": 736,
+  "n03983396": 737,
+  "n03991062": 738,
+  "n03992509": 739,
+  "n03995372": 740,
+  "n03998194": 741,
+  "n04004767": 742,
+  "n04005630": 743,
+  "n04008634": 744,
+  "n04009552": 745,
+  "n04019541": 746,
+  "n04023962": 747,
+  "n04026417": 748,
+  "n04033901": 749,
+  "n04033995": 750,
+  "n04037443": 751,
+  "n04039381": 752,
+  "n04040759": 753,
+  "n04041544": 754,
+  "n04044716": 755,
+  "n04049303": 756,
+  "n04065272": 757,
+  "n04067472": 758,
+  "n04069434": 759,
+  "n04070727": 760,
+  "n04074963": 761,
+  "n04081281": 762,
+  "n04086273": 763,
+  "n04090263": 764,
+  "n04099969": 765,
+  "n04111531": 766,
+  "n04116512": 767,
+  "n04118538": 768,
+  "n04118776": 769,
+  "n04120489": 770,
+  "n04125021": 771,
+  "n04127249": 772,
+  "n04131690": 773,
+  "n04133789": 774,
+  "n04136333": 775,
+  "n04141076": 776,
+  "n04141327": 777,
+  "n04141975": 778,
+  "n04146614": 779,
+  "n04147183": 780,
+  "n04149813": 781,
+  "n04152593": 782,
+  "n04153751": 783,
+  "n04154565": 784,
+  "n04162706": 785,
+  "n04179913": 786,
+  "n04192698": 787,
+  "n04200800": 788,
+  "n04201297": 789,
+  "n04204238": 790,
+  "n04204347": 791,
+  "n04208210": 792,
+  "n04209133": 793,
+  "n04209239": 794,
+  "n04228054": 795,
+  "n04229816": 796,
+  "n04235860": 797,
+  "n04238763": 798,
+  "n04239074": 799,
+  "n04243546": 800,
+  "n04251144": 801,
+  "n04252077": 802,
+  "n04252225": 803,
+  "n04254120": 804,
+  "n04254680": 805,
+  "n04254777": 806,
+  "n04258138": 807,
+  "n04259630": 808,
+  "n04263257": 809,
+  "n04264628": 810,
+  "n04265275": 811,
+  "n04266014": 812,
+  "n04270147": 813,
+  "n04273569": 814,
+  "n04275548": 815,
+  "n04277352": 816,
+  "n04285008": 817,
+  "n04286575": 818,
+  "n04296562": 819,
+  "n04310018": 820,
+  "n04311004": 821,
+  "n04311174": 822,
+  "n04317175": 823,
+  "n04325704": 824,
+  "n04326547": 825,
+  "n04328186": 826,
+  "n04330267": 827,
+  "n04332243": 828,
+  "n04335435": 829,
+  "n04336792": 830,
+  "n04344873": 831,
+  "n04346328": 832,
+  "n04347754": 833,
+  "n04350905": 834,
+  "n04355338": 835,
+  "n04355933": 836,
+  "n04356056": 837,
+  "n04357314": 838,
+  "n04366367": 839,
+  "n04367480": 840,
+  "n04370456": 841,
+  "n04371430": 842,
+  "n04371774": 843,
+  "n04372370": 844,
+  "n04376876": 845,
+  "n04380533": 846,
+  "n04389033": 847,
+  "n04392985": 848,
+  "n04398044": 849,
+  "n04399382": 850,
+  "n04404412": 851,
+  "n04409515": 852,
+  "n04417672": 853,
+  "n04418357": 854,
+  "n04423845": 855,
+  "n04428191": 856,
+  "n04429376": 857,
+  "n04435653": 858,
+  "n04442312": 859,
+  "n04443257": 860,
+  "n04447861": 861,
+  "n04456115": 862,
+  "n04458633": 863,
+  "n04461696": 864,
+  "n04462240": 865,
+  "n04465501": 866,
+  "n04467665": 867,
+  "n04476259": 868,
+  "n04479046": 869,
+  "n04482393": 870,
+  "n04483307": 871,
+  "n04485082": 872,
+  "n04486054": 873,
+  "n04487081": 874,
+  "n04487394": 875,
+  "n04493381": 876,
+  "n04501370": 877,
+  "n04505470": 878,
+  "n04507155": 879,
+  "n04509417": 880,
+  "n04515003": 881,
+  "n04517823": 882,
+  "n04522168": 883,
+  "n04523525": 884,
+  "n04525038": 885,
+  "n04525305": 886,
+  "n04532106": 887,
+  "n04532670": 888,
+  "n04536866": 889,
+  "n04540053": 890,
+  "n04542943": 891,
+  "n04548280": 892,
+  "n04548362": 893,
+  "n04550184": 894,
+  "n04552348": 895,
+  "n04553703": 896,
+  "n04554684": 897,
+  "n04557648": 898,
+  "n04560804": 899,
+  "n04562935": 900,
+  "n04579145": 901,
+  "n04579432": 902,
+  "n04584207": 903,
+  "n04589890": 904,
+  "n04590129": 905,
+  "n04591157": 906,
+  "n04591713": 907,
+  "n04592741": 908,
+  "n04596742": 909,
+  "n04597913": 910,
+  "n04599235": 911,
+  "n04604644": 912,
+  "n04606251": 913,
+  "n04612504": 914,
+  "n04613696": 915,
+  "n06359193": 916,
+  "n06596364": 917,
+  "n06785654": 918,
+  "n06794110": 919,
+  "n06874185": 920,
+  "n07248320": 921,
+  "n07565083": 922,
+  "n07579787": 923,
+  "n07583066": 924,
+  "n07584110": 925,
+  "n07590611": 926,
+  "n07613480": 927,
+  "n07614500": 928,
+  "n07615774": 929,
+  "n07684084": 930,
+  "n07693725": 931,
+  "n07695742": 932,
+  "n07697313": 933,
+  "n07697537": 934,
+  "n07711569": 935,
+  "n07714571": 936,
+  "n07714990": 937,
+  "n07715103": 938,
+  "n07716358": 939,
+  "n07716906": 940,
+  "n07717410": 941,
+  "n07717556": 942,
+  "n07718472": 943,
+  "n07718747": 944,
+  "n07720875": 945,
+  "n07730033": 946,
+  "n07734744": 947,
+  "n07742313": 948,
+  "n07745940": 949,
+  "n07747607": 950,
+  "n07749582": 951,
+  "n07753113": 952,
+  "n07753275": 953,
+  "n07753592": 954,
+  "n07754684": 955,
+  "n07760859": 956,
+  "n07768694": 957,
+  "n07802026": 958,
+  "n07831146": 959,
+  "n07836838": 960,
+  "n07860988": 961,
+  "n07871810": 962,
+  "n07873807": 963,
+  "n07875152": 964,
+  "n07880968": 965,
+  "n07892512": 966,
+  "n07920052": 967,
+  "n07930864": 968,
+  "n07932039": 969,
+  "n09193705": 970,
+  "n09229709": 971,
+  "n09246464": 972,
+  "n09256479": 973,
+  "n09288635": 974,
+  "n09332890": 975,
+  "n09399592": 976,
+  "n09421951": 977,
+  "n09428293": 978,
+  "n09468604": 979,
+  "n09472597": 980,
+  "n09835506": 981,
+  "n10148035": 982,
+  "n10565667": 983,
+  "n11879895": 984,
+  "n11939491": 985,
+  "n12057211": 986,
+  "n12144580": 987,
+  "n12267677": 988,
+  "n12620546": 989,
+  "n12768682": 990,
+  "n12985857": 991,
+  "n12998815": 992,
+  "n13037406": 993,
+  "n13040303": 994,
+  "n13044778": 995,
+  "n13052670": 996,
+  "n13054560": 997,
+  "n13133613": 998,
+  "n15075141": 999
+}

imagenet_eval_robustness.py ADDED Viewed

	@@ -0,0 +1,337 @@

+import argparse
+import os
+import random
+import shutil
+import time
+import warnings
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.backends.cudnn as cudnn
+import torch.distributed as dist
+import torch.optim
+import torch.multiprocessing as mp
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+import torchvision.models as models
+# Uncomment the expected model below
+# ViT
+from ViT.ViT import vit_base_patch16_224 as vit
+# from ViT.ViT import vit_large_patch16_224 as vit
+# ViT-AugReg
+# from ViT.ViT_new import vit_small_patch16_224 as vit
+# from ViT.ViT_new import vit_base_patch16_224 as vit
+# from ViT.ViT_new import vit_large_patch16_224 as vit
+# DeiT
+# from ViT.ViT import deit_base_patch16_224 as vit
+# from ViT.ViT import deit_small_patch16_224 as vit
+from robustness_dataset import RobustnessDataset
+from objectnet_dataset import ObjectNetDataset
+model_names = sorted(name for name in models.__dict__
+    if name.islower() and not name.startswith("__")
+    and callable(models.__dict__[name]))
+model_names.append("vit")
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+parser.add_argument('--data', metavar='DIR',
+                    help='path to dataset')
+parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
+                    help='number of data loading workers (default: 4)')
+parser.add_argument('--epochs', default=150, type=int, metavar='N',
+                    help='number of total epochs to run')
+parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
+                    help='manual epoch number (useful on restarts)')
+parser.add_argument('-b', '--batch-size', default=256, type=int,
+                    metavar='N',
+                    help='mini-batch size (default: 256), this is the total '
+                         'batch size of all GPUs on the current node when '
+                         'using Data Parallel or Distributed Data Parallel')
+parser.add_argument('--lr', '--learning-rate', default=5e-4, type=float,
+                    metavar='LR', help='initial learning rate', dest='lr')
+parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
+                    help='momentum')
+parser.add_argument('--wd', '--weight-decay', default=0.05, type=float,
+                    metavar='W', help='weight decay (default: 1e-4)',
+                    dest='weight_decay')
+parser.add_argument('-p', '--print-freq', default=10, type=int,
+                    metavar='N', help='print frequency (default: 10)')
+parser.add_argument('--checkpoint', default='', type=str, metavar='PATH',
+                    help='path to latest checkpoint (default: none)')
+parser.add_argument('--resume', default='', type=str, metavar='PATH',
+                    help='path to resume checkpoint (default: none)')
+parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
+                    help='evaluate model on validation set')
+parser.add_argument('--pretrained', dest='pretrained', action='store_true',
+                    help='use pre-trained model')
+parser.add_argument('--world-size', default=-1, type=int,
+                    help='number of nodes for distributed training')
+parser.add_argument('--rank', default=-1, type=int,
+                    help='node rank for distributed training')
+parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
+                    help='url used to set up distributed training')
+parser.add_argument('--dist-backend', default='nccl', type=str,
+                    help='distributed backend')
+parser.add_argument('--seed', default=None, type=int,
+                    help='seed for initializing training. ')
+parser.add_argument('--gpu', default=None, type=int,
+                    help='GPU id to use.')
+parser.add_argument('--multiprocessing-distributed', action='store_true',
+                    help='Use multi-processing distributed training to launch '
+                         'N processes per node, which has N GPUs. This is the '
+                         'fastest way to use PyTorch for either single node or '
+                         'multi node data parallel training')
+parser.add_argument("--isV2", default=False, action='store_true',
+                    help='is dataset imagenet V2.')
+parser.add_argument("--isSI", default=False, action='store_true',
+                    help='is dataset SI-score.')
+parser.add_argument("--isObjectNet", default=False, action='store_true',
+                    help='is dataset SI-score.')
+def main():
+    args = parser.parse_args()
+    if args.seed is not None:
+        random.seed(args.seed)
+        torch.manual_seed(args.seed)
+        cudnn.deterministic = True
+        warnings.warn('You have chosen to seed training. '
+                      'This will turn on the CUDNN deterministic setting, '
+                      'which can slow down your training considerably! '
+                      'You may see unexpected behavior when restarting '
+                      'from checkpoints.')
+    if args.gpu is not None:
+        warnings.warn('You have chosen a specific GPU. This will completely '
+                      'disable data parallelism.')
+    if args.dist_url == "env://" and args.world_size == -1:
+        args.world_size = int(os.environ["WORLD_SIZE"])
+    args.distributed = args.world_size > 1 or args.multiprocessing_distributed
+    ngpus_per_node = torch.cuda.device_count()
+    if args.multiprocessing_distributed:
+        # Since we have ngpus_per_node processes per node, the total world_size
+        # needs to be adjusted accordingly
+        args.world_size = ngpus_per_node * args.world_size
+        # Use torch.multiprocessing.spawn to launch distributed processes: the
+        # main_worker process function
+        mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
+    else:
+        # Simply call main_worker function
+        main_worker(args.gpu, ngpus_per_node, args)
+def main_worker(gpu, ngpus_per_node, args):
+    global best_acc1
+    args.gpu = gpu
+    if args.gpu is not None:
+        print("Use GPU: {} for training".format(args.gpu))
+    if args.distributed:
+        if args.dist_url == "env://" and args.rank == -1:
+            args.rank = int(os.environ["RANK"])
+        if args.multiprocessing_distributed:
+            # For multiprocessing distributed training, rank needs to be the
+            # global rank among all the processes
+            args.rank = args.rank * ngpus_per_node + gpu
+        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                world_size=args.world_size, rank=args.rank)
+    # create model
+    print("=> creating model")
+    if args.checkpoint:
+        model = vit().cuda()
+        checkpoint = torch.load(args.checkpoint)
+        model.load_state_dict(checkpoint['state_dict'])
+    else:
+        model = vit(pretrained=True).cuda()
+    print("done")
+    if not torch.cuda.is_available():
+        print('using CPU, this will be slow')
+    elif args.distributed:
+        # For multiprocessing distributed, DistributedDataParallel constructor
+        # should always set the single device scope, otherwise,
+        # DistributedDataParallel will use all available devices.
+        if args.gpu is not None:
+            torch.cuda.set_device(args.gpu)
+            model.cuda(args.gpu)
+            # When using a single GPU per process and per
+            # DistributedDataParallel, we need to divide the batch size
+            # ourselves based on the total number of GPUs we have
+            args.batch_size = int(args.batch_size / ngpus_per_node)
+            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
+            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
+        else:
+            model.cuda()
+            # DistributedDataParallel will divide and allocate batch_size to all
+            # available GPUs if device_ids are not set
+            model = torch.nn.parallel.DistributedDataParallel(model)
+    elif args.gpu is not None:
+        torch.cuda.set_device(args.gpu)
+        model = model.cuda(args.gpu)
+    else:
+        print("start")
+        model = torch.nn.DataParallel(model).cuda()
+    # optionally resume from a checkpoint
+    if args.resume:
+        if os.path.isfile(args.resume):
+            print("=> loading checkpoint '{}'".format(args.resume))
+            if args.gpu is None:
+                checkpoint = torch.load(args.resume)
+            else:
+                # Map model to be loaded to specified single gpu.
+                loc = 'cuda:{}'.format(args.gpu)
+                checkpoint = torch.load(args.resume, map_location=loc)
+            args.start_epoch = checkpoint['epoch']
+            best_acc1 = checkpoint['best_acc1']
+            if args.gpu is not None:
+                # best_acc1 may be from a checkpoint from a different GPU
+                best_acc1 = best_acc1.to(args.gpu)
+            model.load_state_dict(checkpoint['state_dict'])
+            print("=> loaded checkpoint '{}' (epoch {})"
+                  .format(args.resume, checkpoint['epoch']))
+        else:
+            print("=> no checkpoint found at '{}'".format(args.resume))
+    cudnn.benchmark = True
+    if args.isObjectNet:
+        val_dataset = ObjectNetDataset(args.data)
+    else:
+        val_dataset = RobustnessDataset(args.data, isV2=args.isV2, isSI=args.isSI)
+    val_loader = torch.utils.data.DataLoader(
+        val_dataset, batch_size=args.batch_size, shuffle=False,
+        num_workers=args.workers, pin_memory=True)
+    if args.evaluate:
+        validate(val_loader, model, args)
+        return
+def validate(val_loader, model, args):
+    batch_time = AverageMeter('Time', ':6.3f')
+    losses = AverageMeter('Loss', ':.4e')
+    top1 = AverageMeter('Acc@1', ':6.2f')
+    top5 = AverageMeter('Acc@5', ':6.2f')
+    progress = ProgressMeter(
+        len(val_loader),
+        [batch_time, losses, top1, top5],
+        prefix='Test: ')
+    # switch to evaluate mode
+    model.eval()
+    with torch.no_grad():
+        end = time.time()
+        for i, (images, target) in enumerate(val_loader):
+            if args.gpu is not None:
+                images = images.cuda(args.gpu, non_blocking=True)
+            if torch.cuda.is_available():
+                target = target.cuda(args.gpu, non_blocking=True)
+            # compute output
+            output = model(images)
+            # measure accuracy and record loss
+            acc1, acc5 = accuracy(output, target, topk=(1, 5))
+            top1.update(acc1[0], images.size(0))
+            top5.update(acc5[0], images.size(0))
+            # measure elapsed time
+            batch_time.update(time.time() - end)
+            end = time.time()
+            if i % args.print_freq == 0:
+                progress.display(i)
+        # TODO: this should also be done with the ProgressMeter
+        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
+              .format(top1=top1, top5=top5))
+    return top1.avg
+def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
+    torch.save(state, filename)
+    if is_best:
+        shutil.copyfile(filename, 'model_best.pth.tar')
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self, name, fmt=':f'):
+        self.name = name
+        self.fmt = fmt
+        self.reset()
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+    def __str__(self):
+        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+        return fmtstr.format(**self.__dict__)
+class ProgressMeter(object):
+    def __init__(self, num_batches, meters, prefix=""):
+        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+        self.meters = meters
+        self.prefix = prefix
+    def display(self, batch):
+        entries = [self.prefix + self.batch_fmtstr.format(batch)]
+        entries += [str(meter) for meter in self.meters]
+        print('\t'.join(entries))
+    def _get_batch_fmtstr(self, num_batches):
+        num_digits = len(str(num_batches // 1))
+        fmt = '{:' + str(num_digits) + 'd}'
+        return '[' + fmt + '/' + fmt.format(num_batches) + ']'
+def adjust_learning_rate(optimizer, epoch, args):
+    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+    lr = args.lr * (0.85 ** (epoch // 2))
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+def accuracy(output, target, topk=(1,)):
+    """Computes the accuracy over the k top predictions for the specified values of k"""
+    with torch.no_grad():
+        maxk = max(topk)
+        batch_size = target.size(0)
+        _, pred = output.topk(maxk, 1, True, True)
+        pred = pred.t()
+        correct = pred.eq(target.view(1, -1).expand_as(pred))
+        res = []
+        for k in topk:
+            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
+            res.append(correct_k.mul_(100.0 / batch_size))
+        return res
+if __name__ == '__main__':
+    main()

imagenet_eval_robustness_per_class.py ADDED Viewed

	@@ -0,0 +1,343 @@

+import argparse
+import os
+import random
+import shutil
+import time
+import warnings
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.backends.cudnn as cudnn
+import torch.distributed as dist
+import torch.optim
+import torch.multiprocessing as mp
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+import torchvision.models as models
+# Uncomment the expected model below
+# ViT
+from ViT.ViT import vit_base_patch16_224 as vit
+# from ViT.ViT import vit_large_patch16_224 as vit
+# ViT-AugReg
+# from ViT.ViT_new import vit_small_patch16_224 as vit
+# from ViT.ViT_new import vit_base_patch16_224 as vit
+# from ViT.ViT_new import vit_large_patch16_224 as vit
+# DeiT
+# from ViT.ViT import deit_base_patch16_224 as vit
+# from ViT.ViT import deit_small_patch16_224 as vit
+from robustness_dataset_per_class import RobustnessDataset
+from objectnet_dataset import ObjectNetDataset
+model_names = sorted(name for name in models.__dict__
+    if name.islower() and not name.startswith("__")
+    and callable(models.__dict__[name]))
+model_names.append("vit")
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+parser.add_argument('--data', metavar='DIR',
+                    help='path to dataset')
+parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
+                    help='number of data loading workers (default: 4)')
+parser.add_argument('--epochs', default=150, type=int, metavar='N',
+                    help='number of total epochs to run')
+parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
+                    help='manual epoch number (useful on restarts)')
+parser.add_argument('-b', '--batch-size', default=256, type=int,
+                    metavar='N',
+                    help='mini-batch size (default: 256), this is the total '
+                         'batch size of all GPUs on the current node when '
+                         'using Data Parallel or Distributed Data Parallel')
+parser.add_argument('--lr', '--learning-rate', default=5e-4, type=float,
+                    metavar='LR', help='initial learning rate', dest='lr')
+parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
+                    help='momentum')
+parser.add_argument('--wd', '--weight-decay', default=0.05, type=float,
+                    metavar='W', help='weight decay (default: 1e-4)',
+                    dest='weight_decay')
+parser.add_argument('-p', '--print-freq', default=10, type=int,
+                    metavar='N', help='print frequency (default: 10)')
+parser.add_argument('--checkpoint', default='', type=str, metavar='PATH',
+                    help='path to latest checkpoint (default: none)')
+parser.add_argument('--resume', default='', type=str, metavar='PATH',
+                    help='path to resume checkpoint (default: none)')
+parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
+                    help='evaluate model on validation set')
+parser.add_argument('--pretrained', dest='pretrained', action='store_true',
+                    help='use pre-trained model')
+parser.add_argument('--world-size', default=-1, type=int,
+                    help='number of nodes for distributed training')
+parser.add_argument('--rank', default=-1, type=int,
+                    help='node rank for distributed training')
+parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
+                    help='url used to set up distributed training')
+parser.add_argument('--dist-backend', default='nccl', type=str,
+                    help='distributed backend')
+parser.add_argument('--seed', default=None, type=int,
+                    help='seed for initializing training. ')
+parser.add_argument('--gpu', default=None, type=int,
+                    help='GPU id to use.')
+parser.add_argument('--multiprocessing-distributed', action='store_true',
+                    help='Use multi-processing distributed training to launch '
+                         'N processes per node, which has N GPUs. This is the '
+                         'fastest way to use PyTorch for either single node or '
+                         'multi node data parallel training')
+parser.add_argument("--isV2", default=False, action='store_true',
+                    help='is dataset imagenet V2.')
+parser.add_argument("--isSI", default=False, action='store_true',
+                    help='is dataset SI-score.')
+parser.add_argument("--isObjectNet", default=False, action='store_true',
+                    help='is dataset SI-score.')
+def main():
+    args = parser.parse_args()
+    if args.seed is not None:
+        random.seed(args.seed)
+        torch.manual_seed(args.seed)
+        cudnn.deterministic = True
+        warnings.warn('You have chosen to seed training. '
+                      'This will turn on the CUDNN deterministic setting, '
+                      'which can slow down your training considerably! '
+                      'You may see unexpected behavior when restarting '
+                      'from checkpoints.')
+    if args.gpu is not None:
+        warnings.warn('You have chosen a specific GPU. This will completely '
+                      'disable data parallelism.')
+    if args.dist_url == "env://" and args.world_size == -1:
+        args.world_size = int(os.environ["WORLD_SIZE"])
+    args.distributed = args.world_size > 1 or args.multiprocessing_distributed
+    ngpus_per_node = torch.cuda.device_count()
+    if args.multiprocessing_distributed:
+        # Since we have ngpus_per_node processes per node, the total world_size
+        # needs to be adjusted accordingly
+        args.world_size = ngpus_per_node * args.world_size
+        # Use torch.multiprocessing.spawn to launch distributed processes: the
+        # main_worker process function
+        mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
+    else:
+        # Simply call main_worker function
+        main_worker(args.gpu, ngpus_per_node, args)
+def main_worker(gpu, ngpus_per_node, args):
+    global best_acc1
+    args.gpu = gpu
+    if args.gpu is not None:
+        print("Use GPU: {} for training".format(args.gpu))
+    if args.distributed:
+        if args.dist_url == "env://" and args.rank == -1:
+            args.rank = int(os.environ["RANK"])
+        if args.multiprocessing_distributed:
+            # For multiprocessing distributed training, rank needs to be the
+            # global rank among all the processes
+            args.rank = args.rank * ngpus_per_node + gpu
+        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                world_size=args.world_size, rank=args.rank)
+    # create model
+    print("=> creating model")
+    if args.checkpoint:
+        model = vit().cuda()
+        checkpoint = torch.load(args.checkpoint)
+        model.load_state_dict(checkpoint['state_dict'])
+    else:
+        model = vit(pretrained=True).cuda()
+    print("done")
+    if not torch.cuda.is_available():
+        print('using CPU, this will be slow')
+    elif args.distributed:
+        # For multiprocessing distributed, DistributedDataParallel constructor
+        # should always set the single device scope, otherwise,
+        # DistributedDataParallel will use all available devices.
+        if args.gpu is not None:
+            torch.cuda.set_device(args.gpu)
+            model.cuda(args.gpu)
+            # When using a single GPU per process and per
+            # DistributedDataParallel, we need to divide the batch size
+            # ourselves based on the total number of GPUs we have
+            args.batch_size = int(args.batch_size / ngpus_per_node)
+            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
+            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
+        else:
+            model.cuda()
+            # DistributedDataParallel will divide and allocate batch_size to all
+            # available GPUs if device_ids are not set
+            model = torch.nn.parallel.DistributedDataParallel(model)
+    elif args.gpu is not None:
+        torch.cuda.set_device(args.gpu)
+        model = model.cuda(args.gpu)
+    else:
+        # DataParallel will divide and allocate batch_size to all available GPUs
+        print("start")
+        model = torch.nn.DataParallel(model).cuda()
+    # optionally resume from a checkpoint
+    if args.resume:
+        if os.path.isfile(args.resume):
+            print("=> loading checkpoint '{}'".format(args.resume))
+            if args.gpu is None:
+                checkpoint = torch.load(args.resume)
+            else:
+                # Map model to be loaded to specified single gpu.
+                loc = 'cuda:{}'.format(args.gpu)
+                checkpoint = torch.load(args.resume, map_location=loc)
+            args.start_epoch = checkpoint['epoch']
+            best_acc1 = checkpoint['best_acc1']
+            if args.gpu is not None:
+                # best_acc1 may be from a checkpoint from a different GPU
+                best_acc1 = best_acc1.to(args.gpu)
+            model.load_state_dict(checkpoint['state_dict'])
+            print("=> loaded checkpoint '{}' (epoch {})"
+                  .format(args.resume, checkpoint['epoch']))
+        else:
+            print("=> no checkpoint found at '{}'".format(args.resume))
+    cudnn.benchmark = True
+    # Data loading code
+    top1_per_class = {}
+    top5_per_class = {}
+    for folder in os.listdir(args.data):
+        val_dataset = RobustnessDataset(args.data, folder=folder, isV2=args.isV2, isSI=args.isSI)
+        print("len: ", len(val_dataset))
+        val_loader = torch.utils.data.DataLoader(
+            val_dataset, batch_size=args.batch_size, shuffle=False,
+            num_workers=args.workers, pin_memory=True)
+        class_name = val_dataset.get_classname()
+        top1, top5 = validate(val_loader, model, args)
+        top1_per_class[class_name] = top1.item()
+        top5_per_class[class_name] = top5.item()
+    print("overall top1 per class: ", top1_per_class)
+    print("overall top5 per class: ", top5_per_class)
+def validate(val_loader, model, args):
+    batch_time = AverageMeter('Time', ':6.3f')
+    losses = AverageMeter('Loss', ':.4e')
+    top1 = AverageMeter('Acc@1', ':6.2f')
+    top5 = AverageMeter('Acc@5', ':6.2f')
+    progress = ProgressMeter(
+        len(val_loader),
+        [batch_time, losses, top1, top5],
+        prefix='Test: ')
+    # switch to evaluate mode
+    model.eval()
+    with torch.no_grad():
+        end = time.time()
+        for i, (images, target) in enumerate(val_loader):
+            if args.gpu is not None:
+                images = images.cuda(args.gpu, non_blocking=True)
+            if torch.cuda.is_available():
+                target = target.cuda(args.gpu, non_blocking=True)
+            # compute output
+            output = model(images)
+            # measure accuracy and record loss
+            acc1, acc5 = accuracy(output, target, topk=(1, 5))
+            top1.update(acc1[0], images.size(0))
+            top5.update(acc5[0], images.size(0))
+            # measure elapsed time
+            batch_time.update(time.time() - end)
+            end = time.time()
+            if i % args.print_freq == 0:
+                progress.display(i)
+        # TODO: this should also be done with the ProgressMeter
+        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
+              .format(top1=top1, top5=top5))
+    return top1.avg, top5.avg
+def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
+    torch.save(state, filename)
+    if is_best:
+        shutil.copyfile(filename, 'model_best.pth.tar')
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self, name, fmt=':f'):
+        self.name = name
+        self.fmt = fmt
+        self.reset()
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+    def __str__(self):
+        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+        return fmtstr.format(**self.__dict__)
+class ProgressMeter(object):
+    def __init__(self, num_batches, meters, prefix=""):
+        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+        self.meters = meters
+        self.prefix = prefix
+    def display(self, batch):
+        entries = [self.prefix + self.batch_fmtstr.format(batch)]
+        entries += [str(meter) for meter in self.meters]
+        print('\t'.join(entries))
+    def _get_batch_fmtstr(self, num_batches):
+        num_digits = len(str(num_batches // 1))
+        fmt = '{:' + str(num_digits) + 'd}'
+        return '[' + fmt + '/' + fmt.format(num_batches) + ']'
+def adjust_learning_rate(optimizer, epoch, args):
+    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+    lr = args.lr * (0.85 ** (epoch // 2))
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+def accuracy(output, target, topk=(1,)):
+    """Computes the accuracy over the k top predictions for the specified values of k"""
+    with torch.no_grad():
+        maxk = max(topk)
+        batch_size = target.size(0)
+        _, pred = output.topk(maxk, 1, True, True)
+        pred = pred.t()
+        correct = pred.eq(target.view(1, -1).expand_as(pred))
+        res = []
+        for k in topk:
+            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
+            res.append(correct_k.mul_(100.0 / batch_size))
+        return res
+if __name__ == '__main__':
+    main()

imagenet_finetune.py ADDED Viewed

	@@ -0,0 +1,567 @@

+import argparse
+import os
+import random
+import shutil
+import time
+import warnings
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.backends.cudnn as cudnn
+import torch.distributed as dist
+import torch.optim
+import torch.multiprocessing as mp
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+import torchvision.models as models
+from segmentation_dataset import SegmentationDataset, VAL_PARTITION, TRAIN_PARTITION
+# Uncomment the expected model below
+# ViT
+from ViT.ViT import vit_base_patch16_224 as vit
+# from ViT.ViT import vit_large_patch16_224 as vit
+# ViT-AugReg
+# from ViT.ViT_new import vit_small_patch16_224 as vit
+# from ViT.ViT_new import vit_base_patch16_224 as vit
+# from ViT.ViT_new import vit_large_patch16_224 as vit
+# DeiT
+# from ViT.ViT import deit_base_patch16_224 as vit
+# from ViT.ViT import deit_small_patch16_224 as vit
+from ViT.explainer import generate_relevance, get_image_with_relevance
+import torchvision
+import cv2
+from torch.utils.tensorboard import SummaryWriter
+import json
+model_names = sorted(name for name in models.__dict__
+    if name.islower() and not name.startswith("__")
+    and callable(models.__dict__[name]))
+model_names.append("vit")
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+parser.add_argument('--data', metavar='DATA',
+                    help='path to dataset')
+parser.add_argument('--seg_data', metavar='SEG_DATA',
+                    help='path to segmentation dataset')
+parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
+                    help='number of data loading workers (default: 4)')
+parser.add_argument('--epochs', default=50, type=int, metavar='N',
+                    help='number of total epochs to run')
+parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
+                    help='manual epoch number (useful on restarts)')
+parser.add_argument('-b', '--batch-size', default=8, type=int,
+                    metavar='N',
+                    help='mini-batch size (default: 256), this is the total '
+                         'batch size of all GPUs on the current node when '
+                         'using Data Parallel or Distributed Data Parallel')
+parser.add_argument('--lr', '--learning-rate', default=3e-6, type=float,
+                    metavar='LR', help='initial learning rate', dest='lr')
+parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
+                    help='momentum')
+parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
+                    metavar='W', help='weight decay (default: 1e-4)',
+                    dest='weight_decay')
+parser.add_argument('-p', '--print-freq', default=10, type=int,
+                    metavar='N', help='print frequency (default: 10)')
+parser.add_argument('--resume', default='', type=str, metavar='PATH',
+                    help='path to latest checkpoint (default: none)')
+parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
+                    help='evaluate model on validation set')
+parser.add_argument('--pretrained', dest='pretrained', action='store_true',
+                    help='use pre-trained model')
+parser.add_argument('--world-size', default=-1, type=int,
+                    help='number of nodes for distributed training')
+parser.add_argument('--rank', default=-1, type=int,
+                    help='node rank for distributed training')
+parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
+                    help='url used to set up distributed training')
+parser.add_argument('--dist-backend', default='nccl', type=str,
+                    help='distributed backend')
+parser.add_argument('--gpu', default=None, type=int,
+                    help='GPU id to use.')
+parser.add_argument('--save_interval', default=20, type=int,
+                    help='interval to save segmentation results.')
+parser.add_argument('--num_samples', default=3, type=int,
+                    help='number of samples per class for training')
+parser.add_argument('--multiprocessing-distributed', action='store_true',
+                    help='Use multi-processing distributed training to launch '
+                         'N processes per node, which has N GPUs. This is the '
+                         'fastest way to use PyTorch for either single node or '
+                         'multi node data parallel training')
+parser.add_argument('--lambda_seg', default=0.8, type=float,
+                    help='influence of segmentation loss.')
+parser.add_argument('--lambda_acc', default=0.2, type=float,
+                    help='influence of accuracy loss.')
+parser.add_argument('--experiment_folder', default=None, type=str,
+                    help='path to folder to use for experiment.')
+parser.add_argument('--dilation', default=0, type=float,
+                    help='Use dilation on the segmentation maps.')
+parser.add_argument('--lambda_background', default=2, type=float,
+                    help='coefficient of loss for segmentation background.')
+parser.add_argument('--lambda_foreground', default=0.3, type=float,
+                    help='coefficient of loss for segmentation foreground.')
+parser.add_argument('--num_classes', default=500, type=int,
+                    help='coefficient of loss for segmentation foreground.')
+parser.add_argument('--temperature', default=1, type=float,
+                    help='temperature for softmax (mostly for DeiT).')
+parser.add_argument('--class_seed', default=None, type=int,
+                    help='seed to randomly shuffle classes chosen for training.')
+best_loss = float('inf')
+def main():
+    args = parser.parse_args()
+    if args.experiment_folder is None:
+        args.experiment_folder = f'experiment/' \
+                                 f'lr_{args.lr}_seg_{args.lambda_seg}_acc_{args.lambda_acc}' \
+                                 f'_bckg_{args.lambda_background}_fgd_{args.lambda_foreground}'
+        if args.temperature != 1:
+            args.experiment_folder = args.experiment_folder + f'_tempera_{args.temperature}'
+        if args.batch_size != 8:
+            args.experiment_folder = args.experiment_folder + f'_bs_{args.batch_size}'
+        if args.num_classes != 500:
+            args.experiment_folder = args.experiment_folder + f'_num_classes_{args.num_classes}'
+        if args.num_samples != 3:
+            args.experiment_folder = args.experiment_folder + f'_num_samples_{args.num_samples}'
+        if args.epochs != 150:
+            args.experiment_folder = args.experiment_folder + f'_num_epochs_{args.epochs}'
+        if args.class_seed is not None:
+            args.experiment_folder = args.experiment_folder + f'_seed_{args.class_seed}'
+    if os.path.exists(args.experiment_folder):
+        raise Exception(f"Experiment path {args.experiment_folder} already exists!")
+    os.mkdir(args.experiment_folder)
+    os.mkdir(f'{args.experiment_folder}/train_samples')
+    os.mkdir(f'{args.experiment_folder}/val_samples')
+    with open(f'{args.experiment_folder}/commandline_args.txt', 'w') as f:
+        json.dump(args.__dict__, f, indent=2)
+    if args.gpu is not None:
+        warnings.warn('You have chosen a specific GPU. This will completely '
+                      'disable data parallelism.')
+    if args.dist_url == "env://" and args.world_size == -1:
+        args.world_size = int(os.environ["WORLD_SIZE"])
+    args.distributed = args.world_size > 1 or args.multiprocessing_distributed
+    ngpus_per_node = torch.cuda.device_count()
+    if args.multiprocessing_distributed:
+        # Since we have ngpus_per_node processes per node, the total world_size
+        # needs to be adjusted accordingly
+        args.world_size = ngpus_per_node * args.world_size
+        # Use torch.multiprocessing.spawn to launch distributed processes: the
+        # main_worker process function
+        mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
+    else:
+        # Simply call main_worker function
+        main_worker(args.gpu, ngpus_per_node, args)
+def main_worker(gpu, ngpus_per_node, args):
+    global best_loss
+    args.gpu = gpu
+    if args.gpu is not None:
+        print("Use GPU: {} for training".format(args.gpu))
+    if args.distributed:
+        if args.dist_url == "env://" and args.rank == -1:
+            args.rank = int(os.environ["RANK"])
+        if args.multiprocessing_distributed:
+            # For multiprocessing distributed training, rank needs to be the
+            # global rank among all the processes
+            args.rank = args.rank * ngpus_per_node + gpu
+        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                world_size=args.world_size, rank=args.rank)
+    # create model
+    print("=> creating model")
+    model = vit(pretrained=True).cuda()
+    model.train()
+    print("done")
+    if not torch.cuda.is_available():
+        print('using CPU, this will be slow')
+    elif args.distributed:
+        # For multiprocessing distributed, DistributedDataParallel constructor
+        # should always set the single device scope, otherwise,
+        # DistributedDataParallel will use all available devices.
+        if args.gpu is not None:
+            torch.cuda.set_device(args.gpu)
+            model.cuda(args.gpu)
+            # When using a single GPU per process and per
+            # DistributedDataParallel, we need to divide the batch size
+            # ourselves based on the total number of GPUs we have
+            args.batch_size = int(args.batch_size / ngpus_per_node)
+            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
+            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
+        else:
+            model.cuda()
+            # DistributedDataParallel will divide and allocate batch_size to all
+            # available GPUs if device_ids are not set
+            model = torch.nn.parallel.DistributedDataParallel(model)
+    elif args.gpu is not None:
+        torch.cuda.set_device(args.gpu)
+        model = model.cuda(args.gpu)
+    else:
+        # DataParallel will divide and allocate batch_size to all available GPUs
+        print("start")
+        model = torch.nn.DataParallel(model).cuda()
+    # define loss function (criterion) and optimizer
+    criterion = nn.CrossEntropyLoss().cuda(args.gpu)
+    optimizer = torch.optim.AdamW(model.parameters(), args.lr, weight_decay=args.weight_decay)
+    # optionally resume from a checkpoint
+    if args.resume:
+        if os.path.isfile(args.resume):
+            print("=> loading checkpoint '{}'".format(args.resume))
+            if args.gpu is None:
+                checkpoint = torch.load(args.resume)
+            else:
+                # Map model to be loaded to specified single gpu.
+                loc = 'cuda:{}'.format(args.gpu)
+                checkpoint = torch.load(args.resume, map_location=loc)
+            args.start_epoch = checkpoint['epoch']
+            best_loss = checkpoint['best_loss']
+            if args.gpu is not None:
+                # best_loss may be from a checkpoint from a different GPU
+                best_loss = best_loss.to(args.gpu)
+            model.load_state_dict(checkpoint['state_dict'])
+            optimizer.load_state_dict(checkpoint['optimizer'])
+            print("=> loaded checkpoint '{}' (epoch {})"
+                  .format(args.resume, checkpoint['epoch']))
+        else:
+            print("=> no checkpoint found at '{}'".format(args.resume))
+    cudnn.benchmark = True
+    train_dataset = SegmentationDataset(args.seg_data, args.data, partition=TRAIN_PARTITION, train_classes=args.num_classes,
+                                        num_samples=args.num_samples, seed=args.class_seed)
+    if args.distributed:
+        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
+    else:
+        train_sampler = None
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
+        num_workers=args.workers, pin_memory=True, sampler=train_sampler)
+    val_dataset = SegmentationDataset(args.seg_data, args.data, partition=VAL_PARTITION, train_classes=args.num_classes,
+                                      num_samples=1, seed=args.class_seed)
+    val_loader = torch.utils.data.DataLoader(
+        val_dataset, batch_size=10, shuffle=False,
+        num_workers=args.workers, pin_memory=True)
+    if args.evaluate:
+        validate(val_loader, model, criterion, 0, args)
+        return
+    for epoch in range(args.start_epoch, args.epochs):
+        if args.distributed:
+            train_sampler.set_epoch(epoch)
+        adjust_learning_rate(optimizer, epoch, args)
+        log_dir = os.path.join(args.experiment_folder, 'logs')
+        logger = SummaryWriter(log_dir=log_dir)
+        args.logger = logger
+        # train for one epoch
+        train(train_loader, model, criterion, optimizer, epoch, args)
+        # evaluate on validation set
+        loss1 = validate(val_loader, model, criterion, epoch, args)
+        # remember best acc@1 and save checkpoint
+        is_best = loss1 <= best_loss
+        best_loss = min(loss1, best_loss)
+        if not args.multiprocessing_distributed or (args.multiprocessing_distributed
+                and args.rank % ngpus_per_node == 0):
+            save_checkpoint({
+                'epoch': epoch + 1,
+                'state_dict': model.state_dict(),
+                'best_loss': best_loss,
+                'optimizer' : optimizer.state_dict(),
+            }, is_best, folder=args.experiment_folder)
+def train(train_loader, model, criterion, optimizer, epoch, args):
+    mse_criterion = torch.nn.MSELoss(reduction='mean')
+    losses = AverageMeter('Loss', ':.4e')
+    top1 = AverageMeter('Acc@1', ':6.2f')
+    top5 = AverageMeter('Acc@5', ':6.2f')
+    orig_top1 = AverageMeter('Acc@1_orig', ':6.2f')
+    orig_top5 = AverageMeter('Acc@5_orig', ':6.2f')
+    progress = ProgressMeter(
+        len(train_loader),
+        [losses, top1, top5, orig_top1, orig_top5],
+        prefix="Epoch: [{}]".format(epoch))
+    orig_model = vit(pretrained=True).cuda()
+    orig_model.eval()
+    # switch to train mode
+    model.train()
+    for i, (seg_map, image_ten, class_name) in enumerate(train_loader):
+        if torch.cuda.is_available():
+            image_ten = image_ten.cuda(args.gpu, non_blocking=True)
+            seg_map = seg_map.cuda(args.gpu, non_blocking=True)
+            class_name = class_name.cuda(args.gpu, non_blocking=True)
+        # segmentation loss
+        relevance = generate_relevance(model, image_ten, index=class_name)
+        reverse_seg_map = seg_map.clone()
+        reverse_seg_map[reverse_seg_map == 1] = -1
+        reverse_seg_map[reverse_seg_map == 0] = 1
+        reverse_seg_map[reverse_seg_map == -1] = 0
+        background_loss = mse_criterion(relevance * reverse_seg_map, torch.zeros_like(relevance))
+        foreground_loss = mse_criterion(relevance * seg_map, seg_map)
+        segmentation_loss = args.lambda_background * background_loss
+        segmentation_loss += args.lambda_foreground * foreground_loss
+        # classification loss
+        output = model(image_ten)
+        with torch.no_grad():
+            output_orig = orig_model(image_ten)
+        _, pred = output.topk(1, 1, True, True)
+        pred = pred.flatten()
+        if args.temperature != 1:
+            output = output / args.temperature
+        classification_loss = criterion(output, pred)
+        loss = args.lambda_seg * segmentation_loss + args.lambda_acc * classification_loss
+        # debugging output
+        if i % args.save_interval == 0:
+            orig_relevance = generate_relevance(orig_model, image_ten, index=class_name)
+            for j in range(image_ten.shape[0]):
+                image = get_image_with_relevance(image_ten[j], torch.ones_like(image_ten[j]))
+                new_vis = get_image_with_relevance(image_ten[j], relevance[j])
+                old_vis = get_image_with_relevance(image_ten[j], orig_relevance[j])
+                gt = get_image_with_relevance(image_ten[j], seg_map[j])
+                h_img = cv2.hconcat([image, gt, old_vis, new_vis])
+                cv2.imwrite(f'{args.experiment_folder}/train_samples/res_{i}_{j}.jpg', h_img)
+        # measure accuracy and record loss
+        acc1, acc5 = accuracy(output, class_name, topk=(1, 5))
+        losses.update(loss.item(), image_ten.size(0))
+        top1.update(acc1[0], image_ten.size(0))
+        top5.update(acc5[0], image_ten.size(0))
+        # metrics for original vit
+        acc1_orig, acc5_orig = accuracy(output_orig, class_name, topk=(1, 5))
+        orig_top1.update(acc1_orig[0], image_ten.size(0))
+        orig_top5.update(acc5_orig[0], image_ten.size(0))
+        # compute gradient and do SGD step
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        if i % args.print_freq == 0:
+            progress.display(i)
+            args.logger.add_scalar('{}/{}'.format('train', 'segmentation_loss'), segmentation_loss,
+                                   epoch*len(train_loader)+i)
+            args.logger.add_scalar('{}/{}'.format('train', 'classification_loss'), classification_loss,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'orig_top1'), acc1_orig,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'top1'), acc1,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'orig_top5'), acc5_orig,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'top5'), acc5,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'tot_loss'), loss,
+                                   epoch * len(train_loader) + i)
+def validate(val_loader, model, criterion, epoch, args):
+    mse_criterion = torch.nn.MSELoss(reduction='mean')
+    losses = AverageMeter('Loss', ':.4e')
+    top1 = AverageMeter('Acc@1', ':6.2f')
+    top5 = AverageMeter('Acc@5', ':6.2f')
+    orig_top1 = AverageMeter('Acc@1_orig', ':6.2f')
+    orig_top5 = AverageMeter('Acc@5_orig', ':6.2f')
+    progress = ProgressMeter(
+        len(val_loader),
+        [losses, top1, top5, orig_top1, orig_top5],
+        prefix="Epoch: [{}]".format(val_loader))
+    # switch to evaluate mode
+    model.eval()
+    orig_model = vit(pretrained=True).cuda()
+    orig_model.eval()
+    with torch.no_grad():
+        for i, (seg_map, image_ten, class_name) in enumerate(val_loader):
+            if args.gpu is not None:
+                image_ten = image_ten.cuda(args.gpu, non_blocking=True)
+            if torch.cuda.is_available():
+                seg_map = seg_map.cuda(args.gpu, non_blocking=True)
+                class_name = class_name.cuda(args.gpu, non_blocking=True)
+                # segmentation loss
+                with torch.enable_grad():
+                    relevance = generate_relevance(model, image_ten, index=class_name)
+                reverse_seg_map = seg_map.clone()
+                reverse_seg_map[reverse_seg_map == 1] = -1
+                reverse_seg_map[reverse_seg_map == 0] = 1
+                reverse_seg_map[reverse_seg_map == -1] = 0
+                background_loss = mse_criterion(relevance * reverse_seg_map, torch.zeros_like(relevance))
+                foreground_loss = mse_criterion(relevance * seg_map, seg_map)
+                segmentation_loss = args.lambda_background * background_loss
+                segmentation_loss += args.lambda_foreground * foreground_loss
+                # classification loss
+                with torch.no_grad():
+                    output = model(image_ten)
+                    output_orig = orig_model(image_ten)
+                _, pred = output.topk(1, 1, True, True)
+                pred = pred.flatten()
+                if args.temperature != 1:
+                    output = output / args.temperature
+                classification_loss = criterion(output, pred)
+                loss = args.lambda_seg * segmentation_loss + args.lambda_acc * classification_loss
+            # save results
+            if i % args.save_interval == 0:
+                with torch.enable_grad():
+                    orig_relevance = generate_relevance(orig_model, image_ten, index=class_name)
+                for j in range(image_ten.shape[0]):
+                    image = get_image_with_relevance(image_ten[j], torch.ones_like(image_ten[j]))
+                    new_vis = get_image_with_relevance(image_ten[j], relevance[j])
+                    old_vis = get_image_with_relevance(image_ten[j], orig_relevance[j])
+                    gt = get_image_with_relevance(image_ten[j], seg_map[j])
+                    h_img = cv2.hconcat([image, gt, old_vis, new_vis])
+                    cv2.imwrite(f'{args.experiment_folder}/val_samples/res_{i}_{j}.jpg', h_img)
+            # measure accuracy and record loss
+            acc1, acc5 = accuracy(output, class_name, topk=(1, 5))
+            losses.update(loss.item(), image_ten.size(0))
+            top1.update(acc1[0], image_ten.size(0))
+            top5.update(acc5[0], image_ten.size(0))
+            # metrics for original vit
+            acc1_orig, acc5_orig = accuracy(output_orig, class_name, topk=(1, 5))
+            orig_top1.update(acc1_orig[0], image_ten.size(0))
+            orig_top5.update(acc5_orig[0], image_ten.size(0))
+            if i % args.print_freq == 0:
+                progress.display(i)
+                args.logger.add_scalar('{}/{}'.format('val', 'segmentation_loss'), segmentation_loss,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'classification_loss'), classification_loss,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'orig_top1'), acc1_orig,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'top1'), acc1,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'orig_top5'), acc5_orig,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'top5'), acc5,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'tot_loss'), loss,
+                                       epoch * len(val_loader) + i)
+        # TODO: this should also be done with the ProgressMeter
+        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
+              .format(top1=top1, top5=top5))
+    return losses.avg
+def save_checkpoint(state, is_best, folder, filename='checkpoint.pth.tar'):
+    torch.save(state, f'{folder}/{filename}')
+    if is_best:
+        shutil.copyfile(f'{folder}/{filename}', f'{folder}/model_best.pth.tar')
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self, name, fmt=':f'):
+        self.name = name
+        self.fmt = fmt
+        self.reset()
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+    def __str__(self):
+        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+        return fmtstr.format(**self.__dict__)
+class ProgressMeter(object):
+    def __init__(self, num_batches, meters, prefix=""):
+        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+        self.meters = meters
+        self.prefix = prefix
+    def display(self, batch):
+        entries = [self.prefix + self.batch_fmtstr.format(batch)]
+        entries += [str(meter) for meter in self.meters]
+        print('\t'.join(entries))
+    def _get_batch_fmtstr(self, num_batches):
+        num_digits = len(str(num_batches // 1))
+        fmt = '{:' + str(num_digits) + 'd}'
+        return '[' + fmt + '/' + fmt.format(num_batches) + ']'
+def adjust_learning_rate(optimizer, epoch, args):
+    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+    lr = args.lr * (0.85 ** (epoch // 2))
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+def accuracy(output, target, topk=(1,)):
+    """Computes the accuracy over the k top predictions for the specified values of k"""
+    with torch.no_grad():
+        maxk = max(topk)
+        batch_size = target.size(0)
+        _, pred = output.topk(maxk, 1, True, True)
+        pred = pred.t()
+        correct = pred.eq(target.view(1, -1).expand_as(pred))
+        res = []
+        for k in topk:
+            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
+            res.append(correct_k.mul_(100.0 / batch_size))
+        return res
+if __name__ == '__main__':
+    main()

imagenet_finetune_gradmask.py ADDED Viewed

	@@ -0,0 +1,586 @@

+import argparse
+import os
+import random
+import shutil
+import time
+import warnings
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.backends.cudnn as cudnn
+import torch.distributed as dist
+import torch.optim
+import torch.multiprocessing as mp
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+import torchvision.models as models
+from segmentation_dataset import SegmentationDataset, VAL_PARTITION, TRAIN_PARTITION
+import numpy as np
+# Uncomment the expected model below
+# ViT
+from ViT.ViT import vit_base_patch16_224 as vit
+# from ViT.ViT import vit_large_patch16_224 as vit
+# ViT-AugReg
+# from ViT.ViT_new import vit_small_patch16_224 as vit
+# from ViT.ViT_new import vit_base_patch16_224 as vit
+# from ViT.ViT_new import vit_large_patch16_224 as vit
+# DeiT
+# from ViT.ViT import deit_base_patch16_224 as vit
+# from ViT.ViT import deit_small_patch16_224 as vit
+from ViT.explainer import generate_relevance, get_image_with_relevance
+import torchvision
+import cv2
+from torch.utils.tensorboard import SummaryWriter
+import json
+model_names = sorted(name for name in models.__dict__
+    if name.islower() and not name.startswith("__")
+    and callable(models.__dict__[name]))
+model_names.append("vit")
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+parser.add_argument('--data', metavar='DATA',
+                    help='path to dataset')
+parser.add_argument('--seg_data', metavar='SEG_DATA',
+                    help='path to segmentation dataset')
+parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
+                    help='number of data loading workers (default: 4)')
+parser.add_argument('--epochs', default=50, type=int, metavar='N',
+                    help='number of total epochs to run')
+parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
+                    help='manual epoch number (useful on restarts)')
+parser.add_argument('-b', '--batch-size', default=8, type=int,
+                    metavar='N',
+                    help='mini-batch size (default: 256), this is the total '
+                         'batch size of all GPUs on the current node when '
+                         'using Data Parallel or Distributed Data Parallel')
+parser.add_argument('--lr', '--learning-rate', default=3e-6, type=float,
+                    metavar='LR', help='initial learning rate', dest='lr')
+parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
+                    help='momentum')
+parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
+                    metavar='W', help='weight decay (default: 1e-4)',
+                    dest='weight_decay')
+parser.add_argument('-p', '--print-freq', default=10, type=int,
+                    metavar='N', help='print frequency (default: 10)')
+parser.add_argument('--resume', default='', type=str, metavar='PATH',
+                    help='path to latest checkpoint (default: none)')
+parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
+                    help='evaluate model on validation set')
+parser.add_argument('--pretrained', dest='pretrained', action='store_true',
+                    help='use pre-trained model')
+parser.add_argument('--world-size', default=-1, type=int,
+                    help='number of nodes for distributed training')
+parser.add_argument('--rank', default=-1, type=int,
+                    help='node rank for distributed training')
+parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
+                    help='url used to set up distributed training')
+parser.add_argument('--dist-backend', default='nccl', type=str,
+                    help='distributed backend')
+parser.add_argument('--seed', default=None, type=int,
+                    help='seed for initializing training. ')
+parser.add_argument('--gpu', default=None, type=int,
+                    help='GPU id to use.')
+parser.add_argument('--save_interval', default=20, type=int,
+                    help='interval to save segmentation results.')
+parser.add_argument('--num_samples', default=3, type=int,
+                    help='number of samples per class for training')
+parser.add_argument('--multiprocessing-distributed', action='store_true',
+                    help='Use multi-processing distributed training to launch '
+                         'N processes per node, which has N GPUs. This is the '
+                         'fastest way to use PyTorch for either single node or '
+                         'multi node data parallel training')
+parser.add_argument('--lambda_seg', default=0.8, type=float,
+                    help='influence of segmentation loss.')
+parser.add_argument('--lambda_acc', default=0.2, type=float,
+                    help='influence of accuracy loss.')
+parser.add_argument('--experiment_folder', default=None, type=str,
+                    help='path to folder to use for experiment.')
+parser.add_argument('--num_classes', default=500, type=int,
+                    help='coefficient of loss for segmentation foreground.')
+parser.add_argument('--temperature', default=1, type=float,
+                    help='temperature for softmax (mostly for DeiT).')
+best_loss = float('inf')
+def main():
+    args = parser.parse_args()
+    if args.experiment_folder is None:
+        args.experiment_folder = f'experiment/' \
+                                 f'lr_{args.lr}_seg_{args.lambda_seg}_acc_{args.lambda_acc}'
+        if args.temperature != 1:
+            args.experiment_folder = args.experiment_folder + f'_tempera_{args.temperature}'
+        if args.batch_size != 10:
+            args.experiment_folder = args.experiment_folder + f'_bs_{args.batch_size}'
+        if args.num_classes != 500:
+            args.experiment_folder = args.experiment_folder + f'_num_classes_{args.num_classes}'
+        if args.num_samples != 3:
+            args.experiment_folder = args.experiment_folder + f'_num_samples_{args.num_samples}'
+        if args.epochs != 150:
+            args.experiment_folder = args.experiment_folder + f'_num_epochs_{args.epochs}'
+    if os.path.exists(args.experiment_folder):
+        raise Exception(f"Experiment path {args.experiment_folder} already exists!")
+    os.mkdir(args.experiment_folder)
+    os.mkdir(f'{args.experiment_folder}/train_samples')
+    os.mkdir(f'{args.experiment_folder}/val_samples')
+    with open(f'{args.experiment_folder}/commandline_args.txt', 'w') as f:
+        json.dump(args.__dict__, f, indent=2)
+    if args.seed is not None:
+        random.seed(args.seed)
+        torch.manual_seed(args.seed)
+        cudnn.deterministic = True
+        warnings.warn('You have chosen to seed training. '
+                      'This will turn on the CUDNN deterministic setting, '
+                      'which can slow down your training considerably! '
+                      'You may see unexpected behavior when restarting '
+                      'from checkpoints.')
+    if args.gpu is not None:
+        warnings.warn('You have chosen a specific GPU. This will completely '
+                      'disable data parallelism.')
+    if args.dist_url == "env://" and args.world_size == -1:
+        args.world_size = int(os.environ["WORLD_SIZE"])
+    args.distributed = args.world_size > 1 or args.multiprocessing_distributed
+    ngpus_per_node = torch.cuda.device_count()
+    if args.multiprocessing_distributed:
+        # Since we have ngpus_per_node processes per node, the total world_size
+        # needs to be adjusted accordingly
+        args.world_size = ngpus_per_node * args.world_size
+        # Use torch.multiprocessing.spawn to launch distributed processes: the
+        # main_worker process function
+        mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
+    else:
+        # Simply call main_worker function
+        main_worker(args.gpu, ngpus_per_node, args)
+def main_worker(gpu, ngpus_per_node, args):
+    global best_loss
+    args.gpu = gpu
+    if args.gpu is not None:
+        print("Use GPU: {} for training".format(args.gpu))
+    if args.distributed:
+        if args.dist_url == "env://" and args.rank == -1:
+            args.rank = int(os.environ["RANK"])
+        if args.multiprocessing_distributed:
+            # For multiprocessing distributed training, rank needs to be the
+            # global rank among all the processes
+            args.rank = args.rank * ngpus_per_node + gpu
+        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                world_size=args.world_size, rank=args.rank)
+    # create model
+    print("=> creating model")
+    model = vit(pretrained=True).cuda()
+    model.train()
+    print("done")
+    if not torch.cuda.is_available():
+        print('using CPU, this will be slow')
+    elif args.distributed:
+        # For multiprocessing distributed, DistributedDataParallel constructor
+        # should always set the single device scope, otherwise,
+        # DistributedDataParallel will use all available devices.
+        if args.gpu is not None:
+            torch.cuda.set_device(args.gpu)
+            model.cuda(args.gpu)
+            # When using a single GPU per process and per
+            # DistributedDataParallel, we need to divide the batch size
+            # ourselves based on the total number of GPUs we have
+            args.batch_size = int(args.batch_size / ngpus_per_node)
+            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
+            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
+        else:
+            model.cuda()
+            # DistributedDataParallel will divide and allocate batch_size to all
+            # available GPUs if device_ids are not set
+            model = torch.nn.parallel.DistributedDataParallel(model)
+    elif args.gpu is not None:
+        torch.cuda.set_device(args.gpu)
+        model = model.cuda(args.gpu)
+    else:
+        # DataParallel will divide and allocate batch_size to all available GPUs
+        print("start")
+        model = torch.nn.DataParallel(model).cuda()
+    # define loss function (criterion) and optimizer
+    criterion = nn.CrossEntropyLoss().cuda(args.gpu)
+    optimizer = torch.optim.AdamW(model.parameters(), args.lr, weight_decay=args.weight_decay)
+    # optionally resume from a checkpoint
+    if args.resume:
+        if os.path.isfile(args.resume):
+            print("=> loading checkpoint '{}'".format(args.resume))
+            if args.gpu is None:
+                checkpoint = torch.load(args.resume)
+            else:
+                # Map model to be loaded to specified single gpu.
+                loc = 'cuda:{}'.format(args.gpu)
+                checkpoint = torch.load(args.resume, map_location=loc)
+            args.start_epoch = checkpoint['epoch']
+            best_loss = checkpoint['best_loss']
+            if args.gpu is not None:
+                # best_loss may be from a checkpoint from a different GPU
+                best_loss = best_loss.to(args.gpu)
+            model.load_state_dict(checkpoint['state_dict'])
+            optimizer.load_state_dict(checkpoint['optimizer'])
+            print("=> loaded checkpoint '{}' (epoch {})"
+                  .format(args.resume, checkpoint['epoch']))
+        else:
+            print("=> no checkpoint found at '{}'".format(args.resume))
+    cudnn.benchmark = True
+    train_dataset = SegmentationDataset(args.seg_data, args.data, partition=TRAIN_PARTITION, train_classes=args.num_classes,
+                                        num_samples=args.num_samples)
+    if args.distributed:
+        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
+    else:
+        train_sampler = None
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset, batch_size=args.batch_size, shuffle=False,
+        num_workers=args.workers, pin_memory=True, sampler=train_sampler)
+    val_dataset = SegmentationDataset(args.seg_data, args.data, partition=VAL_PARTITION, train_classes=args.num_classes,
+                                      num_samples=1)
+    val_loader = torch.utils.data.DataLoader(
+        val_dataset, batch_size=5, shuffle=False,
+        num_workers=args.workers, pin_memory=True)
+    if args.evaluate:
+        validate(val_loader, model, criterion, 0, args)
+        return
+    for epoch in range(args.start_epoch, args.epochs):
+        if args.distributed:
+            train_sampler.set_epoch(epoch)
+        adjust_learning_rate(optimizer, epoch, args)
+        log_dir = os.path.join(args.experiment_folder, 'logs')
+        logger = SummaryWriter(log_dir=log_dir)
+        args.logger = logger
+        # train for one epoch
+        train(train_loader, model, criterion, optimizer, epoch, args)
+        # evaluate on validation set
+        loss1 = validate(val_loader, model, criterion, epoch, args)
+        # remember best acc@1 and save checkpoint
+        is_best = loss1 < best_loss
+        best_loss = min(loss1, best_loss)
+        if not args.multiprocessing_distributed or (args.multiprocessing_distributed
+                and args.rank % ngpus_per_node == 0):
+            save_checkpoint({
+                'epoch': epoch + 1,
+                'state_dict': model.state_dict(),
+                'best_loss': best_loss,
+                'optimizer' : optimizer.state_dict(),
+            }, is_best, folder=args.experiment_folder)
+def train(train_loader, model, criterion, optimizer, epoch, args):
+    mse_criterion = torch.nn.MSELoss(reduction='mean')
+    losses = AverageMeter('Loss', ':.4e')
+    top1 = AverageMeter('Acc@1', ':6.2f')
+    top5 = AverageMeter('Acc@5', ':6.2f')
+    orig_top1 = AverageMeter('Acc@1_orig', ':6.2f')
+    orig_top5 = AverageMeter('Acc@5_orig', ':6.2f')
+    progress = ProgressMeter(
+        len(train_loader),
+        [losses, top1, top5, orig_top1, orig_top5],
+        prefix="Epoch: [{}]".format(epoch))
+    orig_model = vit(pretrained=True).cuda()
+    orig_model.eval()
+    # switch to train mode
+    model.train()
+    for i, (seg_map, image_ten, class_name) in enumerate(train_loader):
+        if torch.cuda.is_available():
+            image_ten = image_ten.cuda(args.gpu, non_blocking=True)
+            seg_map = seg_map.cuda(args.gpu, non_blocking=True)
+            class_name = class_name.cuda(args.gpu, non_blocking=True)
+        image_ten.requires_grad = True
+        output = model(image_ten)
+        # segmentation loss
+        batch_size = image_ten.shape[0]
+        index = class_name
+        if index == None:
+            index = np.argmax(output.cpu().data.numpy(), axis=-1)
+            index = torch.tensor(index)
+        one_hot = np.zeros((batch_size, output.shape[-1]), dtype=np.float32)
+        one_hot[torch.arange(batch_size), index.data.cpu().numpy()] = 1
+        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
+        one_hot = torch.sum(one_hot.to(image_ten.device) * output)
+        model.zero_grad()
+        relevance = torch.autograd.grad(one_hot, image_ten, retain_graph=True)[0]
+        reverse_seg_map = seg_map.clone()
+        reverse_seg_map[reverse_seg_map == 1] = -1
+        reverse_seg_map[reverse_seg_map == 0] = 1
+        reverse_seg_map[reverse_seg_map == -1] = 0
+        grad_loss = mse_criterion(relevance * reverse_seg_map, torch.zeros_like(relevance))
+        segmentation_loss = grad_loss
+        # classification loss
+        with torch.no_grad():
+            output_orig = orig_model(image_ten)
+        if args.temperature != 1:
+            output = output / args.temperature
+        classification_loss = criterion(output, class_name.flatten())
+        loss = args.lambda_seg * segmentation_loss + args.lambda_acc * classification_loss
+        # debugging output
+        if i % args.save_interval == 0:
+            orig_relevance = generate_relevance(orig_model, image_ten, index=class_name)
+            for j in range(image_ten.shape[0]):
+                image = get_image_with_relevance(image_ten[j], torch.ones_like(image_ten[j]))
+                new_vis = get_image_with_relevance(image_ten[j]*relevance[j], torch.ones_like(image_ten[j]))
+                old_vis = get_image_with_relevance(image_ten[j], orig_relevance[j])
+                gt = get_image_with_relevance(image_ten[j], seg_map[j])
+                h_img = cv2.hconcat([image, gt, old_vis, new_vis])
+                cv2.imwrite(f'{args.experiment_folder}/train_samples/res_{i}_{j}.jpg', h_img)
+        # measure accuracy and record loss
+        acc1, acc5 = accuracy(output, class_name, topk=(1, 5))
+        losses.update(loss.item(), image_ten.size(0))
+        top1.update(acc1[0], image_ten.size(0))
+        top5.update(acc5[0], image_ten.size(0))
+        # metrics for original vit
+        acc1_orig, acc5_orig = accuracy(output_orig, class_name, topk=(1, 5))
+        orig_top1.update(acc1_orig[0], image_ten.size(0))
+        orig_top5.update(acc5_orig[0], image_ten.size(0))
+        # compute gradient and do SGD step
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        if i % args.print_freq == 0:
+            progress.display(i)
+            args.logger.add_scalar('{}/{}'.format('train', 'segmentation_loss'), segmentation_loss,
+                                   epoch*len(train_loader)+i)
+            args.logger.add_scalar('{}/{}'.format('train', 'classification_loss'), classification_loss,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'orig_top1'), acc1_orig,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'top1'), acc1,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'orig_top5'), acc5_orig,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'top5'), acc5,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'tot_loss'), loss,
+                                   epoch * len(train_loader) + i)
+def validate(val_loader, model, criterion, epoch, args):
+    mse_criterion = torch.nn.MSELoss(reduction='mean')
+    losses = AverageMeter('Loss', ':.4e')
+    top1 = AverageMeter('Acc@1', ':6.2f')
+    top5 = AverageMeter('Acc@5', ':6.2f')
+    orig_top1 = AverageMeter('Acc@1_orig', ':6.2f')
+    orig_top5 = AverageMeter('Acc@5_orig', ':6.2f')
+    progress = ProgressMeter(
+        len(val_loader),
+        [losses, top1, top5, orig_top1, orig_top5],
+        prefix="Epoch: [{}]".format(val_loader))
+    # switch to evaluate mode
+    model.eval()
+    orig_model = vit(pretrained=True).cuda()
+    orig_model.eval()
+    with torch.no_grad():
+        for i, (seg_map, image_ten, class_name) in enumerate(val_loader):
+            if args.gpu is not None:
+                image_ten = image_ten.cuda(args.gpu, non_blocking=True)
+            if torch.cuda.is_available():
+                seg_map = seg_map.cuda(args.gpu, non_blocking=True)
+                class_name = class_name.cuda(args.gpu, non_blocking=True)
+                with torch.enable_grad():
+                    image_ten.requires_grad = True
+                    output = model(image_ten)
+                    # segmentation loss
+                    batch_size = image_ten.shape[0]
+                    index = class_name
+                    if index == None:
+                        index = np.argmax(output.cpu().data.numpy(), axis=-1)
+                        index = torch.tensor(index)
+                    one_hot = np.zeros((batch_size, output.shape[-1]), dtype=np.float32)
+                    one_hot[torch.arange(batch_size), index.data.cpu().numpy()] = 1
+                    one_hot = torch.from_numpy(one_hot).requires_grad_(True)
+                    one_hot = torch.sum(one_hot.to(image_ten.device) * output)
+                    model.zero_grad()
+                    relevance = torch.autograd.grad(one_hot, image_ten)[0]
+                reverse_seg_map = seg_map.clone()
+                reverse_seg_map[reverse_seg_map == 1] = -1
+                reverse_seg_map[reverse_seg_map == 0] = 1
+                reverse_seg_map[reverse_seg_map == -1] = 0
+                grad_loss = mse_criterion(relevance * reverse_seg_map, torch.zeros_like(relevance))
+                segmentation_loss = grad_loss
+                # classification loss
+                output = model(image_ten)
+                with torch.no_grad():
+                    output_orig = orig_model(image_ten)
+                if args.temperature != 1:
+                    output = output / args.temperature
+                classification_loss = criterion(output, class_name.flatten())
+                loss = args.lambda_seg * segmentation_loss + args.lambda_acc * classification_loss
+            # save results
+            if i % args.save_interval == 0:
+                with torch.enable_grad():
+                    orig_relevance = generate_relevance(orig_model, image_ten, index=class_name)
+                for j in range(image_ten.shape[0]):
+                    image = get_image_with_relevance(image_ten[j], torch.ones_like(image_ten[j]))
+                    new_vis = get_image_with_relevance(image_ten[j]*relevance[j], torch.ones_like(image_ten[j]))
+                    old_vis = get_image_with_relevance(image_ten[j], orig_relevance[j])
+                    gt = get_image_with_relevance(image_ten[j], seg_map[j])
+                    h_img = cv2.hconcat([image, gt, old_vis, new_vis])
+                    cv2.imwrite(f'{args.experiment_folder}/val_samples/res_{i}_{j}.jpg', h_img)
+            # measure accuracy and record loss
+            acc1, acc5 = accuracy(output, class_name, topk=(1, 5))
+            losses.update(loss.item(), image_ten.size(0))
+            top1.update(acc1[0], image_ten.size(0))
+            top5.update(acc5[0], image_ten.size(0))
+            # metrics for original vit
+            acc1_orig, acc5_orig = accuracy(output_orig, class_name, topk=(1, 5))
+            orig_top1.update(acc1_orig[0], image_ten.size(0))
+            orig_top5.update(acc5_orig[0], image_ten.size(0))
+            if i % args.print_freq == 0:
+                progress.display(i)
+                args.logger.add_scalar('{}/{}'.format('val', 'segmentation_loss'), segmentation_loss,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'classification_loss'), classification_loss,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'orig_top1'), acc1_orig,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'top1'), acc1,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'orig_top5'), acc5_orig,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'top5'), acc5,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'tot_loss'), loss,
+                                       epoch * len(val_loader) + i)
+        # TODO: this should also be done with the ProgressMeter
+        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
+              .format(top1=top1, top5=top5))
+    return losses.avg
+def save_checkpoint(state, is_best, folder, filename='checkpoint.pth.tar'):
+    torch.save(state, f'{folder}/{filename}')
+    if is_best:
+        shutil.copyfile(f'{folder}/{filename}', f'{folder}/model_best.pth.tar')
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self, name, fmt=':f'):
+        self.name = name
+        self.fmt = fmt
+        self.reset()
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+    def __str__(self):
+        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+        return fmtstr.format(**self.__dict__)
+class ProgressMeter(object):
+    def __init__(self, num_batches, meters, prefix=""):
+        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+        self.meters = meters
+        self.prefix = prefix
+    def display(self, batch):
+        entries = [self.prefix + self.batch_fmtstr.format(batch)]
+        entries += [str(meter) for meter in self.meters]
+        print('\t'.join(entries))
+    def _get_batch_fmtstr(self, num_batches):
+        num_digits = len(str(num_batches // 1))
+        fmt = '{:' + str(num_digits) + 'd}'
+        return '[' + fmt + '/' + fmt.format(num_batches) + ']'
+def adjust_learning_rate(optimizer, epoch, args):
+    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+    lr = args.lr * (0.85 ** (epoch // 2))
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+def accuracy(output, target, topk=(1,)):
+    """Computes the accuracy over the k top predictions for the specified values of k"""
+    with torch.no_grad():
+        maxk = max(topk)
+        batch_size = target.size(0)
+        _, pred = output.topk(maxk, 1, True, True)
+        pred = pred.t()
+        correct = pred.eq(target.view(1, -1).expand_as(pred))
+        res = []
+        for k in topk:
+            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
+            res.append(correct_k.mul_(100.0 / batch_size))
+        return res
+if __name__ == '__main__':
+    main()

imagenet_finetune_rrr.py ADDED Viewed

	@@ -0,0 +1,570 @@

+import argparse
+import os
+import random
+import shutil
+import time
+import warnings
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.backends.cudnn as cudnn
+import torch.distributed as dist
+import torch.optim
+import torch.multiprocessing as mp
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+import torchvision.models as models
+import torch.nn.functional as F
+from segmentation_dataset import SegmentationDataset, VAL_PARTITION, TRAIN_PARTITION
+import numpy as np
+# Uncomment the expected model below
+# ViT
+from ViT.ViT import vit_base_patch16_224 as vit
+# from ViT.ViT import vit_large_patch16_224 as vit
+# ViT-AugReg
+# from ViT.ViT_new import vit_small_patch16_224 as vit
+# from ViT.ViT_new import vit_base_patch16_224 as vit
+# from ViT.ViT_new import vit_large_patch16_224 as vit
+# DeiT
+# from ViT.ViT import deit_base_patch16_224 as vit
+# from ViT.ViT import deit_small_patch16_224 as vit
+from ViT.explainer import generate_relevance, get_image_with_relevance
+import torchvision
+import cv2
+from torch.utils.tensorboard import SummaryWriter
+import json
+model_names = sorted(name for name in models.__dict__
+    if name.islower() and not name.startswith("__")
+    and callable(models.__dict__[name]))
+model_names.append("vit")
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+parser.add_argument('--data', metavar='DATA',
+                    help='path to dataset')
+parser.add_argument('--seg_data', metavar='SEG_DATA',
+                    help='path to segmentation dataset')
+parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
+                    choices=model_names,
+                    help='model architecture: ' +
+                        ' | '.join(model_names) +
+                        ' (default: resnet18)')
+parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
+                    help='number of data loading workers (default: 4)')
+parser.add_argument('--epochs', default=50, type=int, metavar='N',
+                    help='number of total epochs to run')
+parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
+                    help='manual epoch number (useful on restarts)')
+parser.add_argument('-b', '--batch-size', default=8, type=int,
+                    metavar='N',
+                    help='mini-batch size (default: 256), this is the total '
+                         'batch size of all GPUs on the current node when '
+                         'using Data Parallel or Distributed Data Parallel')
+parser.add_argument('--lr', '--learning-rate', default=3e-6, type=float,
+                    metavar='LR', help='initial learning rate', dest='lr')
+parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
+                    help='momentum')
+parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
+                    metavar='W', help='weight decay (default: 1e-4)',
+                    dest='weight_decay')
+parser.add_argument('-p', '--print-freq', default=10, type=int,
+                    metavar='N', help='print frequency (default: 10)')
+parser.add_argument('--resume', default='', type=str, metavar='PATH',
+                    help='path to latest checkpoint (default: none)')
+parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
+                    help='evaluate model on validation set')
+parser.add_argument('--pretrained', dest='pretrained', action='store_true',
+                    help='use pre-trained model')
+parser.add_argument('--world-size', default=-1, type=int,
+                    help='number of nodes for distributed training')
+parser.add_argument('--rank', default=-1, type=int,
+                    help='node rank for distributed training')
+parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
+                    help='url used to set up distributed training')
+parser.add_argument('--dist-backend', default='nccl', type=str,
+                    help='distributed backend')
+parser.add_argument('--seed', default=None, type=int,
+                    help='seed for initializing training. ')
+parser.add_argument('--gpu', default=None, type=int,
+                    help='GPU id to use.')
+parser.add_argument('--save_interval', default=20, type=int,
+                    help='interval to save segmentation results.')
+parser.add_argument('--num_samples', default=3, type=int,
+                    help='number of samples per class for training')
+parser.add_argument('--multiprocessing-distributed', action='store_true',
+                    help='Use multi-processing distributed training to launch '
+                         'N processes per node, which has N GPUs. This is the '
+                         'fastest way to use PyTorch for either single node or '
+                         'multi node data parallel training')
+parser.add_argument('--lambda_seg', default=0.8, type=float,
+                    help='influence of segmentation loss.')
+parser.add_argument('--lambda_acc', default=0.2, type=float,
+                    help='influence of accuracy loss.')
+parser.add_argument('--experiment_folder', default=None, type=str,
+                    help='path to folder to use for experiment.')
+parser.add_argument('--num_classes', default=500, type=int,
+                    help='coefficient of loss for segmentation foreground.')
+parser.add_argument('--temperature', default=1, type=float,
+                    help='temperature for softmax (mostly for DeiT).')
+best_loss = float('inf')
+def main():
+    args = parser.parse_args()
+    if args.experiment_folder is None:
+        args.experiment_folder = f'experiment/' \
+                                 f'lr_{args.lr}_seg_{args.lambda_seg}_acc_{args.lambda_acc}'
+        if args.temperature != 1:
+            args.experiment_folder = args.experiment_folder + f'_tempera_{args.temperature}'
+        if args.batch_size != 8:
+            args.experiment_folder = args.experiment_folder + f'_bs_{args.batch_size}'
+        if args.num_classes != 500:
+            args.experiment_folder = args.experiment_folder + f'_num_classes_{args.num_classes}'
+        if args.num_samples != 3:
+            args.experiment_folder = args.experiment_folder + f'_num_samples_{args.num_samples}'
+        if args.epochs != 150:
+            args.experiment_folder = args.experiment_folder + f'_num_epochs_{args.epochs}'
+    if os.path.exists(args.experiment_folder):
+        raise Exception(f"Experiment path {args.experiment_folder} already exists!")
+    os.mkdir(args.experiment_folder)
+    os.mkdir(f'{args.experiment_folder}/train_samples')
+    os.mkdir(f'{args.experiment_folder}/val_samples')
+    with open(f'{args.experiment_folder}/commandline_args.txt', 'w') as f:
+        json.dump(args.__dict__, f, indent=2)
+    if args.seed is not None:
+        random.seed(args.seed)
+        torch.manual_seed(args.seed)
+        cudnn.deterministic = True
+        warnings.warn('You have chosen to seed training. '
+                      'This will turn on the CUDNN deterministic setting, '
+                      'which can slow down your training considerably! '
+                      'You may see unexpected behavior when restarting '
+                      'from checkpoints.')
+    if args.gpu is not None:
+        warnings.warn('You have chosen a specific GPU. This will completely '
+                      'disable data parallelism.')
+    if args.dist_url == "env://" and args.world_size == -1:
+        args.world_size = int(os.environ["WORLD_SIZE"])
+    args.distributed = args.world_size > 1 or args.multiprocessing_distributed
+    ngpus_per_node = torch.cuda.device_count()
+    if args.multiprocessing_distributed:
+        # Since we have ngpus_per_node processes per node, the total world_size
+        # needs to be adjusted accordingly
+        args.world_size = ngpus_per_node * args.world_size
+        # Use torch.multiprocessing.spawn to launch distributed processes: the
+        # main_worker process function
+        mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
+    else:
+        # Simply call main_worker function
+        main_worker(args.gpu, ngpus_per_node, args)
+def main_worker(gpu, ngpus_per_node, args):
+    global best_loss
+    args.gpu = gpu
+    if args.gpu is not None:
+        print("Use GPU: {} for training".format(args.gpu))
+    if args.distributed:
+        if args.dist_url == "env://" and args.rank == -1:
+            args.rank = int(os.environ["RANK"])
+        if args.multiprocessing_distributed:
+            # For multiprocessing distributed training, rank needs to be the
+            # global rank among all the processes
+            args.rank = args.rank * ngpus_per_node + gpu
+        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                world_size=args.world_size, rank=args.rank)
+    # create model
+    print("=> creating model")
+    model = vit(pretrained=True).cuda()
+    model.train()
+    print("done")
+    if not torch.cuda.is_available():
+        print('using CPU, this will be slow')
+    elif args.distributed:
+        # For multiprocessing distributed, DistributedDataParallel constructor
+        # should always set the single device scope, otherwise,
+        # DistributedDataParallel will use all available devices.
+        if args.gpu is not None:
+            torch.cuda.set_device(args.gpu)
+            model.cuda(args.gpu)
+            # When using a single GPU per process and per
+            # DistributedDataParallel, we need to divide the batch size
+            # ourselves based on the total number of GPUs we have
+            args.batch_size = int(args.batch_size / ngpus_per_node)
+            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
+            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
+        else:
+            model.cuda()
+            # DistributedDataParallel will divide and allocate batch_size to all
+            # available GPUs if device_ids are not set
+            model = torch.nn.parallel.DistributedDataParallel(model)
+    elif args.gpu is not None:
+        torch.cuda.set_device(args.gpu)
+        model = model.cuda(args.gpu)
+    else:
+        # DataParallel will divide and allocate batch_size to all available GPUs
+        print("start")
+        model = torch.nn.DataParallel(model).cuda()
+    # define loss function (criterion) and optimizer
+    criterion = nn.CrossEntropyLoss().cuda(args.gpu)
+    optimizer = torch.optim.AdamW(model.parameters(), args.lr, weight_decay=args.weight_decay)
+    # optionally resume from a checkpoint
+    if args.resume:
+        if os.path.isfile(args.resume):
+            print("=> loading checkpoint '{}'".format(args.resume))
+            if args.gpu is None:
+                checkpoint = torch.load(args.resume)
+            else:
+                # Map model to be loaded to specified single gpu.
+                loc = 'cuda:{}'.format(args.gpu)
+                checkpoint = torch.load(args.resume, map_location=loc)
+            args.start_epoch = checkpoint['epoch']
+            best_loss = checkpoint['best_loss']
+            if args.gpu is not None:
+                # best_loss may be from a checkpoint from a different GPU
+                best_loss = best_loss.to(args.gpu)
+            model.load_state_dict(checkpoint['state_dict'])
+            optimizer.load_state_dict(checkpoint['optimizer'])
+            print("=> loaded checkpoint '{}' (epoch {})"
+                  .format(args.resume, checkpoint['epoch']))
+        else:
+            print("=> no checkpoint found at '{}'".format(args.resume))
+    cudnn.benchmark = True
+    train_dataset = SegmentationDataset(args.seg_data, args.data, partition=TRAIN_PARTITION, train_classes=args.num_classes,
+                                        num_samples=args.num_samples)
+    if args.distributed:
+        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
+    else:
+        train_sampler = None
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset, batch_size=args.batch_size, shuffle=False,
+        num_workers=args.workers, pin_memory=True, sampler=train_sampler)
+    val_dataset = SegmentationDataset(args.seg_data, args.data, partition=VAL_PARTITION, train_classes=args.num_classes,
+                                      num_samples=1)
+    val_loader = torch.utils.data.DataLoader(
+        val_dataset, batch_size=5, shuffle=False,
+        num_workers=args.workers, pin_memory=True)
+    if args.evaluate:
+        validate(val_loader, model, criterion, 0, args)
+        return
+    for epoch in range(args.start_epoch, args.epochs):
+        if args.distributed:
+            train_sampler.set_epoch(epoch)
+        adjust_learning_rate(optimizer, epoch, args)
+        log_dir = os.path.join(args.experiment_folder, 'logs')
+        logger = SummaryWriter(log_dir=log_dir)
+        args.logger = logger
+        # train for one epoch
+        train(train_loader, model, criterion, optimizer, epoch, args)
+        # evaluate on validation set
+        loss1 = validate(val_loader, model, criterion, epoch, args)
+        # remember best acc@1 and save checkpoint
+        is_best = loss1 < best_loss
+        best_loss = min(loss1, best_loss)
+        if not args.multiprocessing_distributed or (args.multiprocessing_distributed
+                and args.rank % ngpus_per_node == 0):
+            save_checkpoint({
+                'epoch': epoch + 1,
+                'state_dict': model.state_dict(),
+                'best_loss': best_loss,
+                'optimizer' : optimizer.state_dict(),
+            }, is_best, folder=args.experiment_folder)
+def train(train_loader, model, criterion, optimizer, epoch, args):
+    losses = AverageMeter('Loss', ':.4e')
+    top1 = AverageMeter('Acc@1', ':6.2f')
+    top5 = AverageMeter('Acc@5', ':6.2f')
+    orig_top1 = AverageMeter('Acc@1_orig', ':6.2f')
+    orig_top5 = AverageMeter('Acc@5_orig', ':6.2f')
+    progress = ProgressMeter(
+        len(train_loader),
+        [losses, top1, top5, orig_top1, orig_top5],
+        prefix="Epoch: [{}]".format(epoch))
+    orig_model = vit(pretrained=True).cuda()
+    orig_model.eval()
+    # switch to train mode
+    model.train()
+    for i, (seg_map, image_ten, class_name) in enumerate(train_loader):
+        if torch.cuda.is_available():
+            image_ten = image_ten.cuda(args.gpu, non_blocking=True)
+            seg_map = seg_map.cuda(args.gpu, non_blocking=True)
+            class_name = class_name.cuda(args.gpu, non_blocking=True)
+        image_ten.requires_grad = True
+        output = model(image_ten)
+        # segmentation loss
+        EPS = 10e-12
+        y_pred = torch.sum(torch.log(F.softmax(output, dim=1) + EPS))
+        relevance = torch.autograd.grad(y_pred, image_ten, retain_graph=True)[0]
+        reverse_seg_map = seg_map.clone()
+        reverse_seg_map[reverse_seg_map == 1] = -1
+        reverse_seg_map[reverse_seg_map == 0] = 1
+        reverse_seg_map[reverse_seg_map == -1] = 0
+        rrr_loss = (relevance * reverse_seg_map)**2
+        segmentation_loss = rrr_loss.sum()
+        # classification loss
+        with torch.no_grad():
+            output_orig = orig_model(image_ten)
+        if args.temperature != 1:
+            output = output / args.temperature
+        classification_loss = criterion(output, class_name.flatten())
+        loss = args.lambda_seg * segmentation_loss + args.lambda_acc * classification_loss
+        # debugging output
+        if i % args.save_interval == 0:
+            orig_relevance = generate_relevance(orig_model, image_ten, index=class_name)
+            for j in range(image_ten.shape[0]):
+                image = get_image_with_relevance(image_ten[j], torch.ones_like(image_ten[j]))
+                new_vis = get_image_with_relevance(image_ten[j]*relevance[j], torch.ones_like(image_ten[j]))
+                old_vis = get_image_with_relevance(image_ten[j], orig_relevance[j])
+                gt = get_image_with_relevance(image_ten[j], seg_map[j])
+                h_img = cv2.hconcat([image, gt, old_vis, new_vis])
+                cv2.imwrite(f'{args.experiment_folder}/train_samples/res_{i}_{j}.jpg', h_img)
+        # measure accuracy and record loss
+        acc1, acc5 = accuracy(output, class_name, topk=(1, 5))
+        losses.update(loss.item(), image_ten.size(0))
+        top1.update(acc1[0], image_ten.size(0))
+        top5.update(acc5[0], image_ten.size(0))
+        # metrics for original vit
+        acc1_orig, acc5_orig = accuracy(output_orig, class_name, topk=(1, 5))
+        orig_top1.update(acc1_orig[0], image_ten.size(0))
+        orig_top5.update(acc5_orig[0], image_ten.size(0))
+        # compute gradient and do SGD step
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        if i % args.print_freq == 0:
+            progress.display(i)
+            args.logger.add_scalar('{}/{}'.format('train', 'segmentation_loss'), segmentation_loss,
+                                   epoch*len(train_loader)+i)
+            args.logger.add_scalar('{}/{}'.format('train', 'classification_loss'), classification_loss,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'orig_top1'), acc1_orig,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'top1'), acc1,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'orig_top5'), acc5_orig,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'top5'), acc5,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'tot_loss'), loss,
+                                   epoch * len(train_loader) + i)
+def validate(val_loader, model, criterion, epoch, args):
+    mse_criterion = torch.nn.MSELoss(reduction='mean')
+    losses = AverageMeter('Loss', ':.4e')
+    top1 = AverageMeter('Acc@1', ':6.2f')
+    top5 = AverageMeter('Acc@5', ':6.2f')
+    orig_top1 = AverageMeter('Acc@1_orig', ':6.2f')
+    orig_top5 = AverageMeter('Acc@5_orig', ':6.2f')
+    progress = ProgressMeter(
+        len(val_loader),
+        [losses, top1, top5, orig_top1, orig_top5],
+        prefix="Epoch: [{}]".format(val_loader))
+    # switch to evaluate mode
+    model.eval()
+    orig_model = vit(pretrained=True).cuda()
+    orig_model.eval()
+    with torch.no_grad():
+        for i, (seg_map, image_ten, class_name) in enumerate(val_loader):
+            if args.gpu is not None:
+                image_ten = image_ten.cuda(args.gpu, non_blocking=True)
+            if torch.cuda.is_available():
+                seg_map = seg_map.cuda(args.gpu, non_blocking=True)
+                class_name = class_name.cuda(args.gpu, non_blocking=True)
+                with torch.enable_grad():
+                    image_ten.requires_grad = True
+                    output = model(image_ten)
+                    # segmentation loss
+                    EPS = 10e-12
+                    y_pred = torch.sum(torch.log(F.softmax(output, dim=1) + EPS))
+                    relevance = torch.autograd.grad(y_pred, image_ten, retain_graph=True)[0]
+                    reverse_seg_map = seg_map.clone()
+                    reverse_seg_map[reverse_seg_map == 1] = -1
+                    reverse_seg_map[reverse_seg_map == 0] = 1
+                    reverse_seg_map[reverse_seg_map == -1] = 0
+                    rrr_loss = (relevance * reverse_seg_map) ** 2
+                    segmentation_loss = rrr_loss.sum()
+                # classification loss
+                output = model(image_ten)
+                with torch.no_grad():
+                    output_orig = orig_model(image_ten)
+                if args.temperature != 1:
+                    output = output / args.temperature
+                classification_loss = criterion(output, class_name.flatten())
+                loss = args.lambda_seg * segmentation_loss + args.lambda_acc * classification_loss
+            # save results
+            if i % args.save_interval == 0:
+                with torch.enable_grad():
+                    orig_relevance = generate_relevance(orig_model, image_ten, index=class_name)
+                for j in range(image_ten.shape[0]):
+                    image = get_image_with_relevance(image_ten[j], torch.ones_like(image_ten[j]))
+                    new_vis = get_image_with_relevance(image_ten[j]*relevance[j], torch.ones_like(image_ten[j]))
+                    old_vis = get_image_with_relevance(image_ten[j], orig_relevance[j])
+                    gt = get_image_with_relevance(image_ten[j], seg_map[j])
+                    h_img = cv2.hconcat([image, gt, old_vis, new_vis])
+                    cv2.imwrite(f'{args.experiment_folder}/val_samples/res_{i}_{j}.jpg', h_img)
+            # measure accuracy and record loss
+            acc1, acc5 = accuracy(output, class_name, topk=(1, 5))
+            losses.update(loss.item(), image_ten.size(0))
+            top1.update(acc1[0], image_ten.size(0))
+            top5.update(acc5[0], image_ten.size(0))
+            # metrics for original vit
+            acc1_orig, acc5_orig = accuracy(output_orig, class_name, topk=(1, 5))
+            orig_top1.update(acc1_orig[0], image_ten.size(0))
+            orig_top5.update(acc5_orig[0], image_ten.size(0))
+            if i % args.print_freq == 0:
+                progress.display(i)
+                args.logger.add_scalar('{}/{}'.format('val', 'segmentation_loss'), segmentation_loss,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'classification_loss'), classification_loss,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'orig_top1'), acc1_orig,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'top1'), acc1,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'orig_top5'), acc5_orig,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'top5'), acc5,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'tot_loss'), loss,
+                                       epoch * len(val_loader) + i)
+        # TODO: this should also be done with the ProgressMeter
+        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
+              .format(top1=top1, top5=top5))
+    return losses.avg
+def save_checkpoint(state, is_best, folder, filename='checkpoint.pth.tar'):
+    torch.save(state, f'{folder}/{filename}')
+    if is_best:
+        shutil.copyfile(f'{folder}/{filename}', f'{folder}/model_best.pth.tar')
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self, name, fmt=':f'):
+        self.name = name
+        self.fmt = fmt
+        self.reset()
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+    def __str__(self):
+        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+        return fmtstr.format(**self.__dict__)
+class ProgressMeter(object):
+    def __init__(self, num_batches, meters, prefix=""):
+        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+        self.meters = meters
+        self.prefix = prefix
+    def display(self, batch):
+        entries = [self.prefix + self.batch_fmtstr.format(batch)]
+        entries += [str(meter) for meter in self.meters]
+        print('\t'.join(entries))
+    def _get_batch_fmtstr(self, num_batches):
+        num_digits = len(str(num_batches // 1))
+        fmt = '{:' + str(num_digits) + 'd}'
+        return '[' + fmt + '/' + fmt.format(num_batches) + ']'
+def adjust_learning_rate(optimizer, epoch, args):
+    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+    lr = args.lr * (0.85 ** (epoch // 2))
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+def accuracy(output, target, topk=(1,)):
+    """Computes the accuracy over the k top predictions for the specified values of k"""
+    with torch.no_grad():
+        maxk = max(topk)
+        batch_size = target.size(0)
+        _, pred = output.topk(maxk, 1, True, True)
+        pred = pred.t()
+        correct = pred.eq(target.view(1, -1).expand_as(pred))
+        res = []
+        for k in topk:
+            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
+            res.append(correct_k.mul_(100.0 / batch_size))
+        return res
+if __name__ == '__main__':
+    main()

imagenet_finetune_tokencut.py ADDED Viewed

	@@ -0,0 +1,577 @@

+import argparse
+import os
+import random
+import shutil
+import time
+import warnings
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.backends.cudnn as cudnn
+import torch.distributed as dist
+import torch.optim
+import torch.multiprocessing as mp
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+import torchvision.models as models
+from tokencut_dataset import SegmentationDataset, VAL_PARTITION, TRAIN_PARTITION
+# Uncomment the expected model below
+# ViT
+from ViT.ViT import vit_base_patch16_224 as vit
+# from ViT.ViT import vit_large_patch16_224 as vit
+# ViT-AugReg
+# from ViT.ViT_new import vit_small_patch16_224 as vit
+# from ViT.ViT_new import vit_base_patch16_224 as vit
+# from ViT.ViT_new import vit_large_patch16_224 as vit
+# DeiT
+# from ViT.ViT import deit_base_patch16_224 as vit
+# from ViT.ViT import deit_small_patch16_224 as vit
+from ViT.explainer import generate_relevance, get_image_with_relevance
+import torchvision
+import cv2
+from torch.utils.tensorboard import SummaryWriter
+import json
+model_names = sorted(name for name in models.__dict__
+    if name.islower() and not name.startswith("__")
+    and callable(models.__dict__[name]))
+model_names.append("vit")
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+parser.add_argument('--data', metavar='DATA',
+                    help='path to dataset')
+parser.add_argument('--seg_data', metavar='SEG_DATA',
+                    help='path to segmentation dataset')
+parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
+                    help='number of data loading workers (default: 4)')
+parser.add_argument('--epochs', default=150, type=int, metavar='N',
+                    help='number of total epochs to run')
+parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
+                    help='manual epoch number (useful on restarts)')
+parser.add_argument('-b', '--batch-size', default=10, type=int,
+                    metavar='N',
+                    help='mini-batch size (default: 256), this is the total '
+                         'batch size of all GPUs on the current node when '
+                         'using Data Parallel or Distributed Data Parallel')
+parser.add_argument('--lr', '--learning-rate', default=3e-6, type=float,
+                    metavar='LR', help='initial learning rate', dest='lr')
+parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
+                    help='momentum')
+parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
+                    metavar='W', help='weight decay (default: 1e-4)',
+                    dest='weight_decay')
+parser.add_argument('-p', '--print-freq', default=10, type=int,
+                    metavar='N', help='print frequency (default: 10)')
+parser.add_argument('--resume', default='', type=str, metavar='PATH',
+                    help='path to latest checkpoint (default: none)')
+parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
+                    help='evaluate model on validation set')
+parser.add_argument('--pretrained', dest='pretrained', action='store_true',
+                    help='use pre-trained model')
+parser.add_argument('--world-size', default=-1, type=int,
+                    help='number of nodes for distributed training')
+parser.add_argument('--rank', default=-1, type=int,
+                    help='node rank for distributed training')
+parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
+                    help='url used to set up distributed training')
+parser.add_argument('--dist-backend', default='nccl', type=str,
+                    help='distributed backend')
+parser.add_argument('--seed', default=None, type=int,
+                    help='seed for initializing training. ')
+parser.add_argument('--gpu', default=None, type=int,
+                    help='GPU id to use.')
+parser.add_argument('--save_interval', default=20, type=int,
+                    help='interval to save segmentation results.')
+parser.add_argument('--num_samples', default=3, type=int,
+                    help='number of samples per class for training')
+parser.add_argument('--multiprocessing-distributed', action='store_true',
+                    help='Use multi-processing distributed training to launch '
+                         'N processes per node, which has N GPUs. This is the '
+                         'fastest way to use PyTorch for either single node or '
+                         'multi node data parallel training')
+parser.add_argument('--lambda_seg', default=0.1, type=float,
+                    help='influence of segmentation loss.')
+parser.add_argument('--lambda_acc', default=1, type=float,
+                    help='influence of accuracy loss.')
+parser.add_argument('--experiment_folder', default=None, type=str,
+                    help='path to folder to use for experiment.')
+parser.add_argument('--dilation', default=0, type=float,
+                    help='Use dilation on the segmentation maps.')
+parser.add_argument('--lambda_background', default=1, type=float,
+                    help='coefficient of loss for segmentation background.')
+parser.add_argument('--lambda_foreground', default=0.3, type=float,
+                    help='coefficient of loss for segmentation foreground.')
+parser.add_argument('--num_classes', default=500, type=int,
+                    help='coefficient of loss for segmentation foreground.')
+parser.add_argument('--temperature', default=1, type=float,
+                    help='temperature for softmax (mostly for DeiT).')
+best_loss = float('inf')
+def main():
+    args = parser.parse_args()
+    if args.experiment_folder is None:
+        args.experiment_folder = f'experiment/' \
+                                 f'lr_{args.lr}_seg_{args.lambda_seg}_acc_{args.lambda_acc}' \
+                                 f'_bckg_{args.lambda_background}_fgd_{args.lambda_foreground}'
+        if args.temperature != 1:
+            args.experiment_folder = args.experiment_folder + f'_tempera_{args.temperature}'
+        if args.batch_size != 8:
+            args.experiment_folder = args.experiment_folder + f'_bs_{args.batch_size}'
+        if args.num_classes != 500:
+            args.experiment_folder = args.experiment_folder + f'_num_classes_{args.num_classes}'
+        if args.num_samples != 3:
+            args.experiment_folder = args.experiment_folder + f'_num_samples_{args.num_samples}'
+        if args.epochs != 150:
+            args.experiment_folder = args.experiment_folder + f'_num_epochs_{args.epochs}'
+    if os.path.exists(args.experiment_folder):
+        raise Exception(f"Experiment path {args.experiment_folder} already exists!")
+    os.mkdir(args.experiment_folder)
+    os.mkdir(f'{args.experiment_folder}/train_samples')
+    os.mkdir(f'{args.experiment_folder}/val_samples')
+    with open(f'{args.experiment_folder}/commandline_args.txt', 'w') as f:
+        json.dump(args.__dict__, f, indent=2)
+    if args.seed is not None:
+        random.seed(args.seed)
+        torch.manual_seed(args.seed)
+        cudnn.deterministic = True
+        warnings.warn('You have chosen to seed training. '
+                      'This will turn on the CUDNN deterministic setting, '
+                      'which can slow down your training considerably! '
+                      'You may see unexpected behavior when restarting '
+                      'from checkpoints.')
+    if args.gpu is not None:
+        warnings.warn('You have chosen a specific GPU. This will completely '
+                      'disable data parallelism.')
+    if args.dist_url == "env://" and args.world_size == -1:
+        args.world_size = int(os.environ["WORLD_SIZE"])
+    args.distributed = args.world_size > 1 or args.multiprocessing_distributed
+    ngpus_per_node = torch.cuda.device_count()
+    if args.multiprocessing_distributed:
+        # Since we have ngpus_per_node processes per node, the total world_size
+        # needs to be adjusted accordingly
+        args.world_size = ngpus_per_node * args.world_size
+        # Use torch.multiprocessing.spawn to launch distributed processes: the
+        # main_worker process function
+        mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
+    else:
+        # Simply call main_worker function
+        main_worker(args.gpu, ngpus_per_node, args)
+def main_worker(gpu, ngpus_per_node, args):
+    global best_loss
+    args.gpu = gpu
+    if args.gpu is not None:
+        print("Use GPU: {} for training".format(args.gpu))
+    if args.distributed:
+        if args.dist_url == "env://" and args.rank == -1:
+            args.rank = int(os.environ["RANK"])
+        if args.multiprocessing_distributed:
+            # For multiprocessing distributed training, rank needs to be the
+            # global rank among all the processes
+            args.rank = args.rank * ngpus_per_node + gpu
+        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                world_size=args.world_size, rank=args.rank)
+    # create model
+    print("=> creating model")
+    model = vit(pretrained=True).cuda()
+    model.train()
+    print("done")
+    if not torch.cuda.is_available():
+        print('using CPU, this will be slow')
+    elif args.distributed:
+        # For multiprocessing distributed, DistributedDataParallel constructor
+        # should always set the single device scope, otherwise,
+        # DistributedDataParallel will use all available devices.
+        if args.gpu is not None:
+            torch.cuda.set_device(args.gpu)
+            model.cuda(args.gpu)
+            # When using a single GPU per process and per
+            # DistributedDataParallel, we need to divide the batch size
+            # ourselves based on the total number of GPUs we have
+            args.batch_size = int(args.batch_size / ngpus_per_node)
+            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
+            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
+        else:
+            model.cuda()
+            # DistributedDataParallel will divide and allocate batch_size to all
+            # available GPUs if device_ids are not set
+            model = torch.nn.parallel.DistributedDataParallel(model)
+    elif args.gpu is not None:
+        torch.cuda.set_device(args.gpu)
+        model = model.cuda(args.gpu)
+    else:
+        # DataParallel will divide and allocate batch_size to all available GPUs
+        print("start")
+        model = torch.nn.DataParallel(model).cuda()
+    # define loss function (criterion) and optimizer
+    criterion = nn.CrossEntropyLoss().cuda(args.gpu)
+    optimizer = torch.optim.AdamW(model.parameters(), args.lr, weight_decay=args.weight_decay)
+    # optionally resume from a checkpoint
+    if args.resume:
+        if os.path.isfile(args.resume):
+            print("=> loading checkpoint '{}'".format(args.resume))
+            if args.gpu is None:
+                checkpoint = torch.load(args.resume)
+            else:
+                # Map model to be loaded to specified single gpu.
+                loc = 'cuda:{}'.format(args.gpu)
+                checkpoint = torch.load(args.resume, map_location=loc)
+            args.start_epoch = checkpoint['epoch']
+            best_loss = checkpoint['best_loss']
+            if args.gpu is not None:
+                # best_loss may be from a checkpoint from a different GPU
+                best_loss = best_loss.to(args.gpu)
+            model.load_state_dict(checkpoint['state_dict'])
+            optimizer.load_state_dict(checkpoint['optimizer'])
+            print("=> loaded checkpoint '{}' (epoch {})"
+                  .format(args.resume, checkpoint['epoch']))
+        else:
+            print("=> no checkpoint found at '{}'".format(args.resume))
+    cudnn.benchmark = True
+    train_dataset = SegmentationDataset(args.seg_data, args.data, partition=TRAIN_PARTITION, train_classes=args.num_classes,
+                                        num_samples=args.num_samples)
+    if args.distributed:
+        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
+    else:
+        train_sampler = None
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
+        num_workers=args.workers, pin_memory=True, sampler=train_sampler)
+    val_dataset = SegmentationDataset(args.seg_data, args.data, partition=VAL_PARTITION, train_classes=args.num_classes,
+                                      num_samples=1)
+    val_loader = torch.utils.data.DataLoader(
+        val_dataset, batch_size=10, shuffle=False,
+        num_workers=args.workers, pin_memory=True)
+    if args.evaluate:
+        validate(val_loader, model, criterion, 0, args)
+        return
+    for epoch in range(args.start_epoch, args.epochs):
+        if args.distributed:
+            train_sampler.set_epoch(epoch)
+        adjust_learning_rate(optimizer, epoch, args)
+        log_dir = os.path.join(args.experiment_folder, 'logs')
+        logger = SummaryWriter(log_dir=log_dir)
+        args.logger = logger
+        # train for one epoch
+        train(train_loader, model, criterion, optimizer, epoch, args)
+        # evaluate on validation set
+        loss1 = validate(val_loader, model, criterion, epoch, args)
+        # remember best acc@1 and save checkpoint
+        is_best = loss1 <= best_loss
+        best_loss = min(loss1, best_loss)
+        if not args.multiprocessing_distributed or (args.multiprocessing_distributed
+                and args.rank % ngpus_per_node == 0):
+            save_checkpoint({
+                'epoch': epoch + 1,
+                'state_dict': model.state_dict(),
+                'best_loss': best_loss,
+                'optimizer' : optimizer.state_dict(),
+            }, is_best, folder=args.experiment_folder)
+def train(train_loader, model, criterion, optimizer, epoch, args):
+    mse_criterion = torch.nn.MSELoss(reduction='mean')
+    losses = AverageMeter('Loss', ':.4e')
+    top1 = AverageMeter('Acc@1', ':6.2f')
+    top5 = AverageMeter('Acc@5', ':6.2f')
+    orig_top1 = AverageMeter('Acc@1_orig', ':6.2f')
+    orig_top5 = AverageMeter('Acc@5_orig', ':6.2f')
+    progress = ProgressMeter(
+        len(train_loader),
+        # [batch_time, data_time, losses, top1, top5, orig_top1, orig_top5],
+        [losses, top1, top5, orig_top1, orig_top5],
+        prefix="Epoch: [{}]".format(epoch))
+    orig_model = vit(pretrained=True).cuda()
+    orig_model.eval()
+    # switch to train mode
+    model.train()
+    end = time.time()
+    for i, (seg_map, image_ten, class_name) in enumerate(train_loader):
+        if torch.cuda.is_available():
+            image_ten = image_ten.cuda(args.gpu, non_blocking=True)
+            seg_map = seg_map.cuda(args.gpu, non_blocking=True)
+            class_name = class_name.cuda(args.gpu, non_blocking=True)
+        # compute output
+        # segmentation loss
+        relevance = generate_relevance(model, image_ten, index=class_name)
+        reverse_seg_map = seg_map.clone()
+        reverse_seg_map[reverse_seg_map == 1] = -1
+        reverse_seg_map[reverse_seg_map == 0] = 1
+        reverse_seg_map[reverse_seg_map == -1] = 0
+        background_loss = mse_criterion(relevance * reverse_seg_map, torch.zeros_like(relevance))
+        foreground_loss = mse_criterion(relevance * seg_map, seg_map)
+        segmentation_loss = args.lambda_background * background_loss
+        segmentation_loss += args.lambda_foreground * foreground_loss
+        # classification loss
+        output = model(image_ten)
+        with torch.no_grad():
+            output_orig = orig_model(image_ten)
+        _, pred = output.topk(1, 1, True, True)
+        pred = pred.flatten()
+        if args.temperature != 1:
+            output = output / args.temperature
+        classification_loss = criterion(output, pred)
+        loss = args.lambda_seg * segmentation_loss + args.lambda_acc * classification_loss
+        # debugging output
+        if i % args.save_interval == 0:
+            orig_relevance = generate_relevance(orig_model, image_ten, index=class_name)
+            for j in range(image_ten.shape[0]):
+                image = get_image_with_relevance(image_ten[j], torch.ones_like(image_ten[j]))
+                new_vis = get_image_with_relevance(image_ten[j], relevance[j])
+                old_vis = get_image_with_relevance(image_ten[j], orig_relevance[j])
+                gt = get_image_with_relevance(image_ten[j], seg_map[j])
+                h_img = cv2.hconcat([image, gt, old_vis, new_vis])
+                cv2.imwrite(f'{args.experiment_folder}/train_samples/res_{i}_{j}.jpg', h_img)
+        # measure accuracy and record loss
+        acc1, acc5 = accuracy(output, class_name, topk=(1, 5))
+        losses.update(loss.item(), image_ten.size(0))
+        top1.update(acc1[0], image_ten.size(0))
+        top5.update(acc5[0], image_ten.size(0))
+        # metrics for original vit
+        acc1_orig, acc5_orig = accuracy(output_orig, class_name, topk=(1, 5))
+        orig_top1.update(acc1_orig[0], image_ten.size(0))
+        orig_top5.update(acc5_orig[0], image_ten.size(0))
+        # compute gradient and do SGD step
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        if i % args.print_freq == 0:
+            progress.display(i)
+            args.logger.add_scalar('{}/{}'.format('train', 'segmentation_loss'), segmentation_loss,
+                                   epoch*len(train_loader)+i)
+            args.logger.add_scalar('{}/{}'.format('train', 'classification_loss'), classification_loss,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'orig_top1'), acc1_orig,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'top1'), acc1,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'orig_top5'), acc5_orig,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'top5'), acc5,
+                                   epoch * len(train_loader) + i)
+            args.logger.add_scalar('{}/{}'.format('train', 'tot_loss'), loss,
+                                   epoch * len(train_loader) + i)
+def validate(val_loader, model, criterion, epoch, args):
+    mse_criterion = torch.nn.MSELoss(reduction='mean')
+    losses = AverageMeter('Loss', ':.4e')
+    top1 = AverageMeter('Acc@1', ':6.2f')
+    top5 = AverageMeter('Acc@5', ':6.2f')
+    orig_top1 = AverageMeter('Acc@1_orig', ':6.2f')
+    orig_top5 = AverageMeter('Acc@5_orig', ':6.2f')
+    progress = ProgressMeter(
+        len(val_loader),
+        [losses, top1, top5, orig_top1, orig_top5],
+        prefix="Epoch: [{}]".format(val_loader))
+    # switch to evaluate mode
+    model.eval()
+    orig_model = vit(pretrained=True).cuda()
+    orig_model.eval()
+    with torch.no_grad():
+        end = time.time()
+        for i, (seg_map, image_ten, class_name) in enumerate(val_loader):
+            if args.gpu is not None:
+                image_ten = image_ten.cuda(args.gpu, non_blocking=True)
+            if torch.cuda.is_available():
+                seg_map = seg_map.cuda(args.gpu, non_blocking=True)
+                class_name = class_name.cuda(args.gpu, non_blocking=True)
+                # segmentation loss
+                with torch.enable_grad():
+                    relevance = generate_relevance(model, image_ten, index=class_name)
+                reverse_seg_map = seg_map.clone()
+                reverse_seg_map[reverse_seg_map == 1] = -1
+                reverse_seg_map[reverse_seg_map == 0] = 1
+                reverse_seg_map[reverse_seg_map == -1] = 0
+                background_loss = mse_criterion(relevance * reverse_seg_map, torch.zeros_like(relevance))
+                foreground_loss = mse_criterion(relevance * seg_map, seg_map)
+                segmentation_loss = args.lambda_background * background_loss
+                segmentation_loss += args.lambda_foreground * foreground_loss
+                # classification loss
+                with torch.no_grad():
+                    output = model(image_ten)
+                    output_orig = orig_model(image_ten)
+                _, pred = output.topk(1, 1, True, True)
+                pred = pred.flatten()
+                if args.temperature != 1:
+                    output = output / args.temperature
+                classification_loss = criterion(output, pred)
+                loss = args.lambda_seg * segmentation_loss + args.lambda_acc * classification_loss
+            # save results
+            if i % args.save_interval == 0:
+                with torch.enable_grad():
+                    orig_relevance = generate_relevance(orig_model, image_ten, index=class_name)
+                for j in range(image_ten.shape[0]):
+                    image = get_image_with_relevance(image_ten[j], torch.ones_like(image_ten[j]))
+                    new_vis = get_image_with_relevance(image_ten[j], relevance[j])
+                    old_vis = get_image_with_relevance(image_ten[j], orig_relevance[j])
+                    gt = get_image_with_relevance(image_ten[j], seg_map[j])
+                    h_img = cv2.hconcat([image, gt, old_vis, new_vis])
+                    cv2.imwrite(f'{args.experiment_folder}/val_samples/res_{i}_{j}.jpg', h_img)
+            # measure accuracy and record loss
+            acc1, acc5 = accuracy(output, class_name, topk=(1, 5))
+            losses.update(loss.item(), image_ten.size(0))
+            top1.update(acc1[0], image_ten.size(0))
+            top5.update(acc5[0], image_ten.size(0))
+            # metrics for original vit
+            acc1_orig, acc5_orig = accuracy(output_orig, class_name, topk=(1, 5))
+            orig_top1.update(acc1_orig[0], image_ten.size(0))
+            orig_top5.update(acc5_orig[0], image_ten.size(0))
+            if i % args.print_freq == 0:
+                progress.display(i)
+                args.logger.add_scalar('{}/{}'.format('val', 'segmentation_loss'), segmentation_loss,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'classification_loss'), classification_loss,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'orig_top1'), acc1_orig,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'top1'), acc1,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'orig_top5'), acc5_orig,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'top5'), acc5,
+                                       epoch * len(val_loader) + i)
+                args.logger.add_scalar('{}/{}'.format('val', 'tot_loss'), loss,
+                                       epoch * len(val_loader) + i)
+        # TODO: this should also be done with the ProgressMeter
+        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
+              .format(top1=top1, top5=top5))
+    return losses.avg
+def save_checkpoint(state, is_best, folder, filename='checkpoint.pth.tar'):
+    torch.save(state, f'{folder}/{filename}')
+    if is_best:
+        shutil.copyfile(f'{folder}/{filename}', f'{folder}/model_best.pth.tar')
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self, name, fmt=':f'):
+        self.name = name
+        self.fmt = fmt
+        self.reset()
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+    def __str__(self):
+        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+        return fmtstr.format(**self.__dict__)
+class ProgressMeter(object):
+    def __init__(self, num_batches, meters, prefix=""):
+        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+        self.meters = meters
+        self.prefix = prefix
+    def display(self, batch):
+        entries = [self.prefix + self.batch_fmtstr.format(batch)]
+        entries += [str(meter) for meter in self.meters]
+        print('\t'.join(entries))
+    def _get_batch_fmtstr(self, num_batches):
+        num_digits = len(str(num_batches // 1))
+        fmt = '{:' + str(num_digits) + 'd}'
+        return '[' + fmt + '/' + fmt.format(num_batches) + ']'
+def adjust_learning_rate(optimizer, epoch, args):
+    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+    lr = args.lr * (0.85 ** (epoch // 2))
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+def accuracy(output, target, topk=(1,)):
+    """Computes the accuracy over the k top predictions for the specified values of k"""
+    with torch.no_grad():
+        maxk = max(topk)
+        batch_size = target.size(0)
+        _, pred = output.topk(maxk, 1, True, True)
+        pred = pred.t()
+        correct = pred.eq(target.view(1, -1).expand_as(pred))
+        res = []
+        for k in topk:
+            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
+            res.append(correct_k.mul_(100.0 / batch_size))
+        return res
+if __name__ == '__main__':
+    main()

label_str_to_imagenet_classes.py ADDED Viewed

	@@ -0,0 +1,133 @@

+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Dictionary mapping labels (strings) to imagenet classes (ints).
+Generated manually.
+"""
+label_str_to_imagenet_classes = {
+    'ambulance': 407,
+    'armadillo': 363,
+    'artichoke': 944,
+    'backpack': 414,
+    'bagel': 931,
+    'balance beam': 416,
+    'banana': 954,
+    'band-aid': 419,
+    'beaker': 438,
+    'bell pepper': 945,
+    'billiard table': 736,
+    'binoculars': 447,
+    'broccoli': 937,
+    'brown bear': 294,
+    'burrito': 965,
+    'candle': 470,
+    'canoe': 472,
+    'cello': 486,
+    'cheetah': 293,
+    'cocktail shaker': 503,
+    'common fig': 952,
+    'computer mouse': 673,
+    'cowboy hat': 515,
+    'cucumber': 943,
+    'diaper': 529,
+    'digital clock': 530,
+    'dumbbell': 543,
+    'envelope': 549,
+    'eraser': 767,
+    'filing cabinet': 553,
+    'flowerpot': 738,
+    'flute': 558,
+    'frying pan': 567,
+    'golf ball': 574,
+    'goose': 99,
+    'guacamole': 924,
+    'hair dryer': 589,
+    'hair spray': 585,
+    'hammer': 587,
+    'hamster': 333,
+    'harmonica': 593,
+    'hedgehog': 334,
+    'hippopotamus': 344,
+    'hot dog': 934,
+    'ipod': 605,
+    'jeans': 608,
+    'kite': 21,
+    'koala': 105,
+    'ladle': 618,
+    'laptop': 620,
+    'lemon': 951,
+    'light switch': 844,
+    'lighthouse': 437,
+    'limousine': 627,
+    'lipstick': 629,
+    'lynx': 287,
+    'magpie': 18,
+    'maracas': 641,
+    'measuring cup': 647,
+    'microwave oven': 651,
+    'miniskirt': 655,
+    'missile': 657,
+    'mixing bowl': 659,
+    'mobile phone': 487,
+    'mushroom': 947,
+    'orange': 950,
+    'ostrich': 9,
+    'otter': 360,
+    'paper towel': 700,
+    'pencil case': 709,
+    'pig': 341,
+    'pillow': 721,
+    'pitcher (container)': 725,
+    'pizza': 963,
+    'plastic bag': 728,
+    'polar bear': 296,
+    'pomegranate': 957,
+    'pretzel': 932,
+    'printer': 742,
+    'punching bag': 747,
+    'racket': 752,
+    'red panda': 387,
+    'remote control': 761,
+    'rugby ball': 768,
+    'ruler': 769,
+    'saxophone': 776,
+    'screwdriver': 784,
+    'sea lion': 150,
+    'seat belt': 785,
+    'skunk': 361,
+    'snowmobile': 802,
+    'soap dispenser': 804,
+    'sock': 806,
+    'sombrero': 808,
+    'spatula': 813,
+    'starfish': 327,
+    'strawberry': 949,
+    'studio couch': 831,
+    'taxi': 468,
+    'teapot': 849,
+    'teddy bear': 850,
+    'tennis ball': 852,
+    'toaster': 859,
+    'toilet paper': 999,
+    'torch': 862,
+    'traffic light': 920,
+    'vase': 883,
+    'volleyball (ball)': 890,
+    'washing machine': 897,
+    'wok': 909,
+    'zebra': 340,
+    'zucchini': 939
+}

objectnet_dataset.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import json
+from torch.utils import data
+from torchvision.datasets import ImageFolder
+import torch
+import os
+from PIL import Image
+import numpy as np
+import argparse
+from tqdm import tqdm
+from munkres import Munkres
+import multiprocessing
+from multiprocessing import Process, Manager
+import collections
+import torchvision.transforms as transforms
+import torchvision.transforms.functional as TF
+import random
+import torchvision
+import cv2
+from label_str_to_imagenet_classes import label_str_to_imagenet_classes
+torch.manual_seed(0)
+normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
+                                 std=[0.5, 0.5, 0.5])
+transform = transforms.Compose([
+    transforms.Resize(256),
+    transforms.CenterCrop(224),
+    transforms.ToTensor(),
+    normalize,
+])
+class ObjectNetDataset(ImageFolder):
+    def __init__(self, imagenet_path):
+        self._imagenet_path = imagenet_path
+        self._all_images = []
+        o_dataset = ImageFolder(self._imagenet_path)
+        # get mappings folder
+        mappings_folder = os.path.abspath(
+            os.path.join(self._imagenet_path, "../mappings")
+        )
+        # get ObjectNet label to ImageNet label mapping
+        with open(
+            os.path.join(mappings_folder, "objectnet_to_imagenet_1k.json")
+        ) as file_handle:
+            o_label_to_all_i_labels = json.load(file_handle)
+        # now remove double i labels to avoid confusion
+        o_label_to_i_labels = {
+            o_label: all_i_label.split("; ")
+            for o_label, all_i_label in o_label_to_all_i_labels.items()
+        }
+        # some in-between mappings ...
+        o_folder_to_o_idx = o_dataset.class_to_idx
+        with open(
+            os.path.join(mappings_folder, "folder_to_objectnet_label.json")
+        ) as file_handle:
+            o_folder_o_label = json.load(file_handle)
+        # now get mapping from o_label to o_idx
+        o_label_to_o_idx = {
+            o_label: o_folder_to_o_idx[o_folder]
+            for o_folder, o_label in o_folder_o_label.items()
+        }
+        # some in-between mappings ...
+        with open(
+            os.path.join(mappings_folder, "pytorch_to_imagenet_2012_id.json")
+        ) as file_handle:
+            i_idx_to_i_line = json.load(file_handle)
+        with open(
+            os.path.join(mappings_folder, "imagenet_to_label_2012_v2")
+        ) as file_handle:
+            i_line_to_i_label = file_handle.readlines()
+        i_line_to_i_label = {
+            i_line: i_label[:-1]
+            for i_line, i_label in enumerate(i_line_to_i_label)
+        }
+        # now get mapping from i_label to i_idx
+        i_label_to_i_idx = {
+            i_line_to_i_label[i_line]: int(i_idx)
+            for i_idx, i_line in i_idx_to_i_line.items()
+        }
+        # now get the final mapping of interest!!!
+        o_idx_to_i_idxs = {
+            o_label_to_o_idx[o_label]: [
+                i_label_to_i_idx[i_label] for i_label in i_labels
+            ]
+            for o_label, i_labels in o_label_to_i_labels.items()
+        }
+        self._tag_list = []
+        # now get a list of files of interest
+        for filepath, o_idx in o_dataset.samples:
+            if o_idx not in o_idx_to_i_idxs:
+                continue
+            rel_file = os.path.relpath(filepath, self._imagenet_path)
+            if o_idx_to_i_idxs[o_idx][0] not in self._tag_list:
+                self._tag_list.append(o_idx_to_i_idxs[o_idx][0])
+            self._all_images.append((rel_file, o_idx_to_i_idxs[o_idx][0]))
+    def __getitem__(self, item):
+        image_path, classification = self._all_images[item]
+        image_path = os.path.join(self._imagenet_path, image_path)
+        image = Image.open(image_path)
+        image = image.convert('RGB')
+        image = transform(image)
+        return image, classification
+    def __len__(self):
+        return len(self._all_images)

robustness_dataset.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import json
+from torch.utils import data
+from torchvision.datasets import ImageFolder
+import torch
+import os
+from PIL import Image
+import numpy as np
+import argparse
+from tqdm import tqdm
+from munkres import Munkres
+import multiprocessing
+from multiprocessing import Process, Manager
+import collections
+import torchvision.transforms as transforms
+import torchvision.transforms.functional as TF
+import random
+import torchvision
+import cv2
+from label_str_to_imagenet_classes import label_str_to_imagenet_classes
+torch.manual_seed(0)
+ImageItem = collections.namedtuple('ImageItem', ('image_name', 'tag'))
+normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
+                                 std=[0.5, 0.5, 0.5])
+transform = transforms.Compose([
+    transforms.Resize(256),
+    transforms.CenterCrop(224),
+    transforms.ToTensor(),
+    normalize,
+])
+class RobustnessDataset(ImageFolder):
+    def __init__(self, imagenet_path, imagenet_classes_path='imagenet_classes.json', isV2=False, isSI=False):
+        self._isV2 = isV2
+        self._isSI = isSI
+        self._imagenet_path = imagenet_path
+        with open(imagenet_classes_path, 'r') as f:
+            self._imagenet_classes = json.load(f)
+        self._tag_list = [tag for tag in os.listdir(self._imagenet_path)]
+        self._all_images = []
+        for tag in self._tag_list:
+            base_dir = os.path.join(self._imagenet_path, tag)
+            for i, file in enumerate(os.listdir(base_dir)):
+                self._all_images.append(ImageItem(file, tag))
+    def __getitem__(self, item):
+        image_item = self._all_images[item]
+        image_path = os.path.join(self._imagenet_path, image_item.tag, image_item.image_name)
+        image = Image.open(image_path)
+        image = image.convert('RGB')
+        image = transform(image)
+        if self._isV2:
+            class_name = int(image_item.tag)
+        elif self._isSI:
+            class_name = int(label_str_to_imagenet_classes[image_item.tag])
+        else:
+            class_name = int(self._imagenet_classes[image_item.tag])
+        return image, class_name
+    def __len__(self):
+        return len(self._all_images)

robustness_dataset_per_class.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import json
+from torchvision.datasets import ImageFolder
+import torch
+import os
+from PIL import Image
+import collections
+import torchvision.transforms as transforms
+from label_str_to_imagenet_classes import label_str_to_imagenet_classes
+torch.manual_seed(0)
+ImageItem = collections.namedtuple('ImageItem', ('image_name', 'tag'))
+normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
+                                 std=[0.5, 0.5, 0.5])
+transform = transforms.Compose([
+    transforms.Resize(256),
+    transforms.CenterCrop(224),
+    transforms.ToTensor(),
+    normalize,
+])
+class RobustnessDataset(ImageFolder):
+    def __init__(self, imagenet_path, folder, imagenet_classes_path='imagenet_classes.json', isV2=False, isSI=False):
+        self._isV2 = isV2
+        self._isSI = isSI
+        self._folder = folder
+        self._imagenet_path = imagenet_path
+        with open(imagenet_classes_path, 'r') as f:
+            self._imagenet_classes = json.load(f)
+        self._all_images = []
+        base_dir = os.path.join(self._imagenet_path, folder)
+        for i, file in enumerate(os.listdir(base_dir)):
+            self._all_images.append(ImageItem(file, folder))
+    def __getitem__(self, item):
+        image_item = self._all_images[item]
+        image_path = os.path.join(self._imagenet_path, image_item.tag, image_item.image_name)
+        image = Image.open(image_path)
+        image = image.convert('RGB')
+        image = transform(image)
+        if self._isV2:
+            class_name = int(image_item.tag)
+        elif self._isSI:
+            class_name = int(label_str_to_imagenet_classes[image_item.tag])
+        else:
+            class_name = int(self._imagenet_classes[image_item.tag])
+        return image, class_name
+    def __len__(self):
+        return len(self._all_images)
+    def get_classname(self):
+        if self._isV2:
+            class_name = int(self._folder)
+        elif self._isSI:
+            class_name = int(label_str_to_imagenet_classes[self._folder])
+        else:
+            class_name = int(self._imagenet_classes[self._folder])
+        return class_name