# source: https://github.com/openai/CLIP/blob/main/notebooks/Prompt_Engineering_for_ImageNet.ipynb IMAGENET_TEMPLATES = [ 'a bad photo of a {}.', 'a photo of many {}.', 'a sculpture of a {}.', 'a photo of the hard to see {}.', 'a low resolution photo of the {}.', 'a rendering of a {}.', 'graffiti of a {}.', 'a bad photo of the {}.', 'a cropped photo of the {}.', 'a tattoo of a {}.', 'the embroidered {}.', 'a photo of a hard to see {}.', 'a bright photo of a {}.', 'a photo of a clean {}.', 'a photo of a dirty {}.', 'a dark photo of the {}.', 'a drawing of a {}.', 'a photo of my {}.', 'the plastic {}.', 'a photo of the cool {}.', 'a close-up photo of a {}.', 'a black and white photo of the {}.', 'a painting of the {}.', 'a painting of a {}.', 'a pixelated photo of the {}.', 'a sculpture of the {}.', 'a bright photo of the {}.', 'a cropped photo of a {}.', 'a plastic {}.', 'a photo of the dirty {}.', 'a jpeg corrupted photo of a {}.', 'a blurry photo of the {}.', 'a photo of the {}.', 'a good photo of the {}.', 'a rendering of the {}.', 'a {} in a video game.', 'a photo of one {}.', 'a doodle of a {}.', 'a close-up photo of the {}.', 'a photo of a {}.', 'the origami {}.', 'the {} in a video game.', 'a sketch of a {}.', 'a doodle of the {}.', 'a origami {}.', 'a low resolution photo of a {}.', 'the toy {}.', 'a rendition of the {}.', 'a photo of the clean {}.', 'a photo of a large {}.', 'a rendition of a {}.', 'a photo of a nice {}.', 'a photo of a weird {}.', 'a blurry photo of a {}.', 'a cartoon {}.', 'art of a {}.', 'a sketch of the {}.', 'a embroidered {}.', 'a pixelated photo of a {}.', 'itap of the {}.', 'a jpeg corrupted photo of the {}.', 'a good photo of a {}.', 'a plushie {}.', 'a photo of the nice {}.', 'a photo of the small {}.', 'a photo of the weird {}.', 'the cartoon {}.', 'art of the {}.', 'a drawing of the {}.', 'a photo of the large {}.', 'a black and white photo of a {}.', 'the plushie {}.', 'a dark photo of a {}.', 'itap of a {}.', 'graffiti of the {}.', 'a toy {}.', 'itap of my {}.', 'a photo of a cool {}.', 'a photo of a small {}.', 'a tattoo of the {}.', # 'A photo of a {} in the scene.', ] # v1: 59.0875 IMAGENET_TEMPLATES_SELECT = [ 'itap of a {}.', 'a bad photo of the {}.', 'a origami {}.', 'a photo of the large {}.', 'a {} in a video game.', 'art of the {}.', 'a photo of the small {}.', 'A photo of a {} in the scene', ] # v2: 58.2584 # IMAGENET_TEMPLATES_SELECT = [ # 'itap of a {}', # 'a bad photo of the {}', # 'a origami {}', # 'a photo of the large {}', # 'art of the {}', # 'a photo of the small {}', # 'A photo of a {} in the scene', # ] # v3: 59.1006 # IMAGENET_TEMPLATES_SELECT = [ # 'itap of a {}.', # 'a bad photo of the {}.', # 'a origami {}.', # 'a photo of the large {}.', # 'art of the {}.', # 'a photo of the small {}.', # 'a cropped photo of a {}.', # 'A photo of a {} in the scene', # 'itap of a {} in the scene', # 'a bad photo of the {} in the scene', # 'a origami {} in the scene', # 'a photo of the large {} in the scene', # 'art of the {} in the scene', # 'a photo of the small {} in the scene', # 'a cropped photo of a {} in the scene', # ] # v4: 59.8659 # IMAGENET_TEMPLATES_SELECT = [ # 'a bad photo of the {}.', # 'a photo of the large {}.', # 'art of the {}.', # 'a photo of the small {}.', # 'a cropped photo of a {}.', # 'A photo of a {} in the scene', # 'a bad photo of the {} in the scene', # 'a photo of the large {} in the scene', # 'art of the {} in the scene', # 'a photo of the small {} in the scene', # 'a cropped photo of a {} in the scene', # 'a photo of a masked {} in the scene', # ] # v5: 59.9346 # IMAGENET_TEMPLATES_SELECT = [ # 'a bad photo of the {}.', # 'a photo of the large {}.', # 'art of the {}.', # 'a photo of the small {}.', # 'a cropped photo of a {}.', # 'This is a photo of a {}', # 'This is a photo of a small {}', # 'This is a photo of a medium {}', # 'This is a photo of a large {}', # 'A photo of a {} in the scene', # 'a bad photo of the {} in the scene', # 'a photo of the large {} in the scene', # 'art of the {} in the scene', # 'a photo of the small {} in the scene', # 'a cropped photo of a {} in the scene', # 'a photo of a masked {} in the scene', # 'There is a {} in the scene', # 'There is the {} in the scene', # 'This is a {} in the scene', # 'This is the {} in the scene', # 'This is one {} in the scene', # ] # v6: 60.6611 # IMAGENET_TEMPLATES_SELECT = [ # 'a bad photo of the {}.', # 'a photo of the large {}.', # 'art of the {}.', # 'a photo of the small {}.', # 'a cropped photo of a {}.', # 'This is a photo of a {}', # 'This is a photo of a small {}', # 'This is a photo of a medium {}', # 'This is a photo of a large {}', # 'A photo of a {} in the scene', # 'a bad photo of the {} in the scene', # 'a photo of the large {} in the scene', # 'art of the {} in the scene', # 'a photo of the small {} in the scene', # 'a cropped photo of a {} in the scene', # 'a photo of a masked {} in the scene', # 'There is a {} in the scene', # 'There is the {} in the scene', # 'This is a {} in the scene', # 'This is the {} in the scene', # 'This is one {} in the scene', # # 'There is a masked {} in the scene', # 'There is the masked {} in the scene', # 'This is a masked {} in the scene', # 'This is the masked {} in the scene', # 'This is one masked {} in the scene', # ] # v7: 60.4529 # IMAGENET_TEMPLATES_SELECT = [ # 'a bad photo of the {}.', # 'a photo of the large {}.', # 'art of the {}.', # 'a photo of the small {}.', # 'a cropped photo of a {}.', # 'This is a photo of a {}', # 'This is a photo of a small {}', # 'This is a photo of a medium {}', # 'This is a photo of a large {}', # 'A photo of a {} in the scene', # 'a bad photo of the {} in the scene', # 'a photo of the large {} in the scene', # 'art of the {} in the scene', # 'a photo of the small {} in the scene', # 'a cropped photo of a {} in the scene', # 'a photo of a masked {} in the scene', # 'There is a {} in the scene', # 'There is the {} in the scene', # 'This is a {} in the scene', # 'This is the {} in the scene', # 'This is one {} in the scene', # # 'There is a cropped {} in the scene', # 'There is the cropped {} in the scene', # 'This is a cropped {} in the scene', # 'This is the cropped {} in the scene', # 'This is one cropped {} in the scene', # # 'a cropped photo of the {}', # 'a cropped photo of a {}', # 'a cropped photo of one {}', # # 'There is a masked {} in the scene', # 'There is the masked {} in the scene', # 'This is a masked {} in the scene', # 'This is the masked {} in the scene', # 'This is one masked {} in the scene', # ] # v8: 60.7057 # IMAGENET_TEMPLATES_SELECT = [ # 'a bad photo of the {}.', # 'a photo of the large {}.', # 'a photo of the small {}.', # 'a cropped photo of a {}.', # 'This is a photo of a {}', # 'This is a photo of a small {}', # 'This is a photo of a medium {}', # 'This is a photo of a large {}', # # 'This is a masked photo of a {}', # 'This is a masked photo of a small {}', # 'This is a masked photo of a medium {}', # 'This is a masked photo of a large {}', # # 'A photo of a {} in the scene', # 'a bad photo of the {} in the scene', # 'a photo of the large {} in the scene', # 'a photo of the small {} in the scene', # 'a cropped photo of a {} in the scene', # 'a photo of a masked {} in the scene', # 'There is a {} in the scene', # 'There is the {} in the scene', # 'This is a {} in the scene', # 'This is the {} in the scene', # 'This is one {} in the scene', # # 'There is a masked {} in the scene', # 'There is the masked {} in the scene', # 'This is a masked {} in the scene', # 'This is the masked {} in the scene', # 'This is one masked {} in the scene', # ] # v9: 60.8775 # IMAGENET_TEMPLATES_SELECT = [ # 'a bad photo of the {}.', # 'a photo of the large {}.', # 'a photo of the small {}.', # 'a cropped photo of a {}.', # 'This is a photo of a {}', # 'This is a photo of a small {}', # 'This is a photo of a medium {}', # 'This is a photo of a large {}', # # 'This is a masked photo of a {}', # 'This is a masked photo of a small {}', # 'This is a masked photo of a medium {}', # 'This is a masked photo of a large {}', # # 'This is a cropped photo of a {}', # 'This is a cropped photo of a small {}', # 'This is a cropped photo of a medium {}', # 'This is a cropped photo of a large {}', # # 'A photo of a {} in the scene', # 'a bad photo of the {} in the scene', # 'a photo of the large {} in the scene', # 'a photo of the small {} in the scene', # 'a cropped photo of a {} in the scene', # 'a photo of a masked {} in the scene', # 'There is a {} in the scene', # 'There is the {} in the scene', # 'This is a {} in the scene', # 'This is the {} in the scene', # 'This is one {} in the scene', # # 'There is a masked {} in the scene', # 'There is the masked {} in the scene', # 'This is a masked {} in the scene', # 'This is the masked {} in the scene', # 'This is one masked {} in the scene', # ] # v9 IMAGENET_TEMPLATES_SELECT_CLIP = [ 'a bad photo of the {}.', 'a photo of the large {}.', 'a photo of the small {}.', 'a cropped photo of a {}.', 'This is a photo of a {}', 'This is a photo of a small {}', 'This is a photo of a medium {}', 'This is a photo of a large {}', 'This is a masked photo of a {}', 'This is a masked photo of a small {}', 'This is a masked photo of a medium {}', 'This is a masked photo of a large {}', 'This is a cropped photo of a {}', 'This is a cropped photo of a small {}', 'This is a cropped photo of a medium {}', 'This is a cropped photo of a large {}', 'A photo of a {} in the scene', 'a bad photo of the {} in the scene', 'a photo of the large {} in the scene', 'a photo of the small {} in the scene', 'a cropped photo of a {} in the scene', 'a photo of a masked {} in the scene', 'There is a {} in the scene', 'There is the {} in the scene', 'This is a {} in the scene', 'This is the {} in the scene', 'This is one {} in the scene', 'There is a masked {} in the scene', 'There is the masked {} in the scene', 'This is a masked {} in the scene', 'This is the masked {} in the scene', 'This is one masked {} in the scene', ] # v10, for comparison # IMAGENET_TEMPLATES_SELECT_CLIP = [ # 'a photo of a {}.', # # 'This is a photo of a {}', # 'This is a photo of a small {}', # 'This is a photo of a medium {}', # 'This is a photo of a large {}', # # 'This is a photo of a {}', # 'This is a photo of a small {}', # 'This is a photo of a medium {}', # 'This is a photo of a large {}', # # 'a photo of a {} in the scene', # 'a photo of a {} in the scene', # # 'There is a {} in the scene', # 'There is the {} in the scene', # 'This is a {} in the scene', # 'This is the {} in the scene', # 'This is one {} in the scene', # ] ViLD_templates = [ 'There is {article} {category} in the scene.', 'There is the {category} in the scene.', 'a photo of {article} {category} in the scene.', 'a photo of the {category} in the scene.', 'a photo of one {category} in the scene.', 'itap of {article} {category}.', 'itap of my {category}.', 'itap of the {category}.', 'a photo of {article} {category}.', 'a photo of my {category}.', 'a photo of the {category}.', 'a photo of one {category}.', 'a photo of many {category}.', 'a good photo of {article} {category}.', 'a good photo of the {category}.', 'a bad photo of {article} {category}.', 'a bad photo of the {category}.', 'a photo of a nice {category}.', 'a photo of the nice {category}.', 'a photo of a cool {category}.', 'a photo of the cool {category}.', 'a photo of a weird {category}.', 'a photo of the weird {category}.', 'a photo of a small {category}.', 'a photo of the small {category}.', 'a photo of a large {category}.', 'a photo of the large {category}.', 'a photo of a clean {category}.', 'a photo of the clean {category}.', 'a photo of a dirty {category}.', 'a photo of the dirty {category}.', 'a bright photo of {article} {category}.', 'a bright photo of the {category}.', 'a dark photo of {article} {category}.', 'a dark photo of the {category}.', 'a photo of a hard to see {category}.', 'a photo of the hard to see {category}.', 'a low resolution photo of {article} {category}.', 'a low resolution photo of the {category}.', 'a cropped photo of {article} {category}.', 'a cropped photo of the {category}.', 'a close-up photo of {article} {category}.', 'a close-up photo of the {category}.', 'a jpeg corrupted photo of {article} {category}.', 'a jpeg corrupted photo of the {category}.', 'a blurry photo of {article} {category}.', 'a blurry photo of the {category}.', 'a pixelated photo of {article} {category}.', 'a pixelated photo of the {category}.', 'a black and white photo of the {category}.', 'a black and white photo of {article} {category}.', 'a plastic {category}.', 'the plastic {category}.', 'a toy {category}.', 'the toy {category}.', 'a plushie {category}.', 'the plushie {category}.', 'a cartoon {category}.', 'the cartoon {category}.', 'an embroidered {category}.', 'the embroidered {category}.', 'a painting of the {category}.', 'a painting of a {category}.' ]