Spaces:
Sleeping
Sleeping
File size: 2,065 Bytes
859c3ef 3d04a5c 859c3ef 3d04a5c 859c3ef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
from transformers import ViTFeatureExtractor, ViTMAEForPreTraining
import numpy as np
import torch
from PIL import Image
import gradio as gr
feature_extractor = ViTFeatureExtractor.from_pretrained('andrewbo29/vit-mae-base-formula1')
model = ViTMAEForPreTraining.from_pretrained('andrewbo29/vit-mae-base-formula1')
imagenet_mean = np.array(feature_extractor.image_mean)
imagenet_std = np.array(feature_extractor.image_std)
def prep_image(image):
return torch.clip((image * imagenet_std + imagenet_mean) * 255, 0, 255).int().cpu().numpy()
def reconstruct(img):
image = Image.fromarray(img)
pixel_values = feature_extractor(image, return_tensors='pt').pixel_values
outputs = model(pixel_values)
y = model.unpatchify(outputs.logits)
y = torch.einsum('nchw->nhwc', y).detach().cpu()
# visualize the mask
mask = outputs.mask.detach()
mask = mask.unsqueeze(-1).repeat(1, 1, model.config.patch_size ** 2 * 3) # (N, H*W, p*p*3)
mask = model.unpatchify(mask) # 1 is removing, 0 is keeping
mask = torch.einsum('nchw->nhwc', mask).detach().cpu()
x = torch.einsum('nchw->nhwc', pixel_values).detach().cpu()
# masked image
im_masked = x * (1 - mask)
# MAE reconstruction pasted with visible patches
im_paste = x * (1 - mask) + y * mask
out_masked = prep_image(im_masked[0])
out_rec = prep_image(y[0])
out_rec_vis = prep_image(im_paste[0])
return [(out_masked, 'masked'), (out_rec, 'reconstruction'), (out_rec_vis, 'reconstruction + visible')]
with gr.Blocks() as demo:
with gr.Column(variant="panel"):
with gr.Row():
img = gr.Image(
label="Enter your prompt",
container=False,
)
btn = gr.Button("Generate image", scale=0)
gallery = gr.Gallery(
label="Generated images", show_label=False, elem_id="gallery"
, columns=[3], rows=[1], object_fit="contain", height='auto', container=True)
btn.click(reconstruct, img, gallery)
if __name__ == "__main__":
demo.launch()
|