Spaces:

tennant
/

MUG_caption

Runtime error

tennant commited on Dec 2, 2022

Commit

35d1065

•

1 Parent(s): 1ceff34

add cat

Files changed (2) hide show

app.py CHANGED Viewed

@@ -25,12 +25,12 @@ if torch.cuda.is_available():
 model.eval()
 @torch.no_grad()
-def visual_recon(x, model):
     target = model.patchify(x)
     mean = target.mean(dim=-1, keepdim=True)
     var = target.var(dim=-1, keepdim=True)
-    latent, mask, ids_restore, _ = model.forward_encoder(x, mask_ratio=0.75)
     y, _ = model.forward_decoder(latent, ids_restore)
     y = y * (var + 1.e-6)**.5 + mean
     y = model.unpatchify(y)
@@ -82,7 +82,7 @@ def caption(max_len, latent, model, tokenizer, prefix='a photo of a'):
     return ' '.join(words)
-def gr_caption(x):
     imagenet_mean = np.array([0.485, 0.456, 0.406])
     imagenet_std = np.array([0.229, 0.224, 0.225])
     x = np.array(x) / 255.
@@ -100,8 +100,8 @@ def gr_caption(x):
         img = img * imagenet_std + imagenet_mean
         return np.clip(img, a_min=0., a_max=1.)
-    masked, masked_recon, recon, latent = visual_recon(x, model)
-    caption_from_model = caption(20, latent, model, tokenizer, )
     masked, masked_recon, recon = map(unnorm_pix, (masked, masked_recon, recon))
     return_img = np.concatenate([masked, masked_recon, recon], axis=1)
@@ -111,8 +111,12 @@ def gr_caption(x):
 import gradio as gr
 demo = gr.Interface(gr_caption,
-                    inputs=[gr.Image(shape=(224, 224))],
                     outputs=[gr.Image(shape=(224, 224 * 3)),
-                             'text'])
 demo.launch()

 model.eval()
 @torch.no_grad()
+def visual_recon(x, model, mask_ratio=0.75):
     target = model.patchify(x)
     mean = target.mean(dim=-1, keepdim=True)
     var = target.var(dim=-1, keepdim=True)
+    latent, mask, ids_restore, _ = model.forward_encoder(x, mask_ratio=mask_ratio)
     y, _ = model.forward_decoder(latent, ids_restore)
     y = y * (var + 1.e-6)**.5 + mean
     y = model.unpatchify(y)
     return ' '.join(words)
+def gr_caption(x, mask_ratio=0.75, max_len=20, prefix='a'):
     imagenet_mean = np.array([0.485, 0.456, 0.406])
     imagenet_std = np.array([0.229, 0.224, 0.225])
     x = np.array(x) / 255.
         img = img * imagenet_std + imagenet_mean
         return np.clip(img, a_min=0., a_max=1.)
+    masked, masked_recon, recon, latent = visual_recon(x, model, mask_ratio=mask_ratio)
+    caption_from_model = caption(max_len, latent, model, tokenizer, prefix=prefix)
     masked, masked_recon, recon = map(unnorm_pix, (masked, masked_recon, recon))
     return_img = np.concatenate([masked, masked_recon, recon], axis=1)
 import gradio as gr
 demo = gr.Interface(gr_caption,
+                    inputs=[gr.Image(shape=(224, 224)),
+                            'number',
+                            'number',
+                            'text'],
                     outputs=[gr.Image(shape=(224, 224 * 3)),
+                             'text'],
+                    examples=[['cat.jpeg', 0.75, 20, 'a photo of a']],)
 demo.launch()

cat.jpeg ADDED Viewed