Spaces:

imthanhlv
/

dual-encoder

Runtime error

imthanhlv commited on Dec 3, 2021

Commit

4ad89ee

•

1 Parent(s): 547178e

added optional image

Files changed (1) hide show

app.py CHANGED Viewed

@@ -238,11 +238,15 @@ def inference(img, text, is_translate):
   use_beam_search = True
   if is_translate:
     # encode text
     text = clip_model.tokenize([text]).to(device)
     with torch.no_grad():
       prefix = clip_model.encode_text(text).to(device, dtype=torch.float32)
   else:
     image = io.imread(img.name)
     pil_image = PIL.Image.fromarray(image)
     image = preprocess(pil_image).unsqueeze(0).to(device)
@@ -251,6 +255,7 @@ def inference(img, text, is_translate):
       prefix = clip_model.encode_image(image).to(device, dtype=torch.float32)
   prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
   if use_beam_search:
     generated_text_prefix = generate_beam(model, tokenizer, embed=prefix_embed)[0]
   else:
@@ -262,7 +267,7 @@ description = "You can translate English sentence to Vietnamese sentence or gene
 examples=[["drug.jpg","", False], ["", "What is your name?", True]]
 inputs = [
-  gr.inputs.Image(type="file", label="Image to generate Vietnamese caption"),
   gr.inputs.Textbox(lines=2, placeholder="English sentence for translation"),
   gr.inputs.Checkbox()
 ]

   use_beam_search = True
   if is_translate:
     # encode text
+    if text is None:
+      return "No text provided"
     text = clip_model.tokenize([text]).to(device)
     with torch.no_grad():
       prefix = clip_model.encode_text(text).to(device, dtype=torch.float32)
   else:
+    if img is None:
+      return "No image"
     image = io.imread(img.name)
     pil_image = PIL.Image.fromarray(image)
     image = preprocess(pil_image).unsqueeze(0).to(device)
       prefix = clip_model.encode_image(image).to(device, dtype=torch.float32)
   prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
   if use_beam_search:
     generated_text_prefix = generate_beam(model, tokenizer, embed=prefix_embed)[0]
   else:
 examples=[["drug.jpg","", False], ["", "What is your name?", True]]
 inputs = [
+  gr.inputs.Image(type="file", label="Image to generate Vietnamese caption", optional=True),
   gr.inputs.Textbox(lines=2, placeholder="English sentence for translation"),
   gr.inputs.Checkbox()
 ]