imthanhlv commited on
Commit
4ad89ee
1 Parent(s): 547178e

added optional image

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -238,11 +238,15 @@ def inference(img, text, is_translate):
238
  use_beam_search = True
239
  if is_translate:
240
  # encode text
 
 
241
  text = clip_model.tokenize([text]).to(device)
242
  with torch.no_grad():
243
  prefix = clip_model.encode_text(text).to(device, dtype=torch.float32)
244
 
245
  else:
 
 
246
  image = io.imread(img.name)
247
  pil_image = PIL.Image.fromarray(image)
248
  image = preprocess(pil_image).unsqueeze(0).to(device)
@@ -251,6 +255,7 @@ def inference(img, text, is_translate):
251
  prefix = clip_model.encode_image(image).to(device, dtype=torch.float32)
252
 
253
  prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
 
254
  if use_beam_search:
255
  generated_text_prefix = generate_beam(model, tokenizer, embed=prefix_embed)[0]
256
  else:
@@ -262,7 +267,7 @@ description = "You can translate English sentence to Vietnamese sentence or gene
262
  examples=[["drug.jpg","", False], ["", "What is your name?", True]]
263
 
264
  inputs = [
265
- gr.inputs.Image(type="file", label="Image to generate Vietnamese caption"),
266
  gr.inputs.Textbox(lines=2, placeholder="English sentence for translation"),
267
  gr.inputs.Checkbox()
268
  ]
 
238
  use_beam_search = True
239
  if is_translate:
240
  # encode text
241
+ if text is None:
242
+ return "No text provided"
243
  text = clip_model.tokenize([text]).to(device)
244
  with torch.no_grad():
245
  prefix = clip_model.encode_text(text).to(device, dtype=torch.float32)
246
 
247
  else:
248
+ if img is None:
249
+ return "No image"
250
  image = io.imread(img.name)
251
  pil_image = PIL.Image.fromarray(image)
252
  image = preprocess(pil_image).unsqueeze(0).to(device)
 
255
  prefix = clip_model.encode_image(image).to(device, dtype=torch.float32)
256
 
257
  prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
258
+
259
  if use_beam_search:
260
  generated_text_prefix = generate_beam(model, tokenizer, embed=prefix_embed)[0]
261
  else:
 
267
  examples=[["drug.jpg","", False], ["", "What is your name?", True]]
268
 
269
  inputs = [
270
+ gr.inputs.Image(type="file", label="Image to generate Vietnamese caption", optional=True),
271
  gr.inputs.Textbox(lines=2, placeholder="English sentence for translation"),
272
  gr.inputs.Checkbox()
273
  ]