imthanhlv commited on
Commit
e1cba3c
1 Parent(s): 2c2c1dc

add prompt

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -231,7 +231,7 @@ from transformers import AutoTokenizer
231
  tokenizer = AutoTokenizer.from_pretrained("imthanhlv/gpt2news")
232
 
233
 
234
- def inference(img, text, is_translation):
235
  prefix_length = 10
236
  model = ClipCaptionModel(prefix_length)
237
  model_path = 'sat_019.pt'
@@ -247,7 +247,7 @@ def inference(img, text, is_translation):
247
  with torch.no_grad():
248
  prefix = clip_model.encode_text(text).to(device, dtype=torch.float32)
249
  prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
250
- generated_text_prefix = generate_beam(model, tokenizer, embed=prefix_embed)[0]
251
 
252
  else:
253
  if img is None:
@@ -266,17 +266,18 @@ def inference(img, text, is_translation):
266
  title = "CLIP Dual encoder"
267
  description = "You can translate English to Vietnamese or generate Vietnamese caption from image"
268
  examples=[
269
- ["examples/drug.jpg","", False],
270
- ["examples/harry.jpeg","", False],
271
- ["examples/OldTrafford.jpeg","", False],
272
- ["examples/coffee.jpg","", False],
273
- ["", "What is your name?", True]
274
  ]
275
 
276
  inputs = [
277
  gr.inputs.Image(type="file", label="Image to generate Vietnamese caption", optional=True),
278
  gr.inputs.Textbox(lines=2, placeholder="English sentence for translation"),
279
- gr.inputs.Checkbox()
 
280
  ]
281
 
282
  gr.Interface(
 
231
  tokenizer = AutoTokenizer.from_pretrained("imthanhlv/gpt2news")
232
 
233
 
234
+ def inference(img, text, is_translation, prompt=None):
235
  prefix_length = 10
236
  model = ClipCaptionModel(prefix_length)
237
  model_path = 'sat_019.pt'
 
247
  with torch.no_grad():
248
  prefix = clip_model.encode_text(text).to(device, dtype=torch.float32)
249
  prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
250
+ generated_text_prefix = generate_beam(model, tokenizer, embed=prefix_embed, prompt=prompt)[0]
251
 
252
  else:
253
  if img is None:
 
266
  title = "CLIP Dual encoder"
267
  description = "You can translate English to Vietnamese or generate Vietnamese caption from image"
268
  examples=[
269
+ ["examples/drug.jpg","", False, "Một bức ảnh về"],
270
+ ["examples/harry.jpeg","", False, "Một bức ảnh về"],
271
+ ["examples/OldTrafford.jpeg","", False, "Một bức ảnh về"],
272
+ ["examples/coffee.jpg","", False, "Một bức ảnh về"],
273
+ ["", "What is your name?", True, "trong Tiếng Việt có nghĩa là"]
274
  ]
275
 
276
  inputs = [
277
  gr.inputs.Image(type="file", label="Image to generate Vietnamese caption", optional=True),
278
  gr.inputs.Textbox(lines=2, placeholder="English sentence for translation"),
279
+ gr.inputs.Checkbox(),
280
+ gr.inputs.Textbox(lines=1, placeholder="Prompt [Optional]", optional=True)
281
  ]
282
 
283
  gr.Interface(