Spaces:

imthanhlv
/

dual-encoder

Runtime error

imthanhlv commited on Dec 20, 2021

Commit

e1cba3c

•

1 Parent(s): 2c2c1dc

add prompt

Files changed (1) hide show

app.py CHANGED Viewed

@@ -231,7 +231,7 @@ from transformers import AutoTokenizer
 tokenizer = AutoTokenizer.from_pretrained("imthanhlv/gpt2news")
-def inference(img, text, is_translation):
     prefix_length = 10
     model = ClipCaptionModel(prefix_length)
     model_path = 'sat_019.pt'
@@ -247,7 +247,7 @@ def inference(img, text, is_translation):
         with torch.no_grad():
             prefix = clip_model.encode_text(text).to(device, dtype=torch.float32)
             prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
-            generated_text_prefix = generate_beam(model, tokenizer, embed=prefix_embed)[0]
     else:
         if img is None:
@@ -266,17 +266,18 @@ def inference(img, text, is_translation):
 title = "CLIP Dual encoder"
 description = "You can translate English to Vietnamese or generate Vietnamese caption from image"
 examples=[
-    ["examples/drug.jpg","", False],
-    ["examples/harry.jpeg","", False],
-    ["examples/OldTrafford.jpeg","", False],
-    ["examples/coffee.jpg","", False],
-    ["", "What is your name?", True]
 ]
 inputs = [
     gr.inputs.Image(type="file", label="Image to generate Vietnamese caption", optional=True),
     gr.inputs.Textbox(lines=2, placeholder="English sentence for translation"),
-    gr.inputs.Checkbox()
 ]
 gr.Interface(

 tokenizer = AutoTokenizer.from_pretrained("imthanhlv/gpt2news")
+def inference(img, text, is_translation, prompt=None):
     prefix_length = 10
     model = ClipCaptionModel(prefix_length)
     model_path = 'sat_019.pt'
         with torch.no_grad():
             prefix = clip_model.encode_text(text).to(device, dtype=torch.float32)
             prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
+            generated_text_prefix = generate_beam(model, tokenizer, embed=prefix_embed, prompt=prompt)[0]
     else:
         if img is None:
 title = "CLIP Dual encoder"
 description = "You can translate English to Vietnamese or generate Vietnamese caption from image"
 examples=[
+    ["examples/drug.jpg","", False, "Một bức ảnh về"],
+    ["examples/harry.jpeg","", False, "Một bức ảnh về"],
+    ["examples/OldTrafford.jpeg","", False, "Một bức ảnh về"],
+    ["examples/coffee.jpg","", False, "Một bức ảnh về"],
+    ["", "What is your name?", True, "trong Tiếng Việt có nghĩa là"]
 ]
 inputs = [
     gr.inputs.Image(type="file", label="Image to generate Vietnamese caption", optional=True),
     gr.inputs.Textbox(lines=2, placeholder="English sentence for translation"),
+    gr.inputs.Checkbox(),
+    gr.inputs.Textbox(lines=1, placeholder="Prompt [Optional]", optional=True)
 ]
 gr.Interface(