Spaces:

ixxan
/

cross-lingual-vqa

Running

ixxan commited on Sep 5, 2024

Commit

05c3988

verified ·

1 Parent(s): 647ec6f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -66,6 +66,7 @@ def vilt_vqa(image, question):
     with torch.no_grad():
         outputs = vilt_model(**inputs)
     logits = outputs.logits
     idx = logits.argmax(-1).item()
     answer = vilt_model.config.id2label[idx]
     logger.info("ViLT: " + answer)
@@ -126,12 +127,16 @@ Supported 107 Languages: Afrikaans, Albanian, Amharic, Arabic, Armenian, Azerbai
 # Load example images
 torch.hub.download_url_to_file('http://farm3.staticflickr.com/2710/4520550856_7a9f9ea59d_z.jpg', 'apple.jpg')
 torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/000000039769.jpg', 'cats.jpg')
 # Define home page variables
 image = gr.Image(type="pil")
 question = gr.Textbox(label="Question")
 answer = gr.Textbox(label="Predicted answer")
-examples = [["apple.jpg", "In French, what is in my hand?"], ["cats.jpg", "What are the cats doing, in German?"], ["apple.jpg", "What color is this? Answer in Uyghur."], ["cats.jpg", "How many cats are here?"]]
 demo = gr.Interface(fn=vqa_main,
                          inputs=[image, question],

     with torch.no_grad():
         outputs = vilt_model(**inputs)
     logits = outputs.logits
+    logger.info("ViLT logits:" + logits)
     idx = logits.argmax(-1).item()
     answer = vilt_model.config.id2label[idx]
     logger.info("ViLT: " + answer)
 # Load example images
 torch.hub.download_url_to_file('http://farm3.staticflickr.com/2710/4520550856_7a9f9ea59d_z.jpg', 'apple.jpg')
 torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/000000039769.jpg', 'cats.jpg')
+torch.hub.download_url_to_file('https://media.istockphoto.com/id/1174602891/photo/two-monkeys-mom-and-cub-eat-bananas.jpg?s=612x612&w=0&k=20&c=r7VXi9d1wHhyq3iAk9D2Z3yTZiOJMlLNtjdVRBEjG7g=', 'monkey.jpg')
 # Define home page variables
 image = gr.Image(type="pil")
 question = gr.Textbox(label="Question")
 answer = gr.Textbox(label="Predicted answer")
+examples = [
+    ["monkey.jpg", "Tell me what is going on in Korean."]
+    ["apple.jpg", "What color is this? Answer in Uyghur."],
+    ["cats.jpg", "How many cats are here?"]]
 demo = gr.Interface(fn=vqa_main,
                          inputs=[image, question],