Update app.py
Browse files
app.py
CHANGED
@@ -29,9 +29,9 @@ images = [gr.inputs.Image(type="pil"), gr.inputs.Image(type="pil")]
|
|
29 |
text = gr.inputs.Textbox(lines=2, label="Sentence")
|
30 |
answer = gr.outputs.Textbox(label="Predicted answer")
|
31 |
|
32 |
-
example_sentence_1 = "
|
33 |
example_sentence_2 = "One image shows exactly two brown acorns in back-to-back caps on green foliage."
|
34 |
-
examples = [["image1.jpg", "image2.jpg", example_sentence_1], ["
|
35 |
|
36 |
title = "Interactive demo: natural language visual reasoning with ViLT"
|
37 |
description = "Gradio Demo for ViLT (Vision and Language Transformer), fine-tuned on NLVR2. To use it, simply upload a pair of images and type a sentence and click 'submit', or click one of the examples to load them. The model will predict whether the sentence is true or false, based on the 2 images. Read more at the links below."
|
|
|
29 |
text = gr.inputs.Textbox(lines=2, label="Sentence")
|
30 |
answer = gr.outputs.Textbox(label="Predicted answer")
|
31 |
|
32 |
+
example_sentence_1 = "The left image contains twice the number of dogs as the right image, and at least two dogs in total are standing."
|
33 |
example_sentence_2 = "One image shows exactly two brown acorns in back-to-back caps on green foliage."
|
34 |
+
examples = [["image1.jpg", "image2.jpg", example_sentence_1], ["image3.jpg", "image4.jpg", example_sentence_2]]
|
35 |
|
36 |
title = "Interactive demo: natural language visual reasoning with ViLT"
|
37 |
description = "Gradio Demo for ViLT (Vision and Language Transformer), fine-tuned on NLVR2. To use it, simply upload a pair of images and type a sentence and click 'submit', or click one of the examples to load them. The model will predict whether the sentence is true or false, based on the 2 images. Read more at the links below."
|