xj
commited on
Commit
•
6f5c44e
1
Parent(s):
5cd9ad6
[feat] add benchmark results and huge ckpt/logs
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +1 -0
- benchmark/guesswhat_grounding/huge-alt-1/20230516-132049.json +0 -0
- benchmark/guesswhat_grounding/huge-main-0.7630/20230512-185344.json +0 -0
- benchmark/guesswhat_grounding/huge-main-0.7740/20230512-185556.json +0 -0
- benchmark/guesswhat_grounding/invig-large/20230514-044035.json +0 -0
- benchmark/guesswhat_grounding/invig-large/20230516-125105.json +0 -0
- benchmark/guesswhat_grounding/invig-large/20230516-183341.json +0 -0
- benchmark/guesswhat_grounding/invig-m-dialog/20230513-033458.json +0 -0
- benchmark/guesswhat_grounding/invig-m-grounding/20230514-111541.json +0 -0
- benchmark/guesswhat_grounding/large-invig+guesswhat/20230516-163011.json +0 -0
- benchmark/guesswhat_grounding/large-o-grounding+guesswhat/20230512-175038.json +0 -0
- benchmark/guesswhat_grounding/large-o-guesswhat/20230512-173248.json +0 -0
- benchmark/guesswhat_grounding/large-o-invig/20230515-212817.json +0 -0
- benchmark/guesswhat_grounding_end2end/huge-alt-1/20230516-130735.json +0 -0
- benchmark/guesswhat_grounding_end2end/huge-main-0.7630/20230513-011551.json +0 -0
- benchmark/guesswhat_grounding_end2end/huge-main-0.7740/20230513-012209.json +0 -0
- benchmark/guesswhat_grounding_end2end/invig-large/20230514-044110.json +0 -0
- benchmark/guesswhat_grounding_end2end/invig-large/20230516-183512.json +0 -0
- benchmark/guesswhat_grounding_end2end/invig-large/20230516-183613.json +0 -0
- benchmark/guesswhat_grounding_end2end/invig-large/20230516-183750.json +0 -0
- benchmark/guesswhat_grounding_end2end/invig-m-dialog/20230513-034220.json +0 -0
- benchmark/guesswhat_grounding_end2end/invig-m-grounding/20230514-074730.json +0 -0
- benchmark/guesswhat_grounding_end2end/large-invig+guesswhat/20230516-163838.json +0 -0
- benchmark/guesswhat_grounding_end2end/large-invig+guesswhat/20230516-164712.json +0 -0
- benchmark/guesswhat_grounding_end2end/large-invig+guesswhat/20230516-165115.json +0 -0
- benchmark/guesswhat_grounding_end2end/large-o-grounding+guesswhat/20230513-012017.json +0 -0
- benchmark/guesswhat_grounding_end2end/large-o-guesswhat/20230513-011852.json +0 -0
- benchmark/guesswhat_grounding_end2end/large-o-invig/20230515-213019.json +0 -0
- benchmark/guesswhat_oracle/huge-main-0.7630/20230512-213406.json +0 -0
- benchmark/guesswhat_oracle/invig-large/20230514-043946.json +0 -0
- benchmark/guesswhat_oracle/invig-large/20230516-183935.json +0 -0
- benchmark/guesswhat_oracle/invig-m-dialog/20230513-034055.json +0 -0
- benchmark/guesswhat_oracle/invig-m-grounding/20230514-115536.json +0 -0
- benchmark/guesswhat_oracle/large-invig+guesswhat/20230516-163305.json +0 -0
- benchmark/guesswhat_oracle/large-o-grounding+guesswhat/20230512-164938.json +0 -0
- benchmark/guesswhat_oracle/large-o-guesswhat/20230512-160114.json +17 -0
- benchmark/guesswhat_oracle/large-o-guesswhat/20230512-175119.json +0 -0
- benchmark/guesswhat_oracle/large-o-invig/20230515-213052.json +0 -0
- benchmark/guesswhat_oracle/xvlm/20230517-022316.json +4 -0
- benchmark/human_eval/ours_20230515-194448.json +130 -0
- benchmark/human_eval/xvlm_20230515-192335.json +130 -0
- benchmark/invig_grounding/huge-main-0.7630/20230512-181257.json +0 -0
- benchmark/invig_grounding/huge-main-0.7740/20230512-182551.json +0 -0
- benchmark/invig_grounding/invig-large/20230514-043824.json +0 -0
- benchmark/invig_grounding/invig-large/20230516-090623.json +0 -0
- benchmark/invig_grounding/invig-large/20230516-091634.json +0 -0
- benchmark/invig_grounding/invig-large/20230516-113956.json +0 -0
- benchmark/invig_grounding/invig-large/20230516-141445.json +0 -0
- benchmark/invig_grounding/invig-large/20230516-165140.json +0 -0
- benchmark/invig_grounding/invig-m-dialog/20230513-033645.json +0 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
**/.ipynb_checkpoints
|
benchmark/guesswhat_grounding/huge-alt-1/20230516-132049.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding/huge-main-0.7630/20230512-185344.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding/huge-main-0.7740/20230512-185556.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding/invig-large/20230514-044035.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding/invig-large/20230516-125105.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding/invig-large/20230516-183341.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding/invig-m-dialog/20230513-033458.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding/invig-m-grounding/20230514-111541.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding/large-invig+guesswhat/20230516-163011.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding/large-o-grounding+guesswhat/20230512-175038.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding/large-o-guesswhat/20230512-173248.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding/large-o-invig/20230515-212817.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding_end2end/huge-alt-1/20230516-130735.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding_end2end/huge-main-0.7630/20230513-011551.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding_end2end/huge-main-0.7740/20230513-012209.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding_end2end/invig-large/20230514-044110.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding_end2end/invig-large/20230516-183512.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding_end2end/invig-large/20230516-183613.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding_end2end/invig-large/20230516-183750.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding_end2end/invig-m-dialog/20230513-034220.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding_end2end/invig-m-grounding/20230514-074730.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding_end2end/large-invig+guesswhat/20230516-163838.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding_end2end/large-invig+guesswhat/20230516-164712.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding_end2end/large-invig+guesswhat/20230516-165115.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding_end2end/large-o-grounding+guesswhat/20230513-012017.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding_end2end/large-o-guesswhat/20230513-011852.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_grounding_end2end/large-o-invig/20230515-213019.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/guesswhat_oracle/huge-main-0.7630/20230512-213406.json
ADDED
Binary file (92 MB). View file
|
|
benchmark/guesswhat_oracle/invig-large/20230514-043946.json
ADDED
Binary file (92 MB). View file
|
|
benchmark/guesswhat_oracle/invig-large/20230516-183935.json
ADDED
Binary file (92 MB). View file
|
|
benchmark/guesswhat_oracle/invig-m-dialog/20230513-034055.json
ADDED
Binary file (92 MB). View file
|
|
benchmark/guesswhat_oracle/invig-m-grounding/20230514-115536.json
ADDED
Binary file (92 MB). View file
|
|
benchmark/guesswhat_oracle/large-invig+guesswhat/20230516-163305.json
ADDED
Binary file (92 MB). View file
|
|
benchmark/guesswhat_oracle/large-o-grounding+guesswhat/20230512-164938.json
ADDED
Binary file (92 MB). View file
|
|
benchmark/guesswhat_oracle/large-o-guesswhat/20230512-160114.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
["index", "text_input", "text_output", "text_gen"]
|
2 |
+
[0, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it a guy?\"\n#region: <bin_325> <bin_574> <bin_689> <bin_849>", " no.", " no."]
|
3 |
+
[1, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it a guy? agent: no. human: is it a racket?\"\n#region: <bin_325> <bin_574> <bin_689> <bin_849>", " yes.", " yes."]
|
4 |
+
[2, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it electronic?\"\n#region: <bin_781> <bin_309> <bin_988> <bin_677>", " no.", " no."]
|
5 |
+
[3, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it electronic? agent: no. human: is it a person?\"\n#region: <bin_781> <bin_309> <bin_988> <bin_677>", " yes.", " yes."]
|
6 |
+
[4, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it considered food?\"\n#region: <bin_176> <bin_135> <bin_452> <bin_516>", " yes.", " yes."]
|
7 |
+
[5, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it considered food? agent: yes. human: is it yellow?\"\n#region: <bin_176> <bin_135> <bin_452> <bin_516>", " no.", " no."]
|
8 |
+
[6, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it in the sky?\"\n#region: <bin_837> <bin_877> <bin_959> <bin_962>", " no.", " no."]
|
9 |
+
[7, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it in the sky? agent: no. human: is it the umbrella?\"\n#region: <bin_837> <bin_877> <bin_959> <bin_962>", " no.", " no."]
|
10 |
+
[8, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it in the sky? agent: no. human: is it the umbrella? agent: no. human: is it the ocean?\"\n#region: <bin_837> <bin_877> <bin_959> <bin_962>", " no.", " no."]
|
11 |
+
[9, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it food?\"\n#region: <bin_589> <bin_434> <bin_998> <bin_779>", " no.", " no."]
|
12 |
+
[10, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it food? agent: no. human: is it a drink?\"\n#region: <bin_589> <bin_434> <bin_998> <bin_779>", " no.", " no."]
|
13 |
+
[11, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it the boat?\"\n#region: <bin_499> <bin_297> <bin_525> <bin_331>", " no.", " no."]
|
14 |
+
[12, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it the boat? agent: no. human: is it a goat?\"\n#region: <bin_499> <bin_297> <bin_525> <bin_331>", " no.", " no."]
|
15 |
+
[13, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it the boat? agent: no. human: is it a goat? agent: no. human: is it a person?\"\n#region: <bin_499> <bin_297> <bin_525> <bin_331>", " yes.", " yes."]
|
16 |
+
[14, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it the boat? agent: no. human: is it a goat? agent: no. human: is it a person? agent: yes. human: is it on the left?\"\n#region: <bin_499> <bin_297> <bin_525> <bin_331>", " no.", " no."]
|
17 |
+
[15, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it the boat? agent: no. human: is it a goat? agent: no. human: is it a person? agent: yes. human: is it on the left? agent: no. human: is it black?\"\n#region: <bin_499> <bin_297> <bin_525> <bin_331>", " no.", " no."]
|
benchmark/guesswhat_oracle/large-o-guesswhat/20230512-175119.json
ADDED
Binary file (92 MB). View file
|
|
benchmark/guesswhat_oracle/large-o-invig/20230515-213052.json
ADDED
Binary file (92.3 MB). View file
|
|
benchmark/guesswhat_oracle/xvlm/20230517-022316.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
["index", "text_input", "text_output_gt", "text_gen", "image_path"]
|
2 |
+
[1, [["", "is it a guy?"]], "no.", "no", "coco/train2014/COCO_train2014_000000579680.jpg"]
|
3 |
+
[2, [["", "is it a guy?"], ["No.", "is it a racket?"]], "yes.", "yes", "coco/train2014/COCO_train2014_000000579680.jpg"]
|
4 |
+
[3, [["", "is it a guy?"], ["No.", "is it a racket?"], ["Yes.", "is it on the right?"]], "no.", "no", "coco/train2014/COCO_train2014_000000579680.jpg"]
|
benchmark/human_eval/ours_20230515-194448.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[13, [["ride the bikes on the bottom left.", "which one do you want to ride?"], ["the closest one.", "is it the first one from the left?"], ["yeah.", "[0, 432, 96, 479]"]], [35, 422, 108, 479], [0, 432, 96, 479], "openimages_v1.2/imgs/2b240f8c7b095972_Bicycle_Bicycle wheel_Land vehicle_House_Vehicle_Building_Tire_4.jpg"]
|
2 |
+
[51, [["catch the duck on the far right.", "is it the first one from the right?"], ["yes.", "[499, 175, 611, 240]"]], [500, 172, 618, 238], [499, 175, 611, 240], "openimages_v1.2/imgs/0391aa378a4094b3_Bird_Duck_12.jpg"]
|
3 |
+
[54, [["mark the tallest penguin.", "is it the one in the middle?"], ["yes.", "[188, 115, 415, 545]"]], [187, 115, 420, 548], [188, 115, 415, 545], "openimages_v1.2/imgs/5cc33103845e5519_Bird_Penguin_Animal_4.jpg"]
|
4 |
+
[209, [["This tea set is so pretty.", "which one?"], ["It is in the middle.", "is that the teapot?"], ["yes", "[167, 39, 381, 217]"]], [168, 39, 378, 215], [167, 39, 381, 217], "openimages_v1.2/imgs/76005037c3e150f7_Teapot_Snack_Dessert_Food_Baked goods_Pastry_6.jpg"]
|
5 |
+
[228, [["I want to know its name.", "which one are you talking about?"], ["The bigger one.", "is it the first one from the right?"], ["yes", "[301, 0, 508, 523]"]], [0, 0, 509, 497], [301, 0, 508, 523], "openimages_v1.2/imgs/31d3d97132fbb2f2_Bird_Penguin_Animal_3.jpg"]
|
6 |
+
[255, [["It does not know if it is made by machine.", "is it a cookie?"], ["yes", "is it in the front row?"], ["seconed row", "is it on the left?"], ["yes", "is it the first one on the left?"], ["no", "is it the second one on the left?"], ["yes", "[0, 244, 202, 329]"]], [0, 241, 202, 330], [0, 244, 202, 329], "openimages_v1.2/imgs/467d1abed2e32295_Snack_Cookie_Food_Baked goods_17.jpg"]
|
7 |
+
[285, [["It does not look very strong", "is it a rooster?"], ["yes", "is it the rooster on the right?"], ["no, it\u2019s in the middle.", "[228, 213, 400, 400]"]], [231, 210, 402, 400], [228, 213, 400, 400], "openimages_v1.2/imgs/1797f4e85e52bc5a_Building_Bird_Chicken_Animal_4.jpg"]
|
8 |
+
[318, [["Give me that coffee cup.", "which one do you want?"], ["white one", "is it the one on the plate?"], ["yes", "[67, 9, 254, 165]"]], [68, 6, 252, 167], [67, 9, 254, 165], "openimages_v1.2/imgs/67c02c17c9a759c8_Drink_Coffee cup_Food_Saucer_Tableware_Vegetable_Salad_Broccoli_2.jpg"]
|
9 |
+
[326, [["Give me that action figures", "which one do you want"], ["It is in the middle.", "is it the first one from the left?"], ["no", "is it the second one?"], ["yes", "[343, 352, 394, 410]"]], [234, 228, 275, 288], [343, 352, 394, 410], "openimages_v1.2/imgs/bcc0fa02fdce3af0_Toy_Land vehicle_Vehicle_7.jpg"]
|
10 |
+
[333, [["This flower is interesting", "which one?"], ["The purple one", "is it the biggest one?"], ["no", "is it white?"], ["no", "is it next to the big one?"], ["yes", "[399, 259, 469, 348]"]], [397, 261, 468, 345], [399, 259, 469, 348], "openimages_v1.2/imgs/921e76975c6f820c_Plant_Rose_Flower_3.jpg"]
|
11 |
+
[393, [["The bird is cute", "which one?"], ["Bottom of the page", "what color is it?"], ["black and white", "is it on the right?"], ["no", "is it on the left?"], ["no", "is it in the middle?"], ["yes", "[226, 181, 315, 293]"]], [222, 279, 301, 394], [226, 181, 315, 293], "openimages_v1.2/imgs/ef4a0d60192428c1_Bird_Penguin_Animal_13.jpg"]
|
12 |
+
[407, [["It is so cool", "is it a train?"], ["yes", "is it the one in the middle?"], ["yes", "[94, 133, 608, 322]"]], [85, 121, 601, 337], [94, 133, 608, 322], "openimages_v1.2/imgs/91826655a7d34abd_Land vehicle_Train_Vehicle_Wheel_3.jpg"]
|
13 |
+
[429, [["It is really strong.", "which one are you talking about?"], ["the left one", "is it the first one from the left?"], ["yes", "[26, 184, 288, 361]"]], [28, 188, 282, 364], [26, 184, 288, 361], "openimages_v1.2/imgs/02c91a8515336047_Cattle_10.jpg"]
|
14 |
+
[440, [["What is the brand of this computer", "what brand is it"], ["what", "is it a laptop?"], ["yes", "is it the one on the left?"], ["yes", "[0, 0, 229, 257]"]], [0, 0, 230, 256], [0, 0, 229, 257], "openimages_v1.2/imgs/9b031924d17fc0d4_Beer_Laptop_Drink_Office supplies_Bottle_Wine_Book_2.jpg"]
|
15 |
+
[447, [["It\u2019s samll", "is it the front one?"], ["yes", "[321, 152, 602, 356]"]], [319, 156, 550, 331], [321, 152, 602, 356], "openimages_v1.2/imgs/008ede8abd811226_Squirrel_Animal_3.jpg"]
|
16 |
+
[451, [["It is in full bloom", "which one are you talking about"], ["the middle one", "is it the top one?"], ["no", "is it the bottom one?"], ["no", "is it the middle one?"], ["yes", "[268, 317, 352, 399]"]], [247, 244, 297, 333], [268, 317, 352, 399], "openimages_v1.2/imgs/f7f3a10c836226b2_Plant_Flower_Common sunflower_7.jpg"]
|
17 |
+
[457, [["It is strange", "is it a toy?"], ["yes", "is it yellow?"], ["no, the green one", "[76, 33, 258, 122]"]], [29, 348, 266, 440], [76, 33, 258, 122], "openimages_v1.2/imgs/15f3aeb130d0d03d_Toy_Land vehicle_Vehicle_Wheel_8.jpg"]
|
18 |
+
[466, [["What kind of animal is this?", "which one?"], ["the seconed one", "what color is it?"], ["black and white", "is it the second one from the right?"], ["yes", "[262, 27, 367, 307]"]], [277, 30, 354, 219], [262, 27, 367, 307], "openimages_v1.2/imgs/7fe6972c3f562011_Horse_Cattle_Animal_6.jpg"]
|
19 |
+
[476, [["Please pick one flower for me.", "which one do you want?"], ["Second from left.", "is it the first one from the left?"], ["No. It is the second from the left.", "[149, 175, 188, 207]"]], [154, 175, 190, 206], [149, 175, 188, 207], "openimages_v1.2/imgs/46ceefe0e0ecff39_Chair_Houseplant_Table_Plant_Flower_Furniture_Flowerpot_1.jpg"]
|
20 |
+
[501, [["Pass me the first carton.", "is it the first carton from the left?"], ["Yes.", "is it the first carton from the right?"], ["No. It is the first carton from the left.", "[58, 252, 329, 425]"]], [62, 250, 327, 425], [58, 252, 329, 425], "openimages_v1.2/imgs/2a1552207182c802_Box_4.jpg"]
|
21 |
+
[541, [["Pass me the plant pot.", "which pot are you talking about?"], ["The one in the middle.", "is it the one closest to us?"], ["Yes, it is.", "[276, 343, 346, 399]"]], [276, 344, 348, 401], [276, 343, 346, 399], "openimages_v1.2/imgs/b539ca908aece624_Fountain_Houseplant_Plant_Tree_Flower_Flowerpot_5.jpg"]
|
22 |
+
[546, [["Pass me the axe.", "which one do you want?"], ["I want the first one from the left.", "is it the first one from the left?"], ["Yes, it is.", "[137, 167, 249, 513]"]], [145, 169, 250, 514], [137, 167, 249, 513], "openimages_v1.2/imgs/5c7e5c0ac6b63b5e_Tool_Axe_4.jpg"]
|
23 |
+
[563, [["The flag is beautiful.", "which one are you talking about?"], ["The one in red, white and blue.", "is it the first one from the left?"], ["Yes, it is.", "[94, 171, 315, 307]"]], [97, 177, 315, 304], [94, 171, 315, 307], "openimages_v1.2/imgs/5d170945d5549dea_Flag_3.jpg"]
|
24 |
+
[569, [["The flag is beautiful.", "which one are you talking about?"], ["The one farthest away from us.", "is it the first one from the right?"], ["Yes, it is.", "[380, 228, 499, 343]"]], [378, 233, 493, 343], [380, 228, 499, 343], "openimages_v1.2/imgs/3bd5795f8469318e_Flag_3.jpg"]
|
25 |
+
[592, [["The polar bear is so cute.", "which one are you talking about?"], ["The sleeping one.", "is it the one on the right?"], ["No. It is the one on the left.", "is it the first one from the right?"], ["No. It is the first one from the left.", "[12, 208, 176, 256]"]], [38, 206, 183, 254], [12, 208, 176, 256], "openimages_v1.2/imgs/392f8102414cc81e_Polar bear_4.jpg"]
|
26 |
+
[592, [["The polar bear is so cute.", "which one are you talking about?"], ["The sleeping one.", "is it the one on the right?"], ["No. It is the one on the left.", "is it the first one from the right?"], ["No. It is the first one from the left.", "[12, 208, 176, 256]"]], [38, 206, 183, 254], [12, 208, 176, 256], "openimages_v1.2/imgs/392f8102414cc81e_Polar bear_4.jpg"]
|
27 |
+
[600, [["Pass me the cherry tomato.", "which one do you want?"], ["The one in the middle.", "is it the one next to the cucumber?"], ["No. It is the one on the cabbage.", "[128, 164, 256, 299]"]], [131, 168, 255, 290], [128, 164, 256, 299], "openimages_v1.2/imgs/0353b5f2eb7bd145_Cucumber_Zucchini_Food_Tomato_Vegetable_Cabbage_Salad_13.jpg"]
|
28 |
+
[689, [["The cake looks really nice.", "which one are you talking about?"], ["The red cake closest to us.", "is it the first one from the right?"], ["No. It is the first one from the left.", "[135, 446, 285, 610]"]], [128, 445, 288, 608], [135, 446, 285, 610], "openimages_v1.2/imgs/ffcce9c7edd86436_Dessert_Food_Baked goods_Cake_Pastry_18.jpg"]
|
29 |
+
[696, [["Pass me the vass.", "which one are you talking about?"], ["The white vass with a red flower on it.", "there are two white vases with red flowers, which one is it?"], ["It is the one in the left.", "is it the first one from the right?"], ["No. It is the first one from the left.", "[268, 353, 344, 478]"]], [267, 353, 343, 478], [268, 353, 344, 478], "openimages_v1.2/imgs/a0abaa5bf4b660d8_Teapot_Vase_Tableware_Jug_Furniture_8.jpg"]
|
30 |
+
[704, [["The latte art is so beautiful.", "which one?"], ["The one in the right.", "is there a spoon next to it?"], ["Yes, it is.", "[264, 33, 409, 195]"]], [273, 41, 400, 120], [264, 33, 409, 195], "openimages_v1.2/imgs/0e4bf084d6b8b360_Croissant_Coffee_Saucer_4.jpg"]
|
31 |
+
[727, [["Pass me the plant pot.", "which pot do you want?"], ["The one with purple flower.", "is it the first one from the right?"], ["Yes, it is.", "[508, 368, 547, 435]"]], [510, 371, 547, 436], [508, 368, 547, 435], "openimages_v1.2/imgs/4bd65f1121e94380_Chair_Houseplant_House_Plant_Flower_Furniture_Flowerpot_5.jpg"]
|
32 |
+
[735, [["The dustbin is full.", "which one?"], ["The one in the left.", "what color is it?"], ["It is green.", "is it the first one from the left?"], ["Yes, it is.", "[48, 274, 199, 464]"]], [49, 278, 198, 462], [48, 274, 199, 464], "openimages_v1.2/imgs/7ee6b53585ead951_Waste container_9.jpg"]
|
33 |
+
[740, [["I want to take a boat.", "which one do you want?"], ["The one in yellow and blue.", "is it the first one from the left?"], ["Yes, it is.", "[96, 171, 502, 304]"]], [93, 157, 521, 309], [96, 171, 502, 304], "openimages_v1.2/imgs/c22e23202c31e85e_Boat_Barge_Watercraft_Vehicle_4.jpg"]
|
34 |
+
[747, [["The balloon is flying so high.", "which one are you talking about?"], ["The first one from the left.", "is it the first one from the left?"], ["Yes, it is.", "[26, 32, 186, 215]"]], [25, 33, 187, 214], [26, 32, 186, 215], "openimages_v1.2/imgs/23a0eb2232fe9e66_Balloon_Vehicle_4.jpg"]
|
35 |
+
[758, [["can you pass me the tree in middle?", "which one are you talking about?"], ["it is in the middle.", "is it the tallest one?"], ["yes.", "[210, 206, 325, 357]"]], [191, 210, 338, 363], [210, 206, 325, 357], "openimages_v1.2/imgs/f2246253917f75b5_Christmas tree_Tree_4.jpg"]
|
36 |
+
[758, [["can you pass me the tree in middle?", "which one are you talking about?"], ["it is in the middle.", "is it the tallest one?"], ["yes.", "[210, 206, 325, 357]"]], [191, 210, 338, 363], [210, 206, 325, 357], "openimages_v1.2/imgs/f2246253917f75b5_Christmas tree_Tree_4.jpg"]
|
37 |
+
[778, [["the fish is so cute.", "which one are you talking about?"], ["it is in the middle.", "is it yellow?"], ["yes.", "is it the first one from the right?"], ["no, it is in the middle.", "[99, 164, 356, 292]"]], [97, 161, 357, 292], [99, 164, 356, 292], "openimages_v1.2/imgs/5d97d9b7b9995160_Goldfish_Fish_Animal_14.jpg"]
|
38 |
+
[778, [["the fish is so cute.", "which one are you talking about?"], ["it is in the middle.", "is it yellow?"], ["yes.", "is it the first one from the right?"], ["no, it is in the middle.", "[99, 164, 356, 292]"]], [97, 161, 357, 292], [99, 164, 356, 292], "openimages_v1.2/imgs/5d97d9b7b9995160_Goldfish_Fish_Animal_14.jpg"]
|
39 |
+
[859, [["Can you pass me the biggest pumpkin?", "is it the one on the left?"], ["yes.", "[4, 44, 245, 268]"]], [7, 44, 245, 267], [4, 44, 245, 268], "openimages_v1.2/imgs/6708c917862153ec_Squash (Plant)_Pumpkin_Fruit_4.jpg"]
|
40 |
+
[859, [["Can you pass me the biggest pumpkin?", "is it the one on the left?"], ["yes.", "[4, 44, 245, 268]"]], [7, 44, 245, 267], [4, 44, 245, 268], "openimages_v1.2/imgs/6708c917862153ec_Squash (Plant)_Pumpkin_Fruit_4.jpg"]
|
41 |
+
[864, [["Can you pass me the taxi?", "which one are you talking about?"], ["it is in the middle.", "is it the first one from the right?"], ["no, it is in the middle.", "is it the first one from the left?"], ["no, is it the first second one from the left?", "[19, 196, 259, 367]"], ["no, it is in the middle.", "[200, 197, 371, 324]"]], [199, 195, 367, 331], [200, 197, 371, 324], "openimages_v1.2/imgs/13399741ea286868_Vehicle_Car_Taxi_4.jpg"]
|
42 |
+
[864, [["Can you pass me the taxi?", "which one are you talking about?"], ["it is in the middle.", "is it the first one from the right?"], ["no, it is in the middle.", "is it the first one from the left?"], ["no, is it the first second one from the left?", "[19, 196, 259, 367]"], ["no, it is in the middle.", "[200, 197, 371, 324]"]], [199, 195, 367, 331], [200, 197, 371, 324], "openimages_v1.2/imgs/13399741ea286868_Vehicle_Car_Taxi_4.jpg"]
|
43 |
+
[865, [["the cake is so cute.", "which one are you talking about?"], ["it is in top middle.", "is it brown?"], ["yes.", "is it the first one from the right?"], ["No, it\u2019s in the middle of the top.", "[339, 15, 469, 102]"], ["the left beside it.", "[342, 46, 469, 101]"], ["the left beside it.", "[342, 46, 469, 101]"], ["And to the left.", "[343, 44, 469, 101]"]], [266, 55, 360, 123], [343, 44, 469, 101], "openimages_v1.2/imgs/008b40159ed22bdf_Food_Baked goods_Pastry_3.jpg"]
|
44 |
+
[865, [["the cake is so cute.", "which one are you talking about?"], ["it is in top middle.", "is it brown?"], ["yes.", "is it the first one from the right?"], ["No, it\u2019s in the middle of the top.", "[339, 15, 469, 102]"], ["the left beside it.", "[342, 46, 469, 101]"], ["the left beside it.", "[342, 46, 469, 101]"], ["And to the left.", "[343, 44, 469, 101]"]], [266, 55, 360, 123], [343, 44, 469, 101], "openimages_v1.2/imgs/008b40159ed22bdf_Food_Baked goods_Pastry_3.jpg"]
|
45 |
+
[865, [["the cake is so cute.", "which one are you talking about?"], ["it is in top middle.", "is it brown?"], ["yes.", "is it the first one from the right?"], ["No, it\u2019s in the middle of the top.", "[339, 15, 469, 102]"], ["the left beside it.", "[342, 46, 469, 101]"], ["the left beside it.", "[342, 46, 469, 101]"], ["And to the left.", "[343, 44, 469, 101]"]], [266, 55, 360, 123], [343, 44, 469, 101], "openimages_v1.2/imgs/008b40159ed22bdf_Food_Baked goods_Pastry_3.jpg"]
|
46 |
+
[919, [["the deer is so cute.", "which one are you talking about?"], ["Behind the doll", "is it the first one from the right?"], ["yes.", "[320, 0, 490, 173]"]], [328, 1, 479, 161], [320, 0, 490, 173], "openimages_v1.2/imgs/9263a5fd06a1b5ca_Snowman_Toy_6.jpg"]
|
47 |
+
[928, [["the flower is so lovely.", "which one are you talking about?"], ["the left one.", "is it the first one from the left?"], ["yes.", "[0, 127, 185, 311]"]], [3, 128, 184, 310], [0, 127, 185, 311], "openimages_v1.2/imgs/0fffd5362226f8d3_Flower_Plant_Rose_3.jpg"]
|
48 |
+
[928, [["the flower is so lovely.", "which one are you talking about?"], ["the left one.", "is it the first one from the left?"], ["yes.", "[0, 127, 185, 311]"]], [3, 128, 184, 310], [0, 127, 185, 311], "openimages_v1.2/imgs/0fffd5362226f8d3_Flower_Plant_Rose_3.jpg"]
|
49 |
+
[928, [["the flower is so lovely.", "which one are you talking about?"], ["the left one.", "is it the first one from the left?"], ["yes.", "[0, 127, 185, 311]"]], [3, 128, 184, 310], [0, 127, 185, 311], "openimages_v1.2/imgs/0fffd5362226f8d3_Flower_Plant_Rose_3.jpg"]
|
50 |
+
[940, [["pass me the blue flag.", "which one are you talking about?"], ["the blue one.", "is it the first one from the left?"], ["yes.", "[97, 141, 129, 250]"]], [100, 142, 130, 251], [97, 141, 129, 250], "openimages_v1.2/imgs/009c68ec28f32e34_Flag_3.jpg"]
|
51 |
+
[940, [["pass me the blue flag.", "which one are you talking about?"], ["the blue one.", "is it the first one from the left?"], ["yes.", "[97, 141, 129, 250]"]], [100, 142, 130, 251], [97, 141, 129, 250], "openimages_v1.2/imgs/009c68ec28f32e34_Flag_3.jpg"]
|
52 |
+
[940, [["pass me the blue flag.", "which one are you talking about?"], ["the blue one.", "is it the first one from the left?"], ["yes.", "[97, 141, 129, 250]"]], [100, 142, 130, 251], [97, 141, 129, 250], "openimages_v1.2/imgs/009c68ec28f32e34_Flag_3.jpg"]
|
53 |
+
[1034, [["the doll is so cute.", "which one?"], ["the left one.", "is it green?"], ["yes.", "[124, 96, 282, 379]"]], [125, 96, 280, 381], [124, 96, 282, 379], "openimages_v1.2/imgs/d6d4e0e131db09c9_Toy_4.jpg"]
|
54 |
+
[1034, [["the doll is so cute.", "which one?"], ["the left one.", "is it green?"], ["yes.", "[124, 96, 282, 379]"]], [125, 96, 280, 381], [124, 96, 282, 379], "openimages_v1.2/imgs/d6d4e0e131db09c9_Toy_4.jpg"]
|
55 |
+
[1098, [["pass me the flower pot.", "which one do you want?"], ["The second one on the right.", "is it the second one from the right?"], ["yes.", "[521, 187, 575, 235]"]], [527, 187, 576, 233], [521, 187, 575, 235], "openimages_v1.2/imgs/2dbb4a546d107596_Flower_Street light_Houseplant_Plant_Tree_Flowerpot_4.jpg"]
|
56 |
+
[1098, [["pass me the flower pot.", "which one do you want?"], ["The second one on the right.", "is it the second one from the right?"], ["yes.", "[521, 187, 575, 235]"]], [527, 187, 576, 233], [521, 187, 575, 235], "openimages_v1.2/imgs/2dbb4a546d107596_Flower_Street light_Houseplant_Plant_Tree_Flowerpot_4.jpg"]
|
57 |
+
[1116, [["pass me the Squirrel doll.", "which one do you want?"], ["the cloest one.", "is it the first one from the right?"], ["no, the nearest one.", "[195, 350, 399, 564]"]], [194, 362, 408, 556], [195, 350, 399, 564], "openimages_v1.2/imgs/5cdad082e579d5cd_Toy_5.jpg"]
|
58 |
+
[1116, [["pass me the Squirrel doll.", "which one do you want?"], ["the cloest one.", "is it the first one from the right?"], ["no, the nearest one.", "[195, 350, 399, 564]"]], [194, 362, 408, 556], [195, 350, 399, 564], "openimages_v1.2/imgs/5cdad082e579d5cd_Toy_5.jpg"]
|
59 |
+
[1130, [["pass me the flower.", "which one do you want?"], ["the right one.", "is it the first one from the right?"], ["yes.", "[455, 83, 543, 180]"], ["you are wrong, i need the right one.", "[453, 83, 543, 180]"]], [546, 246, 608, 305], [453, 83, 543, 180], "openimages_v1.2/imgs/e47152d5fc91ef57_Plant_Rose_Flower_3.jpg"]
|
60 |
+
[1130, [["pass me the flower.", "which one do you want?"], ["the right one.", "is it the first one from the right?"], ["yes.", "[455, 83, 543, 180]"], ["you are wrong, i need the right one.", "[453, 83, 543, 180]"]], [546, 246, 608, 305], [453, 83, 543, 180], "openimages_v1.2/imgs/e47152d5fc91ef57_Plant_Rose_Flower_3.jpg"]
|
61 |
+
[1130, [["pass me the flower.", "which one do you want?"], ["the right one.", "is it the first one from the right?"], ["yes.", "[455, 83, 543, 180]"], ["you are wrong, i need the right one.", "[453, 83, 543, 180]"]], [546, 246, 608, 305], [453, 83, 543, 180], "openimages_v1.2/imgs/e47152d5fc91ef57_Plant_Rose_Flower_3.jpg"]
|
62 |
+
[1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "is it the one on the far left?"], ["no, it is on the flowerpot.", "[265, 296, 324, 329]"]], [264, 295, 323, 328], [265, 296, 324, 329], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
|
63 |
+
[1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "is it the one on the far left?"], ["no, it is on the flowerpot.", "[265, 296, 324, 329]"]], [264, 295, 323, 328], [265, 296, 324, 329], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
|
64 |
+
[1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "is it the one on the far left?"], ["no, it is on the flowerpot.", "[265, 296, 324, 329]"]], [264, 295, 323, 328], [265, 296, 324, 329], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
|
65 |
+
[1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "is it the one on the far left?"], ["no, it is on the flowerpot.", "[265, 296, 324, 329]"]], [264, 295, 323, 328], [265, 296, 324, 329], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
|
66 |
+
[1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one in the middle.", "is it the one in the lower right corner?"], ["No, it is the one in the middle right.", "[501, 213, 566, 271]"]], [506, 213, 568, 267], [501, 213, 566, 271], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
|
67 |
+
[1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one in the middle.", "is it the one in the lower right corner?"], ["No, it is the one in the middle right.", "[501, 213, 566, 271]"]], [506, 213, 568, 267], [501, 213, 566, 271], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
|
68 |
+
[1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one in the middle.", "is it the one in the lower right corner?"], ["No, it is the one in the middle right.", "[501, 213, 566, 271]"]], [506, 213, 568, 267], [501, 213, 566, 271], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
|
69 |
+
[1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one in the middle.", "is it the one in the lower right corner?"], ["No, it is the one in the middle right.", "[501, 213, 566, 271]"]], [506, 213, 568, 267], [501, 213, 566, 271], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
|
70 |
+
[1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one in the middle.", "is it the one in the lower right corner?"], ["No, it is the one in the middle right.", "[501, 213, 566, 271]"]], [506, 213, 568, 267], [501, 213, 566, 271], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
|
71 |
+
[775, [["can you pass me the plate?", "which one do you want?"], ["Upper left corner.", "is it the first one from the left?"], ["no, it is the second.", "[37, 0, 133, 71]"]], [39, 0, 134, 73], [37, 0, 133, 71], "openimages_v1.2/imgs/318a8bb8339dde66_Candle_Tableware_Plate_Platter_Serving tray_4.jpg"]
|
72 |
+
[206, [["No, I want the wine bottle.", "which one do you want?"], ["The nearer one.", "is it the one on the right?"], ["No", "[343, 0, 492, 405]"], ["No, not this one.", "[343, 0, 492, 405]"]], [151, 0, 328, 476], [343, 0, 492, 405], "openimages_v1.2/imgs/25b17c8df7b2400b_Beer_Drink_Bottle_Wine_5.jpg"]
|
73 |
+
[1206, [["pass me a flower", "which one do you want"], ["The upper one.", "is it in the upper right corner?"], ["yes.", "[593, 0, 638, 159]"]], [341, 0, 515, 192], [593, 0, 638, 159], "openimages_v1.2/imgs/a64572ad63e1c1ab_Rose_Flower_3.jpg"]
|
74 |
+
[1206, [["pass me a flower", "which one do you want"], ["The upper one.", "is it in the upper right corner?"], ["yes.", "[593, 0, 638, 159]"]], [341, 0, 515, 192], [593, 0, 638, 159], "openimages_v1.2/imgs/a64572ad63e1c1ab_Rose_Flower_3.jpg"]
|
75 |
+
[1206, [["pass me a flower", "which one do you want"], ["The upper one.", "is it in the upper right corner?"], ["yes.", "[593, 0, 638, 159]"]], [341, 0, 515, 192], [593, 0, 638, 159], "openimages_v1.2/imgs/a64572ad63e1c1ab_Rose_Flower_3.jpg"]
|
76 |
+
[1209, [["I want a cake", "which one do you want"], ["The right one.", "is it the first one from the right?"], ["Yes.", "is it the first one from the right?"], ["Yes.", "[413, 100, 595, 295]"]], [413, 102, 589, 269], [413, 100, 595, 295], "openimages_v1.2/imgs/5c7b9a2eec5ff0ae_Muffin_Dessert_Food_Baked goods_7.jpg"]
|
77 |
+
[1209, [["I want a cake", "which one do you want"], ["The right one.", "is it the first one from the right?"], ["Yes.", "is it the first one from the right?"], ["Yes.", "[413, 100, 595, 295]"]], [413, 102, 589, 269], [413, 100, 595, 295], "openimages_v1.2/imgs/5c7b9a2eec5ff0ae_Muffin_Dessert_Food_Baked goods_7.jpg"]
|
78 |
+
[1232, [["give me a bottle of wine", "which bottle do you want"], ["The upest, from left, the forth.", "is it the fourth bottle from the left?"], ["yes.", "[247, 10, 295, 214]"]], [244, 13, 293, 213], [247, 10, 295, 214], "openimages_v1.2/imgs/a211016f402b956b_Bottle_Building_Wine_7.jpg"]
|
79 |
+
[1232, [["give me a bottle of wine", "which bottle do you want"], ["The upest, from left, the forth.", "is it the fourth bottle from the left?"], ["yes.", "[247, 10, 295, 214]"]], [244, 13, 293, 213], [247, 10, 295, 214], "openimages_v1.2/imgs/a211016f402b956b_Bottle_Building_Wine_7.jpg"]
|
80 |
+
[1236, [["I want a pillow", "which one do you want"], ["The frontmost one.", "is it black and white?"], ["Yes.", "is it the one on the right?"], ["Yes.", "[335, 208, 476, 267]"]], [339, 210, 471, 275], [335, 208, 476, 267], "openimages_v1.2/imgs/2992bde1d85a7162_Nightstand_Pillow_6.jpg"]
|
81 |
+
[1266, [["I want the strawberry", "which one do you want"], ["The center one.", "is it the one in the middle?"], ["yes", "[256, 147, 335, 289]"]], [185, 64, 287, 222], [256, 147, 335, 289], "openimages_v1.2/imgs/8a61c3b92dffeb3e_Dessert_Dairy Product_Food_Fruit_Baked goods_Strawberry_Cake_10.jpg"]
|
82 |
+
[1287, [["The green pants", "is it a man?"], ["I don\u2019t think so.", "is it a woman?"], ["Yes, I guess", "is she wearing green pants?"], ["Yes", "is she wearing a white top?"], ["Yes", "is she wearing black pants?"], ["No, the green pants", "[0, 0, 639, 479]"]], [527, 276, 580, 346], [0, 0, 639, 479], "openimages_v1.2/imgs/0c22601eb7370faa_Trousers_4.jpg"]
|
83 |
+
[1301, [["goblet", "is it a glass?"], ["yes", "[139, 132, 228, 378]"]], [139, 171, 231, 272], [139, 132, 228, 378], "openimages_v1.2/imgs/b132c5bc9f77cb7f_Cocktail_Drink_Food_Fruit_Tableware_Strawberry_Wine_Wine glass_3.jpg"]
|
84 |
+
[1309, [["The picture", "is it a person?"], ["Yes", "is it on the left?"], ["Yes", "is it on the right?"], ["No", "is it in the middle?"], ["No", "is it on the wall?"], ["yes", "is it on the left?"], ["yes", "is it the first one on the left?"], ["yes", "[131, 236, 152, 261]"]], [0, 100, 63, 289], [131, 236, 152, 261], "openimages_v1.2/imgs/00225def00d94d8b_Picture frame_Furniture_13.jpg"]
|
85 |
+
[1330, [["Tomato", "is it a tomato?"], ["of course", "is it green?"], ["No\uff0cthe red", "is it the top one?"], ["yes", "[41, 28, 196, 311]"]], [37, 145, 196, 309], [41, 28, 196, 311], "openimages_v1.2/imgs/4e5506fb9ab3dc08_Fruit_Plant_Tomato_Vegetable_8.jpg"]
|
86 |
+
[1330, [["Tomato", "is it a tomato?"], ["of course", "is it green?"], ["No\uff0cthe red", "is it the top one?"], ["yes", "[41, 28, 196, 311]"]], [37, 145, 196, 309], [41, 28, 196, 311], "openimages_v1.2/imgs/4e5506fb9ab3dc08_Fruit_Plant_Tomato_Vegetable_8.jpg"]
|
87 |
+
[1354, [["The purple flowerpot", "is it big?"], ["No", "is it on the right?"], ["No\uff0cleft", "is it on the ground?"], ["yes", "is it the big one?"], ["No", "[414, 246, 548, 435]"]], [85, 154, 162, 234], [414, 246, 548, 435], "openimages_v1.2/imgs/08e6bf832016e29e_Houseplant_Plant_Flower_Flowerpot_11.jpg"]
|
88 |
+
[1372, [["I want a red apple", "which one do you want"], ["The red in rightmost", "is it next to the green apple?"], ["No", "is it next to the price sign?"], ["No", "[492, 344, 587, 422]"]], [582, 342, 639, 414], [492, 344, 587, 422], "openimages_v1.2/imgs/7d5be279c905b3fa_Apple_Food_Fruit_Orange_Grapefruit_11.jpg"]
|
89 |
+
[1385, [["the cow", "is it standing?"], ["the one on the right side of the tree", "is it black and white?"], ["yes", "[131, 257, 250, 325]"]], [126, 257, 245, 330], [131, 257, 250, 325], "openimages_v1.2/imgs/0e9f655b0a190ab1_Cattle_Plant_Tree_Animal_13.jpg"]
|
90 |
+
[1329, [["give me the hot air balloon on the leftmost", "is it the green one?"], ["yes", "[0, 221, 142, 426]"]], [0, 219, 131, 426], [0, 221, 142, 426], "openimages_v1.2/imgs/521d913963c3b47c_Balloon_Vehicle_Aircraft_3.jpg"]
|
91 |
+
[1429, [["the pink towel ", "all of it?"], ["No\uff0cthe pink towel ", "all of it?"], ["No\uff0cthe bottom towel", "[0, 296, 638, 423]"]], [0, 294, 639, 423], [0, 296, 638, 423], "openimages_v1.2/imgs/613305c0a39b4c72_Towel_3.jpg"]
|
92 |
+
[1436, [["give me the rightmost penguin", "is it the first one from the right?"], ["yes", "[425, 133, 510, 432]"]], [440, 142, 511, 420], [425, 133, 510, 432], "openimages_v1.2/imgs/3868829d6119e6ea_Bird_Penguin_Animal_3.jpg"]
|
93 |
+
[1437, [["give me the flowerpot", "is it red?"], ["yes", "is it the whole pot?"], ["yes", "[147, 447, 375, 638]"]], [148, 438, 376, 639], [147, 447, 375, 638], "openimages_v1.2/imgs/23bd2f5d5d520cd1_Houseplant_Plant_Flowerpot_3.jpg"]
|
94 |
+
[1992, [["give me the left black flower pot.", "is it the first one from the left?"], ["the first black one from the left.", "is it the first black flower pot from the left?"], ["yeah", "[0, 567, 68, 638]"]], [60, 565, 105, 619], [0, 567, 68, 638], "openimages_v1.2/imgs/02a344aac1eead08_House_Tree_Building_Houseplant_Plant_Flower_Flowerpot_3.jpg"]
|
95 |
+
[1960, [["pass me the most right flower pot.", "is it the first one from the right?"], ["yes", "[442, 355, 520, 432]"]], [438, 353, 518, 434], [442, 355, 520, 432], "openimages_v1.2/imgs/6baf9ff6fd56c7c6_Houseplant_Plant_Flowerpot_4.jpg"]
|
96 |
+
[1918, [["the red cake with 2 eyes.", "which one are you talking about?"], ["the right one.", "is it the first one from the right?"], ["yes", "[338, 311, 591, 535]"]], [332, 308, 592, 546], [338, 311, 591, 535], "openimages_v1.2/imgs/fcc713fe37b82bdc_Toy_Snack_Dessert_Food_Baked goods_12.jpg"]
|
97 |
+
[1888, [["Give me the bottom slice of cucumber", "is it the one in the lower right corner?"], ["yes", "[359, 376, 492, 479]"]], [358, 376, 493, 479], [359, 376, 492, 479], "openimages_v1.2/imgs/81303291b41e898d_Cucumber_3.jpg"]
|
98 |
+
[1827, [["give me the notebook", "which one do you want?"], ["the left one.", "is it the one in your hand?"], ["yes", "[0, 69, 281, 374]"]], [0, 75, 279, 374], [0, 69, 281, 374], "openimages_v1.2/imgs/7d930e70cd3b11a7_Laptop_Computer keyboard_Book_1.jpg"]
|
99 |
+
[1813, [["Give me the largest West Blue Flower", "is it the broccoli?"], ["yes", "[250, 224, 496, 390]"]], [251, 223, 495, 403], [250, 224, 496, 390], "openimages_v1.2/imgs/2f515c715d155747_Toy_Food_Broccoli_2.jpg"]
|
100 |
+
[1780, [["Give me the leftmost tape", "is it pink?"], ["yes", "[31, 54, 120, 205]"]], [31, 53, 117, 207], [31, 54, 120, 205], "openimages_v1.2/imgs/638c7052acd8ca03_Adhesive tape_4.jpg"]
|
101 |
+
[1774, [["give me the nearest boxes.", "ok, which one do you want?"], ["the nearest one.", "is it the first one from the right?"], ["yes, it is in the middle", "[249, 229, 502, 367]"]], [249, 231, 497, 370], [249, 229, 502, 367], "openimages_v1.2/imgs/0ae1f7df4aeca2f9_Box_Table_8.jpg"]
|
102 |
+
[1764, [["Pick the flower bud at the bottom for me.", "which one are you talking about?"], ["the flower bud at the bottom", "is it the first one from the right?"], ["yes", "[373, 254, 443, 351]"]], [376, 259, 443, 350], [373, 254, 443, 351], "openimages_v1.2/imgs/0c875a717a0c0515_Pomegranate_3.jpg"]
|
103 |
+
[1751, [["the most left cat.", "is it black?"], ["no", "is it the first one from the left?"], ["yes", "[96, 25, 305, 141]"]], [94, 28, 301, 140], [96, 25, 305, 141], "openimages_v1.2/imgs/2b7a258daaeb3636_Carnivore_Cat_Animal_4.jpg"]
|
104 |
+
[1751, [["which cat is closer to me?", "which one are you talking about?"], ["right cat.", "is it the first one from the right?"], ["yes.", "[298, 184, 568, 326]"]], [94, 28, 301, 140], [298, 184, 568, 326], "openimages_v1.2/imgs/2b7a258daaeb3636_Carnivore_Cat_Animal_4.jpg"]
|
105 |
+
[1764, [["what is in the picture?", "is it a pomegranate?"], ["yes, give me the biggest one.", "is it the one in the middle?"], ["yes.", "[262, 153, 366, 253]"]], [376, 259, 443, 350], [262, 153, 366, 253], "openimages_v1.2/imgs/0c875a717a0c0515_Pomegranate_3.jpg"]
|
106 |
+
[1764, [["what is in the picture?", "is it a pomegranate?"], ["yes, give me the biggest one.", "is it the one in the middle?"], ["yes.", "[262, 153, 366, 253]"]], [376, 259, 443, 350], [262, 153, 366, 253], "openimages_v1.2/imgs/0c875a717a0c0515_Pomegranate_3.jpg"]
|
107 |
+
[1774, [["what is in the picture?", "is it a donut?"], ["yes, give me the blue one.", "is it the first one from the left?"], ["yes.", "[23, 158, 156, 274]"]], [249, 231, 497, 370], [23, 158, 156, 274], "openimages_v1.2/imgs/0ae1f7df4aeca2f9_Box_Table_8.jpg"]
|
108 |
+
[1780, [["what are these?", "is it yellow?"], ["what is in the picture?", "is it a flower?"], ["no.", "is it pink?"], ["what is in the picture?", "is it a cup?"], ["no.", "[0, 0, 637, 389]"]], [31, 53, 117, 207], [0, 0, 637, 389], "openimages_v1.2/imgs/638c7052acd8ca03_Adhesive tape_4.jpg"]
|
109 |
+
[1813, [["what is in the picture?", "is it food?"], ["i don\u2019t like broccoli, pick it out.", "is it orange?"], ["no, it is green.", "is it blue?"], ["no, it is green.", "[0, 76, 638, 479]"]], [251, 223, 495, 403], [0, 76, 638, 479], "openimages_v1.2/imgs/2f515c715d155747_Toy_Food_Broccoli_2.jpg"]
|
110 |
+
[1827, [["give me the notebook.", "which one do you want?"], ["left one.", "is it the one in your hand?"], ["yes.", "[0, 69, 281, 374]"]], [0, 75, 279, 374], [0, 69, 281, 374], "openimages_v1.2/imgs/7d930e70cd3b11a7_Laptop_Computer keyboard_Book_1.jpg"]
|
111 |
+
[1888, [["give me the bowl.", "which one do you want?"], ["top right.", "is there a carrot in it?"], ["yes.", "[364, 0, 638, 299]"]], [358, 376, 493, 479], [364, 0, 638, 299], "openimages_v1.2/imgs/81303291b41e898d_Cucumber_3.jpg"]
|
112 |
+
[1918, [["what is in the picture?", "is it red?"], ["give me the one-eye one.", "is it in the front row?"], ["no, it is in the middle.", "is it the one on the left?"], ["no, it is in the middle.", "[124, 140, 273, 281]"]], [332, 308, 592, 546], [124, 140, 273, 281], "openimages_v1.2/imgs/fcc713fe37b82bdc_Toy_Snack_Dessert_Food_Baked goods_12.jpg"]
|
113 |
+
[1960, [["give the tallest flower.", "is it the first one from the left?"], ["no.", "is it the second one?"], ["no.", "is it the third one?"], ["yes.", "[314, 361, 397, 442]"]], [438, 353, 518, 434], [314, 361, 397, 442], "openimages_v1.2/imgs/6baf9ff6fd56c7c6_Houseplant_Plant_Flowerpot_4.jpg"]
|
114 |
+
[1992, [["what is in the picture?", "is it a plant?"], ["i think so, give it to me.", "is it a flower pot?"], ["yes.", "is it pink?"], ["no, it is green.", "is it on the left?"], ["no, it is right one.", "[259, 380, 292, 414]"]], [60, 565, 105, 619], [259, 380, 292, 414], "openimages_v1.2/imgs/02a344aac1eead08_House_Tree_Building_Houseplant_Plant_Flower_Flowerpot_3.jpg"]
|
115 |
+
[61, [["drive the green car away.", "which one are you talking about?"], ["the middle one.", "is it the first one from the left?"], ["no, the middle one.", "[190, 310, 265, 380]"]], [187, 308, 264, 382], [190, 310, 265, 380], "openimages_v1.2/imgs/8b240c08cb38382e_Toy_Bus_Land vehicle_Vehicle_13.jpg"]
|
116 |
+
[65, [["close the laptop on the left.", "is it the one on the left?"], ["yeah.", "[0, 73, 348, 426]"]], [0, 72, 346, 426], [0, 73, 348, 426], "openimages_v1.2/imgs/5b39c13a7271eed4_Laptop_Computer keyboard_3.jpg"]
|
117 |
+
[88, [["pass me the far left glass.", "is it the first glass from the left?"], ["yes.", "[56, 165, 127, 376]"]], [64, 238, 121, 290], [56, 165, 127, 376], "openimages_v1.2/imgs/efd13a8dcab6d7f4_Drink_Table_Tableware_Bottle_Wine_Wine glass_Furniture_5.jpg"]
|
118 |
+
[93, [["feed the second panda on the left.", "is it the second panda from the left?"], ["yes.", "[149, 103, 268, 219]"]], [148, 100, 267, 217], [149, 103, 268, 219], "openimages_v1.2/imgs/329035dece1a9746_Panda_4.jpg"]
|
119 |
+
[142, [["eat the broccoli on the right.", "is it the top one?"], ["yes.", "[233, 149, 460, 342]"]], [229, 152, 460, 354], [233, 149, 460, 342], "openimages_v1.2/imgs/e0b81ffd382fa5f5_Food_Vegetable_Broccoli_3.jpg"]
|
120 |
+
[161, [["mark the stone flower bed.", "which one do you mean?"], ["the right one.", "is it the first one from the right?"], ["yes.", "[273, 516, 346, 582]"]], [284, 517, 342, 615], [273, 516, 346, 582], "openimages_v1.2/imgs/0cb491551929a2ee_Stairs_Houseplant_Porch_Plant_Tree_Flower_Building_Flowerpot_4.jpg"]
|
121 |
+
[163, [["eat the tomato on the top left.", "is it red?"], ["yes.", "is it the one on the left?"], ["yes.", "[44, 5, 209, 146]"]], [46, 2, 208, 167], [44, 5, 209, 146], "openimages_v1.2/imgs/987761ccf847341d_Tomato_21.jpg"]
|
122 |
+
[178, [["kill the meerkat on the right.", "which one are you talking about?"], ["the right one sitting one the stone.", "is it the first one from the right?"], ["yeah.", "[252, 69, 445, 419]"]], [206, 68, 450, 529], [252, 69, 445, 419], "openimages_v1.2/imgs/921f182a889503b5_Carnivore_Animal_3.jpg"]
|
123 |
+
[189, [["throw the spoon handle on the table.", "which one?"], ["the right one.", "is it in the blue bowl?"], ["no, it is on the table.", "is it the one in the bowl?"], ["no.", "[302, 0, 446, 29]"]], [410, 360, 456, 397], [302, 0, 446, 29], "openimages_v1.2/imgs/ee784978eea58567_Drink_Food_Tableware_Spoon_3.jpg"]
|
124 |
+
[191, [["pick the flower in the middle.", "is it the pink one?"], ["yeah.", "[195, 144, 381, 331]"]], [192, 146, 381, 331], [195, 144, 381, 331], "openimages_v1.2/imgs/e26ff9e7c08822f9_Plant_Rose_Flower_6.jpg"]
|
125 |
+
[198, [["drive the white van on the left away.", "there are two white vans, which one do you mean?"], ["the right one.", "is it the one next to the red car?"], ["no, the smaller one.", "is it the first one from the right?"], ["no, the second on the left.", "[35, 176, 169, 254]"]], [32, 183, 150, 255], [35, 176, 169, 254], "openimages_v1.2/imgs/88af919148def218_Land vehicle_Truck_Vehicle_Wheel_Van_Car_2.jpg"]
|
126 |
+
[1442, [["The bird is so cute.", "which one are you talking about?"], ["the black one on the left. ", "is it the first one from the left?"], ["yes.", "[37, 115, 58, 135]"]], [558, 0, 587, 32], [37, 115, 58, 135], "openimages_v1.2/imgs/4e1503114e8c097f_Bird_Duck_Animal_4.jpg"]
|
127 |
+
[1466, [["the girl is so lovely.", "which one are you talking about?"], ["The one who shows me her face.", "is it the first one from the left?"], ["yes.", "[142, 279, 240, 419]"]], [234, 272, 352, 425], [142, 279, 240, 419], "openimages_v1.2/imgs/d357157808acbaa9_Toy_Monkey_3.jpg"]
|
128 |
+
[1494, [["the donut looks delicious.", "which one are you talking about?"], ["the chocolate one.", "is it the top one?"], ["yes.", "[264, 31, 466, 219]"]], [364, 165, 579, 393], [264, 31, 466, 219], "openimages_v1.2/imgs/6b68fc10e440a1a8_Food_Bagel_Snack_Dessert_Baked goods_Doughnut_3.jpg"]
|
129 |
+
[1508, [["give me the persimmon.", "which one do you want?"], ["the biggest one.", "is it the middle one?"], ["no.", "is it the one on the right?"], ["yes.", "[422, 152, 611, 335]"]], [169, 138, 354, 327], [422, 152, 611, 335], "openimages_v1.2/imgs/bdc3d2983ffc0a46_Food_Tomato_Vegetable_19.jpg"]
|
130 |
+
[1516, [["the cake looks delicious.<br>", "which piece do you want?"], ["The one on which the strawberry is.<br>", "is it the first piece from the left?"], ["no.", "is it the second piece from the left?"], ["no.", "is it the third piece from the left?"], ["no.", "is it the fourth piece from the left?"], ["no.", "is it the fifth piece from the left?"], ["no, it is on the second row from the left.", "is it the fifth piece from the left?"], ["yes.", "[360, 202, 407, 232]"]], [236, 278, 325, 377], [360, 202, 407, 232], "openimages_v1.2/imgs/17da62a9aa6e99f6_Food_Fast food_Teapot_Person_Dessert_Kettle_Tableware_Human head_Baked goods_Cake_Pastry_5.jpg"]
|
benchmark/human_eval/xvlm_20230515-192335.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[13, [["ride the red bikes.", "which one do you want?"], ["the middle one.", "is it the front wheel?"], ["no, the back wheel.", "[35, 408, 91, 442]"]], [35, 422, 108, 479], [35, 408, 91, 442], "openimages_v1.2/imgs/2b240f8c7b095972_Bicycle_Bicycle wheel_Land vehicle_House_Vehicle_Building_Tire_4.jpg"]
|
2 |
+
[51, [["catch the duck on the right.", "which one?"], ["the one on the far right and is swimming to the left.", "is it the one on the far right?"], ["yes.", "[500, 172, 618, 238]"]], [500, 172, 618, 238], [500, 172, 618, 238], "openimages_v1.2/imgs/0391aa378a4094b3_Bird_Duck_12.jpg"]
|
3 |
+
[54, [["please mark the adult penguin.", "is it the first one from the left?"], ["the closest one.", "[273, 253, 437, 547]"]], [187, 115, 420, 548], [273, 253, 437, 547], "openimages_v1.2/imgs/5cc33103845e5519_Bird_Penguin_Animal_4.jpg"]
|
4 |
+
[61, [["mark the green lego car.", "is it the one in the middle?"], ["yes.", "[187, 308, 264, 382]"]], [187, 308, 264, 382], [187, 308, 264, 382], "openimages_v1.2/imgs/8b240c08cb38382e_Toy_Bus_Land vehicle_Vehicle_13.jpg"]
|
5 |
+
[65, [["bring me the laptop on the left.", "is it the one with the black screen?"], ["yeah.", "[0, 72, 346, 426]"]], [0, 72, 346, 426], [0, 72, 346, 426], "openimages_v1.2/imgs/5b39c13a7271eed4_Laptop_Computer keyboard_3.jpg"]
|
6 |
+
[88, [["pour the champagne in the far left glass.", "ok, i see."], ["thanks.", "[64, 238, 121, 290]"]], [64, 238, 121, 290], [64, 238, 121, 290], "openimages_v1.2/imgs/efd13a8dcab6d7f4_Drink_Table_Tableware_Bottle_Wine_Wine glass_Furniture_5.jpg"]
|
7 |
+
[88, [["pour the champagne in the far left glass.", "ok, i see."], ["thanks.", "[64, 238, 121, 290]"]], [64, 238, 121, 290], [64, 238, 121, 290], "openimages_v1.2/imgs/efd13a8dcab6d7f4_Drink_Table_Tableware_Bottle_Wine_Wine glass_Furniture_5.jpg"]
|
8 |
+
[93, [["feed the panda in the mid rear.", "is it the one with the head down?"], ["no.", "is it the one on the far right?"], ["no, the furthest one.", "[148, 100, 266, 216]"]], [148, 100, 267, 217], [148, 100, 266, 216], "openimages_v1.2/imgs/329035dece1a9746_Panda_4.jpg"]
|
9 |
+
[142, [["pick the broccoli on the top for me.", "is it the one on the far right?"], ["yes.", "[228, 151, 460, 353]"]], [229, 152, 460, 354], [228, 151, 460, 353], "openimages_v1.2/imgs/e0b81ffd382fa5f5_Food_Vegetable_Broccoli_3.jpg"]
|
10 |
+
[161, [["mark the flower bed on the right.", "which one?"], ["the right one.", "is it the one on the far right?"], ["yes.", "[270, 525, 299, 564]"]], [284, 517, 342, 615], [270, 525, 299, 564], "openimages_v1.2/imgs/0cb491551929a2ee_Stairs_Houseplant_Porch_Plant_Tree_Flower_Building_Flowerpot_4.jpg"]
|
11 |
+
[163, [["wash the tomato on the top left.", "is it the one in the upper left corner?"], ["yes.", "[46, 2, 208, 167]"]], [46, 2, 208, 167], [46, 2, 208, 167], "openimages_v1.2/imgs/987761ccf847341d_Tomato_21.jpg"]
|
12 |
+
[178, [["feed the animal sitting on the right.", "is it the one with the head down?"], ["yes", "[206, 68, 450, 529]"]], [206, 68, 450, 529], [206, 68, 450, 529], "openimages_v1.2/imgs/921f182a889503b5_Carnivore_Animal_3.jpg"]
|
13 |
+
[189, [["wash the fork on the table.", "which bowl?"], ["the fork.", "is it the one on the far right?"], ["yes.", "[397, 90, 622, 233]"]], [410, 360, 456, 397], [397, 90, 622, 233], "openimages_v1.2/imgs/ee784978eea58567_Drink_Food_Tableware_Spoon_3.jpg"]
|
14 |
+
[189, [["wash the fork on the table.", "which bowl?"], ["the fork.", "is it the one on the far right?"], ["yes.", "[397, 90, 622, 233]"]], [410, 360, 456, 397], [397, 90, 622, 233], "openimages_v1.2/imgs/ee784978eea58567_Drink_Food_Tableware_Spoon_3.jpg"]
|
15 |
+
[191, [["bring me the pink flower on the left.", "which one?"], ["the left one.", "the one on the far left?"], ["the pink flower on the left.", "[56, 181, 244, 323]"]], [192, 146, 381, 331], [56, 181, 244, 323], "openimages_v1.2/imgs/e26ff9e7c08822f9_Plant_Rose_Flower_6.jpg"]
|
16 |
+
[198, [["drive the van on the far left.", "is it the first one from the left?"], ["yes.", "[32, 182, 150, 255]"]], [32, 183, 150, 255], [32, 182, 150, 255], "openimages_v1.2/imgs/88af919148def218_Land vehicle_Truck_Vehicle_Wheel_Van_Car_2.jpg"]
|
17 |
+
[198, [["drive the van on the far left.", "is it the first one from the left?"], ["yes.", "[32, 182, 150, 255]"]], [32, 183, 150, 255], [32, 182, 150, 255], "openimages_v1.2/imgs/88af919148def218_Land vehicle_Truck_Vehicle_Wheel_Van_Car_2.jpg"]
|
18 |
+
[206, [["open the black bottle of champagne.", "which bottle is it?"], ["the left one.", "[76, 0, 158, 172]"]], [151, 0, 328, 476], [76, 0, 158, 172], "openimages_v1.2/imgs/25b17c8df7b2400b_Beer_Drink_Bottle_Wine_5.jpg"]
|
19 |
+
[209, [["This tea set is so pretty.", "yes, which one do you like?"], ["It is in the middle.", "[168, 38, 378, 215]"]], [168, 39, 378, 215], [168, 38, 378, 215], "openimages_v1.2/imgs/76005037c3e150f7_Teapot_Snack_Dessert_Food_Baked goods_Pastry_6.jpg"]
|
20 |
+
[228, [["I want to know its name.", "which one?"], ["The bigger one.", "is it the one on the far right?"], ["yes", "[0, 0, 508, 497]"]], [0, 0, 509, 497], [0, 0, 508, 497], "openimages_v1.2/imgs/31d3d97132fbb2f2_Bird_Penguin_Animal_3.jpg"]
|
21 |
+
[255, [["It does not know if it is made by machine.", "what do you want to take?"], ["it is a cookie.", "is it the one on the left?"], ["yes", "[0, 241, 202, 330]"]], [0, 241, 202, 330], [0, 241, 202, 330], "openimages_v1.2/imgs/467d1abed2e32295_Snack_Cookie_Food_Baked goods_17.jpg"]
|
22 |
+
[285, [["It does not look very strong", "is it the one on the far left?"], ["no, it\u2019s in the middle.", "is it the one with the black head down?"], ["no", "[231, 210, 402, 400]"]], [231, 210, 402, 400], [231, 210, 402, 400], "openimages_v1.2/imgs/1797f4e85e52bc5a_Building_Bird_Chicken_Animal_4.jpg"]
|
23 |
+
[318, [["Give me that coffee cup.", "which one?"], ["white one", "is it the one on the far right?"], ["no", "[68, 6, 252, 167]"]], [68, 6, 252, 167], [68, 6, 252, 167], "openimages_v1.2/imgs/67c02c17c9a759c8_Drink_Coffee cup_Food_Saucer_Tableware_Vegetable_Salad_Broccoli_2.jpg"]
|
24 |
+
[326, [["Give me that action figures", "which one do you want?"], ["It is in the middle.", "is it the one with the red head?"], ["no", "ok, here you go."], ["where", "[234, 228, 275, 288]"]], [234, 228, 275, 288], [234, 228, 275, 288], "openimages_v1.2/imgs/bcc0fa02fdce3af0_Toy_Land vehicle_Vehicle_7.jpg"]
|
25 |
+
[333, [["This flower is interesting", "which one are you talking about?"], ["The purple one", "is it the one on the far right?"], ["yes", "[397, 261, 467, 344]"]], [397, 261, 468, 345], [397, 261, 467, 344], "openimages_v1.2/imgs/921e76975c6f820c_Plant_Rose_Flower_3.jpg"]
|
26 |
+
[393, [["The bird is cute", "which one?"], ["Bottom of the page", "is it the one on the far right?"], ["no", "[222, 279, 301, 394]"]], [222, 279, 301, 394], [222, 279, 301, 394], "openimages_v1.2/imgs/ef4a0d60192428c1_Bird_Penguin_Animal_13.jpg"]
|
27 |
+
[407, [["It is so cool", "yes, it\u2019s beautiful."], ["which one", "[85, 121, 601, 337]"]], [85, 121, 601, 337], [85, 121, 601, 337], "openimages_v1.2/imgs/91826655a7d34abd_Land vehicle_Train_Vehicle_Wheel_3.jpg"]
|
28 |
+
[429, [["It is really strong.", "which one are you talking about?"], ["the left one", "[28, 188, 282, 363]"]], [28, 188, 282, 364], [28, 188, 282, 363], "openimages_v1.2/imgs/02c91a8515336047_Cattle_10.jpg"]
|
29 |
+
[440, [["What is the brand of this computer", "which one are you talking about?"], ["left", "is it the one on the far right?"], ["no", "[207, 0, 334, 177]"]], [0, 0, 230, 256], [207, 0, 334, 177], "openimages_v1.2/imgs/9b031924d17fc0d4_Beer_Laptop_Drink_Office supplies_Bottle_Wine_Book_2.jpg"]
|
30 |
+
[447, [["It\u2019s a little samll", "yes."], ["which one", "is it the one on the far right?"], ["no", "[318, 156, 550, 330]"]], [319, 156, 550, 331], [318, 156, 550, 330], "openimages_v1.2/imgs/008ede8abd811226_Squirrel_Animal_3.jpg"]
|
31 |
+
[451, [["It is in full bloom", "yes, it\u2019s beautiful."], ["the middle one?", "[228, 75, 411, 257]"]], [247, 244, 297, 333], [228, 75, 411, 257], "openimages_v1.2/imgs/f7f3a10c836226b2_Plant_Flower_Common sunflower_7.jpg"]
|
32 |
+
[457, [["It is strange", "yes, do you want to buy one?"], ["yes, the green one", "[205, 50, 232, 63]"]], [29, 348, 266, 440], [205, 50, 232, 63], "openimages_v1.2/imgs/15f3aeb130d0d03d_Toy_Land vehicle_Vehicle_Wheel_8.jpg"]
|
33 |
+
[466, [["What kind of animal is this?", "which one are you talking about?"], ["the seconed one", "[200, 30, 321, 307]"]], [277, 30, 354, 219], [200, 30, 321, 307], "openimages_v1.2/imgs/7fe6972c3f562011_Horse_Cattle_Animal_6.jpg"]
|
34 |
+
[476, [["Please pick one flower for me.", "which one do you want?"], ["The second one from the left.", "ok, here you go."], ["Please make sure it is the second one from the left.", "[205, 183, 224, 201]"]], [154, 175, 190, 206], [205, 183, 224, 201], "openimages_v1.2/imgs/46ceefe0e0ecff39_Chair_Houseplant_Table_Plant_Flower_Furniture_Flowerpot_1.jpg"]
|
35 |
+
[501, [["Pass me the first carton.", "is it the one in the middle?"], ["No. It is the first carton from the left.", "[2, 271, 105, 337]"]], [62, 250, 327, 425], [2, 271, 105, 337], "openimages_v1.2/imgs/2a1552207182c802_Box_4.jpg"]
|
36 |
+
[541, [["Pass me the plant pot.", "which one?"], ["The one in the middle.", "is it the one with the flower pot on the left?"], ["No. It is the one closest to us.", "[276, 344, 347, 400]"]], [276, 344, 348, 401], [276, 344, 347, 400], "openimages_v1.2/imgs/b539ca908aece624_Fountain_Houseplant_Plant_Tree_Flower_Flowerpot_5.jpg"]
|
37 |
+
[546, [["Pass me the axe.", "which one do you want?"], ["I want the first one from the left.", "ok, i see."], ["Please make sure it\u2019s the first one from the left.", "[145, 169, 250, 514]"]], [145, 169, 250, 514], [145, 169, 250, 514], "openimages_v1.2/imgs/5c7e5c0ac6b63b5e_Tool_Axe_4.jpg"]
|
38 |
+
[563, [["The flag is beautiful.", "which side are you talking about?"], ["The one in red, white and blue.", "[96, 177, 315, 303]"]], [97, 177, 315, 304], [96, 177, 315, 303], "openimages_v1.2/imgs/5d170945d5549dea_Flag_3.jpg"]
|
39 |
+
[569, [["The flag is beautiful.", "which side are you talking about?"], ["The one farthest away from us.", "[378, 233, 493, 343]"]], [378, 233, 493, 343], [378, 233, 493, 343], "openimages_v1.2/imgs/3bd5795f8469318e_Flag_3.jpg"]
|
40 |
+
[592, [["The polar bear is so cute.", "which one are you talking about?"], ["The sleeping one.", "is it the one on the far right?"], ["No. It is the one on the left.", "[463, 186, 573, 228]"]], [38, 206, 183, 254], [463, 186, 573, 228], "openimages_v1.2/imgs/392f8102414cc81e_Polar bear_4.jpg"]
|
41 |
+
[600, [["Pass me the cherry tomato.", "which one do you want?"], ["The one in the middle.", "ok, here you go."], ["Please make sure it is the one on the cabbage.", "[131, 168, 255, 290]"]], [131, 168, 255, 290], [131, 168, 255, 290], "openimages_v1.2/imgs/0353b5f2eb7bd145_Cucumber_Zucchini_Food_Tomato_Vegetable_Cabbage_Salad_13.jpg"]
|
42 |
+
[689, [["The cake looks really nice.", "yes."], ["Please pass me the red cake closest to us.", "[127, 444, 287, 607]"]], [128, 445, 288, 608], [127, 444, 287, 607], "openimages_v1.2/imgs/ffcce9c7edd86436_Dessert_Food_Baked goods_Cake_Pastry_18.jpg"]
|
43 |
+
[696, [["Please pass me the vass.", "which one do you want?"], ["The one in the middle of the second layer of the shelf.", "is it the one with the red cover?"], ["No. It is the white one with a red flower on it.", "is it the one on the far right?"], ["No. It is the one in the middle.", "okay, here you go."], ["Please make sure it is the one in the middle.", "[314, 335, 357, 387]"]], [267, 353, 343, 478], [314, 335, 357, 387], "openimages_v1.2/imgs/a0abaa5bf4b660d8_Teapot_Vase_Tableware_Jug_Furniture_8.jpg"]
|
44 |
+
[704, [["The latte art is so beautiful.", "yes."], ["I want the one in the right.", "[273, 41, 400, 120]"]], [273, 41, 400, 120], [273, 41, 400, 120], "openimages_v1.2/imgs/0e4bf084d6b8b360_Croissant_Coffee_Saucer_4.jpg"]
|
45 |
+
[727, [["Pass me the plant pot.", "which one?"], ["The one with purple flower.", "is it the one on the far left?"], ["No. It is the first one from the right", "[450, 365, 490, 397]"]], [510, 371, 547, 436], [450, 365, 490, 397], "openimages_v1.2/imgs/4bd65f1121e94380_Chair_Houseplant_House_Plant_Flower_Furniture_Flowerpot_5.jpg"]
|
46 |
+
[735, [["The dustbin is full.", "yes, there are many."], ["The one in the left is full.", "is it the one with the blue pattern?"], ["Yes, it is.", "[49, 278, 198, 462]"]], [49, 278, 198, 462], [49, 278, 198, 462], "openimages_v1.2/imgs/7ee6b53585ead951_Waste container_9.jpg"]
|
47 |
+
[740, [["I want to take a boat.", "which one do you want to rent?"], ["The one in yellow and blue.", "is it the one on the far right?"], ["No. It is the first one from the left.", "[93, 157, 521, 309]"]], [93, 157, 521, 309], [93, 157, 521, 309], "openimages_v1.2/imgs/c22e23202c31e85e_Boat_Barge_Watercraft_Vehicle_4.jpg"]
|
48 |
+
[747, [["The balloon is flying so high.", "which one are you talking about?"], ["The first one from the left.", "[25, 33, 187, 214]"]], [25, 33, 187, 214], [25, 33, 187, 214], "openimages_v1.2/imgs/23a0eb2232fe9e66_Balloon_Vehicle_4.jpg"]
|
49 |
+
[758, [["can you pass me the tree in middle?", "yes, which one do you want?"], ["it is in the middle.", "ok, here it is."], ["where?", "[191, 210, 337, 363]"]], [191, 210, 338, 363], [191, 210, 337, 363], "openimages_v1.2/imgs/f2246253917f75b5_Christmas tree_Tree_4.jpg"]
|
50 |
+
[758, [["can you pass me the tree in middle?", "yes, which one do you want?"], ["it is in the middle.", "ok, here it is."], ["where?", "[191, 210, 337, 363]"]], [191, 210, 338, 363], [191, 210, 337, 363], "openimages_v1.2/imgs/f2246253917f75b5_Christmas tree_Tree_4.jpg"]
|
51 |
+
[778, [["the fish is so cute.", "which one are you talking about?"], ["it is in the middle.", "is it the one with the head down?"], ["no, it is in the middle.", "[445, 181, 583, 234]"]], [97, 161, 357, 292], [445, 181, 583, 234], "openimages_v1.2/imgs/5d97d9b7b9995160_Goldfish_Fish_Animal_14.jpg"]
|
52 |
+
[778, [["the fish is so cute.", "which one are you talking about?"], ["it is in the middle.", "is it the one with the head down?"], ["no, it is in the middle.", "[445, 181, 583, 234]"]], [97, 161, 357, 292], [445, 181, 583, 234], "openimages_v1.2/imgs/5d97d9b7b9995160_Goldfish_Fish_Animal_14.jpg"]
|
53 |
+
[859, [["Can you pass me the biggest pumpkin?", "yes, which one do you want?"], ["the one on the left.", "ok, here you go."], ["show me.", "[6, 44, 245, 266]"]], [7, 44, 245, 267], [6, 44, 245, 266], "openimages_v1.2/imgs/6708c917862153ec_Squash (Plant)_Pumpkin_Fruit_4.jpg"]
|
54 |
+
[859, [["Can you pass me the biggest pumpkin?", "yes, which one do you want?"], ["the one on the left.", "ok, here you go."], ["show me.", "[6, 44, 245, 266]"]], [7, 44, 245, 267], [6, 44, 245, 266], "openimages_v1.2/imgs/6708c917862153ec_Squash (Plant)_Pumpkin_Fruit_4.jpg"]
|
55 |
+
[864, [["Can you pass me the taxi?", "which one are you talking about?"], ["it is in the middle.", "ok, here you go."], ["where?", "[199, 195, 367, 331]"]], [199, 195, 367, 331], [199, 195, 367, 331], "openimages_v1.2/imgs/13399741ea286868_Vehicle_Car_Taxi_4.jpg"]
|
56 |
+
[864, [["Can you pass me the taxi?", "which one are you talking about?"], ["it is in the middle.", "ok, here you go."], ["where?", "[199, 195, 367, 331]"]], [199, 195, 367, 331], [199, 195, 367, 331], "openimages_v1.2/imgs/13399741ea286868_Vehicle_Car_Taxi_4.jpg"]
|
57 |
+
[864, [["Can you pass me the taxi?", "which one are you talking about?"], ["it is in the middle.", "ok, here you go."], ["where?", "[199, 195, 367, 331]"]], [199, 195, 367, 331], [199, 195, 367, 331], "openimages_v1.2/imgs/13399741ea286868_Vehicle_Car_Taxi_4.jpg"]
|
58 |
+
[865, [["the cake is so cute.", "which one do you want?"], ["it is in top middle.", "is it the one with the cake on the left?"], ["it is in the middle.", "[189, 63, 274, 141]"], ["the right beside it.", "[266, 55, 360, 123]"]], [266, 55, 360, 123], [266, 55, 360, 123], "openimages_v1.2/imgs/008b40159ed22bdf_Food_Baked goods_Pastry_3.jpg"]
|
59 |
+
[865, [["the cake is so cute.", "which one do you want?"], ["it is in top middle.", "is it the one with the cake on the left?"], ["it is in the middle.", "[189, 63, 274, 141]"], ["the right beside it.", "[266, 55, 360, 123]"]], [266, 55, 360, 123], [266, 55, 360, 123], "openimages_v1.2/imgs/008b40159ed22bdf_Food_Baked goods_Pastry_3.jpg"]
|
60 |
+
[919, [["the deer is so cute.", "which one are you talking about?"], ["Behind the doll", "[327, 1, 479, 161]"]], [328, 1, 479, 161], [327, 1, 479, 161], "openimages_v1.2/imgs/9263a5fd06a1b5ca_Snowman_Toy_6.jpg"]
|
61 |
+
[919, [["the deer is so cute.", "which one are you talking about?"], ["Behind the doll", "[327, 1, 479, 161]"]], [328, 1, 479, 161], [327, 1, 479, 161], "openimages_v1.2/imgs/9263a5fd06a1b5ca_Snowman_Toy_6.jpg"]
|
62 |
+
[928, [["the flower is so lovely.", "which one are you talking about?"], ["the left one.", "[3, 128, 184, 310]"]], [3, 128, 184, 310], [3, 128, 184, 310], "openimages_v1.2/imgs/0fffd5362226f8d3_Flower_Plant_Rose_3.jpg"]
|
63 |
+
[928, [["the flower is so lovely.", "which one are you talking about?"], ["the left one.", "[3, 128, 184, 310]"]], [3, 128, 184, 310], [3, 128, 184, 310], "openimages_v1.2/imgs/0fffd5362226f8d3_Flower_Plant_Rose_3.jpg"]
|
64 |
+
[940, [["pass me the blue flag.", "which side?"], ["the blue one.", "is it the one on the far right?"], ["no, the first one from the left.", "[100, 141, 130, 251]"], ["no, the first one from the left.", "[100, 141, 130, 251]"]], [100, 142, 130, 251], [100, 141, 130, 251], "openimages_v1.2/imgs/009c68ec28f32e34_Flag_3.jpg"]
|
65 |
+
[940, [["pass me the blue flag.", "which side?"], ["the blue one.", "is it the one on the far right?"], ["no, the first one from the left.", "[100, 141, 130, 251]"], ["no, the first one from the left.", "[100, 141, 130, 251]"]], [100, 142, 130, 251], [100, 141, 130, 251], "openimages_v1.2/imgs/009c68ec28f32e34_Flag_3.jpg"]
|
66 |
+
[1034, [["the doll is so cute.", "which one?"], ["the left one.", "is it the one with the head down?"], [" it is green.", "[125, 96, 280, 381]"]], [125, 96, 280, 381], [125, 96, 280, 381], "openimages_v1.2/imgs/d6d4e0e131db09c9_Toy_4.jpg"]
|
67 |
+
[1034, [["the doll is so cute.", "which one?"], ["the left one.", "is it the one with the head down?"], [" it is green.", "[125, 96, 280, 381]"]], [125, 96, 280, 381], [125, 96, 280, 381], "openimages_v1.2/imgs/d6d4e0e131db09c9_Toy_4.jpg"]
|
68 |
+
[1098, [["pass me the flower pot.", "which one?"], ["The second one on the right.", "[527, 187, 575, 233]"]], [527, 187, 576, 233], [527, 187, 575, 233], "openimages_v1.2/imgs/2dbb4a546d107596_Flower_Street light_Houseplant_Plant_Tree_Flowerpot_4.jpg"]
|
69 |
+
[1098, [["pass me the flower pot.", "which one?"], ["The second one on the right.", "[527, 187, 575, 233]"]], [527, 187, 576, 233], [527, 187, 575, 233], "openimages_v1.2/imgs/2dbb4a546d107596_Flower_Street light_Houseplant_Plant_Tree_Flowerpot_4.jpg"]
|
70 |
+
[1116, [["pass me the Squirrel doll.", "which one do you want?"], ["the cloest one.", "is it the one on the far right?"], ["no, the nearest one.", "[193, 362, 407, 555]"]], [194, 362, 408, 556], [193, 362, 407, 555], "openimages_v1.2/imgs/5cdad082e579d5cd_Toy_5.jpg"]
|
71 |
+
[1116, [["pass me the Squirrel doll.", "which one do you want?"], ["the cloest one.", "is it the one on the far right?"], ["no, the nearest one.", "[193, 362, 407, 555]"]], [194, 362, 408, 556], [193, 362, 407, 555], "openimages_v1.2/imgs/5cdad082e579d5cd_Toy_5.jpg"]
|
72 |
+
[1130, [["pass me the flower.", "which one?"], ["the right one.", "the one on the far right?"], ["yes.", "[546, 246, 608, 305]"]], [546, 246, 608, 305], [546, 246, 608, 305], "openimages_v1.2/imgs/e47152d5fc91ef57_Plant_Rose_Flower_3.jpg"]
|
73 |
+
[1130, [["pass me the flower.", "which one?"], ["the right one.", "the one on the far right?"], ["yes.", "[546, 246, 608, 305]"]], [546, 246, 608, 305], [546, 246, 608, 305], "openimages_v1.2/imgs/e47152d5fc91ef57_Plant_Rose_Flower_3.jpg"]
|
74 |
+
[1130, [["pass me the flower.", "which one?"], ["the right one.", "the one on the far right?"], ["yes.", "[546, 246, 608, 305]"]], [546, 246, 608, 305], [546, 246, 608, 305], "openimages_v1.2/imgs/e47152d5fc91ef57_Plant_Rose_Flower_3.jpg"]
|
75 |
+
[1130, [["pass me the flower.", "which one?"], ["the right one.", "the one on the far right?"], ["yes.", "[546, 246, 608, 305]"]], [546, 246, 608, 305], [546, 246, 608, 305], "openimages_v1.2/imgs/e47152d5fc91ef57_Plant_Rose_Flower_3.jpg"]
|
76 |
+
[1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "[264, 295, 323, 328]"]], [264, 295, 323, 328], [264, 295, 323, 328], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
|
77 |
+
[1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "[264, 295, 323, 328]"]], [264, 295, 323, 328], [264, 295, 323, 328], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
|
78 |
+
[1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "[264, 295, 323, 328]"]], [264, 295, 323, 328], [264, 295, 323, 328], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
|
79 |
+
[1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "[264, 295, 323, 328]"]], [264, 295, 323, 328], [264, 295, 323, 328], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
|
80 |
+
[1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "[264, 295, 323, 328]"]], [264, 295, 323, 328], [264, 295, 323, 328], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
|
81 |
+
[1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one.", "is it the one on the far right?"], ["yes.", "[505, 213, 568, 267]"]], [506, 213, 568, 267], [505, 213, 568, 267], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
|
82 |
+
[1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one.", "is it the one on the far right?"], ["yes.", "[505, 213, 568, 267]"]], [506, 213, 568, 267], [505, 213, 568, 267], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
|
83 |
+
[1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one.", "is it the one on the far right?"], ["yes.", "[505, 213, 568, 267]"]], [506, 213, 568, 267], [505, 213, 568, 267], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
|
84 |
+
[1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one.", "is it the one on the far right?"], ["yes.", "[505, 213, 568, 267]"]], [506, 213, 568, 267], [505, 213, 568, 267], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
|
85 |
+
[1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one.", "is it the one on the far right?"], ["yes.", "[505, 213, 568, 267]"]], [506, 213, 568, 267], [505, 213, 568, 267], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
|
86 |
+
[1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one.", "is it the one on the far right?"], ["yes.", "[505, 213, 568, 267]"]], [506, 213, 568, 267], [505, 213, 568, 267], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
|
87 |
+
[775, [["can you pass me the plate?", "which one do you want?"], ["Upper left corner.", "is it the one on the left?"], ["no, it is the second one.", "ok, here you go."], ["where?", "[33, 5, 132, 63]"]], [39, 0, 134, 73], [33, 5, 132, 63], "openimages_v1.2/imgs/318a8bb8339dde66_Candle_Tableware_Plate_Platter_Serving tray_4.jpg"]
|
88 |
+
[775, [["can you pass me the plate?", "which one do you want?"], ["Upper left corner.", "is it the one on the left?"], ["no, it is the second one.", "ok, here you go."], ["where?", "[33, 5, 132, 63]"]], [39, 0, 134, 73], [33, 5, 132, 63], "openimages_v1.2/imgs/318a8bb8339dde66_Candle_Tableware_Plate_Platter_Serving tray_4.jpg"]
|
89 |
+
[1206, [["pass me a flower", "which one do you want?"], ["The upper red one.", "is it the one on the far right?"], ["yes", "[341, 0, 515, 192]"]], [341, 0, 515, 192], [341, 0, 515, 192], "openimages_v1.2/imgs/a64572ad63e1c1ab_Rose_Flower_3.jpg"]
|
90 |
+
[1206, [["pass me a flower", "which one do you want?"], ["The upper red one.", "is it the one on the far right?"], ["yes", "[341, 0, 515, 192]"]], [341, 0, 515, 192], [341, 0, 515, 192], "openimages_v1.2/imgs/a64572ad63e1c1ab_Rose_Flower_3.jpg"]
|
91 |
+
[1209, [["Pass me a cake.", "which one do you want?"], ["The one with a \u201cR\u201d", "is it the one on the far right?"], ["Yes", "[404, 95, 597, 302]"]], [413, 102, 589, 269], [404, 95, 597, 302], "openimages_v1.2/imgs/5c7b9a2eec5ff0ae_Muffin_Dessert_Food_Baked goods_7.jpg"]
|
92 |
+
[1232, [["Pass me a cake.", "which one do you want?"], ["The one with a \u201cR\u201d", "is it the one on the far right?"], ["Yes", "[404, 95, 597, 302]"]], [413, 102, 589, 269], [404, 95, 597, 302], "openimages_v1.2/imgs/5c7b9a2eec5ff0ae_Muffin_Dessert_Food_Baked goods_7.jpg"]
|
93 |
+
[1232, [["Give me a bottle.", "which one do you want?"], ["The upest, from left, the forth.", "[162, 12, 204, 213]"]], [244, 13, 293, 213], [162, 12, 204, 213], "openimages_v1.2/imgs/a211016f402b956b_Bottle_Building_Wine_7.jpg"]
|
94 |
+
[1236, [["give me a pillow", "which one do you want?"], ["The frontmost one.", "is it the one on the left?"], ["No.", "[339, 210, 470, 275]"]], [339, 210, 471, 275], [339, 210, 470, 275], "openimages_v1.2/imgs/2992bde1d85a7162_Nightstand_Pillow_6.jpg"]
|
95 |
+
[1266, [["strawberry", "yes, which one do you want?"], ["The whole one.", "the one on the far right?"], ["No\uff0cthe middle", "[185, 64, 287, 222]"]], [185, 64, 287, 222], [185, 64, 287, 222], "openimages_v1.2/imgs/8a61c3b92dffeb3e_Dessert_Dairy Product_Food_Fruit_Baked goods_Strawberry_Cake_10.jpg"]
|
96 |
+
[1287, [["The green pants", "which one are you talking about?"], ["The one who wears green pants", "[527, 275, 580, 346]"]], [527, 276, 580, 346], [527, 275, 580, 346], "openimages_v1.2/imgs/0c22601eb7370faa_Trousers_4.jpg"]
|
97 |
+
[1301, [["goblet", "which glass?"], ["I think there is only one glass", "is it the first glass from the left?"], ["yes", "[138, 171, 231, 272]"]], [139, 171, 231, 272], [138, 171, 231, 272], "openimages_v1.2/imgs/b132c5bc9f77cb7f_Cocktail_Drink_Food_Fruit_Tableware_Strawberry_Wine_Wine glass_3.jpg"]
|
98 |
+
[1309, [["The painting", "which one?"], ["The leftmost", "is it the one on the far left?"], ["yes", "[0, 100, 63, 289]"]], [0, 100, 63, 289], [0, 100, 63, 289], "openimages_v1.2/imgs/00225def00d94d8b_Picture frame_Furniture_13.jpg"]
|
99 |
+
[1330, [["pass me a tomato", "which one do you want?"], ["The one in lefttop", "[37, 145, 196, 309]"]], [37, 145, 196, 309], [37, 145, 196, 309], "openimages_v1.2/imgs/4e5506fb9ab3dc08_Fruit_Plant_Tomato_Vegetable_8.jpg"]
|
100 |
+
[1354, [["The purple flowerpot", "yes, it\u2019s the one on the left of the flower pot"], ["yes", "[49, 274, 140, 360]"]], [85, 154, 162, 234], [49, 274, 140, 360], "openimages_v1.2/imgs/08e6bf832016e29e_Houseplant_Plant_Flower_Flowerpot_11.jpg"]
|
101 |
+
[1372, [["I want a red apple", "there are many here, which one do you want"], ["The red in rightmost", "[497, 339, 597, 419]"]], [582, 342, 639, 414], [497, 339, 597, 419], "openimages_v1.2/imgs/7d5be279c905b3fa_Apple_Food_Fruit_Orange_Grapefruit_11.jpg"]
|
102 |
+
[1385, [["the cow", "which one?"], ["the one on the right side of the tree", "is it the one on the far right?"], ["NO", "[537, 321, 620, 344]"]], [126, 257, 245, 330], [537, 321, 620, 344], "openimages_v1.2/imgs/0e9f655b0a190ab1_Cattle_Plant_Tree_Animal_13.jpg"]
|
103 |
+
[1329, [["give me the hot air balloon on the leftmost", "is it the first one from the left?"], ["yes", "[0, 219, 131, 426]"]], [0, 219, 131, 426], [0, 219, 131, 426], "openimages_v1.2/imgs/521d913963c3b47c_Balloon_Vehicle_Aircraft_3.jpg"]
|
104 |
+
[1429, [["the pink towel ", "yes."], ["give it to me", "ok, here you go."], ["label it", "[0, 0, 639, 115]"]], [0, 294, 639, 423], [0, 0, 639, 115], "openimages_v1.2/imgs/613305c0a39b4c72_Towel_3.jpg"]
|
105 |
+
[1436, [["give me the rightmost penguin", "is it the first one from the left?"], ["No", "[440, 141, 511, 420]"]], [440, 142, 511, 420], [440, 141, 511, 420], "openimages_v1.2/imgs/3868829d6119e6ea_Bird_Penguin_Animal_3.jpg"]
|
106 |
+
[1437, [["give me the flowerpot", "is it the one on the left?"], ["No", "is it the one on the far right?"], ["No", "[0, 372, 73, 639]"]], [148, 438, 376, 639], [0, 372, 73, 639], "openimages_v1.2/imgs/23bd2f5d5d520cd1_Houseplant_Plant_Flowerpot_3.jpg"]
|
107 |
+
[1992, [["give me the left black flower pot.", "is it the one on the left?"], ["yes", "[10, 519, 113, 612]"]], [60, 565, 105, 619], [10, 519, 113, 612], "openimages_v1.2/imgs/02a344aac1eead08_House_Tree_Building_Houseplant_Plant_Flower_Flowerpot_3.jpg"]
|
108 |
+
[1960, [["pass me the most right flower pot.", "is it the one on the far right?"], ["yes", "[438, 353, 518, 434]"]], [438, 353, 518, 434], [438, 353, 518, 434], "openimages_v1.2/imgs/6baf9ff6fd56c7c6_Houseplant_Plant_Flowerpot_4.jpg"]
|
109 |
+
[1918, [["the red cake with 2 eyes.", "which one?"], ["right one", "[332, 307, 592, 545]"]], [332, 308, 592, 546], [332, 307, 592, 545], "openimages_v1.2/imgs/fcc713fe37b82bdc_Toy_Snack_Dessert_Food_Baked goods_12.jpg"]
|
110 |
+
[1888, [["Give me the bottom slice of cucumber", "is it the first slice from the left in the first row"], ["the first slice from right", "[430, 300, 537, 376]"]], [358, 376, 493, 479], [430, 300, 537, 376], "openimages_v1.2/imgs/81303291b41e898d_Cucumber_3.jpg"]
|
111 |
+
[1827, [["give me the notebook", "is it the one on the far right?"], ["no. it is on the left.", "[0, 75, 279, 373]"]], [0, 75, 279, 374], [0, 75, 279, 373], "openimages_v1.2/imgs/7d930e70cd3b11a7_Laptop_Computer keyboard_Book_1.jpg"]
|
112 |
+
[1813, [["Give me the largest West Blue Flower", "is it the one on the far right?"], ["no, it is in the middle.", "[251, 223, 495, 403]"]], [251, 223, 495, 403], [251, 223, 495, 403], "openimages_v1.2/imgs/2f515c715d155747_Toy_Food_Broccoli_2.jpg"]
|
113 |
+
[1813, [["Give me the largest West Blue Flower", "is it the one on the far right?"], ["no, it is in the middle.", "[251, 223, 495, 403]"]], [251, 223, 495, 403], [251, 223, 495, 403], "openimages_v1.2/imgs/2f515c715d155747_Toy_Food_Broccoli_2.jpg"]
|
114 |
+
[1780, [["Give me the leftmost tape", "is it the first one from the left?"], ["yes", "[31, 53, 117, 207]"]], [31, 53, 117, 207], [31, 53, 117, 207], "openimages_v1.2/imgs/638c7052acd8ca03_Adhesive tape_4.jpg"]
|
115 |
+
[1774, [["give me the nearest boxes.", "is it the one on the left?"], ["no, it is in the middle.", "is it the one with the red box on the left?"], ["yes", "[146, 158, 350, 294]"]], [249, 231, 497, 370], [146, 158, 350, 294], "openimages_v1.2/imgs/0ae1f7df4aeca2f9_Box_Table_8.jpg"]
|
116 |
+
[1764, [["Pick the flower bud at the bottom for me.", "which one?"], ["the flower bud at the bottom", "is it the one on the far right?"], ["yes", "[376, 259, 443, 350]"]], [376, 259, 443, 350], [376, 259, 443, 350], "openimages_v1.2/imgs/0c875a717a0c0515_Pomegranate_3.jpg"]
|
117 |
+
[1751, [["the most left cat.", "ok, i see."], ["go", "[93, 27, 301, 140]"]], [94, 28, 301, 140], [93, 27, 301, 140], "openimages_v1.2/imgs/2b7a258daaeb3636_Carnivore_Cat_Animal_4.jpg"]
|
118 |
+
[1442, [["the bird is beautiful.", "which one are you talking about?"], ["in the middle.", "is it the one with the head facing us?"], ["yes.", "[179, 165, 206, 197]"]], [558, 0, 587, 32], [179, 165, 206, 197], "openimages_v1.2/imgs/4e1503114e8c097f_Bird_Duck_Animal_4.jpg"]
|
119 |
+
[1494, [["give me the donut.", "which one do you want?"], ["the yellow.", "is it the one on the left?"], ["yes.", "[199, 205, 410, 433]"]], [364, 165, 579, 393], [199, 205, 410, 433], "openimages_v1.2/imgs/6b68fc10e440a1a8_Food_Bagel_Snack_Dessert_Baked goods_Doughnut_3.jpg"]
|
120 |
+
[1508, [["wihch tomato is biggest?", "yes."], ["which tomato is biggest?", "[169, 138, 354, 327]"]], [169, 138, 354, 327], [169, 138, 354, 327], "openimages_v1.2/imgs/bdc3d2983ffc0a46_Food_Tomato_Vegetable_19.jpg"]
|
121 |
+
[1516, [["give me the red cake.", "is it the one on the left?"], ["no, on the right.", "is it the one on the right?"], ["yes.", "[334, 144, 364, 166]"]], [236, 278, 325, 377], [334, 144, 364, 166], "openimages_v1.2/imgs/17da62a9aa6e99f6_Food_Fast food_Teapot_Person_Dessert_Kettle_Tableware_Human head_Baked goods_Cake_Pastry_5.jpg"]
|
122 |
+
[1518, [["the cow is so big.", "which one are you talking about?"], ["in the middle.", "is it the one with the head facing us?"], ["yes.", "[154, 93, 582, 359]"]], [595, 122, 638, 232], [154, 93, 582, 359], "openimages_v1.2/imgs/8a5009673e492c9d_Cattle_Plant_Animal_8.jpg"]
|
123 |
+
[1554, [["give me the flag.", "which side do you want?"], ["right.", "is it the first side from the right?"], ["yes.", "[485, 8, 507, 166]"]], [156, 48, 218, 340], [485, 8, 507, 166], "openimages_v1.2/imgs/029bb5332af019f2_Flag_5.jpg"]
|
124 |
+
[1563, [["choose the biggest flag.", "is it the first flag from the left?"], ["no.", "is it the first flag from the left?"], ["no, in the middle.", "[290, 164, 343, 212]"]], [197, 252, 228, 284], [290, 164, 343, 212], "openimages_v1.2/imgs/c3441f8e9cda4470_Flag_43.jpg"]
|
125 |
+
[1583, [["it is delicious.", "which one do you want?"], ["right two.", "[259, 211, 381, 270]"]], [259, 211, 382, 270], [259, 211, 381, 270], "openimages_v1.2/imgs/9c3ca6d64e55c775_Dessert_Food_Fruit_Baked goods_Strawberry_Cake_5.jpg"]
|
126 |
+
[1650, [["give me the bowl.", "which one do you want?"], ["the blue.", "is it the one on the far right?"], ["yes.", "[580, 33, 639, 111]"]], [432, 38, 624, 186], [580, 33, 639, 111], "openimages_v1.2/imgs/6fd23a979a53f388_Chopsticks_Drink_Tableware_Bowl_8.jpg"]
|
127 |
+
[1652, [["give it to me.", "what do you want?"], ["the green one.", "the one on the far right?"], ["no, on the left.", "[0, 104, 311, 323]"]], [436, 147, 639, 429], [0, 104, 311, 323], "openimages_v1.2/imgs/8028f73fd4273898_Bus_Toy_Land vehicle_Vehicle_3.jpg"]
|
128 |
+
[1657, [["give me the dish.", "which one?"], ["right.", "is there a spoon on it?"], ["yes.", "[427, 182, 505, 261]"]], [272, 253, 428, 305], [427, 182, 505, 261], "openimages_v1.2/imgs/17b9486db8f43d5f_Vase_Houseplant_Mixing bowl_Table_Tableware_Bowl_Plate_Plant_Jug_Furniture_Flowerpot_5.jpg"]
|
129 |
+
[1708, [["what is in the picture?", "i see."], ["what is in the picture?", "[250, 194, 445, 359]"]], [66, 180, 179, 308], [250, 194, 445, 359], "openimages_v1.2/imgs/3d259b911bfc5069_Houseplant_Plant_Flowerpot_4.jpg"]
|
130 |
+
[1735, [["the dog is cute.", "which one are you talking about?"], ["on the right.", "is it the one with the head down?"], ["yes.", "[266, 6, 565, 484]"]], [40, 166, 274, 310], [266, 6, 565, 484], "openimages_v1.2/imgs/71da951cd1ea96c5_Carnivore_Dog_Animal_3.jpg"]
|
benchmark/invig_grounding/huge-main-0.7630/20230512-181257.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/invig_grounding/huge-main-0.7740/20230512-182551.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/invig_grounding/invig-large/20230514-043824.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/invig_grounding/invig-large/20230516-090623.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/invig_grounding/invig-large/20230516-091634.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/invig_grounding/invig-large/20230516-113956.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/invig_grounding/invig-large/20230516-141445.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/invig_grounding/invig-large/20230516-165140.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
benchmark/invig_grounding/invig-m-dialog/20230513-033645.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|