File size: 4,771 Bytes
1aecc62
 
 
 
 
 
 
0982b74
1aecc62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28b8f0a
9145e51
1aecc62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c97e71d
 
 
 
 
1aecc62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9145e51
 
1aecc62
 
 
 
 
8d96e4a
 
1aecc62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import math
from datasets import load_dataset
import gradio as gr
import os

# auth_token = os.environ.get("auth_token")
auth_token = os.environ.get("HF_TOKEN")
Visual_Riddles = load_dataset("nitzanguetta/Visual_Riddles", token=auth_token, trust_remote_code=True)['test']
# print(f"Loaded WHOOPS!, first example:")
# print(whoops[0])
dataset_size = len(Visual_Riddles)

IMAGE = 'Image'
QUESTION = 'Question'
ANSWER = "Answer"
CAPTION = "Image caption"
PROMPT = "Prompt"
MODEL_NAME = "Model name"
HINT = "Hint"
ATTRIBUTION = "Attribution"
DLI = "Difficulty Level Index"
CATEGORY = "Category"
DESIGNER = "Designer"


left_side_columns = [IMAGE]
right_side_columns = [x for x in Visual_Riddles.features.keys() if x not in left_side_columns]
right_side_columns.remove('Image file name')
# right_side_columns.remove('Question')
# enumerate_cols = [CROWD_CAPTIONS, CROWD_EXPLANATIONS, CROWD_UNDERSPECIFIED_CAPTIONS]
emoji_to_label = {IMAGE: '🎨, πŸ§‘β€πŸŽ¨, πŸ’»', ANSWER: 'πŸ’‘, πŸ€”, πŸ§‘β€πŸŽ¨', QUESTION: '❓, πŸ€”, πŸ’‘', CATEGORY: 'πŸ€”, πŸ“š, πŸ’‘',
                  CAPTION: 'πŸ“, πŸ‘Œ, πŸ’¬', PROMPT: 'πŸ“, πŸ’»', MODEL_NAME: '🎨, πŸ’»', HINT:'πŸ€”, πŸ”',
                  ATTRIBUTION: 'πŸ”, πŸ“„', DLI:"🌑️, πŸ€”, 🎯", DESIGNER:"πŸ§‘β€πŸŽ¨"}
# batch_size = 16
batch_size = 8
target_size = (1024, 1024)


def func(index):
    start_index = index * batch_size
    end_index = start_index + batch_size
    all_examples = [Visual_Riddles[index] for index in list(range(start_index, end_index))]
    values_lst = []
    for example_idx, example in enumerate(all_examples):
        values = get_instance_values(example)
        values_lst += values
    return values_lst


def get_instance_values(example):
    values = []
    for k in left_side_columns + right_side_columns:
        if k == IMAGE:
            value = example["Image"].resize(target_size)
        # elif k in enumerate_cols:
        #     value = list_to_string(example[k])
        # elif k == QA:
        #     qa_list = [f"Q: {x[0]} A: {x[1]}" for x in example[k]]
        #     value = list_to_string(qa_list)
        else:
            value = example[k]
        values.append(value)
    return values
def list_to_string(lst):
    return '\n'.join(['{}. {}'.format(i+1, item) for i, item in enumerate(lst)])

demo = gr.Blocks()


def get_col(example):
    instance_values = get_instance_values(example)
    with gr.Column():
        inputs_left = []
        assert len(left_side_columns) == len(
            instance_values[:len(left_side_columns)])  # excluding the image & designer
        for key, value in zip(left_side_columns, instance_values[:len(left_side_columns)]):
            if key == IMAGE:
                img_resized = example["Image"].resize(target_size)
                # input_k = gr.Image(value=img_resized, label=example['commonsense_category'])
                input_k = gr.Image(value=img_resized)
            else:
                label = key.capitalize().replace("_", " ")
                input_k = gr.Textbox(value=value, label=f"{label} {emoji_to_label[key]}")
            inputs_left.append(input_k)
        with gr.Accordion("Click for details", open=False):
        # with gr.Accordion(example[QUESTION], open=False):
            text_inputs_right = []
            assert len(right_side_columns) == len(
                instance_values[len(left_side_columns):])  # excluding the image & designer
            for key, value in zip(right_side_columns, instance_values[len(left_side_columns):]):
                label = key.capitalize().replace("_", " ")
                num_lines = max(1, len(value) // 50 + (len(value) % 50 > 0))  # Assuming ~50 chars per line
                text_input_k = gr.Textbox(value=value, label=f"{label} {emoji_to_label[key]}", lines=num_lines)
                text_inputs_right.append(text_input_k)
    return inputs_left, text_inputs_right


with demo:
    gr.Markdown("# Slide to iterate Visual Riddles")

    with gr.Column():
        num_batches = math.ceil(dataset_size / batch_size)
        slider = gr.Slider(minimum=0, maximum=num_batches, step=1, label=f'Page (out of {num_batches})')
        with gr.Row():
            index = slider.value
            start_index = 0 * batch_size
            end_index = start_index + batch_size
            all_examples = [Visual_Riddles[index] for index in list(range(start_index, end_index))]
            all_inputs_left_right = []
            for example_idx, example in enumerate(all_examples):
                inputs_left, text_inputs_right = get_col(example)
                inputs_left_right = inputs_left + text_inputs_right
                all_inputs_left_right += inputs_left_right

    slider.change(func, inputs=[slider], outputs=all_inputs_left_right)

demo.launch()