File size: 7,047 Bytes
140a766
411cee3
140a766
411cee3
140a766
 
 
8e85e31
 
 
140a766
411cee3
140a766
 
 
 
7062ece
140a766
 
967ff26
411cee3
82e8993
411cee3
140a766
411cee3
 
140a766
411cee3
 
140a766
411cee3
140a766
 
411cee3
 
140a766
 
411cee3
82e8993
 
411cee3
 
 
82e8993
140a766
 
 
82e8993
140a766
 
 
 
 
 
 
 
 
 
 
 
 
82e8993
140a766
 
411cee3
 
 
82e8993
 
 
411cee3
 
 
 
1e870d6
a8611ac
411cee3
 
c7149fd
 
411cee3
 
 
 
82e8993
55dc83f
1e300d8
e57a9e9
 
 
a8611ac
 
82e8993
1e300d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82e8993
1e300d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82e8993
1e300d8
 
 
 
82e8993
 
1e300d8
 
140a766
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import gradio as gr
from transformers import AutoProcessor, Idefics2ForConditionalGeneration
import re
import time
from PIL import Image
import torch
import spaces
import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)


processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b")

model = Idefics2ForConditionalGeneration.from_pretrained(
        "HuggingFaceM4/idefics2-8b",
        torch_dtype=torch.bfloat16,
        #_attn_implementation="flash_attention_2",
        trust_remote_code=True).to("cuda")

@spaces.GPU(duration=180)
def model_inference(
    image, text, decoding_strategy, temperature,
    max_new_tokens, repetition_penalty, top_p
):
    if text == "" and not image:
        gr.Error("Please input a query and optionally image(s).")

    if text == "" and image:
        gr.Error("Please input a text query along the image(s).")

    resulting_messages = [
            {
                "role": "user",
                "content": [{"type": "image"}] + [
                    {"type": "text", "text": text}
                ]
            }
        ]


    prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
    inputs = processor(text=prompt, images=[image], return_tensors="pt")
    inputs = {k: v.to("cuda") for k, v in inputs.items()}

    generation_args = {
        "max_new_tokens": max_new_tokens,
        "repetition_penalty": repetition_penalty,

    }

    assert decoding_strategy in [
        "Greedy",
        "Top P Sampling",
    ]
    if decoding_strategy == "Greedy":
        generation_args["do_sample"] = False
    elif decoding_strategy == "Top P Sampling":
        generation_args["temperature"] = temperature
        generation_args["do_sample"] = True
        generation_args["top_p"] = top_p


    generation_args.update(inputs)

    # Generate
    generated_ids = model.generate(**generation_args)

    generated_texts = processor.batch_decode(generated_ids[:, generation_args["input_ids"].size(1):], skip_special_tokens=True)
    print("INPUT:", prompt, "|OUTPUT:", generated_texts)
    return generated_texts[0]


with gr.Blocks(fill_height=True) as demo:
    gr.Markdown("## IDEFICS2 Instruction 🐶")
    gr.Markdown("Play with [IDEFICS2-8B](https://huggingface.co/HuggingFaceM4/idefics2-8b) in this demo. To get started, upload an image and text or try one of the examples.")
    gr.Markdown("**Important note**: This model is not made for chatting, the chatty IDEFICS2 will be released in the upcoming days. **This model is very strong on various tasks, including visual question answering, document retrieval and more, you can see it through the examples.**")
    gr.Markdown("Learn more about IDEFICS2 in this [blog post](https://huggingface.co/blog/idefics2).")


    with gr.Column():
        image_input = gr.Image(label="Upload your Image", type="pil")
        query_input = gr.Textbox(label="Prompt")
        submit_btn = gr.Button("Submit")
        output = gr.Textbox(label="Output")

    with gr.Accordion(label="Example Inputs and Advanced Generation Parameters"):
        examples=[["./example_images/docvqa_example.png", "How many items are sold?", "Greedy", 0.4, 512, 1.2, 0.8],
                    ["./example_images/example_images_travel_tips.jpg", "I want to go somewhere similar to the one in the photo. Give me destinations and travel tips.", "Greedy", 0.4, 512, 1.2, 0.8],
                    ["./example_images/baklava.png", "Where is this pastry from?", "Greedy", 0.4, 512, 1.2, 0.8],
                    ["./example_images/dummy_pdf.png", "How much percent is the order status?", "Greedy", 0.4, 512, 1.2, 0.8],
                    ["./example_images/art_critic.png", "As an art critic AI assistant, could you describe this painting in details and make a thorough critic?.", "Greedy", 0.4, 512, 1.2, 0.8],
                    ["./example_images/s2w_example.png", "What is this UI about?", "Greedy", 0.4, 512, 1.2, 0.8]]

        # Hyper-parameters for generation
        max_new_tokens = gr.Slider(
              minimum=8,
              maximum=1024,
              value=512,
              step=1,
              interactive=True,
              label="Maximum number of new tokens to generate",
          )
        repetition_penalty = gr.Slider(
              minimum=0.01,
              maximum=5.0,
              value=1.2,
              step=0.01,
              interactive=True,
              label="Repetition penalty",
              info="1.0 is equivalent to no penalty",
          )
        temperature = gr.Slider(
              minimum=0.0,
              maximum=5.0,
              value=0.4,
              step=0.1,
              interactive=True,
              label="Sampling temperature",
              info="Higher values will produce more diverse outputs.",
          )
        top_p = gr.Slider(
              minimum=0.01,
              maximum=0.99,
              value=0.8,
              step=0.01,
              interactive=True,
              label="Top P",
              info="Higher values is equivalent to sampling more low-probability tokens.",
          )
        decoding_strategy = gr.Radio(
              [
                  "Greedy",
                  "Top P Sampling",
              ],
              value="Greedy",
              label="Decoding strategy",
              interactive=True,
              info="Higher values is equivalent to sampling more low-probability tokens.",
          )
        decoding_strategy.change(
              fn=lambda selection: gr.Slider(
                  visible=(
                      selection in ["contrastive_sampling", "beam_sampling", "Top P Sampling", "sampling_top_k"]
                  )
              ),
              inputs=decoding_strategy,
              outputs=temperature,
          )

        decoding_strategy.change(
              fn=lambda selection: gr.Slider(
                  visible=(
                      selection in ["contrastive_sampling", "beam_sampling", "Top P Sampling", "sampling_top_k"]
                  )
              ),
              inputs=decoding_strategy,
              outputs=repetition_penalty,
          )
        decoding_strategy.change(
              fn=lambda selection: gr.Slider(visible=(selection in ["Top P Sampling"])),
              inputs=decoding_strategy,
              outputs=top_p,
          )
        gr.Examples(
                        examples = examples,
                        inputs=[image_input, query_input, decoding_strategy, temperature,
                                                              max_new_tokens, repetition_penalty, top_p],
                        outputs=output,
                        fn=model_inference
                    )

        submit_btn.click(model_inference, inputs = [image_input, query_input, decoding_strategy, temperature,
                                                      max_new_tokens, repetition_penalty, top_p], outputs=output)


demo.launch(debug=True)