File size: 6,071 Bytes
3925278
 
 
b216236
11419b6
3925278
30ac0ed
f515034
6aefed5
 
f8578f6
6aefed5
f8578f6
 
 
 
 
 
 
 
 
6aefed5
f515034
f8578f6
f515034
f8578f6
 
 
 
f515034
 
f8578f6
30ac0ed
 
 
aaf5441
 
30ac0ed
3925278
 
 
2df78a7
3925278
 
 
aaf5441
 
3925278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2df78a7
 
 
aaf5441
 
3925278
2df78a7
3925278
 
 
 
 
 
2df78a7
3925278
 
 
 
4bd93a2
2c500d4
ded9088
0d3baf4
ded9088
 
 
aaf5441
 
 
 
 
 
 
 
 
 
 
 
6aefed5
 
aaf5441
1f22496
 
 
 
ded9088
2df78a7
 
0d3baf4
ded9088
 
 
a816d07
ded9088
 
2df78a7
ded9088
3925278
 
 
2df78a7
3925278
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import requests
from PIL import Image
import gradio as gr
from transformers import AutoProcessor, Blip2ForConditionalGeneration
import torch 


css = """
#column_container {
  position: relative;
  height: 800px;
  max-width: 700px;
  display: flex;
  flex-direction: column;
  background-color: lightgray;
  border: 1px solid gray;
  border-radius: 5px;
  padding: 10px;
  box-shadow: 2px 2px 5px gray;
  margin-left: auto; 
  margin-right: auto;
}
#input_prompt {
  position: fixed;
  bottom: 0;
  max-width: 680px;
}
#chatbot-component {
  overflow: auto;
}
"""

processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16) 

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

def upload_button_config():
    return gr.update(visible=False)

def update_textbox_config(text_in):
    return gr.update(visible=True)

#takes input and generates the Response
def predict(btn_upload, counter,image_hid, input, history):
    
    if counter == 0:
      image_in = Image.open(btn_upload)
      #Resizing the image
      basewidth = 512
      wpercent = (basewidth/float(image_in.size[0]))
      hsize = int((float(image_in.size[1])*float(wpercent)))
      image_in = image_in.resize((basewidth,hsize)) #, Image.Resampling.LANCZOS)
      # Save the image to the file-like object
      #seed = random.randint(0, 1000000)
      img_name = "uploaded_image.png" #f"./edited_image_{seed}.png"
      image_in.save(img_name)
      #add state
      history = history or []
      response = '<img src="/file=' + img_name + '">'
      history.append((input, response))
      counter += 1
      return history, history, img_name, counter, image_in

    #process the prompt
    print(f"prompt is :{input}") 
    #Getting prompt in the format - Question: Is this photo unusual? Answer:
    prompt = f"Question: {input} Answer: "
    inputs = processor(image_hid, text=prompt, return_tensors="pt").to(device, torch.float16)
    
    #generate the response
    generated_ids = model.generate(**inputs, max_new_tokens=10)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
    print(f"generated_text is : {generated_text}")

    #add state
    history = history or []
    response = generated_text 
    history.append((input, response))
    counter += 1
    return history, history, "uploaded_image.png", counter, image_hid

#Blocks Layout - leaving this here for moment - "#chatbot-component .overflow-y-auto{height:800px}"
with gr.Blocks(css="#chatbot-component {height: 600px} #input_prompt {position: absolute; bottom: 0;}") as demo:  
  with gr.Row():
    with gr.Column(scale=1):
        #with gr.Accordion("See details"):
        gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
                    <div
                style="
                    display: inline-flex;
                    align-items: center;
                    gap: 0.8rem;
                    font-size: 1.75rem;
                "
                >
                <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
                    Bringing Visual Conversations to Life with BLIP2
                </h1>
                </div>
                <p style="margin-bottom: 10px; font-size: 94%">
                Blip2 is functioning as an <b>instructed zero-shot image-to-text generation</b> model using OPT-2.7B in this Space. 
                It shows a wide range of capabilities including visual conversation, visual knowledge reasoning, visual commensense reasoning, storytelling, 
                personalized image-to-text generation etc.<br>
                BLIP-2 by <a href="https://huggingface.co/Salesforce" target="_blank">Salesforce</a> is now available in🤗Transformers! 
                This model was contributed by <a href="https://twitter.com/NielsRogge" target="_blank">nielsr</a>. 
                The BLIP-2 model was proposed in <a href="https://arxiv.org/abs/2301.12597" target="_blank">BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a> 
                by Junnan Li, Dongxu Li, Silvio Savarese, Steven Hoi.<br><br>
                </p></div>""")
        gr.HTML("""<a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate Space with GPU Upgrade for fast Inference & no queue<br>""")
    
    with gr.Column(elem_id = "column_container", scale=2):
        #text_in = gr.Textbox(value='', placeholder="Type your questions here and press enter", elem_id = "input_prompt", visible=False, label='Great! Now you can ask questions to get more information about the image')
        btn_upload = gr.UploadButton("Upload image!", file_types=["image"], file_count="single", elem_id="upload_button")
        text_in = gr.Textbox(value='', placeholder="Type your questions here and press enter", elem_id = "input_prompt", visible=False, label='Great! Now you can ask questions to get more information about the image')
        chatbot = gr.Chatbot(elem_id = 'chatbot-component', label='Converse with Images')
        state_in = gr.State()
        counter_out = gr.Number(visible=False, value=0, precision=0)
        text_out = gr.Textbox(visible=False)  #getting image name out
        image_hid = gr.Image(visible=False) #, type='pil')

  #Using Event Listeners
  btn_upload.upload(predict, [btn_upload, counter_out, image_hid, text_in, state_in], [chatbot, state_in, text_out, counter_out, image_hid])
  btn_upload.upload(fn = update_textbox_config, inputs=text_in, outputs = text_in)

  text_in.submit(predict, [btn_upload, counter_out, image_hid, text_in, state_in], [chatbot, state_in, text_out, counter_out, image_hid])
  
  chatbot.change(fn = upload_button_config, outputs=btn_upload) #, scroll_to_output = True)
    
demo.queue(concurrency_count=10)
demo.launch(debug=True) #, width="80%", height=2000)