Spaces:
Runtime error
Runtime error
File size: 6,071 Bytes
3925278 b216236 11419b6 3925278 30ac0ed f515034 6aefed5 f8578f6 6aefed5 f8578f6 6aefed5 f515034 f8578f6 f515034 f8578f6 f515034 f8578f6 30ac0ed aaf5441 30ac0ed 3925278 2df78a7 3925278 aaf5441 3925278 2df78a7 aaf5441 3925278 2df78a7 3925278 2df78a7 3925278 4bd93a2 2c500d4 ded9088 0d3baf4 ded9088 aaf5441 6aefed5 aaf5441 1f22496 ded9088 2df78a7 0d3baf4 ded9088 a816d07 ded9088 2df78a7 ded9088 3925278 2df78a7 3925278 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import requests
from PIL import Image
import gradio as gr
from transformers import AutoProcessor, Blip2ForConditionalGeneration
import torch
css = """
#column_container {
position: relative;
height: 800px;
max-width: 700px;
display: flex;
flex-direction: column;
background-color: lightgray;
border: 1px solid gray;
border-radius: 5px;
padding: 10px;
box-shadow: 2px 2px 5px gray;
margin-left: auto;
margin-right: auto;
}
#input_prompt {
position: fixed;
bottom: 0;
max-width: 680px;
}
#chatbot-component {
overflow: auto;
}
"""
processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
def upload_button_config():
return gr.update(visible=False)
def update_textbox_config(text_in):
return gr.update(visible=True)
#takes input and generates the Response
def predict(btn_upload, counter,image_hid, input, history):
if counter == 0:
image_in = Image.open(btn_upload)
#Resizing the image
basewidth = 512
wpercent = (basewidth/float(image_in.size[0]))
hsize = int((float(image_in.size[1])*float(wpercent)))
image_in = image_in.resize((basewidth,hsize)) #, Image.Resampling.LANCZOS)
# Save the image to the file-like object
#seed = random.randint(0, 1000000)
img_name = "uploaded_image.png" #f"./edited_image_{seed}.png"
image_in.save(img_name)
#add state
history = history or []
response = '<img src="/file=' + img_name + '">'
history.append((input, response))
counter += 1
return history, history, img_name, counter, image_in
#process the prompt
print(f"prompt is :{input}")
#Getting prompt in the format - Question: Is this photo unusual? Answer:
prompt = f"Question: {input} Answer: "
inputs = processor(image_hid, text=prompt, return_tensors="pt").to(device, torch.float16)
#generate the response
generated_ids = model.generate(**inputs, max_new_tokens=10)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
print(f"generated_text is : {generated_text}")
#add state
history = history or []
response = generated_text
history.append((input, response))
counter += 1
return history, history, "uploaded_image.png", counter, image_hid
#Blocks Layout - leaving this here for moment - "#chatbot-component .overflow-y-auto{height:800px}"
with gr.Blocks(css="#chatbot-component {height: 600px} #input_prompt {position: absolute; bottom: 0;}") as demo:
with gr.Row():
with gr.Column(scale=1):
#with gr.Accordion("See details"):
gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
<div
style="
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
"
>
<h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
Bringing Visual Conversations to Life with BLIP2
</h1>
</div>
<p style="margin-bottom: 10px; font-size: 94%">
Blip2 is functioning as an <b>instructed zero-shot image-to-text generation</b> model using OPT-2.7B in this Space.
It shows a wide range of capabilities including visual conversation, visual knowledge reasoning, visual commensense reasoning, storytelling,
personalized image-to-text generation etc.<br>
BLIP-2 by <a href="https://huggingface.co/Salesforce" target="_blank">Salesforce</a> is now available in🤗Transformers!
This model was contributed by <a href="https://twitter.com/NielsRogge" target="_blank">nielsr</a>.
The BLIP-2 model was proposed in <a href="https://arxiv.org/abs/2301.12597" target="_blank">BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a>
by Junnan Li, Dongxu Li, Silvio Savarese, Steven Hoi.<br><br>
</p></div>""")
gr.HTML("""<a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate Space with GPU Upgrade for fast Inference & no queue<br>""")
with gr.Column(elem_id = "column_container", scale=2):
#text_in = gr.Textbox(value='', placeholder="Type your questions here and press enter", elem_id = "input_prompt", visible=False, label='Great! Now you can ask questions to get more information about the image')
btn_upload = gr.UploadButton("Upload image!", file_types=["image"], file_count="single", elem_id="upload_button")
text_in = gr.Textbox(value='', placeholder="Type your questions here and press enter", elem_id = "input_prompt", visible=False, label='Great! Now you can ask questions to get more information about the image')
chatbot = gr.Chatbot(elem_id = 'chatbot-component', label='Converse with Images')
state_in = gr.State()
counter_out = gr.Number(visible=False, value=0, precision=0)
text_out = gr.Textbox(visible=False) #getting image name out
image_hid = gr.Image(visible=False) #, type='pil')
#Using Event Listeners
btn_upload.upload(predict, [btn_upload, counter_out, image_hid, text_in, state_in], [chatbot, state_in, text_out, counter_out, image_hid])
btn_upload.upload(fn = update_textbox_config, inputs=text_in, outputs = text_in)
text_in.submit(predict, [btn_upload, counter_out, image_hid, text_in, state_in], [chatbot, state_in, text_out, counter_out, image_hid])
chatbot.change(fn = upload_button_config, outputs=btn_upload) #, scroll_to_output = True)
demo.queue(concurrency_count=10)
demo.launch(debug=True) #, width="80%", height=2000) |