Spaces:
Runtime error
Runtime error
| import requests | |
| from PIL import Image | |
| import gradio as gr | |
| from transformers import AutoProcessor, Blip2ForConditionalGeneration | |
| import torch | |
| css = """ | |
| #column_container { | |
| position: relative; | |
| height: 800px; | |
| max-width: 700px; | |
| display: flex; | |
| flex-direction: column; | |
| background-color: lightgray; | |
| border: 1px solid gray; | |
| border-radius: 5px; | |
| padding: 10px; | |
| box-shadow: 2px 2px 5px gray; | |
| margin-left: auto; | |
| margin-right: auto; | |
| } | |
| #input_prompt { | |
| position: fixed; | |
| bottom: 0; | |
| max-width: 680px; | |
| } | |
| #chatbot-component { | |
| overflow: auto; | |
| } | |
| """ | |
| processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b") | |
| model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model.to(device) | |
| def upload_button_config(): | |
| return gr.update(visible=False) | |
| def update_textbox_config(text_in): | |
| return gr.update(visible=True) | |
| #takes input and generates the Response | |
| def predict(btn_upload, counter,image_hid, input, history): | |
| if counter == 0: | |
| image_in = Image.open(btn_upload) | |
| #Resizing the image | |
| basewidth = 512 | |
| wpercent = (basewidth/float(image_in.size[0])) | |
| hsize = int((float(image_in.size[1])*float(wpercent))) | |
| image_in = image_in.resize((basewidth,hsize)) #, Image.Resampling.LANCZOS) | |
| # Save the image to the file-like object | |
| #seed = random.randint(0, 1000000) | |
| img_name = "uploaded_image.png" #f"./edited_image_{seed}.png" | |
| image_in.save(img_name) | |
| #add state | |
| history = history or [] | |
| response = '<img src="/file=' + img_name + '">' | |
| history.append((input, response)) | |
| counter += 1 | |
| return history, history, img_name, counter, image_in | |
| #process the prompt | |
| print(f"prompt is :{input}") | |
| #Getting prompt in the format - Question: Is this photo unusual? Answer: | |
| prompt = f"Question: {input} Answer: " | |
| inputs = processor(image_hid, text=prompt, return_tensors="pt").to(device, torch.float16) | |
| #generate the response | |
| generated_ids = model.generate(**inputs, max_new_tokens=10) | |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip() | |
| print(f"generated_text is : {generated_text}") | |
| #add state | |
| history = history or [] | |
| response = generated_text | |
| history.append((input, response)) | |
| counter += 1 | |
| return history, history, "uploaded_image.png", counter, image_hid | |
| #Blocks Layout - leaving this here for moment - "#chatbot-component .overflow-y-auto{height:800px}" | |
| with gr.Blocks(css="#chatbot-component {height: 600px} #input_prompt {position: absolute; bottom: 0;}") as demo: | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| #with gr.Accordion("See details"): | |
| gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;"> | |
| <div | |
| style=" | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 0.8rem; | |
| font-size: 1.75rem; | |
| " | |
| > | |
| <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;"> | |
| Bringing Visual Conversations to Life with BLIP2 | |
| </h1> | |
| </div> | |
| <p style="margin-bottom: 10px; font-size: 94%"> | |
| Blip2 is functioning as an <b>instructed zero-shot image-to-text generation</b> model using OPT-2.7B in this Space. | |
| It shows a wide range of capabilities including visual conversation, visual knowledge reasoning, visual commensense reasoning, storytelling, | |
| personalized image-to-text generation etc.<br> | |
| BLIP-2 by <a href="https://huggingface.co/Salesforce" target="_blank">Salesforce</a> is now available in🤗Transformers! | |
| This model was contributed by <a href="https://twitter.com/NielsRogge" target="_blank">nielsr</a>. | |
| The BLIP-2 model was proposed in <a href="https://arxiv.org/abs/2301.12597" target="_blank">BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a> | |
| by Junnan Li, Dongxu Li, Silvio Savarese, Steven Hoi.<br><br> | |
| </p></div>""") | |
| gr.HTML("""<a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate Space with GPU Upgrade for fast Inference & no queue<br>""") | |
| with gr.Column(elem_id = "column_container", scale=2): | |
| #text_in = gr.Textbox(value='', placeholder="Type your questions here and press enter", elem_id = "input_prompt", visible=False, label='Great! Now you can ask questions to get more information about the image') | |
| btn_upload = gr.UploadButton("Upload image!", file_types=["image"], file_count="single", elem_id="upload_button") | |
| text_in = gr.Textbox(value='', placeholder="Type your questions here and press enter", elem_id = "input_prompt", visible=False, label='Great! Now you can ask questions to get more information about the image') | |
| chatbot = gr.Chatbot(elem_id = 'chatbot-component', label='Converse with Images') | |
| state_in = gr.State() | |
| counter_out = gr.Number(visible=False, value=0, precision=0) | |
| text_out = gr.Textbox(visible=False) #getting image name out | |
| image_hid = gr.Image(visible=False) #, type='pil') | |
| #Using Event Listeners | |
| btn_upload.upload(predict, [btn_upload, counter_out, image_hid, text_in, state_in], [chatbot, state_in, text_out, counter_out, image_hid]) | |
| btn_upload.upload(fn = update_textbox_config, inputs=text_in, outputs = text_in) | |
| text_in.submit(predict, [btn_upload, counter_out, image_hid, text_in, state_in], [chatbot, state_in, text_out, counter_out, image_hid]) | |
| chatbot.change(fn = upload_button_config, outputs=btn_upload) #, scroll_to_output = True) | |
| demo.queue(concurrency_count=10) | |
| demo.launch(debug=True) #, width="80%", height=2000) |