Spaces:

ysharma
/

Chat_With_Blip2

Runtime error

App Files Files Community

Chat_With_Blip2 / app.py

ysharma HF Staff

update

11419b6 over 2 years ago

raw

history blame contribute delete

6.07 kB

	import requests
	from PIL import Image
	import gradio as gr
	from transformers import AutoProcessor, Blip2ForConditionalGeneration
	import torch


	css = """
	#column_container {
	position: relative;
	height: 800px;
	max-width: 700px;
	display: flex;
	flex-direction: column;
	background-color: lightgray;
	border: 1px solid gray;
	border-radius: 5px;
	padding: 10px;
	box-shadow: 2px 2px 5px gray;
	margin-left: auto;
	margin-right: auto;
	}
	#input_prompt {
	position: fixed;
	bottom: 0;
	max-width: 680px;
	}
	#chatbot-component {
	overflow: auto;
	}
	"""

	processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
	model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)

	device = "cuda" if torch.cuda.is_available() else "cpu"
	model.to(device)

	def upload_button_config():
	return gr.update(visible=False)

	def update_textbox_config(text_in):
	return gr.update(visible=True)

	#takes input and generates the Response
	def predict(btn_upload, counter,image_hid, input, history):

	if counter == 0:
	image_in = Image.open(btn_upload)
	#Resizing the image
	basewidth = 512
	wpercent = (basewidth/float(image_in.size[0]))
	hsize = int((float(image_in.size[1])*float(wpercent)))
	image_in = image_in.resize((basewidth,hsize)) #, Image.Resampling.LANCZOS)
	# Save the image to the file-like object
	#seed = random.randint(0, 1000000)
	img_name = "uploaded_image.png" #f"./edited_image_{seed}.png"
	image_in.save(img_name)
	#add state
	history = history or []
	response = '<img src="/file=' + img_name + '">'
	history.append((input, response))
	counter += 1
	return history, history, img_name, counter, image_in

	#process the prompt
	print(f"prompt is :{input}")
	#Getting prompt in the format - Question: Is this photo unusual? Answer:
	prompt = f"Question: {input} Answer: "
	inputs = processor(image_hid, text=prompt, return_tensors="pt").to(device, torch.float16)

	#generate the response
	generated_ids = model.generate(**inputs, max_new_tokens=10)
	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
	print(f"generated_text is : {generated_text}")

	#add state
	history = history or []
	response = generated_text
	history.append((input, response))
	counter += 1
	return history, history, "uploaded_image.png", counter, image_hid

	#Blocks Layout - leaving this here for moment - "#chatbot-component .overflow-y-auto{height:800px}"
	with gr.Blocks(css="#chatbot-component {height: 600px} #input_prompt {position: absolute; bottom: 0;}") as demo:
	with gr.Row():
	with gr.Column(scale=1):
	#with gr.Accordion("See details"):
	gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
	<div
	style="
	display: inline-flex;
	align-items: center;
	gap: 0.8rem;
	font-size: 1.75rem;
	"
	>
	<h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
	Bringing Visual Conversations to Life with BLIP2
	</h1>
	</div>
	<p style="margin-bottom: 10px; font-size: 94%">
	Blip2 is functioning as an <b>instructed zero-shot image-to-text generation</b> model using OPT-2.7B in this Space.
	It shows a wide range of capabilities including visual conversation, visual knowledge reasoning, visual commensense reasoning, storytelling,
	personalized image-to-text generation etc.<br>
	BLIP-2 by <a href="https://huggingface.co/Salesforce" target="_blank">Salesforce</a> is now available in🤗Transformers!
	This model was contributed by <a href="https://twitter.com/NielsRogge" target="_blank">nielsr</a>.
	The BLIP-2 model was proposed in <a href="https://arxiv.org/abs/2301.12597" target="_blank">BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a>
	by Junnan Li, Dongxu Li, Silvio Savarese, Steven Hoi.<br><br>
	</p></div>""")
	gr.HTML("""<a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate Space with GPU Upgrade for fast Inference & no queue<br>""")

	with gr.Column(elem_id = "column_container", scale=2):
	#text_in = gr.Textbox(value='', placeholder="Type your questions here and press enter", elem_id = "input_prompt", visible=False, label='Great! Now you can ask questions to get more information about the image')
	btn_upload = gr.UploadButton("Upload image!", file_types=["image"], file_count="single", elem_id="upload_button")
	text_in = gr.Textbox(value='', placeholder="Type your questions here and press enter", elem_id = "input_prompt", visible=False, label='Great! Now you can ask questions to get more information about the image')
	chatbot = gr.Chatbot(elem_id = 'chatbot-component', label='Converse with Images')
	state_in = gr.State()
	counter_out = gr.Number(visible=False, value=0, precision=0)
	text_out = gr.Textbox(visible=False) #getting image name out
	image_hid = gr.Image(visible=False) #, type='pil')

	#Using Event Listeners
	btn_upload.upload(predict, [btn_upload, counter_out, image_hid, text_in, state_in], [chatbot, state_in, text_out, counter_out, image_hid])
	btn_upload.upload(fn = update_textbox_config, inputs=text_in, outputs = text_in)

	text_in.submit(predict, [btn_upload, counter_out, image_hid, text_in, state_in], [chatbot, state_in, text_out, counter_out, image_hid])

	chatbot.change(fn = upload_button_config, outputs=btn_upload) #, scroll_to_output = True)

	demo.queue(concurrency_count=10)
	demo.launch(debug=True) #, width="80%", height=2000)