Spaces:

Upyaya
/

Fashion-Image-Captioning-using-BLIP-2

Paused

App Files Files

Fashion-Image-Captioning-using-BLIP-2 / app.py

Upyaya

Fixed the issue to load the model for inferance in CPU device

37db1ce over 1 year ago

raw

history blame

2.52 kB

	from transformers import Blip2ForConditionalGeneration
	from transformers import Blip2Processor
	from peft import PeftModel
	import streamlit as st
	from PIL import Image
	import torch
	import os

	preprocess_ckp = "Salesforce/blip2-opt-2.7b" #Checkpoint path used for perprocess image
	base_model_ckp = "./model/blip2-opt-2.7b-fp16-sharded" #Base model checkpoint path
	peft_model_ckp = "./model/blip2_peft" #PEFT model checkpoint path
	sample_img_path = "./sample_images/"

	#init_model_required = True
	#processor = None
	#model = None

	#def init_model():

	#if init_model_required:

	#Preprocess input
	processor = Blip2Processor.from_pretrained(preprocess_ckp)

	#Model
	#Inferance on GPU device. Will give error in CPU system, as "load_in_8bit" is an setting of bitsandbytes library and only works for GPU
	#model = Blip2ForConditionalGeneration.from_pretrained(base_model_ckp, load_in_8bit = True, device_map = "auto")

	#Inferance on CPU device
	model = Blip2ForConditionalGeneration.from_pretrained(base_model_ckp)

	model = PeftModel.from_pretrained(model, peft_model_ckp)

	#init_model_required = False



	def main():

	st.title("Fashion Image Caption using BLIP2")

	#init_model()

	#Select few sample images for the catagory of cloths
	option = st.selectbox('Sample images ?', ('cap', 'tee', 'dress'))
	file_name = st.file_uploader("Upload image")

	if file_name is None and option is not None:

	file_name = os.join.path(sample_img_path, option)

	if file_name is not None:

	image_col, caption_text = st.columns(2)

	image_col.header("Image")
	image = Image.open(file_name)
	image_col.image(image, use_column_width = True)

	#Preprocess the image
	#Inferance on GPU. When used this on GPU will get errors like: "slow_conv2d_cpu" not implemented for 'Half'" , " Input type (float) and bias type (struct c10::Half)"
	#inputs = processor(images = image, return_tensors = "pt").to('cuda', torch.float16)

	#Inferance on CPU
	inputs = processor(images = image, return_tensors = "pt")

	pixel_values = inputs.pixel_values

	#Predict the caption for the imahe
	generated_ids = model.generate(pixel_values = pixel_values, max_length = 25)
	generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	#Output the predict text
	caption_text.header("Generated Caption")
	caption_text.text(generated_caption)

	if __name__ == "__main__":
	main()