Spaces:

JiaxinGe
/

AutoPresent

Running

AutoPresent / SlidesLib /vqa.py

add slides lib

2dd1349 about 1 month ago

1.82 kB

	from transformers import BlipProcessor, BlipForQuestionAnswering
	from PIL import Image
	import requests
	import re
	class VQA:
	def __init__(self, gpu_number=0):
	use_load_8bit= False
	from transformers import AutoProcessor, InstructBlipForConditionalGeneration, InstructBlipProcessor


	self.model = InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b", device_map="auto")
	self.processor = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")

	self.model.eval()
	self.qa_prompt = "Question: {} Short answer:"
	self.caption_prompt = "\n<image>\na photo of"
	self.max_words = 50

	def pre_question(self, question):
	# from LAVIS blip_processors
	question = re.sub(
	r"([.!\"()*#:;~])",
	"",
	question.lower(),
	)
	question = question.rstrip(" ")

	# truncate question
	question_words = question.split(" ")
	if len(question_words) > self.max_words:
	question = " ".join(question_words[: self.max_words])

	return question

	def qa(self, image_path, question):
	image = Image.open(image_path)
	question = self.pre_question(question)
	inputs = self.processor(images=image, text=question, return_tensors="pt", padding="longest").to(self.model.device)
	generated_ids = self.model.generate(**inputs, length_penalty=-1, num_beams=5, max_length=30, min_length=1,
	do_sample=False, top_p=0.9, repetition_penalty=1.0,
	num_return_sequences=1, temperature=1)
	generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)

	return generated_text[0]