Spaces:

alexbuz
/

ocr

Runtime error

App Files Files Community

ocr / _app.py

alex buz

test

e1cddb8 3 months ago

raw

history blame contribute delete

No virus

2.07 kB

	from transformers import AutoProcessor, AutoModelForCausalLM
	from PIL import Image
	import gradio as gr

	model_id = 'microsoft/Florence-2-large'
	model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True,
	torch_dtype="auto",
	#device_map="auto",
	cache_dir="./cache",
	#attn_implementation="flash_attention_2",
	).eval()
	processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True,
	torch_dtype="auto",
	#device_map="auto",
	cache_dir="./cache",
	#attn_implementation="flash_attention_2",
	)

	def run_example(task_prompt, image, text_input=None):
	if text_input is None:
	prompt = task_prompt
	else:
	prompt = task_prompt + text_input

	inputs = processor(text=prompt, images=image, return_tensors="pt")
	generated_ids = model.generate(
	input_ids=inputs["input_ids"],
	pixel_values=inputs["pixel_values"],
	max_new_tokens=1024,
	early_stopping=False,
	do_sample=False,
	num_beams=3,
	)
	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
	parsed_answer = processor.post_process_generation(
	generated_text,
	task=task_prompt,
	image_size=(image.width, image.height),
	#stream=True
	)

	return parsed_answer

	def inference(image, task_prompt, text_input):
	return run_example(task_prompt, image, text_input)

	interface = gr.Interface(
	fn=inference,
	inputs=[
	gr.Image(type="pil"),
	gr.Textbox(label="Task Prompt", placeholder="Enter task prompt here"),
	gr.Textbox(label="Additional Text Input", placeholder="Enter additional text input here (optional)", optional=True)
	],
	outputs="text",
	title="Hugging Face Model Inference",
	description="Generate text based on an image and a prompt using a Hugging Face model"
	)

	if __name__ == "__main__":
	interface.launch()