Spaces:

AlyxTeam
/

Phi-3-vision-128k-instruct

Sleeping

tanshaohui

feat: 部署demo

54a6ed7 about 2 months ago

1.8 kB

	import gradio as gr
	import spaces
	import torch
	import os
	import subprocess

	# os.system("pip install git+https://github.com/huggingface/transformers")

	from PIL import Image
	import requests
	from transformers import AutoModelForCausalLM
	from transformers import AutoProcessor

	model_id = "microsoft/Phi-3-vision-128k-instruct"

	model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, torch_dtype="auto", _attn_implementation='eager').cuda() # use _attn_implementation='eager' to disable flash attention

	processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

	@spaces.GPU
	def infer(u, t):
	if len(u) < 1:
	u = "https://lf3-static.bytednsdoc.com/obj/eden-cn/pbovhozuha/output.png"
	if len(t) < 1:
	t = "Convert the text in the image to markdown"
	messages = messages = [
	{"role": "user", "content": "<\|image_1\|>\n" + t},
	]
	url = u
	image = Image.open(requests.get(url, stream=True).raw)

	prompt = processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	inputs = processor(prompt, [image], return_tensors="pt").to(model.device)

	generation_args = {
	"max_new_tokens": 512,
	"temperature": 0.7,
	"do_sample": True,
	}

	generate_ids = model.generate(inputs, eos_token_id=processor.tokenizer.eos_token_id, generation_args)

	# remove input tokens
	generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
	response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

	return response


	demo = gr.Interface(
	fn=infer,
	inputs=[
	gr.Text(label="url"),
	gr.Text(label="text"),
	],
	outputs=gr.Text(),
	)
	demo.launch()