import gradio as gr import spaces import torch import os from transformers import AutoProcessor, AutoModelForCausalLM token=os.environ["TOKEN"] model=os.environ["MODEL"] processor = AutoProcessor.from_pretrained(model, token=token,trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model,trust_remote_code=True,token=token,torch_dtype=torch.float16,device_map={"":0}) eos_token_id=processor.tokenizer.convert_tokens_to_ids("<|end|>") @spaces.GPU def compute(image, instruction, output_format): instruction_str=f"{instruction}\n<|output_format|>\n{output_format}" messages=[ {"role":"user","content":f"<|image_1|>\n{instruction_str}"} ] prompt=processor.tokenizer.apply_chat_template(messages,add_generation_prompt=True,tokenize=False) inputs = processor(prompt, [image],return_tensors="pt",max_length=4096*4,truncation=False,padding=False) for k in inputs: inputs[k]=inputs[k].to("cuda") with torch.no_grad(): with torch.cuda.amp.autocast(): outputs = model.generate(**inputs,max_new_tokens=256,eos_token_id=eos_token_id) outputs=outputs[0] input_length=inputs["input_ids"].shape[-1] outputs=outputs[input_length:] out=processor.tokenizer.decode(outputs,skip_special_tokens=True) return out from examples import examples """ demo = gr.Interface( fn=greet, inputs=[ gr.Image(type="pil", label="Image"), gr.Text(label="Instruction"), #r.TextArea(label="Output Format", placeholder="Specify the output format.") ], outputs=gr.Text(), examples=examples ) """ with gr.Blocks() as demo: with gr.Row(): with gr.Column(): image = gr.Image(type="pil", label="Image") instruction = gr.Text(label="Instruction") output_format = gr.TextArea(label="Output Format", placeholder="Specify the output format.") sub_btn=gr.Button(value="Run") with gr.Column(): out = gr.Code(label="Output") sub_btn.click( compute,inputs=[image, instruction, output_format],outputs=out,api_name=False ) with gr.Row(): examples=gr.Examples(examples,inputs=[image, instruction, output_format],outputs=out,fn=compute) demo.launch( show_api=False )