Spaces:

taesiri
/

BugsBunny-Llama-3.2-Base-Medium

Runtime error

App Files Files Community

BugsBunny-Llama-3.2-Base-Medium / app.py

taesiri

Adding Zero Space

8dbbaa9 7 months ago

raw

history blame

1.89 kB

	import os
	import gradio as gr
	import torch
	from PIL import Image
	from transformers import MllamaForConditionalGeneration, AutoProcessor
	from peft import PeftModel
	from huggingface_hub import login
	import spaces

	# Login to Hugging Face
	if "HF_TOKEN" not in os.environ:
	raise ValueError("Please set the HF_TOKEN environment variable with your Hugging Face token")
	login(token=os.environ["HF_TOKEN"])

	# Load model and processor (do this outside the inference function to avoid reloading)
	base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
	lora_weights_path = "taesiri/BunsBunny-LLama-3.2-11B-Vision-Instruct-DummyTask2"

	processor = AutoProcessor.from_pretrained(base_model_path)
	model = MllamaForConditionalGeneration.from_pretrained(
	base_model_path,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	)
	model = PeftModel.from_pretrained(model, lora_weights_path)

	@spaces.GPU
	def inference(image, question):
	# Prepare input
	messages = [
	{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": question}]}
	]
	input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
	inputs = processor(image, input_text, add_special_tokens=False, return_tensors="pt").to(model.device)

	# Run inference
	with torch.no_grad():
	output = model.generate(**inputs, max_new_tokens=2048)

	# Decode output
	result = processor.decode(output[0], skip_special_tokens=True)
	return result

	# Create Gradio interface
	demo = gr.Interface(
	fn=inference,
	inputs=[
	gr.Image(type="pil", label="Upload Image"),
	gr.Textbox(label="Enter your question")
	],
	outputs=gr.Textbox(label="Response"),
	title="Image Analysis AI",
	description="Upload an image and ask a question about it. The AI will analyze and respond.",
	)

	if __name__ == "__main__":
	demo.launch()