Spaces:

remyxai
/

VQASynth

Running on Zero

VQASynth / app.py

smellslikeml

update app

1afbcbd 6 months ago

3.89 kB

	"""SpaceLlama3.1 demo gradio app."""

	import datetime
	import logging
	import os

	import gradio as gr
	import torch
	import PIL.Image
	from prismatic import load
	from huggingface_hub import login

	# Authenticate with the Hugging Face Hub
	def authenticate_huggingface():
	hf_token = os.getenv("HF_TOKEN")
	if hf_token:
	login(token=hf_token)
	else:
	raise ValueError("Hugging Face API token not found. Please set it as an environment variable named 'HF_TOKEN'.")

	# Call the authentication function once at the start
	authenticate_huggingface()

	INTRO_TEXT = """SpaceLlama3.1 demo\n\n
	\| [Model](https://huggingface.co/remyxai/SpaceLlama3.1)
	\| [GitHub](https://github.com/remyxai/VQASynth/tree/main)
	\| [Demo](https://huggingface.co/spaces/remyxai/SpaceLlama3.1)
	\| [Discord](https://discord.gg/DAy3P5wYJk)
	\n\n
	This is an experimental research model. Make sure to add appropriate guardrails when using the model for applications.
	"""

	# Set model location as a constant outside the function
	MODEL_LOCATION = "remyxai/SpaceLlama3.1" # Update as needed

	# Global model variable
	global_model = None

	def load_model():
	"""Loads the model globally."""
	global global_model
	device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
	global_model = load(MODEL_LOCATION)
	global_model.to(device, dtype=torch.bfloat16)
	logging.info("Model loaded successfully.")

	def compute(image, prompt):
	"""Runs model inference."""
	if image is None:
	raise gr.Error("Image required")

	logging.info('prompt="%s"', prompt)

	# Open the image file
	if isinstance(image, str):
	image = PIL.Image.open(image).convert("RGB")

	# Use the globally loaded model
	vlm = global_model

	# Prepare prompt
	prompt_builder = vlm.get_prompt_builder()
	prompt_builder.add_turn(role="human", message=prompt)
	prompt_text = prompt_builder.get_prompt()

	# Generate the text based on image and prompt
	generated_text = vlm.generate(
	image,
	prompt_text,
	do_sample=True,
	temperature=0.1,
	max_new_tokens=512,
	min_length=1,
	)
	output = generated_text.split("</s>")[0]

	logging.info('output="%s"', output)

	return output # Ensure that output is a string

	def reset():
	"""Resets the input fields."""
	return "", None

	def create_app():
	"""Creates demo UI."""

	with gr.Blocks() as demo:
	# Main UI structure
	gr.Markdown(INTRO_TEXT)
	with gr.Row():
	image = gr.Image(value=None, label="Image", type="filepath", visible=True) # input
	with gr.Column():
	prompt = gr.Textbox(value="", label="Prompt", visible=True)
	model_info = gr.Markdown(label="Model Info")
	run = gr.Button("Run", variant="primary")
	clear = gr.Button("Clear")
	highlighted_text = gr.HighlightedText(value="", label="Output", visible=True)

	# Button event handlers
	run.click(
	fn=compute,
	inputs=[image, prompt],
	outputs=highlighted_text, # Ensure this is the right output component
	)
	clear.click(fn=reset, inputs=None, outputs=[prompt, image])

	# Status
	status = gr.Markdown(f"Startup: {datetime.datetime.now()}")
	gpu_kind = gr.Markdown(f"GPU=?")
	demo.load(
	fn=lambda: f"Model `{MODEL_LOCATION}` loaded.", # Ensure the output is a string
	inputs=None,
	outputs=model_info,
	)

	return demo

	if __name__ == "__main__":

	logging.basicConfig(
	level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
	)

	for k, v in os.environ.items():
	logging.info('environ["%s"] = %r', k, v)

	# Load the model once globally
	load_model()

	create_app().queue().launch()