Spaces:

jat-project
/

image-captioning

Runtime error

image-captioning / app.py

Quentin GALLOUÉDEC

app

e80cdb3 over 1 year ago

2.12 kB

	import gradio as gr
	import torch
	from gradio.components import Dropdown, Image, Textbox
	from huggingface_hub import HfApi, ModelFilter
	from transformers import AutoModelForCausalLM, AutoProcessor

	# Get the list of models from the Hugging Face Hub
	api = HfApi()
	models_infos = api.list_models(author="jat-project", filter=ModelFilter(tags="text-generation"))
	models_names = [model.modelId for model in models_infos]

	# Dictionary to store loaded models and their pipelines
	models = {}

	# Load a default model initially
	default_model_name = "jat-project/jat2-small-untrained"

	def generate_text(model_name, input_image):
	# Check if the selected model is already loaded
	if model_name not in models:
	# Inform the user that the model is loading
	yield "Loading model..."

	# Load the model
	processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True, padding_side='left')
	model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
	models[model_name] = model, processor

	# Get the model for the selected model
	model, processor = models[model_name]

	# Inform the user that the text is being generated
	yield "Generating caption..."

	# Convert the input image to a tensor
	pixel_values = processor(images=input_image, return_tensors="pt").pixel_values

	# Generate text
	generated_ids = model.generate(pixel_values=pixel_values, max_length=100, early_stopping=True)
	generated_text = processor.decode(generated_ids[0], skip_special_tokens=True)

	# Return the generated text
	yield generated_text

	# Define the Gradio interface
	iface = gr.Interface(
	fn=generate_text, # Function to be called on user input
	inputs=[
	Dropdown(models_names, label="Select Model", value=default_model_name), # Select model
	Image(label="Input Image"), # Image input
	],
	outputs=Textbox(label="Generated Caption"), # Textbox to display the generated text
	title="JAT Image Captioning", # Title of the interface
	)

	# Launch the Gradio interface
	iface.launch(enable_queue=True)