import logging import os import gradio as gr from pillow_heif import register_heif_opener register_heif_opener() import gradio as gr from transformers import pipeline LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG") MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", 200)) # https://huggingface.co/models?pipeline_tag=image-to-text&sort=likes MODEL = os.getenv("MODEL", "Salesforce/blip-image-captioning-large") logging.basicConfig(level=LOG_LEVEL) logger = logging.getLogger(__name__) logger.info("Loading model...") # simpler model: "ydshieh/vit-gpt2-coco-en" captioner = pipeline( "image-to-text", model=MODEL, max_new_tokens=MAX_NEW_TOKENS, ) logger.info("Done loading model.") def graptioner(image_url): global captioner result = captioner(image_url) caption = result[0]["generated_text"] return caption # add gradio interface iface = gr.Interface(fn=graptioner, inputs="text", outputs=["text"], allow_flagging="never") iface.launch()