import os os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False") import logging import gradio as gr from pillow_heif import register_heif_opener register_heif_opener() from transformers import pipeline LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG") MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", 200)) # https://huggingface.co/models?pipeline_tag=image-to-text&sort=likes MODEL = os.getenv("MODEL", "Salesforce/blip-image-captioning-large") logging.basicConfig(level=LOG_LEVEL) logger = logging.getLogger(__name__) logger.info("Loading model...") # simpler model: "ydshieh/vit-gpt2-coco-en" captioner = pipeline( "image-to-text", model=MODEL, max_new_tokens=MAX_NEW_TOKENS, ) logger.info("Done loading model.") def graptioner(img): result = captioner(img) caption = result[0]["generated_text"] return caption iface = gr.Interface( fn=graptioner, inputs=gr.components.Image(type="pil"), outputs=["text"], allow_flagging="never", # analytics_enabled=False ) iface.launch()