Spaces:

mrm8488
/

idefics-9b-ft-describe-diffusion-mj

Runtime error

File size: 2,198 Bytes

486f4bf
 
ce0ce67
486f4bf
 
ce0ce67
486f4bf
 
ce0ce67
 
 
 
 
 
 
486f4bf
 
 
 
 
8d07585
486f4bf
 
 
 
 
 
8d07585
b390d84
458c57f
 
 
 
 
 
1651e3c
486f4bf
 
8f189a9
 
 
9fd6fc9
8f189a9
9fd6fc9
486f4bf
232d5c8

import torch
from transformers import IdeficsForVisionText2Text, AutoProcessor
from peft import PeftModel, PeftConfig
import gradio as gr

peft_model_id = "mrm8488/idefics-9b-ft-describe-diffusion-bf16-adapter"
device = "cuda" if torch.cuda.is_available() else "cpu"


config = PeftConfig.from_pretrained(peft_model_id)
model = IdeficsForVisionText2Text.from_pretrained(config.base_model_name_or_path, torch_dtype=torch.bfloat16)
model = PeftModel.from_pretrained(model, peft_model_id)
processor = AutoProcessor.from_pretrained(config.base_model_name_or_path)
model = model.to(device)
model.eval()

def predict(prompt, image_url, max_length):
    image = processor.image_processor.fetch_images(image_url)
    prompts = [[image, prompt]]
    inputs = processor(prompts[0], return_tensors="pt").to(device)
    generated_ids = model.generate(**inputs, max_length=max_length)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return generated_text



title = "Midjourney-like Image Captioning with IDEFICS"
description = "Gradio Demo for generating *Midjourney* like captions (describe functionality) with **IDEFICS**"

examples = [
    ["Describe the following image:", "https://cdn.arstechnica.net/wp-content/uploads/2023/06/zoomout_2-1440x807.jpg", 64],
    ["Describe the following image:", "https://framerusercontent.com/images/inZdRVn7eafZNvaVre2iW1a538.png", 64],
    ["Describe the following image:", "https://hips.hearstapps.com/hmg-prod/images/cute-photos-of-cats-in-grass-1593184777.jpg", 64]
    
]
io = gr.Interface(fn=predict, 
                  #inputs=gr.inputs.Image(type='pil'),
                  inputs=[
                      gr.Textbox(label="Prompt", value="Describe the following image:"),
                      gr.Textbox(label="image URL", placeholder="Insert the URL of the image to be described"),
                      gr.Slider(label="Max tokens", value=64, max=128, min=16, step=8)
                  ],
                  outputs=gr.Textbox(label="IDEFICS Description"),
                  title=title, description=description, examples=examples,
                  allow_flagging=False, allow_screenshot=False)
io.launch(debug=True)