Co-Instruct

Runtime error

File size: 3,510 Bytes

848ce1e
 
8bef29c
 
 
 
 
 
 
 
a553a03
8bef29c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8132ec4
8bef29c
8132ec4
8bef29c
8132ec4
 
8bef29c
 
 
838b9d1
43ba66f
8bef29c
848ce1e
 
43ba66f
838b9d1
43ba66f
848ce1e
 
 
8bef29c

import gradio as gr
import requests
from PIL import Image

import torch
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained("q-future/co-instruct-preview", 
                                             trust_remote_code=True, 
                                             torch_dtype=torch.float16, 
                                             use_flash_attention_2=True,
                                             device_map={"":"cuda:0"})

def chat(message, history, image_1, image_2):
    print(history)
    if history:
        if image_1 is not None and image_2 is None:
            past_message = "USER: The image: <|image|> " + history[0][0] + " ASSISTANT:" + history[0][1]
            for i in range((len(history) - 1)):
                past_message += "USER:" +history[i][0] + " ASSISTANT:" + history[i][1] + "</s>"
            message = past_message + "USER:" + message + " ASSISTANT:"
            images = [image_1]
        if image_1 is not None and image_2 is not None:
            past_message = "USER: The first image: <|image|>\nThe second image: <|image|>" + history[0][0] + " ASSISTANT:" + history[0][1] + "</s>"
            for i in range((len(history) - 1)):
                past_message += "USER:" + history[i][0] + " ASSISTANT:" + history[i][1] + "</s>"
            message = past_message + "USER:" + message + " ASSISTANT:"
            images = [image_1, image_2]
    else:  
        if image_1 is not None and image_2 is None:
            message = "USER: The image: <|image|> " + message + " ASSISTANT:"
            images = [image_1]
        if image_1 is not None and image_2 is not None:
            message = "USER: The first image: <|image|>\nThe second image: <|image|>" + message + " ASSISTANT:"
            images = [image_1, image_2]
    
    print(message)
    
    return model.tokenizer.batch_decode(model.chat(message, images, max_new_tokens=150).clamp(0, 100000))[0].split("ASSISTANT:")[-1]


with gr.Blocks(title="img") as demo:
    title_markdown = ("""
    <div align="center">*Preview Version (v1)! Now we support two images as inputs! Try it now!*</div>
<h1 align="center"><a href="https://github.com/Q-Future/Q-Instruct"><img src="https://github.com/Q-Future/Q-Instruct/blob/main/q_instruct_logo.png?raw=true", alt="Q-Instruct (mPLUG-Owl-2)" border="0" style="margin: 0 auto; height: 85px;" /></a> </h1>
<h2 align="center">Q-Instruct: Improving Low-level Visual Abilities for Multi-modality Foundation Models</h2>
<h5 align="center"> Please find our more accurate visual scoring demo on <a href='https://huggingface.co/spaces/teowu/OneScorer'>[OneScorer]</a>!</h2>
<div align="center">
    <div style="display:flex; gap: 0.25rem;" align="center">
        <a href='https://github.com/Q-Future/Q-Instruct'><img src='https://img.shields.io/badge/Github-Code-blue'></a>
        <a href="https://Q-Instruct.github.io/Q-Instruct/fig/Q_Instruct_v0_1_preview.pdf"><img src="https://img.shields.io/badge/Technical-Report-red"></a>
        <a href='https://github.com/Q-Future/Q-Instruct/stargazers'><img src='https://img.shields.io/github/stars/Q-Future/Q-Instruct.svg?style=social'></a>
    </div>
</div>
""")
    gr.Markdown(title_markdown)
    with gr.Row():
            input_img_1 = gr.Image(type='pil', label="Image 1 (The first image)")
            input_img_2 = gr.Image(type='pil', label="Image 2 (The second image)")
    gr.ChatInterface(fn = chat, additional_inputs=[input_img_1, input_img_2])
    demo.launch(share=True)