Spaces:

Nishgop
/

cogVLM

Runtime error

File size: 9,157 Bytes

c1e62b8
 
76d42da
c1e62b8
76d42da
c1e62b8

#!/usr/bin/env python

import gradio as gr
import os
import json
import requests
import time
from concurrent.futures import ThreadPoolExecutor
from utils import is_chinese, process_image_without_resize, parse_response, templates_agent_cogagent, template_grounding_cogvlm, postprocess_text

DESCRIPTION = '''<h2 style='text-align: center'> <a href="https://github.com/THUDM/CogVLM"> CogVLM & CogAgent Chat Demo</a> </h2>'''

NOTES = 'This app is adapted from <a href="https://github.com/THUDM/CogVLM">https://github.com/THUDM/CogVLM</a>. It would be recommended to check out the repo if you want to see the detail of our model.\n\n该demo仅作为测试使用，不支持批量请求。如有大批量需求，欢迎联系[智谱AI](mailto:business@zhipuai.cn)。\n\n请注意该Demo目前仅支持英文，<a href="http://36.103.203.44:7861/">备用网页</a>支持中文。'

MAINTENANCE_NOTICE1 = 'Hint 1: If the app report "Something went wrong, connection error out", please turn off your proxy and retry.<br>Hint 2: If you upload a large size of image like 10MB, it may take some time to upload and process. Please be patient and wait.'

GROUNDING_NOTICE = 'Hint: When you check "Grounding", please use the <a href="https://github.com/THUDM/CogVLM/blob/main/utils/utils/template.py#L344">corresponding prompt</a> or the examples below.'

AGENT_NOTICE = 'Hint: When you check "CogAgent", please use the <a href="https://github.com/THUDM/CogVLM/blob/main/utils/utils/template.py#L761C1-L761C17">corresponding prompt</a> or the examples below.'


default_chatbox = [("", "Hi, What do you want to know about this image?")]

URL = os.environ.get("URL")


def make_request(URL, headers, data):
    response = requests.request("POST", URL, headers=headers, data=data, timeout=(60, 100))
    return response.json()

def post(
        input_text,
        temperature,
        top_p,
        top_k,
        image_prompt,
        result_previous,
        hidden_image,
        grounding,
        cogagent,
        grounding_template,
        agent_template    
        ):
    result_text = [(ele[0], ele[1]) for ele in result_previous]
    for i in range(len(result_text)-1, -1, -1):
        if result_text[i][0] == "" or result_text[i][0] == None:
            del result_text[i]
    print(f"history {result_text}")

    is_zh = is_chinese(input_text)

    if image_prompt is None:
        print("Image empty")
        if is_zh:
            result_text.append((input_text, '图片为空！请上传图片并重试。'))
        else:
            result_text.append((input_text, 'Image empty! Please upload a image and retry.'))
        return input_text, result_text, hidden_image
    elif input_text == "":
        print("Text empty")
        result_text.append((input_text, 'Text empty! Please enter text and retry.'))
        return "", result_text, hidden_image                

    headers = {
            "Content-Type": "application/json; charset=UTF-8",
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36",
        }
    if image_prompt:
        pil_img, encoded_img, image_hash, image_path_grounding = process_image_without_resize(image_prompt)
        print(f"image_hash:{image_hash}, hidden_image_hash:{hidden_image}")

        if hidden_image is not None and image_hash != hidden_image:
            print("image has been update")
            result_text = []        
        hidden_image = image_hash        
    else:
        encoded_img = None 

    model_use = "vlm_chat"
    if not cogagent and grounding:
        model_use = "vlm_grounding"
        if grounding_template:
            input_text = postprocess_text(grounding_template, input_text)
    elif cogagent:
        model_use = "agent_chat"
        if agent_template is not None and agent_template != "do not use template":
            input_text = postprocess_text(agent_template, input_text)

    prompt = input_text

    if grounding:
        prompt += "(with grounding)"   

    print(f'request {model_use} model... with prompt {prompt}, grounding_template {grounding_template}, agent_template {agent_template}')
    data = json.dumps({
        'model_use': model_use,
        'is_grounding': grounding,
        'text': prompt,
        'history': result_text,
        'image': encoded_img,
        'temperature': temperature,
        'top_p': top_p,
        'top_k': top_k,
        'do_sample': True,
        'max_new_tokens': 2048
    })
    try:
        with ThreadPoolExecutor(max_workers=1) as executor:
            future = executor.submit(make_request, URL, headers, data)
            # time.sleep(15)
            response = future.result()  # Blocks until the request is complete
        # response = requests.request("POST", URL, headers=headers, data=data, timeout=(60, 100)).json()
    except Exception as e:
        print("error message", e)
        if is_zh:
            result_text.append((input_text, '超时！请稍等几分钟再重试。'))
        else:
            result_text.append((input_text, 'Timeout! Please wait a few minutes and retry.'))
        return "", result_text, hidden_image
    print('request done...')
    # response = {'result':input_text}

    answer = str(response['result'])
    if grounding:
        parse_response(pil_img, answer, image_path_grounding)
        new_answer = answer.replace(input_text, "")
        result_text.append((input_text, new_answer))
        result_text.append((None, (image_path_grounding,)))
    else:
        result_text.append((input_text, answer))
    print(result_text)
    print('finished')
    return "", result_text, hidden_image


def clear_fn(value):
    return "", default_chatbox, None

def clear_fn2(value):
    return default_chatbox


def main():
    gr.close_all()
    examples = []
    with open("./examples/example_inputs.jsonl") as f:
        for line in f:
            data = json.loads(line)
            examples.append(data)


    with gr.Blocks(css='style.css') as demo:

        gr.Markdown(DESCRIPTION)
        gr.Markdown(NOTES)

        with gr.Row():
            with gr.Column(scale=4.5):
                with gr.Group():
                    input_text = gr.Textbox(label='Input Text', placeholder='Please enter text prompt below and press ENTER.')
                    with gr.Row():
                        run_button = gr.Button('Generate')
                        clear_button = gr.Button('Clear')

                    image_prompt = gr.Image(type="filepath", label="Image Prompt", value=None)
                with gr.Row():
                    grounding = gr.Checkbox(label="Grounding")
                    cogagent = gr.Checkbox(label="CogAgent")
                with gr.Row():
                    # grounding_notice = gr.Markdown(GROUNDING_NOTICE)
                    grounding_template = gr.Dropdown(choices=template_grounding_cogvlm, label="Grounding Template", value=template_grounding_cogvlm[0])
                    # agent_notice = gr.Markdown(AGENT_NOTICE)
                    agent_template = gr.Dropdown(choices=templates_agent_cogagent, label="Agent Template", value=templates_agent_cogagent[0])

                with gr.Row():
                    temperature = gr.Slider(maximum=1, value=0.9, minimum=0, label='Temperature')
                    top_p = gr.Slider(maximum=1, value=0.8, minimum=0, label='Top P')
                    top_k = gr.Slider(maximum=50, value=5, minimum=1, step=1, label='Top K')

            with gr.Column(scale=5.5):
                result_text = gr.components.Chatbot(label='Multi-round conversation History', value=[("", "Hi, What do you want to know about this image?")], height=550)
                hidden_image_hash = gr.Textbox(visible=False)

        gr_examples = gr.Examples(examples=[[example["text"], example["image"], example["grounding"], example["cogagent"]] for example in examples], 
                                  inputs=[input_text, image_prompt, grounding, cogagent],
                                  label="Example Inputs (Click to insert an examplet into the input box)",
                                  examples_per_page=6)

        gr.Markdown(MAINTENANCE_NOTICE1)

        print(gr.__version__)
        run_button.click(fn=post,inputs=[input_text, temperature, top_p, top_k, image_prompt, result_text, hidden_image_hash, grounding, cogagent, grounding_template, agent_template],
                         outputs=[input_text, result_text, hidden_image_hash])
        input_text.submit(fn=post,inputs=[input_text, temperature, top_p, top_k, image_prompt, result_text, hidden_image_hash, grounding, cogagent, grounding_template, agent_template],
                         outputs=[input_text, result_text, hidden_image_hash])
        clear_button.click(fn=clear_fn, inputs=clear_button, outputs=[input_text, result_text, image_prompt])
        image_prompt.upload(fn=clear_fn2, inputs=clear_button, outputs=[result_text])
        image_prompt.clear(fn=clear_fn2, inputs=clear_button, outputs=[result_text])

        print(gr.__version__)

    demo.queue(concurrency_count=10)
    demo.launch()

if __name__ == '__main__':
    main()