Spaces:

Transcrib3D
/

Transcrib3D-Demo

Running

File size: 5,231 Bytes

f27a827

import os
import threading
import gradio as gr
from transcrib3d_main import gen_prompt, get_gpt_response, get_openai_config, extract_answer_id_from_last_line
from code_interpreter import CodeInterpreter
from display_model import *

scan_id = "scene0132_00"
ply_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned.ply")
glb_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned.glb")
new_ply_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned_AddBox.ply")
new_glb_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned_AddBox.glb")
objects_info_file = os.path.join("objects_info", f"objects_info_{scan_id}.npy")

def insert_user_none_between_assistant(messages):
    # 初始化结果列表
    result = []
    # 初始状态设置为"user"，以确保列表第一个条目为"assistant"时能正确插入
    last_role = "user"

    for msg in messages:
        # 检查当前信息的角色
        current_role = msg["role"]

        # 如果上一个和当前信息均为"assistant"，插入content为None的"user"信息
        if last_role == "assistant" and current_role == "assistant":
            result.append({"role": "user", "content": None})

        # 将当前信息添加到结果列表
        result.append(msg)

        # 更新上一条信息的角色
        last_role = current_role

    return result

def generate_answer_glb(answer_content):
    last_line = answer_content.splitlines()[-1] if len(answer_content) > 0 else ''
    answer_id, _ = extract_answer_id_from_last_line(last_line)
    print("extracted answer id:", answer_id)

    # get the bounding box of the answer object
    
    box = np.load(objects_info_file, allow_pickle=True)[answer_id]['extension']
    print("box extension:",box)

    # add the box to ply
    add_1box_to_ply(box, ply_file, new_ply_file)
    ply_to_glb(new_ply_file, new_glb_file)

def run_inferring(instruction, model3d, dialogue):
    # generate prompt from user instruction
    # scan_id = "scene0132_00"
    prompt = gen_prompt(instruction, scan_id)

    # get oepnai config
    openai_config = get_openai_config()
    
    # get LLM response
    code_interpreter = CodeInterpreter(**openai_config)
    get_gpt_response(prompt, code_interpreter)
    messages = code_interpreter.pretext

    # draw the answer bounding box to the scene
    generate_answer_glb(messages[-1]['content'])
    # global model3d
    # print(model3d.value)
    # model3d.postprocess(new_glb_file)
    # print(model3d.value)

    # form gradio chat history
    messages = insert_user_none_between_assistant(messages[1:])
    # print(len(messages))
    # print(messages)
    gradio_messages = []
    for idx in range(int(len(messages)/2)):
        gradio_message = [messages[idx*2]['content'], messages[idx*2+1]['content']]
        gradio_messages.append(gradio_message)

    # return new_glb_file, gradio_messages
    model3d.update(value=new_glb_file)
    dialogue.update(gradio_messages)



def process_instruction_callback(user_instruction, model3d, dialogue):
    threading.Thread(target=run_inferring, args=(user_instruction, model3d, dialogue)).start()
    # return "Processing your instruction, please wait...",

with gr.Blocks() as demo:
    gr.Markdown("## Transcrib3D-Demo")
    with gr.Row():
        model3d = gr.Model3D(
            value="scenes/scene0132_00_vh_clean_2_aligned.glb",
            # value="scenes/scene0132_00_vh_clean_2_aligned_AddBox.glb",
            # value="scenes/scene0132_00_vh_clean_2_aligned.ply",
            # value="scenes/scene0132_00_vh_clean_2_aligned.obj",
            # value="scenes/scene0132_00_gt_bboxes_aligned.ply",
            # value="scenes/cube.ply",
            label="ScanNet-scene0132_00",
            camera_position=(90,120,8),
            zoom_speed=0.25,
            height=635
        )
        # print("Type1:",type(model3d))
        
        with gr.Column():
            # with gr.Row():
            user_instruction_textbox = gr.Textbox(
                label="Instruction",
                placeholder="Describe an object in the scene with its attributes and its relation with other objects.",
                # scale=4
            )
            bt = gr.Button(
                value="Submit",
                # scale=1
            )
            
            dialogue = gr.Chatbot(
                height=470
                # value = [["1","2"], [None, '3']]
            )
        
    # print("Type2:",type(model3d))
    # 直接在 inputs列表里写model3d，会导致实际传给callback函数的是str
    # bt.click(fn=process_instruction_callback, inputs=user_instruction_textbox, outputs=dialogue)
    bt.click(fn=process_instruction_callback, inputs=[user_instruction_textbox, gr.State(model3d), gr.State(dialogue)])#, outputs=[model3d,dialogue])

    # 直接用lambda函数定义一个映射
    # type(user_instruction_textbox.value)
    # user_instruction_textbox.
    # user_instruction_textbox.submit(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue)
    # user_instruction_textbox.
    # bt.click(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue)
    

demo.launch()