import os import threading import gradio as gr from transcrib3d_main import gen_prompt, get_gpt_response, get_openai_config, extract_answer_id_from_last_line from code_interpreter import CodeInterpreter from display_model import * scan_id = "scene0132_00" ply_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned.ply") glb_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned.glb") new_ply_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned_AddBox.ply") new_glb_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned_AddBox.glb") objects_info_file = os.path.join("objects_info", f"objects_info_{scan_id}.npy") def insert_user_none_between_assistant(messages): # 初始化结果列表 result = [] # 初始状态设置为"user",以确保列表第一个条目为"assistant"时能正确插入 last_role = "user" for msg in messages: # 检查当前信息的角色 current_role = msg["role"] # 如果上一个和当前信息均为"assistant",插入content为None的"user"信息 if last_role == "assistant" and current_role == "assistant": result.append({"role": "user", "content": None}) # 将当前信息添加到结果列表 result.append(msg) # 更新上一条信息的角色 last_role = current_role return result def generate_answer_glb(answer_content): last_line = answer_content.splitlines()[-1] if len(answer_content) > 0 else '' answer_id, _ = extract_answer_id_from_last_line(last_line) print("extracted answer id:", answer_id) # get the bounding box of the answer object box = np.load(objects_info_file, allow_pickle=True)[answer_id]['extension'] print("box extension:",box) # add the box to ply add_1box_to_ply(box, ply_file, new_ply_file) ply_to_glb(new_ply_file, new_glb_file) def run_inferring(instruction, model3d, dialogue): # generate prompt from user instruction # scan_id = "scene0132_00" prompt = gen_prompt(instruction, scan_id) # get oepnai config openai_config = get_openai_config() # get LLM response code_interpreter = CodeInterpreter(**openai_config) get_gpt_response(prompt, code_interpreter) messages = code_interpreter.pretext # draw the answer bounding box to the scene generate_answer_glb(messages[-1]['content']) # global model3d # print(model3d.value) # model3d.postprocess(new_glb_file) # print(model3d.value) # form gradio chat history messages = insert_user_none_between_assistant(messages[1:]) # print(len(messages)) # print(messages) gradio_messages = [] for idx in range(int(len(messages)/2)): gradio_message = [messages[idx*2]['content'], messages[idx*2+1]['content']] gradio_messages.append(gradio_message) # return new_glb_file, gradio_messages model3d.update(value=new_glb_file) dialogue.update(gradio_messages) def process_instruction_callback(user_instruction, model3d, dialogue): threading.Thread(target=run_inferring, args=(user_instruction, model3d, dialogue)).start() # return "Processing your instruction, please wait...", with gr.Blocks() as demo: gr.Markdown("## Transcrib3D-Demo") with gr.Row(): model3d = gr.Model3D( value="scenes/scene0132_00_vh_clean_2_aligned.glb", # value="scenes/scene0132_00_vh_clean_2_aligned_AddBox.glb", # value="scenes/scene0132_00_vh_clean_2_aligned.ply", # value="scenes/scene0132_00_vh_clean_2_aligned.obj", # value="scenes/scene0132_00_gt_bboxes_aligned.ply", # value="scenes/cube.ply", label="ScanNet-scene0132_00", camera_position=(90,120,8), zoom_speed=0.25, height=635 ) # print("Type1:",type(model3d)) with gr.Column(): # with gr.Row(): user_instruction_textbox = gr.Textbox( label="Instruction", placeholder="Describe an object in the scene with its attributes and its relation with other objects.", # scale=4 ) bt = gr.Button( value="Submit", # scale=1 ) dialogue = gr.Chatbot( height=470 # value = [["1","2"], [None, '3']] ) # print("Type2:",type(model3d)) # 直接在 inputs列表里写model3d,会导致实际传给callback函数的是str # bt.click(fn=process_instruction_callback, inputs=user_instruction_textbox, outputs=dialogue) bt.click(fn=process_instruction_callback, inputs=[user_instruction_textbox, gr.State(model3d), gr.State(dialogue)])#, outputs=[model3d,dialogue]) # 直接用lambda函数定义一个映射 # type(user_instruction_textbox.value) # user_instruction_textbox. # user_instruction_textbox.submit(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue) # user_instruction_textbox. # bt.click(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue) demo.launch()