Spaces:

Transcrib3D
/

Transcrib3D-Demo

Sleeping

App Files Files Community

kudo1026 commited on Apr 9

Commit

f27a827

•

1 Parent(s): fbfbf30

initial

Browse files

Files changed (16) hide show

.gitattributes +2 -0
Dockerfile +52 -0
README.md +11 -10
app.py +189 -0
appyibu.py +137 -0
code_interpreter.py +132 -0
display_model.py +167 -0
gpt_dialogue.py +186 -0
object_filter_gpt4.py +154 -0
objects_info/objects_info_scene0132_00.npy +3 -0
prompt_text.py +53 -0
requirements.txt +6 -0
scenes/scene0132_00_vh_clean_2_aligned.glb +3 -0
scenes/scene0132_00_vh_clean_2_aligned.ply +3 -0
sources.list +4 -0
transcrib3d_main.py +285 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+scenes/scene0132_00_vh_clean_2_aligned.glb filter=lfs diff=lfs merge=lfs -text
+scenes/scene0132_00_vh_clean_2_aligned.ply filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,52 @@

+# 使用ubuntu22.04作为基础镜像
+FROM ubuntu:22.04
+# 设置工作目录为/Transcrib3D-Demo/
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+# 无vpn时需要换源：
+# COPY ./sources.list /etc/apt/
+# RUN mkdir -p ~/.pip
+# RUN echo "[global]" >> ~/.pip/pip.conf
+# RUN echo "index-url = https://pypi.tuna.tsinghua.edu.cn/simple" >> ~/.pip/pip.conf
+# RUN cat ~/.pip/pip.conf
+RUN apt-get update
+RUN apt-get install -y wget
+RUN apt-get install -y python3-pip
+RUN pip3 config list
+RUN apt-get install -y sudo
+RUN apt-get install -y vim
+# 在/root/Downloads目录下下载libssl包
+RUN mkdir -p /root/Downloads && \
+    wget -P /root/Downloads http://archive.ubuntu.com/ubuntu/pool/main/o/openssl1.0/libssl1.0.0_1.0.2n-1ubuntu5_amd64.deb && \
+    dpkg -i /root/Downloads/libssl1.0.0_1.0.2n-1ubuntu5_amd64.deb
+# 安装requirements.txt中的Python依赖项
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR $HOME/app
+# Copy the current directory contents into the container at $HOME/app setting the owner to the user
+COPY --chown=user . $HOME/app
+# 可以设置容器启动后默认执行的命令，这里我们只是为示例，所以不设置任何启动命令
+# CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
+CMD [ "python3", "app.py" ]

README.md CHANGED Viewed

@@ -1,13 +1,14 @@
 ---
-title: Transcrib3D Demo
-emoji: 👀
-colorFrom: pink
-colorTo: indigo
-sdk: gradio
-sdk_version: 4.26.0
-app_file: app.py
 pinned: false
-license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Transcrib3D-Demo
+emoji: 🎡
+colorFrom: blue
+colorTo: red
+sdk: docker
+app_port: 7860
+# sdk: gradio
+# sdk_version: 4.25.0
+# app_file: app.py
 pinned: false
+license: apache-2.0
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import os, time, threading
+import gradio as gr
+from display_model import *
+scan_id = "scene0132_00"
+ply_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned.ply")
+glb_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned.glb")
+new_ply_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned_AddBox.ply")
+new_glb_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned_AddBox.glb")
+objects_info_file = os.path.join("objects_info", f"objects_info_{scan_id}.npy")
+def insert_user_none_between_assistant(messages):
+    # 初始化结果列表
+    result = []
+    # 初始状态设置为"user"，以确保列表第一个条目为"assistant"时能正确插入
+    last_role = "user"
+    for msg in messages:
+        # 检查当前信息的角色
+        current_role = msg["role"]
+        # 如果上一个和当前信息均为"assistant"，插入content为None的"user"信息
+        if last_role == "assistant" and current_role == "assistant":
+            result.append({"role": "user", "content": None})
+        # 将当前信息添加到结果列表
+        result.append(msg)
+        # 更新上一条信息的角色
+        last_role = current_role
+    return result
+def timer_check_update(code_interpreter, update_interval, stop_event):
+    """
+    定时检查 code_interpreter.has_update 是否为True，
+    如果为True，则触发界面更新逻辑并重置状态。
+    参数：
+    - code_interpreter: CodeInterpreter的实例，预期包含has_update属性。
+    - update_interval: 定时器检查间隔，以秒为单位。
+    - stop_event: 一个threading.Event()实例，用于停止定时器线程。
+    """
+    while not stop_event.is_set():
+        if code_interpreter.has_update:
+            # 实现更新界面显示的逻辑
+            print("Detected update, trigger UI refreshing...")
+            # 在这里添加更新界面显示的代码
+            # ...
+            # 重置has_update状态
+            code_interpreter.has_update = False
+        # 等待下次检查
+        time.sleep(update_interval)
+def process_instruction_callback(inp_api_key, instruction, llm_name):
+    if not inp_api_key:
+        print("Please input OpenAI API Key.")
+        return
+    else:
+        os.environ["OPENAI_API_KEY"] = inp_api_key
+        from transcrib3d_main import gen_prompt, get_gpt_response, get_openai_config
+        from code_interpreter import CodeInterpreter
+    print("llm_name:",llm_name)
+    # generate prompt from user instruction
+    # scan_id = "scene0132_00"
+    prompt = gen_prompt(instruction, scan_id)
+    # get oepnai config
+    openai_config = get_openai_config(llm_name)
+    # get LLM response
+    code_interpreter = CodeInterpreter(**openai_config)
+    get_gpt_response(prompt, code_interpreter)
+    messages = code_interpreter.pretext
+    # draw the answer bounding box to the scene
+    generate_answer_glb(messages[-1]['content'])
+    # model3d.update(value=new_glb_file)
+    # form gradio chat history
+    messages = insert_user_none_between_assistant(messages[1:])
+    gradio_messages = []
+    for idx in range(int(len(messages)/2)):
+        gradio_message = [messages[idx*2]['content'], messages[idx*2+1]['content']]
+        gradio_messages.append(gradio_message)
+    # return gradio_messages
+    return new_glb_file, gradio_messages
+def generate_answer_glb(answer_content):
+    from transcrib3d_main import extract_answer_id_from_last_line
+    last_line = answer_content.splitlines()[-1] if len(answer_content) > 0 else ''
+    answer_id, _ = extract_answer_id_from_last_line(last_line)
+    print("extracted answer id:", answer_id)
+    # get the bounding box of the answer object
+    box = np.load(objects_info_file, allow_pickle=True)[answer_id]['extension']
+    print("box extension:",box)
+    # add the box to ply
+    add_1box_to_ply(box, ply_file, new_ply_file)
+    ply_to_glb(new_ply_file, new_glb_file)
+def llm_dropdown_callback(llm_name):
+    print("type in callback:",type(llm_name))
+    llm_name = str(llm_name)
+    print("llm_name in callback:",llm_name)
+    return llm_name
+with gr.Blocks() as demo:
+    gr.Markdown("## Transcrib3D-Demo")
+    with gr.Row():
+        with gr.Column():
+            model3d = gr.Model3D(
+                value="scenes/scene0132_00_vh_clean_2_aligned.glb",
+                label="ScanNet-scene0132_00",
+                camera_position=(90,120,8),
+                zoom_speed=0.25,
+                # height=635,
+                height=725
+            )
+            # print("Type1:",type(model3d))
+            # gr.Markdown("🖱️:arrow_up::arrow_down:: SCROLL to zoom in/out.\t🖱️🔁 DRAG to rotate.\tCTRL+🖱️🔁 Press CTRL and DRAG to pan.")
+            html_content = """
+                <div style='text-align: center;'>
+                    🖱️🔼🔽: SCROLL to zoom in/out.&nbsp;&nbsp;&nbsp;🖱️🔁: DRAG to rotate.&nbsp;&nbsp;&nbsp;[CTRL]+🖱️🔁: Press CTRL and DRAG to pan.
+                </div>
+                """
+            gr.HTML(value=html_content)
+        with gr.Column():
+            inp_api_key = gr.Textbox(label='OpenAI API Key (this is not stored anywhere)', lines=1)
+            llm_dropdown = gr.Dropdown(
+                # choices=['gpt-4-turbo','gpt-4','gpt-3.5-turbo'],
+                choices=['gpt-4-0125-preview', 'gpt-4-1106-preview', 'gpt-3.5-turbo-0125'],
+                label="LLM Selection",
+                type='value'
+            )
+            # llm_name = "gpt-4-turbo"
+            llm_name_text = gr.Text(visible=False)
+            llm_dropdown.select(fn=llm_dropdown_callback, inputs=llm_dropdown, outputs=llm_name_text)
+            user_instruction_textbox = gr.Textbox(
+                label="Instruction",
+                placeholder="Describe an object in the scene with its attributes and its relation with other objects, e.g. 'The largest table in the scene.",
+                # scale=4
+            )
+            bt = gr.Button(
+                value="Submit",
+                # scale=1
+            )
+            dialogue = gr.Chatbot(
+                height=470
+                # value = [["1","2"], [None, '3']]
+            )
+    # print("Type2:",type(model3d))
+    # 直接在 inputs列表里写model3d，会导致实际传给callback函数的是str
+    # bt.click(fn=process_instruction_callback, inputs=user_instruction_textbox, outputs=dialogue)
+    bt.click(fn=process_instruction_callback, inputs=[inp_api_key, user_instruction_textbox,llm_name_text], outputs=[model3d,dialogue])
+    user_instruction_textbox.submit(fn=process_instruction_callback, inputs=[inp_api_key, user_instruction_textbox, llm_name_text], outputs=[model3d,dialogue])
+    # 直接用lambda函数定义一个映射
+    # type(user_instruction_textbox.value)
+    # user_instruction_textbox.
+    # user_instruction_textbox.submit(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue)
+    # user_instruction_textbox.
+    # bt.click(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue)
+# os.system('uname -a')  # 显示所有系统信息
+# demo.launch()
+if __name__ == "__main__":
+    # demo.launch(share=True, server_name="0.0.0.0", server_port=7860)
+    demo.launch(server_name="0.0.0.0", server_port=7860)

appyibu.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import os
+import threading
+import gradio as gr
+from transcrib3d_main import gen_prompt, get_gpt_response, get_openai_config, extract_answer_id_from_last_line
+from code_interpreter import CodeInterpreter
+from display_model import *
+scan_id = "scene0132_00"
+ply_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned.ply")
+glb_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned.glb")
+new_ply_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned_AddBox.ply")
+new_glb_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned_AddBox.glb")
+objects_info_file = os.path.join("objects_info", f"objects_info_{scan_id}.npy")
+def insert_user_none_between_assistant(messages):
+    # 初始化结果列表
+    result = []
+    # 初始状态设置为"user"，以确保列表第一个条目为"assistant"时能正确插入
+    last_role = "user"
+    for msg in messages:
+        # 检查当前信息的角色
+        current_role = msg["role"]
+        # 如果上一个和当前信息均为"assistant"，插入content为None的"user"信息
+        if last_role == "assistant" and current_role == "assistant":
+            result.append({"role": "user", "content": None})
+        # 将当前信息添加到结果列表
+        result.append(msg)
+        # 更新上一条信息的角色
+        last_role = current_role
+    return result
+def generate_answer_glb(answer_content):
+    last_line = answer_content.splitlines()[-1] if len(answer_content) > 0 else ''
+    answer_id, _ = extract_answer_id_from_last_line(last_line)
+    print("extracted answer id:", answer_id)
+    # get the bounding box of the answer object
+    box = np.load(objects_info_file, allow_pickle=True)[answer_id]['extension']
+    print("box extension:",box)
+    # add the box to ply
+    add_1box_to_ply(box, ply_file, new_ply_file)
+    ply_to_glb(new_ply_file, new_glb_file)
+def run_inferring(instruction, model3d, dialogue):
+    # generate prompt from user instruction
+    # scan_id = "scene0132_00"
+    prompt = gen_prompt(instruction, scan_id)
+    # get oepnai config
+    openai_config = get_openai_config()
+    # get LLM response
+    code_interpreter = CodeInterpreter(**openai_config)
+    get_gpt_response(prompt, code_interpreter)
+    messages = code_interpreter.pretext
+    # draw the answer bounding box to the scene
+    generate_answer_glb(messages[-1]['content'])
+    # global model3d
+    # print(model3d.value)
+    # model3d.postprocess(new_glb_file)
+    # print(model3d.value)
+    # form gradio chat history
+    messages = insert_user_none_between_assistant(messages[1:])
+    # print(len(messages))
+    # print(messages)
+    gradio_messages = []
+    for idx in range(int(len(messages)/2)):
+        gradio_message = [messages[idx*2]['content'], messages[idx*2+1]['content']]
+        gradio_messages.append(gradio_message)
+    # return new_glb_file, gradio_messages
+    model3d.update(value=new_glb_file)
+    dialogue.update(gradio_messages)
+def process_instruction_callback(user_instruction, model3d, dialogue):
+    threading.Thread(target=run_inferring, args=(user_instruction, model3d, dialogue)).start()
+    # return "Processing your instruction, please wait...",
+with gr.Blocks() as demo:
+    gr.Markdown("## Transcrib3D-Demo")
+    with gr.Row():
+        model3d = gr.Model3D(
+            value="scenes/scene0132_00_vh_clean_2_aligned.glb",
+            # value="scenes/scene0132_00_vh_clean_2_aligned_AddBox.glb",
+            # value="scenes/scene0132_00_vh_clean_2_aligned.ply",
+            # value="scenes/scene0132_00_vh_clean_2_aligned.obj",
+            # value="scenes/scene0132_00_gt_bboxes_aligned.ply",
+            # value="scenes/cube.ply",
+            label="ScanNet-scene0132_00",
+            camera_position=(90,120,8),
+            zoom_speed=0.25,
+            height=635
+        )
+        # print("Type1:",type(model3d))
+        with gr.Column():
+            # with gr.Row():
+            user_instruction_textbox = gr.Textbox(
+                label="Instruction",
+                placeholder="Describe an object in the scene with its attributes and its relation with other objects.",
+                # scale=4
+            )
+            bt = gr.Button(
+                value="Submit",
+                # scale=1
+            )
+            dialogue = gr.Chatbot(
+                height=470
+                # value = [["1","2"], [None, '3']]
+            )
+    # print("Type2:",type(model3d))
+    # 直接在 inputs列表里写model3d，会导致实际传给callback函数的是str
+    # bt.click(fn=process_instruction_callback, inputs=user_instruction_textbox, outputs=dialogue)
+    bt.click(fn=process_instruction_callback, inputs=[user_instruction_textbox, gr.State(model3d), gr.State(dialogue)])#, outputs=[model3d,dialogue])
+    # 直接用lambda函数定义一个映射
+    # type(user_instruction_textbox.value)
+    # user_instruction_textbox.
+    # user_instruction_textbox.submit(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue)
+    # user_instruction_textbox.
+    # bt.click(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue)
+demo.launch()

code_interpreter.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import os, datetime, sys
+from io import StringIO
+from contextlib import redirect_stdout
+import traceback
+# import openai
+from gpt_dialogue import Dialogue
+# openai.api_key = os.getenv("OPENAI_API_KEY")
+class CodeInterpreter(Dialogue):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def call_openai_with_code_interpreter(self, user_prompt,namespace_for_exec={},token_usage_total=0):
+        # 如果gpt回复的内容包含python代码，则把代码的执行结果发送给gpt，继续等待其回复
+        # 如果gpt回复的内容不包含python代码，则此函数返回全部结果
+        # 每次递归统计使用的token数，最终返回总的token数
+        assistant_response,token_usage = self.call_openai(user_prompt)
+        token_usage_total+=token_usage
+        # check if response contain code snippet
+        response_content = assistant_response['content']
+        if self.debug:
+            print('response_content: ', response_content)
+        response_splits = response_content.split('```python')
+        if len(response_splits) <= 1:
+            # no code snippet found, return the raw response
+            if self.debug:
+                print('no code snippet found, return the raw response')
+            return assistant_response,token_usage_total
+        else:
+            # code snippet found, execute the code
+            # code_snippet = response_splits[-1].split('```')[0]
+            # print('code snippet: ', code_snippet)
+            code_snippet=""
+            for split in response_splits:
+                if '```' in split:
+                    code_snippet+=split.split('```')[0]
+            f = StringIO()
+            # sys.stdout = f
+            code_exec_success=True
+            with redirect_stdout(f):
+                try:
+                    exec(code_snippet,namespace_for_exec)
+                    code_exe_result = f.getvalue()
+                except Exception as e:
+                    code_exec_success=False
+                    traceback_message_lines=traceback.format_exc().splitlines()
+                    code_exe_result = '\n'.join(traceback_message_lines[-4:])
+            # code_exe_result = f.getvalue()
+            # f.close()
+            # sys.stdout = sys.__stdout__
+            #############利用保存文件的方式####################
+            # # 将代码片段保存到 code_snippet.py 文件
+            # with open("code_snippet.py", "w") as file:
+            #     file.write(code_snippet)
+            # # 执行 code_snippet.py 并将输出重定向到临时文件
+            # os.system("python code_snippet.py > output.txt")
+            # # 从临时文件中读取结果
+            # with open("output.txt", "r") as file:
+            #     code_exe_result = file.read()
+            ##################################################
+            if code_exec_success:
+                code_exe_msg='code execution result:\n' + str(code_exe_result)
+            else:
+                code_exe_msg = "An error was raised when executing the code you write: %s"%code_exe_result
+            # code_exe_msg = 'Execution result of the above code is: ' + str(code_exe_result)
+            print(code_exe_msg)
+            return self.call_openai_with_code_interpreter(code_exe_msg,namespace_for_exec,token_usage_total)
+if __name__ == '__main__':
+    config = {
+        'model': 'gpt-4',
+        # 'model': 'gpt-3.5-turbo',
+        'temperature': 0,
+        'top_p': 0.0,
+        'max_tokens': 'inf',
+        'system_message': "Imagine you are an artificial intelligence assitant with a python interpreter. So when answering questions, you can choose to generate python code (for example, when there is need to do quantitative evaluation). The generated code should always print out the result. The code should be written in python and should be able to run in the python environment with the following packages installed: numpy, math. The generated code should be complete and always include proper imports. Each generated code piece should be independent and NOT rely on previous generated code. When answer step by step, stop whenever you feel there is need to generate python code (for example, where there is need to do quantitative evaluation) and wait for the result from the code execution. When the answewr is complete, add 'Now the answer is complete.' to the end of your answer.",
+        # 'load_path': '',
+        'save_path': 'chats',
+        'debug': False
+    }
+    dialogue = CodeInterpreter(**config)
+    print('======================Instructions======================')
+    print('Type "exit" to exit the dialogue')
+    print('Type "reset" to reset the dialogue')
+    print('Type "pretext" to see the current dialogue history')
+    print('Type "config" to see the current config')
+    print('Type "save" to save the current dialogue history')
+    print('====GPT Dialogue Initialized, start asking your questions====')
+    while True:
+        user_prompt = input('You: ')
+        if user_prompt == 'exit':
+            break
+        elif user_prompt == 'reset':
+            dialogue = CodeInterpreter(**config)
+            print('====GPT Dialogue Initialized, start asking your questions====')
+            continue
+        elif user_prompt == 'pretext':
+            print('===Pretext===')
+            for message in dialogue.get_pretext():
+                print(message)
+            print('===Pretext===')
+            continue
+        elif user_prompt == 'config':
+            print('===Config===')
+            print(config)
+            print('===Config===')
+            continue
+        elif user_prompt == 'save':
+            timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+            dialogue.save_pretext(config['save_path'], timestamp)
+            print('Pretext saved to', os.path.join(
+                config['save_path'], 'dialogue_' + timestamp + '.json'))
+            continue
+        else:
+            # response = dialogue.call_openai(user_prompt)['content']
+            response = dialogue.call_openai_with_code_interpreter(user_prompt)['content']
+            print('Bot:', response)
+            counter = 0
+            while not response.endswith('Now the answer is complete.') and counter < 10:
+                response = dialogue.call_openai_with_code_interpreter('')['content']
+                print('Bot:', response)
+                counter += 1

display_model.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import numpy as np
+from plyfile import PlyData, PlyElement
+def ply_to_glb(ply_file, glb_file):
+    print("Converting PLY to GLB...")
+    # Import trimesh here to ensure it's only required when this function is called
+    import trimesh
+    # Load the PLY file with trimesh
+    try:
+        mesh = trimesh.load(ply_file)
+        # Export to GLB format
+        mesh.export(glb_file, file_type='glb')
+        print("Conversion finished.")
+        return "PLY to GLB conversion complete."
+    except Exception as e:
+        # In case of any issue, print the error
+        print(f"Error during conversion: {e}")
+        return "Conversion failed."
+def merge_box_to_ply(ply_file, box_ply_file):
+    pass
+def add_1box_to_ply(box, ply_file, new_ply_file, line_width=0.05, obj_id=1):
+    print("adding 1 box to ply...")
+    print("ply_file:",ply_file)
+    print("new_ply_file:",new_ply_file)
+    # box format: [xmin, ymin, zmin, xmax, ymax, zmax]
+    xmin, ymin, zmin, xmax, ymax, zmax = box
+    box_coords = np.array(
+            [[xmin, ymin, zmin], #0
+             [xmin-line_width, ymin-line_width, zmin], #1
+             [xmax, ymin, zmin], #2
+             [xmax+line_width, ymin-line_width, zmin], #3
+             [xmax, ymax, zmin], #4
+             [xmax+line_width, ymax+line_width, zmin], #5
+             [xmin, ymax, zmin], #6
+             [xmin-line_width, ymax+line_width, zmin], #7
+             [xmin, ymin, zmax], #8
+             [xmin-line_width, ymin-line_width, zmax], #9
+             [xmax, ymin, zmax], #10
+             [xmax+line_width, ymin-line_width, zmax], #11
+             [xmax, ymax, zmax], #12
+             [xmax+line_width, ymax+line_width, zmax], #13
+             [xmin, ymax, zmax], #14
+             [xmin-line_width, ymax+line_width, zmax] #15
+            ])
+    # read in ply
+    with open(ply_file, 'rb') as f:
+        ply_data = PlyData.read(f)
+        vertices = ply_data['vertex'].data
+    # handle vertices and update
+    color = [255, 0, 0]
+    box_vertices = np.zeros(len(box_coords), dtype=vertices.dtype)
+    box_vertices['x'] = [coord[0] for coord in box_coords]
+    box_vertices['y'] = [coord[1] for coord in box_coords]
+    box_vertices['z'] = [coord[2] for coord in box_coords]
+    box_vertices['red'] = [color[0]] * 16
+    box_vertices['green'] = [color[1]] * 16
+    box_vertices['blue'] = [color[2]] * 16
+    box_vertices['alpha'] = [obj_id] * 16
+    # 将新的顶点数据添加到原始顶点数据后面
+    updated_vertices = np.concatenate((vertices, box_vertices))
+    # 创建包含新顶点的PlyElement对象
+    updated_vertex_element = PlyElement.describe(updated_vertices, 'vertex')
+    # 将更新后的PlyElement对象替换原始的顶点数据
+    # ply_data['vertex'] = updated_vertex_element
+    # get the number of original vertices:
+    num_origin_vertices = len(vertices)
+    # define connections of new faces
+    box_connections=[
+        [num_origin_vertices+0,  num_origin_vertices+1,  num_origin_vertices+3 ],
+        [num_origin_vertices+0,  num_origin_vertices+3,  num_origin_vertices+2 ],
+        [num_origin_vertices+2,  num_origin_vertices+3,  num_origin_vertices+5 ],
+        [num_origin_vertices+2,  num_origin_vertices+5,  num_origin_vertices+4 ],
+        [num_origin_vertices+4,  num_origin_vertices+5,  num_origin_vertices+7 ],
+        [num_origin_vertices+4,  num_origin_vertices+7,  num_origin_vertices+6 ],
+        [num_origin_vertices+0,  num_origin_vertices+1,  num_origin_vertices+7 ],
+        [num_origin_vertices+0,  num_origin_vertices+7,  num_origin_vertices+6 ],
+        [num_origin_vertices+0,  num_origin_vertices+1,  num_origin_vertices+9 ],
+        [num_origin_vertices+0,  num_origin_vertices+9,  num_origin_vertices+8 ],
+        [num_origin_vertices+2,  num_origin_vertices+3,  num_origin_vertices+11],
+        [num_origin_vertices+2,  num_origin_vertices+11, num_origin_vertices+10],
+        [num_origin_vertices+4,  num_origin_vertices+5,  num_origin_vertices+13],
+        [num_origin_vertices+4,  num_origin_vertices+13, num_origin_vertices+12],
+        [num_origin_vertices+6,  num_origin_vertices+7,  num_origin_vertices+15],
+        [num_origin_vertices+6,  num_origin_vertices+15, num_origin_vertices+14],
+        [num_origin_vertices+8,  num_origin_vertices+9,  num_origin_vertices+11],
+        [num_origin_vertices+8,  num_origin_vertices+11, num_origin_vertices+10],
+        [num_origin_vertices+10, num_origin_vertices+11, num_origin_vertices+13],
+        [num_origin_vertices+10, num_origin_vertices+13, num_origin_vertices+12],
+        [num_origin_vertices+12, num_origin_vertices+13, num_origin_vertices+15],
+        [num_origin_vertices+12, num_origin_vertices+15, num_origin_vertices+14],
+        [num_origin_vertices+8,  num_origin_vertices+9,  num_origin_vertices+15],
+        [num_origin_vertices+8,  num_origin_vertices+15, num_origin_vertices+14]
+    ]
+    # handle faces and update
+    faces = ply_data['face'].data
+    box_faces = np.zeros(len(box_connections), dtype=faces.dtype)
+    box_faces['vertex_indices'] = box_connections
+    # 将新的face数据添加到原始顶点数据后面
+    updated_faces = np.concatenate((faces, box_faces))
+    # 创建包含新顶点的PlyElement对象
+    updated_face_element = PlyElement.describe(updated_faces, 'face')
+    # 将更新后的PlyElement对象替换原始的顶点数据
+    # ply_data['face'] = updated_face_element
+    new_ply_data = PlyData([updated_vertex_element, updated_face_element])
+    # 将更新后的PlyData对象写回Ply文件
+    with open(new_ply_file, 'wb') as f:
+        new_ply_data.write(f)
+    print("add 1 box to ply finished.")
+def ply_to_obj(ply_file, obj_file, mtl_file):
+    # 读取PLY文件
+    with open(ply_file, 'rb') as f:
+        plydata = PlyData.read(f)
+        # 获取顶点和面数据
+        vertices = np.vstack([plydata['vertex'][prop] for prop in ['x', 'y', 'z']]).T
+        colors = np.vstack([plydata['vertex'][prop] for prop in ['red', 'green', 'blue', 'alpha']]).T/255.0
+        faces = plydata['face']['vertex_indices']
+    # 写入OBJ文件
+    with open(obj_file, 'w') as f:
+        # 写入依赖的mtl文件（颜色）
+        f.write("mtllib %s\n"%mtl_file.split('/')[-1])
+        # 写入顶点信息
+        for vertex in vertices:
+            f.write(f"v {' '.join(map(str, vertex))}\n")
+        # 写入颜色信息
+        for idx in range(len(vertices)):
+            f.write("usemtl mat%d\n"%(idx+1))
+        # 写入面信息
+        for face in faces:
+            f.write("f")
+            for vertex_index in face:
+                f.write(f" {vertex_index + 1}")  # OBJ文件索引从1开始
+            f.write("\n")
+    # 写入mtl文件
+    with open(mtl_file, 'w') as f:
+        for idx, color in enumerate(colors):
+            f.write("newmtl mat%d\n" % (idx+1))
+            f.write("Kd %f %f %f\n\n" % (color[0], color[1],color[2]))
+if __name__ == "__main__":
+    # ply_to_obj("./scenes/scene0132_00_vh_clean_2_aligned.ply", "./scenes/scene0132_00_vh_clean_2_aligned.obj", "./scenes/scene0132_00_vh_clean_2_aligned_colors.mtl")
+    add_1box_to_ply([0,0,0,1,1,1],"scenes\scene0132_00_vh_clean_2_aligned.ply","scenes\scene0132_00_add1box.ply")

gpt_dialogue.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import os
+import json
+import datetime
+# import openai
+# openai.api_key = os.getenv("OPENAI_API_KEY")
+from openai import OpenAI
+client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+# HUGGINGFACE_MODELS = {
+#     'meta-llama/Llama-2-7b-chat-hf',
+#     'meta-llama/Llama-2-13b-chat-hf',
+#     'meta-llama/Llama-2-70b-chat-hf',
+#     'codellama/CodeLlama-7b-Instruct-hf',
+#     'codellama/CodeLlama-13b-Instruct-hf',
+#     'codellama/CodeLlama-34b-Instruct-hf',
+#     'mistralai/Mistral-7B-Instruct-v0.1',
+# }
+class Dialogue:
+    def __init__(self, model='gpt-4', temperature=0, top_p=0.0, max_tokens=10, system_message='', load_path=None, save_path='chats', debug=False):
+        self.model = model
+        self.temperature = temperature
+        self.top_p = top_p
+        self.max_tokens = max_tokens
+        self.system_message = system_message
+        self.save_path = save_path
+        self.debug = debug
+        self.has_update = False
+        if load_path is not None:
+            self.load_pretext(load_path)
+        else:
+            self.pretext = [{"role": "system", "content": self.system_message}]
+        if 'llama' in self.model:
+            from hf_conversational import HuggingfaceConversational
+            from transformers import Conversation
+            self.conversational = HuggingfaceConversational(
+                model_name=self.model,
+                temperature=self.temperature,
+                top_p=self.top_p,
+                max_length=self.max_tokens
+            )
+    def load_pretext(self, load_path):
+        def load_json(load_path):
+            with open(load_path) as json_file:
+                return json.load(json_file)
+        self.pretext = []
+        if isinstance(load_path, list):
+            for path in load_path:
+                self.pretext += load_json(path)
+        elif isinstance(load_path, str):
+            self.pretext = load_json(load_path)
+        else:
+            raise Exception('load_path must be a list of strings or a string')
+    def get_pretext(self):
+        return self.pretext
+    # def save_pretext(self, save_path, timestamp):
+    #     if not os.path.exists(save_path):
+    #         os.makedirs(save_path)
+    #     json_path = os.path.join(save_path, 'dialogue_' + timestamp + '.json')
+    #     json_object = json.dumps(self.get_pretext(), indent=4)
+    #     with open(json_path, 'w') as f:
+    #         f.write(json_object)
+    def save_pretext(self, save_folder_path, file_name):
+        if not os.path.exists(save_folder_path):
+            os.makedirs(save_folder_path)
+        json_path = os.path.join(save_folder_path, file_name)
+        json_object = json.dumps(self.get_pretext(), indent=4)
+        with open(json_path, 'w') as f:
+            f.write(json_object)
+    def print_pretext(self,print_system_and_user_first_prompt=True,to_print_out=True):
+        # determine whether to print system message and user's first prompt
+        from copy import deepcopy
+        pretext=deepcopy(self.pretext)
+        if not print_system_and_user_first_prompt:
+            pretext=pretext[2:]
+        printed_pretext=''
+        # print pretext
+        for piece in pretext:
+            if to_print_out:
+                print('----------------->ROLE: '+piece['role']+'\t<-----------------')
+                print('CONTENT: '+piece['content'])
+            printed_pretext=printed_pretext+'----------------->\tROLE: '+piece['role']+'\t<-----------------\n'
+            printed_pretext=printed_pretext+'CONTENT: '+piece['content']+'\n'
+        self.printed_pretext=printed_pretext
+    def call_openai(self, user_prompt):
+        user_message = [{"role": "user", "content": user_prompt}]
+        messages = self.pretext + user_message
+        # print('messages: ', messages)
+        if 'gpt' in self.model:
+            completion = client.chat.completions.create(
+                model=self.model,
+                messages=self.pretext + user_message,
+                temperature=self.temperature,
+                top_p=self.top_p,
+                seed=42,
+            )
+            # completion = openai.ChatCompletion.create(
+            #     model=self.model,
+            #     messages=self.pretext + user_message,
+            #     temperature=self.temperature,
+            #     top_p=self.top_p,
+            # )
+            # print('completion: ', completion)
+            raw_response_message = completion.choices[0].message
+            assistant_response_message = {'role': raw_response_message.role, 'content': raw_response_message.content}
+            # print('assistant_response_message: ', assistant_response_message)
+            token_usage = completion.usage.total_tokens
+        elif 'llama' in self.model:
+            chat_completion_messages,token_usage = self.conversational(messages)
+            assistant_response_message = chat_completion_messages.messages[-1]
+        else:
+            raise Exception('model name {} not supported'.format(self.model))
+        self.pretext = self.pretext + user_message + [assistant_response_message]
+        self.has_update = True
+        return assistant_response_message, token_usage
+if __name__ == '__main__':
+    config = {
+        # 'model': 'gpt-4-1106-preview',
+        # 'model': 'gpt-4',
+        'model': 'gpt-3.5-turbo-0125',
+        # 'model': 'gpt-3.5-turbo',
+        # 'model': 'meta-llama/Llama-2-7b-chat-hf',
+        'temperature': 0,
+        'top_p': 0.0,
+        'max_tokens': 8192,
+        'system_message': '',
+        # 'load_path': 'chats/dialogue_an apple.json',
+        'save_path': 'chats',
+        'debug': False
+    }
+    dialogue = Dialogue(**config)
+    print('======================Instructions======================')
+    print('Type "exit" to exit the dialogue')
+    print('Type "reset" to reset the dialogue')
+    print('Type "pretext" to see the current dialogue history')
+    print('Type "config" to see the current config')
+    print('Type "save" to save the current dialogue history')
+    print('====GPT Dialogue Initialized, start asking your questions====')
+    while True:
+        user_prompt = input('You: ')
+        if user_prompt == 'exit':
+            break
+        elif user_prompt == 'reset':
+            dialogue = Dialogue(**config)
+            print('====GPT Dialogue Initialized, start asking your questions====')
+            continue
+        elif user_prompt == 'pretext':
+            print('===Pretext===')
+            for message in dialogue.get_pretext():
+                print(message)
+            # dialogue.print_pretext()
+            print('===Pretext===')
+            continue
+        elif user_prompt == 'config':
+            print('===Config===')
+            print(config)
+            print('===Config===')
+            continue
+        elif user_prompt == 'save':
+            timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+            dialogue.save_pretext(config['save_path'], timestamp)
+            print('Pretext saved to', os.path.join(
+                config['save_path'], 'dialogue_' + timestamp + '.json'))
+            continue
+        else:
+            assistant_response_message, token_usage = dialogue.call_openai(user_prompt)
+            response = assistant_response_message['content']
+            print('Bot:', response)

object_filter_gpt4.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import numpy as np
+import os,json
+import re
+import logging
+from gpt_dialogue import Dialogue
+import openai
+from tenacity import (
+    retry,
+    before_sleep_log,
+    stop_after_attempt,
+    wait_random_exponential,
+    wait_exponential,
+    wait_exponential_jitter,
+    RetryError
+)  # for exponential backoff
+openai.api_key = os.getenv("OPENAI_API_KEY")
+logger = logging.getLogger(__name__+'logger')
+logger.setLevel(logging.ERROR)
+console_handler = logging.StreamHandler()
+console_handler.setLevel(logging.ERROR)
+formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+console_handler.setFormatter(formatter)
+logger.addHandler(console_handler)
+class ObjectFilter(Dialogue):
+    def __init__(self, model='gpt-4'):
+        config = {
+        # 'model': 'gpt-4',
+        # 'model': 'gpt-4-1106-preview',
+        'model': model,
+        'temperature': 0,
+        'top_p': 0.0,
+        'max_tokens': 8192,
+        # 'load_path': './object_filter_pretext.json',
+        'load_path': './object_filter_pretext_new.json',
+        'debug': False
+        }
+        super().__init__(**config)
+    def extract_all_int_lists_from_text(self,text) ->list:
+        # 匹配方括号内的内容
+        pattern = r'\[([^\[\]]+)\]'
+        matches = re.findall(pattern, text)
+        int_lists = []
+        for match in matches:
+            elements = match.split(',')
+            int_list = []
+            for element in elements:
+                element = element.strip()
+                try:
+                    int_value = int(element)
+                    int_list.append(int_value)
+                except ValueError:
+                    pass
+            if len(int_list) == len(elements):
+                int_lists = int_lists + int_list
+        return int_lists
+    def extract_dict_from_text(self,text) ->dict:
+        # Use regular expression to match the dictionary in the text
+        match = re.search(r'{\s*(.*?)\s*}', text)
+        if match:
+            # Get the matched dictionary content
+            dict_str = match.group(1)
+            # Convert the dictionary string to an actual dictionary object
+            try:
+                result_dict = eval('{' + dict_str + '}')
+                return result_dict
+            except Exception as e:
+                print(f"Error converting string to dictionary: {e}")
+                return None
+        else:
+            print("No dictionary found in the given text.")
+            return None
+    @retry(wait=wait_exponential_jitter(initial=20, max=120, jitter=20), stop=stop_after_attempt(5), before_sleep=before_sleep_log(logger,logging.ERROR)) #20s,40s,80s,120s + random.uniform(0,20)
+    def filter_objects_by_description(self,description,use_npy_file,objects_info_path=None,object_info_list=None,to_print=True):
+        # first, create the prompt
+        print("looking for relevant objects based on description:\n'%s'"%description)
+        prompt=""
+        prompt=prompt+"description:\n'%s'\nobject list:\n"%description
+        # load object info data and add to prompt
+        if use_npy_file:
+            data=np.load(objects_info_path,allow_pickle=True)
+            for obj in data:
+                if obj['label']=='object':
+                    continue
+                line="name=%s,id=%d; "%(obj['label'],obj['id'])
+                prompt=prompt+line
+        else: # object info list given, used for robot demo
+            data=object_info_list
+            for obj in data:
+                label=obj.get('cls')
+                if label is None:
+                    label=obj.get('label')
+                # if obj['cls']=='object':
+                #     continue
+                if label in ['object','otherfurniture','other','others']:
+                    continue
+                line="name=%s,id=%d; "%(label,obj['id'])
+                prompt=prompt+line
+        # get response from gpt
+        response,token_usage=self.call_openai(prompt)
+        response=response['content']
+        # print("response:",response)
+        last_line = response.splitlines()[-1] if len(response) > 0 else ''
+        # exract answer(list/dict) from the last line of response
+        # answer=self.extract_all_int_lists_from_text(last_line)
+        answer=self.extract_dict_from_text(last_line)
+        if to_print:
+            self.print_pretext()
+            print("answer:",answer)
+            print("\n\n")
+        if len(answer)==0:
+            answer=None
+        return answer,token_usage
+if __name__ == "__main__":
+    # scanrefer_path="/share/data/ripl/vincenttann/sr3d/data/scanrefer/ScanRefer_filtered_sampled50.json"
+    scanrefer_path="/share/data/ripl/vincenttann/sr3d/data/scanrefer/ScanRefer_filtered_train_sampled1000.json"
+    with open(scanrefer_path, 'r') as json_file:
+        scanrefer_data=json.load(json_file)
+    from datetime import datetime
+    # 记录时间作为文件名
+    current_time = datetime.now()
+    formatted_time = current_time.strftime("%Y-%m-%d-%H-%M-%S")
+    print("formatted_time:",formatted_time)
+    folder_path="/share/data/ripl/vincenttann/sr3d/object_filter_dialogue/%s/"%formatted_time
+    os.makedirs(folder_path)
+    for idx,data in enumerate(scanrefer_data):
+        print("processing %d/%d..."%(idx+1,len(scanrefer_data)))
+        description=data['description']
+        scan_id=data['scene_id']
+        target_id=data['object_id']
+        # path="/share/data/ripl/scannet_raw/train/objects_info_gf/objects_info_gf_%s.npy"%scan_id
+        path="/share/data/ripl/scannet_raw/train/objects_info/objects_info_%s.npy"%scan_id
+        of=ObjectFilter()
+        of.filter_objects_by_description(path,description)
+        object_filter_json_name="%d_%s_%s_object_filter.json"%(idx,scan_id,target_id)
+        of.save_pretext(folder_path,object_filter_json_name)

objects_info/objects_info_scene0132_00.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e872475b1e7cc419364fbd619131214b60579dc415b61db2963f5844d41e98c8
+size 17246

prompt_text.py ADDED Viewed

	@@ -0,0 +1,53 @@

+# def get_principle(utterance, use_priority=False):
+def get_principle(use_priority=False):
+    prompt = ''
+    prompt = prompt + "Tips: while multiple objs may appear within the description, it points to only 1 focal object, with the other objects serving to aid in locating or contextualizing it. For instance, spatial relation with other objects might be employed to establish the position or orientation of this focal object. Examples:"
+    prompt = prompt + "\n1.'The brown cabinet covers the entire back wall. There is a door with a blue sign located between the brown cabinet.' The first sentence is actually a noun phrase starting with 'the,' indicating that the focal object being described is 'the brown cabinet.' The second sentence describes the spatial relationship between the door and the brown cabinet, providing supplementary details about the described brown cabinet."
+    prompt = prompt + "\n2.'This is a big exercise ball. The ball is under the table.' The first sentence starts with 'this is,' indicating the object being described, which is a 'big exercise ball.' The second sentence is used to provide additional information about the ball's location."
+    prompt = prompt + "\n3.'In the corner of the kitchen, there are three trash cans. Beside the third trash can from the left, there's a white stool.' The description first sets up a scene with three trash cans, and then move on to describe the location of the white stool in relation to the trash cans. Therefore, the white stool is the target object."
+    if use_priority:
+        prompt = prompt + "\nConsider different constraints in order (1 to 7) & priority (1 highest, 7 lowest):"
+        prompt = prompt + "\n1: Obj name(category). Names in description & obj list may differ (e.g. similar names such as 'table' and 'desk', 'trash can' and 'recycling bin', 'coffee table' and 'end table'), so use common sense to find all possible candidate objects, ensure no missing, don't write code. If only 1 object in list has the same/similar category with the one described object, answer it directly, discard other constraints. For instance, with description 'the black bag left to the couch' and only 1 bag in the scene, answer it directly, discard 'black' and 'left' constrains."
+        prompt = prompt + "\n2: Horizontal relation like 'next to''farthest''closest''nearest''between''in the middle''at the center'(if given)(not include 'behind''in front of'). Consider only center x,y,z coords of objs, disregard sizes."
+        prompt = prompt + "\n3: Color (if given). Be lenient with color. First convert RGB to HSL. For grayscale, use lightness to compute the difference between objects' color and the specified color as a metric. For other colors, use hue instead. When computing the hue difference, be careful that it is a circular value."
+        prompt = prompt + "\n4: Size & shape(if given). Be cautious not to make overly absolute judgments about obj size. E.g., 'a tiny trash can' doesn't necessarily refer to smallest one in terms of volume."
+        prompt = prompt + "\n5: Direction relation 'left''right'(if given). To judge A on 'left' or 'right' of B, calc vec observer-A & observer-B(both projected to x-y plane). If cross product of vec observer-A & vector observer-B(in this order) has positive z, A on right of B. If z neg, A on left of B. Note that order of cross product matters, put vec observer-A at first. Consider which two objs' left-right relation needs to be determined in sentence, that is, which is A & which is B. DON'T determine left & right relation by compare x or y coords."
+        prompt = prompt + "\n6: Direction relation 'in front of' and 'behind'(if given). Use 'spatially closer' to replace them. To determine which object, P1 or P2, is behind Q, calculate their distances from Q. The one with the smaller distance is behind Q. It is the same for judging 'in front of': also smaller distance. DON'T determine front & behind relation by compare x or y coords."
+        prompt = prompt + "\n7: Vertical relation like 'above'and'under''on''sits on'(if given). Consider only center coords of objs, disregard sizes. Be more lenient with this."
+        prompt = prompt + "\nExplicitly go through these 7 constraints. For every constraint, if it is not mentioned in description, tell me and skip; if mentioned, apply this constraint and record the results of each candidates. For constraint 1, use common sense, no code. For others, write code, which should print the metrics of each candidate objects, instead of only print the most possible object id. After going through all constriants, evaluate all results comprehensively basing on 1-7 priority, and choose the unique target object."
+    else:
+        prompt = prompt + "\nSo first you should identify this focal object(that is, the category name of it) from the description."
+        prompt = prompt + "\nNext, you can identify potential objects from the object list based on the category name of the focal object. You should rely on your common sense to comprehensively identify all relevant candidates without writing code. For example, for the category name 'table,' objects such as 'table,' 'desk,' 'end table,' 'coffee table,' and so on from the object list should all be considered as potential candidates."
+        prompt = prompt + "\nThen, count(do not write code) and tell me the number of candidate objects. If it is 1, which means only one candidate object, you must directly choose it as answer, then stop your response. For example, if the description is 'the white bathhub on the left of the toilet' and there is only one 'bathhub'-like object in the list, answer it directly, ignore 'white' and 'left of the toilet' constraints."
+        prompt = prompt + "\nIf there are multiple candidate objects, you can continue. Identify the constraints in the description. There might be multiple constraints to help finding the unique target object from multiple candidate objects. For each constraint, you can define a quantitative metric to assess the degree to which each candidate object satisfies this constraint."
+        prompt = prompt + "\nYou can write code to calculate the metrics, printing out the metrics of each candidate objects, instead of only print the most possible object id."
+        prompt = prompt + "\nSome special tips for some constraints:"
+        prompt = prompt + "\n- Color (if given). Be lenient with color. Given the HSL value of objects, to determine black and white, use the difference of L(light) as a metric. To determine other colors, use the difference of H(hue) as a metric, and be careful H has circular value range, that H values of 360 and 0 are equal. Do not use conditions like 'if difference(color1, color2) < threshold' to determine a color."
+        # prompt = prompt + "\n- Color(if given). Be lenient with color, because different shades of color mentioned in description can have different RGB values. You might use RGB-space distance as a quantitative metric."
+        prompt = prompt + "\n- Direction relation 'left''right'(if given). To judge obj A on 'left' or 'right' of B, calc vector observer-A & observer-B(both projected to x-y plane). If cross product of vector observer-A & vector observer-B(in this order) has positive z, A on right of B. If z neg, A on left of B. Note that order of cross product matters, put vec observer-A at first. DON'T determine left & right relation by comparing x or y coords."
+        prompt = prompt + "\n- Direction relation 'in front of' and 'behind'(if given). Use 'spatially closer' to replace them. To determine which object, P1 or P2, is behind Q, calculate their distances from Q. The one with the smaller distance is behind Q. It is the same for judging 'in front of': also smaller distance. DON'T determine front & behind relation by comparing x or y coords."
+        prompt = prompt + "\n- Vertical relation such as 'on''above''under'(if given). If obj M has vertical relation with obj N, the x,y coord of ctr of M should be inside the x,y range of obj N, while z of M and z of N should satisfy the corresponding order."
+        prompt = prompt + "\nAfter going through all constraints in the description, double check the given description , and evaluate all results and metrics comprehensively, then choose the unique target object."
+    prompt = prompt + "\nPerceive wall as plane. Distance from obj to wall=vert dist to plane, not to wall center. Wall front=side of plane where obj exist."
+    return prompt
+def get_principle_sr3d():
+    prompt = ""
+    # prompt=prompt+"\nYou must comprehensively consider the x, y, z coordinates, not only one of them (for example, you should consider both greater z and similar x, y coordinates when judging vertical relation). "
+    # prompt=prompt+"\nWhen determining vertical relation such as 'above''under''on''on top of''support'(if given). If obj M has vertical relation with obj N, the x,y coord of ctr of M should be inside the x,y range of obj N, while z of M and z of N should satisfy the corresponding order. You can igonre the size of objects and only consider ctr coords here."
+    prompt = prompt + "\nWhen determining vertical relation such as 'above''under''on''on top of''support'(if given). For example, if obj M is on top of / supportted by obj N, the x,y coord of ctr of M should be inside the x,y range of obj N, while z of M is greater than z of N. You can igonre the size in z direction of objects here. If  you cannot find the obj M after several tries, you can choose one which is closest to N."
+    prompt = prompt + "\nWhen determining the orientation of object B relative to object A, you should calculate the angle between the x-y plane vector from A to B(projected onto x-y plane) and one of the direction vectors of A (the one that corresponds to the direction mentioned in the problem). The smaller the angle, the more it indicates that B is in the corresponding direction of A."
+    return prompt
+def get_system_message():
+    system_message = "Imagine you are an artificial intelligence assitant with a python interpreter. So when answering questions, you can choose to generate python code (for example, when there is need to do quantitative evaluation). The generated code should always use print() function to print out the result and keep two decimal places for numbers. The code should be written in python, start with '```python\nimport numpy as np\nimport math\n' and end with '```'. Keep your code and comments concise. When answer step by step, stop whenever you feel there is need to generate python code (for example, where there is need to do quantitative evaluation) and wait for the result from the code execution. Make sure your code will print out something(include failure info like 'nothing found'), especially when you use if logic.\n"
+    # Before generating code, say 'Let's write some python code to get the results.', then stop. You'll receive an empty message from user, then you start to generate code. If you are printing thing like 'metric: value', make it clear what the metric is."
+    system_message+="You will receive a information list of some objects from a 3D indoor scene, which might include their center positions, sizes, colors and so on. You will also be presented with a description of one certain object in that scene, and your job is to find that object from the object list according to this description. Below are some tips to help you reasoning and finding the object:\n"
+    return system_message

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+numpy
+plyfile
+openai
+tenacity
+trimesh

scenes/scene0132_00_vh_clean_2_aligned.glb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc75b866ab2436dfbcd9f23da1c2e3191cdbb4ce299e5b2ce074e55a7e19af27
+size 26828880

scenes/scene0132_00_vh_clean_2_aligned.ply ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22caa4c712bb89fe8316cbaf829bc81d1a5ee8a33a0c9f5bc20f9c5f2dded606
+size 6833502

sources.list ADDED Viewed

	@@ -0,0 +1,4 @@

+deb http://mirrors.aliyun.com/ubuntu/ focal main restricted universe multiverse
+deb http://mirrors.aliyun.com/ubuntu/ focal-security main restricted universe multiverse
+deb http://mirrors.aliyun.com/ubuntu/ focal-updates main restricted universe multiverse
+deb http://mirrors.aliyun.com/ubuntu/ focal-backports main restricted universe multiverse

transcrib3d_main.py ADDED Viewed

	@@ -0,0 +1,285 @@

+# encoding:utf-8
+import ast
+import csv
+import json
+import logging
+import os
+import random
+import re
+import time
+from copy import deepcopy
+from datetime import datetime
+import numpy as np
+from tenacity import RetryError, before_sleep_log, retry, stop_after_attempt, wait_exponential_jitter  # for exponential backoff
+from code_interpreter import CodeInterpreter
+# from config import confs_nr3d, confs_scanrefer, confs_sr3d
+# from gpt_dialogue import Dialogue
+# from object_filter_gpt4 import ObjectFilter
+from prompt_text import get_principle, get_principle_sr3d, get_system_message
+logger = logging.getLogger(__name__ + 'logger')
+logger.setLevel(logging.ERROR)
+console_handler = logging.StreamHandler()
+console_handler.setLevel(logging.ERROR)
+formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+console_handler.setFormatter(formatter)
+logger.addHandler(console_handler)
+def round_list(lst, length):
+    # round every element in lst
+    for idx, num in enumerate(lst):
+        lst[idx] = round(num, length)
+    return list(lst)
+def remove_spaces(s: str):
+    return s.replace(' ', '')
+def rgb_to_hsl(rgb):
+    # Normalize RGB values to the range [0, 1]
+    r, g, b = [x / 255.0 for x in rgb]
+    # Calculate min and max values of RGB to find chroma
+    c_max = max(r, g, b)
+    c_min = min(r, g, b)
+    chroma = c_max - c_min
+    # Calculate lightness
+    lightness = (c_max + c_min) / 2
+    # Calculate hue and saturation
+    hue = 0
+    saturation = 0
+    if chroma != 0:
+        if c_max == r:
+            hue = ((g - b) / chroma) % 6
+        elif c_max == g:
+            hue = ((b - r) / chroma) + 2
+        elif c_max == b:
+            hue = ((r - g) / chroma) + 4
+        hue *= 60
+        # Calculate saturation
+        if lightness <= 0.5:
+            saturation = chroma / (2 * lightness)
+        else:
+            saturation = chroma / (2 - 2 * lightness)
+    return [hue, saturation, lightness]
+def get_scene_center(objects):
+    xmin, ymin, zmin = float('inf'), float('inf'), float('inf')
+    xmax, ymax, zmax = float('-inf'), float('-inf'), float('-inf')
+    for obj in objects:
+        x, y, z = obj['center_position']
+        if x < xmin:
+            xmin = x
+        if x > xmax:
+            xmax = x
+        if y < ymin:
+            ymin = y
+        if y > ymax:
+            ymax = y
+        if z < zmin:
+            zmin = z
+        if z > zmax:
+            zmax = z
+    return round_list([(xmin + xmax) / 2, (ymin + ymax) / 2, (zmin + zmax) / 2], 2)
+def find_relevant_objects(user_instruction, scan_id):
+    pass
+def gen_prompt(user_instruction, scan_id):
+    npy_path = os.path.join("objects_info", f"objects_info_{scan_id}.npy")
+    objects_info = np.load(npy_path, allow_pickle=True)
+    # objects_related = find_relevant_objects(user_instruction, scan_id)
+    objects_related = objects_info
+    # 获取场景的中心坐标
+    # scene_center=get_scene_center(objects_related)
+    scene_center = get_scene_center(objects_info)  # 注意这里应该用所有物体的信息，而不只是relevant
+    # 生成prompt中的背景信息部分
+    prompt = scan_id + ":objects with quantitative description based on right-hand Cartesian coordinate system with x-y-z axes, x-y plane=ground, z-axis=up/down. Coords format [x, y, z].\n\n"
+    # if dataset == 'nr3d':
+    #     prompt = prompt + "Scene center:%s. If no direction vector, observer at center for objorientation.\n" % remove_spaces(str(scene_center))
+    # elif dataset == 'scanrefer':
+    #     if use_camera_position:
+    #         prompt = prompt + "Scene center:%s.\n" % remove_spaces(str(scene_center))
+    #         prompt = prompt + "Observer position:%s.\n" % remove_spaces(str(round_list(camera_info_aligned['position'], 2)))
+    #     else:
+    #         prompt = prompt + "Scene center:%s. If no direction vector, observer at center for objorientation.\n" % remove_spaces(str(scene_center))
+    prompt = prompt + "Scene center:%s. If no direction vector, observer at center for obj orientation.\n\n" % remove_spaces(str(scene_center))
+    prompt = prompt + "objs list:\n"
+    lines = []
+    # 生成prompt中对物体的定量描述部分（遍历所有相关物体）
+    for obj in objects_related:
+        # 位置信息，保留2位小数
+        center_position = obj['center_position']
+        center_position = round_list(center_position, 2)
+        # size信息，保留2位小数
+        size = obj['size']
+        size = round_list(size, 2)
+        # extension信息，保留2位小数
+        extension = obj['extension']
+        extension = round_list(extension, 2)
+        # 方向信息，用方向向量表示. 注意，scanrefer由于用的不是scannet原始obj id，所以不能用方向信息
+        if obj['has_front']:
+            front_point = np.array(obj['front_point'])
+            center = np.array(obj['obb'][0:3])
+            direction_vector = front_point - center
+            direction_vector_normalized = direction_vector / np.linalg.norm(direction_vector)
+            # 再计算左和右的方向向量，全部保留两位小数
+            front_vector = round_list(direction_vector_normalized, 2)
+            up_vector = np.array([0, 0, 1])
+            left_vector = round_list(np.cross(direction_vector_normalized, up_vector), 2)
+            right_vector = round_list(np.cross(up_vector, direction_vector_normalized), 2)
+            behind_vector = round_list(-np.array(front_vector), 2)
+            # 生成方向信息
+            direction_info = ";direction vectors:front=%s,left=%s,right=%s,behind=%s\n" %(front_vector, left_vector, right_vector, behind_vector)
+            #
+        else:
+            direction_info = "\n"  # 未知方向向量就啥都不写
+        # sr3d，给出center、size
+        # if dataset == 'sr3d':
+        if False:
+            line = f'{obj["label"]},id={obj["id"]},ctr={remove_spaces(str(center_position))},size={remove_spaces(str(size))}'
+        # nr3d和scanrefer，给出center、size、color
+        else:
+            rgb = obj['avg_rgba'][0:3]
+            hsl = round_list(rgb_to_hsl(rgb), 2)
+            # line="%s,id=%s,ctr=%s,size=%s,RGB=%s" %(obj['label'], obj['id'], self.remove_space(str(center_position)), self.remove_spaces(str(size)), self.remove_spaces(str(rgb) )) 原版rgb
+            line="%s,id=%s,ctr=%s,size=%s,HSL=%s" %(obj['label'], obj['id'], remove_spaces(str(center_position)), remove_spaces(str(size)), remove_spaces(str(hsl)))#rgb换成hsl
+            # line = "%s(relevant to %s),id=%s,ctr=%s,size=%s,HSL=%s" % (obj['label'],id_to_name_in_description[obj['id']], obj['id'], self.remove_spaces(st(center_position)), self.remove_spaces(str(size)), self.remove_spaces(str(hsl)))  # 格式：name=原名称(description里的名称)
+            # if id_to_name_in_description[obj['id']]=='room':
+            #     name=obj['label']
+            # else:
+            #     name=id_to_name_in_description[obj['id']]
+            # line="%s,id=%s,ctr=%s,size=%s,HSL=%s" %(name, obj['id'], self.remove_spaces(st(center_position)), self.remove_spaces(str(size)), self.remove_spaces(str(hsl) )) # 式：name=description里的名称
+        lines.append(line + direction_info)
+    # if self.obj_info_ablation_type == 4:
+    #     random.seed(0)
+    #     random.shuffle(lines)
+    prompt += ''.join(lines)
+    # prompt中的要求
+    line = "\nInstruction:find the one described object in description: \n\"%s\"\n" % user_instruction
+    prompt = prompt + line
+    prompt = prompt + "\n\nThere is exactly one answer, so if you receive multiple answers, considerother constraints; if get no answers, loosen constraints."
+    prompt = prompt + "\n\nWork this out step by step to ensure right answer."
+    prompt = prompt + "\n\nIf the answer is complete, add \"Now the answer is complete -- {'ID':id}\" to the end of your answer(that is, your completion, not your code), where id is the id of the referred obj. Do not add anything after."
+    return prompt
+@retry(wait=wait_exponential_jitter(initial=20, max=120, jitter=20), stop=stop_after_attempt(5), before_sleep=before_sleep_log(logger, logging.ERROR))  # 20s,40s,80s,120s + random.uniform(0,20)
+def get_gpt_response(prompt: str, code_interpreter: CodeInterpreter):
+    print("llm_name:",code_interpreter.model)
+    # get response from GPT(using code interpreter). using retry from tenacity.
+    # count the token usage and time as well
+    # if the reponse does not include "Now the answer is complete", this means the answer is notdone. attach an empty user message to let GPT to keep going.
+    # start timing
+    call_start_time = time.time()
+    # the first call with the original prompt
+    response, token_usage_total = code_interpreter.call_openai_with_code_interpreter(prompt)
+    response = response['content']
+    # loop until "Now the answer is complete" is in the response, or looping more than 10 times.
+    count_response = 0
+    while not "Now the answer is complete" in response:
+        if count_response >= 10:
+            print("Response does not end with 'Now the answer is complete.' !")
+            break
+        response, token_usage_add = code_interpreter.call_openai_with_code_interpreter('')
+        response = response['content']
+        token_usage_total += token_usage_add
+        count_response += 1
+        print("count_response:", count_response)
+    # stop timing
+    call_end_time = time.time()
+    time_consumed = call_end_time - call_start_time
+    # self.token_usage_this_ques += token_usage_total
+    # self.token_usage_whole_run += token_usage_total
+    # self.time_consumed_this_ques += time_consumed
+    # self.time_consumed_whole_run += time_consumed
+    # print("\n*** Refer model: token usage=%d, time consumed=%ds, TPM=%.2f ***" %(token_usage_total, time_consumed, token_usage_total / time_consumed * 60))
+    return response
+def extract_answer_id_from_last_line(last_line, random_choice_list=[0,]):
+    # 如果没有按照预期格式回复则随机选取(Sr3d)或直接选成0(Nr3d和Scanrefer);按预期格式恢复则提取答案
+    wrong_return_format = False
+    last_line_split = last_line.split('--')
+    # 使用正则表达式从字符串中提取字典部分
+    pattern = r"\{[^\}]*\}"
+    match = re.search(pattern, last_line_split[-1])
+    if match:
+        # 获取匹配的字典字符串
+        matched_dict_str = match.group()
+        try:
+            # 解析字典字符串为字典对象
+            extracted_dict = ast.literal_eval(matched_dict_str)
+            print(extracted_dict)
+            answer_id = extracted_dict['ID']
+            # 如果确实以 Now the answer is complete -- {'ID': xxx} 的格式回复了，但是xxx不是数字（例如是None），也能随机选。
+            if not isinstance(answer_id, int):
+                if isinstance(answer_id, list) and all([isinstance(e, int) for e in answer_id]):
+                    print("Wrong answer format: %s. random choice from this list" % str(answer_id))
+                    answer_id = random.choice(answer_id)
+                else:
+                    print("Wrong answer format: %s. No dict found. Random choice from relevant objects." % str(answer_id))
+                    answer_id = random.choice(random_choice_list)
+                wrong_return_format = True
+        except BaseException:
+            print("Wrong answer format!! No dict found. Random choice.")
+            answer_id = random.choice(random_choice_list)
+            wrong_return_format = True
+    else:
+        print("Wrong answer format!! No dict found. Random choice.")
+        answer_id = random.choice(random_choice_list)
+        wrong_return_format = True
+    return answer_id, wrong_return_format
+def get_openai_config(llm_name='gpt-3.5-turbo-0125'):
+    system_message = ""
+    system_message += get_system_message()
+    system_message += get_principle()
+    openai_config = {
+        # 'model': 'gpt-4-turbo-preview',
+        'model': llm_name,
+        'temperature': 1e-7,
+        'top_p': 1e-7,
+        # 'max_tokens': 4096,
+        'max_tokens': 8192,
+        'system_message': system_message,
+        # 'load_path': '',
+        'save_path': 'chats',
+        'debug': True
+    }
+    return openai_config
+if __name__ == "__main__":
+    # system_message = 'Imagine you are an artificial intelligence assistant. You job is to do 3D referring reasoning, namely to find the object for a given utterance from a 3d scene presented as object-centric semantic information.\n'
+    system_message = ""
+    system_message += get_system_message()
+    system_message += get_principle()
+    openai_config = {
+        'model': 'gpt-4',
+        'temperature': 1e-7,
+        'top_p': 1e-7,
+        # 'max_tokens': 4096,
+        'max_tokens': 8192,
+        'system_message': system_message,
+        # 'load_path': '',
+        'save_path': 'chats',
+        'debug': True
+    }
+    code_interpreter = CodeInterpreter(**openai_config)
+    prompt = gen_prompt("Find the chair next to the table.", "scene0132_00")
+    print(prompt)
+    response = get_gpt_response(prompt, code_interpreter)
+    # print(response)
+    print("-------pretext--------")
+    print(code_interpreter.pretext)