Spaces:

blanchon
/

gaussian-splatting-kit

Build error

App Files Files Community

blanchon commited on Sep 30, 2023

Commit

08d80be

1 Parent(s): f9f1617

🔥 First commit

Browse files

Files changed (16) hide show

.devcontainer/devcontainer.json +31 -0
.gitignore +7 -0
.gitmodules +16 -0
Dockerfile +145 -0
deps/colmap +1 -0
deps/gaussian-splatting-cuda +1 -0
deps/rerun +1 -0
deps/splat +1 -0
requirements.txt +8 -0
server.py +422 -0
services/colmap.py +244 -0
services/ffmpeg.py +100 -0
services/gaussian_splatting_cuda.py +108 -0
services/http.py +26 -0
services/rerun.py +85 -0
services/utils/read_write_model.py +514 -0

.devcontainer/devcontainer.json ADDED Viewed

	@@ -0,0 +1,31 @@

+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-dockerfile
+{
+	"name": "Cuda",
+	"image": "test",
+	// Features to add to the dev container. More info: https://containers.dev/features.
+	// "features": {},
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	"forwardPorts": [7860],
+	// Uncomment the next line to run commands after the container is created.
+	// "postCreateCommand": "cat /etc/os-release",
+	// Configure tool-specific properties.
+	// "customizations": {},
+	"containerEnv": {
+		"NVIDIA_VISIBLE_DEVICES": "0"
+	},
+	"runArgs": [
+		"--gpus","all",
+		"--runtime=nvidia"
+	],
+	// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
+	// "remoteUser": "devcontainer"
+}

.gitignore CHANGED Viewed

@@ -1,3 +1,10 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

+### Others
+deps
+debug
+build
+parameter
+### Python
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

.gitmodules ADDED Viewed

	@@ -0,0 +1,16 @@

+[submodule "deps/gaussian-splatting-cuda"]
+	path = deps/gaussian-splatting-cuda
+	url = git@github.com:MrNeRF/gaussian-splatting-cuda.git
+	branch = master
+[submodule "deps/colmap"]
+	path = deps/colmap
+	url = git@github.com:colmap/colmap.git
+	branch = main
+[submodule "deps/rerun"]
+	path = deps/rerun
+	url = git@github.com:rerun-io/rerun.git
+	branch = release-0.8.2
+[submodule "deps/splat"]
+	path = deps/splat
+	url = git@github.com:antimatter15/splat.git
+	branch = main

Dockerfile ADDED Viewed

	@@ -0,0 +1,145 @@

+# --- `colmap` Builder Stage ---
+FROM nvidia/cuda:11.7.1-devel-ubuntu20.04 AS colmap_builder
+ARG COLMAP_GIT_COMMIT=main
+ARG CUDA_ARCHITECTURES=native
+ENV QT_XCB_GL_INTEGRATION=xcb_egl
+WORKDIR /workdir
+# Prepare and empty machine for building.
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    git \
+    cmake \
+    ninja-build \
+    build-essential \
+    libboost-program-options-dev \
+    libboost-filesystem-dev \
+    libboost-graph-dev \
+    libboost-system-dev \
+    libeigen3-dev \
+    libflann-dev \
+    libfreeimage-dev \
+    libmetis-dev \
+    libgoogle-glog-dev \
+    libgtest-dev \
+    libsqlite3-dev \
+    libglew-dev \
+    qtbase5-dev \
+    libqt5opengl5-dev \
+    libcgal-dev \
+    libceres-dev \
+    && rm -rf /var/lib/apt/lists/*
+# Build and install COLMAP.
+COPY deps/colmap /colmap
+RUN cd /colmap && \
+    mkdir build && \
+    cd build && \
+    cmake .. -GNinja -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHITECTURES} && \
+    ninja && \
+    ninja install && \
+    cd .. && rm -rf colmap
+# # --- `gaussian-splatting-cuda` Builder Stage ---
+FROM nvidia/cuda:11.7.1-devel-ubuntu20.04 AS gs_builder
+WORKDIR /workdir
+# Install dependencies
+# we could pin them to specific versions to be extra sure
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    git \
+    python3-dev \
+    libtbb-dev \
+    libeigen3-dev \
+    unzip \
+    g++ \
+    libssl-dev \
+    build-essential \
+    checkinstall \
+    wget \
+    cmake \
+    protobuf-compiler \
+ && rm -rf /var/lib/apt/lists/*
+# Install cmake 3.25
+# RUN apt-get update && apt-get -y install
+RUN wget https://github.com/Kitware/CMake/releases/download/v3.25.0/cmake-3.25.0.tar.gz \
+ && tar -zvxf cmake-3.25.0.tar.gz \
+ && cd cmake-3.25.0 \
+ && ./bootstrap \
+ && make -j8 \
+ && checkinstall --pkgname=cmake --pkgversion="3.25-custom" --default
+# Copy necessary files
+COPY deps/gaussian-splatting-cuda/cuda_rasterizer ./cuda_rasterizer
+COPY deps/gaussian-splatting-cuda/external ./external
+COPY deps/gaussian-splatting-cuda/includes ./includes
+COPY deps/gaussian-splatting-cuda/parameter ./parameter
+COPY deps/gaussian-splatting-cuda/src ./src
+COPY deps/gaussian-splatting-cuda/CMakeLists.txt ./CMakeLists.txt
+# Download and extract libtorch
+RUN wget https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.0.1%2Bcu118.zip \
+ && unzip -o libtorch-cxx11-abi-shared-with-deps-2.0.1+cu118.zip -d external/ \
+ && rm libtorch-cxx11-abi-shared-with-deps-2.0.1+cu118.zip
+# Build (on CPU, this will add compute_35 as build target, which we do not want)
+ENV PATH /usr/local/cuda/bin:$PATH
+ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:$LD_LIBRARY_PATH
+RUN cmake -B build -D CMAKE_BUILD_TYPE=Release -D CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda/ -D CUDA_VERSION=11.7 \
+ && cmake --build build -- -j8
+# --- Runner Stage ---
+FROM nvidia/cuda:11.7.1-devel-ubuntu20.04 AS runner
+WORKDIR /app
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    libboost-program-options-dev \
+    libboost-filesystem-dev \
+    libboost-graph-dev \
+    libboost-system-dev \
+    libeigen3-dev \
+    libflann-dev \
+    libfreeimage-dev \
+    libmetis-dev \
+    libgoogle-glog-dev \
+    libgtest-dev \
+    libsqlite3-dev \
+    libglew-dev \
+    qtbase5-dev \
+    libqt5opengl5-dev \
+    libcgal-dev \
+    libceres-dev \
+    imagemagick \
+    ffmpeg \
+    python3-pip \
+    && rm -rf /var/lib/apt/lists/*
+# Copy built artifact from colmap_builder stage
+COPY --from=colmap_builder /usr/local/bin/colmap /usr/local/bin/colmap
+# Copy built artifact from builder stage
+COPY --from=gs_builder /workdir/build/gaussian_splatting_cuda /usr/local/bin/gaussian_splatting_cuda
+COPY --from=gs_builder /workdir/external/libtorch /usr/local/libtorch
+COPY --from=gs_builder /workdir/parameter /usr/local/bin/parameter
+# Setup environment
+ENV PATH /usr/local/libtorch/bin:/usr/local/cuda/bin:$PATH
+ENV LD_LIBRARY_PATH /usr/local/libtorch/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+# Install python dependencies
+COPY requirements.txt /app/requirements.txt
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install -r /app/requirements.txt
+COPY services /app/services
+COPY server.py /app/server.py
+# Fix bug
+RUN mkdir /parameter && cp /usr/local/bin/parameter/optimization_params.json /parameter/optimization_params.json
+EXPOSE 7860
+CMD [ "python3", "-u", "/app/server.py" ]

deps/colmap ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit c04629017e7378b3046c6e8961277fbe98b56a32

deps/gaussian-splatting-cuda ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit b3aced9a3c80bed0e072f31540bcf9919cf1eb1d

deps/rerun ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 9b76b7027b5cc34bf86b07c41968eb0988b383a3

deps/splat ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit db27473c34b21e9294bd80848380660808b21a4e

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+requests
+numpy
+typing_extensions
+rich
+fastapi
+uvicorn[standard]
+gradio
+# rerun-sdk==0.8.2 # if you want to use the rerun

server.py ADDED Viewed

	@@ -0,0 +1,422 @@

+from pathlib import Path
+import shutil
+import tempfile
+import gradio as gr
+import uuid
+from typing_extensions import TypedDict, Tuple
+from fastapi import FastAPI
+from fastapi.staticfiles import StaticFiles
+import uvicorn
+app = FastAPI()
+# create a static directory to store the static files
+gs_dir = Path(str(tempfile.gettempdir())) / "gaussian_splatting_gradio"
+gs_dir.mkdir(parents=True, exist_ok=True)
+# mount FastAPI StaticFiles server
+app.mount("/static", StaticFiles(directory=gs_dir), name="static")
+StateDict = TypedDict("StateDict", {
+    "uuid": str,
+})
+def getHTML():
+    html_body = """
+<body>
+    <div id="progress"></div>
+    <div id="message"></div>
+    <div class="scene" id="spinner">
+        <div class="cube-wrapper">
+            <div class="cube">
+                <div class="cube-faces">
+                    <div class="cube-face bottom"></div>
+                    <div class="cube-face top"></div>
+                    <div class="cube-face left"></div>
+                    <div class="cube-face right"></div>
+                    <div class="cube-face back"></div>
+                    <div class="cube-face front"></div>
+                </div>
+            </div>
+        </div>
+    </div>
+    <canvas id="canvas"></canvas>
+    <div id="quality">
+        <span id="fps"></span>
+    </div>
+    <style>
+        .cube-wrapper {
+            transform-style: preserve-3d;
+        }
+        .cube {
+            transform-style: preserve-3d;
+            transform: rotateX(45deg) rotateZ(45deg);
+            animation: rotation 2s infinite;
+        }
+        .cube-faces {
+            transform-style: preserve-3d;
+            height: 80px;
+            width: 80px;
+            position: relative;
+            transform-origin: 0 0;
+            transform: translateX(0) translateY(0) translateZ(-40px);
+        }
+        .cube-face {
+            position: absolute;
+            inset: 0;
+            background: #0017ff;
+            border: solid 1px #ffffff;
+        }
+        .cube-face.top {
+            transform: translateZ(80px);
+        }
+        .cube-face.front {
+            transform-origin: 0 50%;
+            transform: rotateY(-90deg);
+        }
+        .cube-face.back {
+            transform-origin: 0 50%;
+            transform: rotateY(-90deg) translateZ(-80px);
+        }
+        .cube-face.right {
+            transform-origin: 50% 0;
+            transform: rotateX(-90deg) translateY(-80px);
+        }
+        .cube-face.left {
+            transform-origin: 50% 0;
+            transform: rotateX(-90deg) translateY(-80px) translateZ(80px);
+        }
+        @keyframes rotation {
+            0% {
+                transform: rotateX(45deg) rotateY(0) rotateZ(45deg);
+                animation-timing-function: cubic-bezier(
+                    0.17,
+                    0.84,
+                    0.44,
+                    1
+                );
+            }
+            50% {
+                transform: rotateX(45deg) rotateY(0) rotateZ(225deg);
+                animation-timing-function: cubic-bezier(
+                    0.76,
+                    0.05,
+                    0.86,
+                    0.06
+                );
+            }
+            100% {
+                transform: rotateX(45deg) rotateY(0) rotateZ(405deg);
+                animation-timing-function: cubic-bezier(
+                    0.17,
+                    0.84,
+                    0.44,
+                    1
+                );
+            }
+        }
+        .scene,
+        #message {
+            position: absolute;
+            display: flex;
+            top: 0;
+            right: 0;
+            left: 0;
+            bottom: 0;
+            z-index: 2;
+            height: 100%;
+            width: 100%;
+            align-items: center;
+            justify-content: center;
+        }
+        #message {
+            font-weight: bold;
+            font-size: large;
+            color: red;
+            pointer-events: none;
+        }
+        #progress {
+            position: absolute;
+            top: 0;
+            height: 5px;
+            background: blue;
+            z-index: 99;
+            transition: width 0.1s ease-in-out;
+        }
+        #quality {
+            position: absolute;
+            bottom: 10px;
+            z-index: 999;
+            right: 10px;
+        }
+        #canvas {
+            display: block;
+            position: absolute;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            touch-action: none;
+        }
+        #instructions {
+            background: rgba(0,0,0,0.6);
+            white-space: pre-wrap;
+            padding: 10px;
+            border-radius: 10px;
+            font-size: x-small;
+        }
+    </style>
+</body>
+"""
+    html = f"""
+<head>
+  <title>3D Gaussian Splatting Viewer</title>
+  <script src="http://zeus.blanchon.cc/dropshare/main.js"></script>
+</head>
+{html_body}
+"""
+    return f"""<iframe style="width: 100%; height: 900px" srcdoc='{html}'></iframe>"""
+def createStateSession() -> StateDict:
+    # Create new session
+    session_uuid = str(uuid.uuid4())
+    print("createStateSession")
+    print(session_uuid)
+    return StateDict(
+        uuid=session_uuid,
+    )
+def removeStateSession(session_state_value: StateDict):
+    # Clean up previous session
+    return StateDict(
+        uuid=None,
+    )
+def makeButtonVisible() -> Tuple[gr.Button, gr.Button]:
+    process_button = gr.Button(visible=True)
+    reset_button = gr.Button(visible=False) #TODO: I will bring this back when I figure out how to stop the process
+    return process_button, reset_button
+def resetSession(state: StateDict) -> Tuple[StateDict, gr.Button, gr.Button]:
+    print("resetSession")
+    new_state = removeStateSession(state)
+    process_button = gr.Button(visible=False)
+    reset_button = gr.Button(visible=False)
+    return new_state, process_button, reset_button
+def process(
+        # *args, **kwargs
+        session_state_value: StateDict,
+        filepath: str,
+        ffmpeg_fps: int,
+        ffmpeg_qscale: int,
+        colmap_camera: str,
+    ):
+    if session_state_value["uuid"] is None:
+        return
+    print("process")
+    # print(args)
+    # print(kwargs)
+    # return
+    print(session_state_value)
+    print(f"Processing {filepath}")
+    try:
+        session_tmpdirname = gs_dir / str(session_state_value['uuid'])
+        session_tmpdirname.mkdir(parents=True, exist_ok=True)
+        print('Created temporary directory', session_tmpdirname)
+        gs_dir_path = Path(session_tmpdirname)
+        logfile_path = Path(session_tmpdirname) / "log.txt"
+        logfile_path.touch()
+        with logfile_path.open("w") as log_file:
+            # Create log file
+            logfile_path.touch()
+            from services.ffmpeg import ffmpeg_run
+            ffmpeg_run(
+                video_path = Path(filepath),
+                output_path = gs_dir_path,
+                fps = int(ffmpeg_fps),
+                qscale = int(ffmpeg_qscale),
+                stream_file=log_file
+            )
+            from services.colmap import colmap
+            colmap(
+                source_path=gs_dir_path,
+                camera=str(colmap_camera),
+                stream_file=log_file
+            )
+            print("Done with colmap")
+            # Create a zip of the gs_dir_path folder
+            print(gs_dir, gs_dir_path)
+            print(gs_dir_path.name)
+            archive = shutil.make_archive("result", 'zip', gs_dir, gs_dir_path)
+            print('Created zip file', archive)
+            # Move the zip file to the gs_dir_path folder
+            shutil.move(archive, gs_dir_path)
+            from services.gaussian_splatting_cuda import gaussian_splatting_cuda
+            gaussian_splatting_cuda(
+                data_path = gs_dir_path,
+                output_path = gs_dir_path / "output",
+                gs_command = str(Path(__file__).parent.absolute() / "build" / 'gaussian_splatting_cuda'),
+                iterations = 100,
+                convergence_rate = 0.01,
+                resolution = 512,
+                enable_cr_monitoring = False,
+                force = False,
+                empty_gpu_cache = False,
+                stream_file = log_file
+            )
+    except Exception:
+        pass
+        # print('Error - Removing temporary directory', session_tmpdirname)
+        # shutil.rmtree(session_tmpdirname)
+def updateLog(session_state_value: StateDict) -> str:
+    if session_state_value["uuid"] is None:
+        return ""
+    log_file = gs_dir / str(session_state_value['uuid']) / "log.txt"
+    if not log_file.exists():
+        return ""
+    with log_file.open("r") as log_file:
+        logs = log_file.read()
+    return logs
+with gr.Blocks() as demo:
+    session_state = gr.State({
+        "uuid": None,
+    })
+    with gr.Row():
+        with gr.Column():
+            video_input = gr.PlayableVideo(
+                format="mp4",
+                source="upload",
+                label="Upload a video",
+                include_audio=False
+            )
+            with gr.Row(variant="panel"):
+                ffmpeg_fps = gr.Number(
+                    label="FFMPEG FPE",
+                    value=1,
+                    minimum=1,
+                    maximum=5,
+                    step=0.10,
+                )
+                ffmpeg_qscale = gr.Number(
+                    label="FFMPEG QSCALE",
+                    value=1,
+                    minimum=1,
+                    maximum=5,
+                    step=1,
+                )
+                colmap_camera = gr.Dropdown(
+                    label="COLMAP Camera",
+                    value="OPENCV",
+                    choices=["OPENCV", "SIMPLE_PINHOLE", "PINHOLE", "SIMPLE_RADIAL", "RADIAL"],
+                )
+        text_log = gr.Textbox(
+            label="Logs",
+            info="Logs",
+            interactive=False,
+            show_copy_button=True
+        )
+        # text_log = gr.Code(
+        #     label="Logs",
+        #     language=None,
+        #     interactive=False,
+        # )
+    process_button = gr.Button("Process", visible=False)
+    reset_button = gr.ClearButton(
+        components=[video_input, text_log, ffmpeg_fps, ffmpeg_qscale, colmap_camera],
+        label="Reset",
+        visible=False,
+    )
+    process_event = process_button.click(
+        fn=process,
+        inputs=[session_state, video_input, ffmpeg_fps, ffmpeg_qscale, colmap_camera],
+        outputs=[],
+    )
+    upload_event = video_input.upload(
+        fn=makeButtonVisible,
+        inputs=[],
+        outputs=[process_button, reset_button]
+    ).then(
+        fn=createStateSession,
+        inputs=[],
+        outputs=[session_state],
+    ).then(
+        fn=updateLog,
+        inputs=[session_state],
+        outputs=[text_log],
+        every=2,
+    )
+    reset_button.click(
+        fn=resetSession,
+        inputs=[session_state],
+        outputs=[session_state, process_button, reset_button],
+        cancels=[process_event]
+    )
+    video_input.clear(
+        fn=resetSession,
+        inputs=[session_state],
+        outputs=[session_state, process_button, reset_button],
+        cancels=[process_event]
+    )
+    demo.close
+    # gr.LoginButton, gr.LogoutButton
+    # gr.HuggingFaceDatasetSaver
+    # gr.OAuthProfile
+    # with gr.Tab("jsdn"):
+    #     input_mic = gr.HTML(getHTML())
+demo.queue()
+# demo.launch()
+# mount Gradio app to FastAPI app
+app = gr.mount_gradio_app(app, demo, path="/")
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)

services/colmap.py ADDED Viewed

	@@ -0,0 +1,244 @@

+from typing import Literal, Optional
+from io import IOBase
+import os
+from pathlib import Path
+import shutil
+import subprocess
+from rich.progress import Progress
+from rich.console import Console
+console = Console()
+class FailedProcess(Exception):
+    pass
+def colmap_feature_extraction(
+        database_path: Path,
+        image_path: Path,
+        camera: Literal["OPENCV"],
+        colmap_command: str = "colmap",
+        use_gpu: bool = True,
+        stream_file: Optional[IOBase] = None
+    ):
+    total = len(list(image_path.glob("*.jpg")))
+    with Progress(console=console) as progress:
+        task = progress.add_task("Feature Extraction", total=total)
+        database_path.parent.mkdir(parents=True, exist_ok=True)
+        cmd = [
+            colmap_command,
+            "feature_extractor",
+            "--database_path", database_path.as_posix(),
+            "--image_path", image_path.as_posix(),
+            "--ImageReader.single_camera", "1",
+            "--ImageReader.camera_model", camera,
+            "--SiftExtraction.use_gpu", "1" if use_gpu else "0",
+            # "--SiftExtraction.domain_size_pooling", "1",
+            # "--SiftExtraction.estimate_affine_shape", "1"
+        ]
+        console.log(f"💻 Executing command: {' '.join(cmd)}")
+        _stdout = stream_file if stream_file else subprocess.PIPE
+        with subprocess.Popen(cmd, stdout=_stdout, stderr=subprocess.STDOUT, text=True) as process:
+            if process.stdout:
+                for line in process.stdout:
+                    if line.startswith("Processed file "):
+                        line_process = line\
+                            .replace("Processed file [", "")\
+                            .replace("]", "")\
+                            .replace("\n", "")
+                        current, total = line_process.split("/")
+                        progress.update(task, completed=int(current), total=int(total), refresh=True)
+        progress.update(task, completed=int(total), refresh=True)
+    return_code = process.returncode
+    if return_code == 0:
+        console.log(f'Feature stored in {database_path.as_posix()}.')
+        console.log('✅ Feature extraction completed.')
+    else:
+        raise FailedProcess("Feature extraction failed.")
+def colmap_feature_matching(
+        database_path: Path,
+        image_path: Path,
+        colmap_command: str = "colmap",
+        use_gpu: bool = True,
+        stream_file: Optional[IOBase] = None
+    ):
+    total = len(list(image_path.glob("*.jpg")))
+    with Progress(console=console) as progress:
+        task = progress.add_task("Feature Matching", total=total)
+        database_path
+        cmd = [
+            colmap_command,
+            "exhaustive_matcher",
+            "--database_path", database_path.as_posix(),
+            "--SiftMatching.use_gpu", "1" if use_gpu else "0"
+        ]
+        console.log(f"💻 Executing command: {' '.join(cmd)}")
+        _stdout = stream_file if stream_file else subprocess.PIPE
+        with subprocess.Popen(cmd, stdout=_stdout, stderr=subprocess.STDOUT, text=True) as process:
+            if process.stdout:
+                for line in process.stdout:
+                    pass
+        progress.update(task, completed=int(total), refresh=True)
+    return_code = process.returncode
+    if return_code == 0:
+        console.log('✅ Feature matching completed.')
+    else:
+        raise FailedProcess("Feature matching failed.")
+def colmap_bundle_adjustment(
+        database_path: Path,
+        image_path: Path,
+        sparse_path: Path,
+        colmap_command: str = "colmap",
+        stream_file: Optional[IOBase] = None
+    ):
+    total = len(list(image_path.glob("*.jpg")))
+    with Progress(console=console) as progress:
+        task = progress.add_task("Bundle Adjustment", total=total)
+        cmd = [
+            colmap_command,
+            "mapper",
+            "--database_path", database_path.as_posix(),
+            "--image_path", image_path.as_posix(),
+            "--output_path", sparse_path.as_posix(),
+            "--Mapper.ba_global_function_tolerance=0.000001"
+            # "--Mapper.ba_local_max_num_iterations", "40",
+            # "--Mapper.ba_global_max_num_iterations", "100",
+            # "--Mapper.ba_local_max_refinements", "3",
+            # "--Mapper.ba_global_max_refinements", "5"
+        ]
+        console.log(f"💻 Executing command: {' '.join(cmd)}")
+        sparse_path.mkdir(parents=True, exist_ok=True)
+        _stdout = stream_file if stream_file else subprocess.PIPE
+        with subprocess.Popen(cmd, stdout=_stdout, stderr=subprocess.STDOUT, text=True) as process:
+            if process.stdout:
+                for line in process.stdout:
+                    print(line)
+                    if line.startswith("Registering image #"):
+                        line_process = line\
+                            .replace("Registering image #", "")\
+                            .replace("\n", "")
+                        *_, current = line_process.split("(")
+                        current, *_ = current.split(")")
+                        progress.update(task, completed=int(current), refresh=True)
+        progress.update(task, completed=int(total), refresh=True)
+    return_code = process.returncode
+    if return_code == 0:
+        console.log('✅ Bundle adjustment completed.')
+    else:
+        raise FailedProcess("Bundle adjustment failed.")
+def colmap_image_undistortion(
+        image_path: Path,
+        sparse0_path: Path,
+        source_path: Path,
+        colmap_command: str = "colmap",
+        stream_file: Optional[IOBase] = None
+    ):
+    total = len(list(image_path.glob("*.jpg")))
+    with Progress(console=console) as progress:
+        task = progress.add_task("Image Undistortion", total=total)
+        cmd = [
+            colmap_command,
+            "image_undistorter",
+            "--image_path", image_path.as_posix(),
+            "--input_path", sparse0_path.as_posix(),
+            "--output_path", source_path.as_posix(),
+            "--output_type", "COLMAP"
+        ]
+        console.log(f"💻 Executing command: {' '.join(cmd)}")
+        _stdout = stream_file if stream_file else subprocess.PIPE
+        with subprocess.Popen(cmd, stdout=_stdout, stderr=subprocess.STDOUT, text=True) as process:
+            if process.stdout:
+                for line in process.stdout:
+                    if line.startswith("Undistorting image ["):
+                        line_process = line\
+                            .replace("Undistorting image [", "")\
+                            .replace("]", "")\
+                            .replace("\n", "")
+                        current, total = line_process.split("/")
+                        progress.update(task, completed=int(current), total=int(total), refresh=True)
+        progress.update(task, completed=int(total), refresh=True)
+    return_code = process.returncode
+    if return_code == 0:
+        console.log('✅ Image undistortion completed.')
+    else:
+        raise FailedProcess("Image undistortion failed.")
+def colmap(
+    source_path: Path,
+    camera: Literal["OPENCV"] = "OPENCV",
+    colmap_command: str = "colmap",
+    use_gpu: bool = True,
+    skip_matching: bool = False,
+    stream_file: Optional[IOBase] = None
+):
+    image_path = source_path / "input"
+    if not image_path.exists():
+        raise Exception(f"Image path {image_path} does not exist. Exiting.")
+    total = len(list(image_path.glob("*.jpg")))
+    if total == 0:
+        raise Exception(f"No images found in {image_path}. Exiting.")
+    database_path = source_path / "distorted" / "database.db"
+    sparse_path = source_path / "distorted" / "sparse"
+    if not skip_matching:
+        colmap_feature_extraction(database_path, image_path, camera, colmap_command, use_gpu, stream_file)
+        colmap_feature_matching(database_path, image_path, colmap_command, use_gpu, stream_file)
+        colmap_bundle_adjustment(database_path, image_path, sparse_path, colmap_command, stream_file)
+    colmap_image_undistortion(image_path, sparse_path / "0", source_path, colmap_command, stream_file)
+    origin_path = source_path / "sparse"
+    destination_path = source_path / "sparse" / "0"
+    destination_path.mkdir(exist_ok=True)
+    console.log(f"🌟 Moving files from {origin_path} to {destination_path}")
+    for file in os.listdir(origin_path):
+        if file == '0':
+            continue
+        source_file = os.path.join(origin_path, file)
+        destination_file = os.path.join(destination_path, file)
+        shutil.copy(source_file, destination_file)
+if __name__ == "__main__":
+    import tempfile
+    with tempfile.NamedTemporaryFile(mode='w+t') as temp_file:
+        print(f"Using temp file: {temp_file.name}")
+        try:
+            colmap(
+                source_path = Path("/home/europe/Desktop/gaussian-splatting-kit/test/"),
+                camera = "OPENCV",
+                colmap_command = "colmap",
+                use_gpu = True,
+                skip_matching = False,
+                stream_file = open("/home/europe/Desktop/gaussian-splatting-kit/test.log", "w+t")
+            )
+        except FailedProcess:
+            console.log("🚨 Error executing colmap.")
+            temp_file.seek(0)
+            print(temp_file.read())

services/ffmpeg.py ADDED Viewed

	@@ -0,0 +1,100 @@

+from io import IOBase
+import os
+import subprocess
+from typing import Optional
+from pathlib import Path
+from rich.console import Console
+console = Console()
+class FailedProcess(Exception):
+    pass
+def ffmpeg_extract_frames(
+        video_path: Path,
+        frames_path: Path,
+        # TODO: Enable these options
+        # start_time: Optional[str] = None,
+        # duration: Optional[float] = None,
+        # end_time: Optional[str]  = None,
+        fps: float = 1,
+        qscale: int = 1,
+        stream_file: Optional[IOBase] = None
+        ) -> str:
+    frame_destination = frames_path / "input"
+    console.log(f"🎞️  Extracting Images from {video_path} to {frame_destination} (fps: {fps}, qscale: {qscale}")
+    # Create the directory to store the frames
+    frames_path.mkdir(parents=True, exist_ok=True)
+    frame_destination.mkdir(parents=True, exist_ok=True)
+    # Store the current working directory
+    cwd = os.getcwd()
+    # Change the current working directory to frame_destination
+    os.chdir(frame_destination)
+    # Construct the ffmpeg command as a list of strings
+    cmd = [
+        'ffmpeg',
+        '-i', str(video_path),
+        '-qscale:v', str(qscale),
+        '-qmin', '1',
+        '-vf', f"fps={fps}",
+        '%04d.jpg'
+    ]
+    console.log(f"💻 Executing command: {' '.join(cmd)}")
+    _stdout = stream_file if stream_file else subprocess.PIPE
+    with subprocess.Popen(cmd, stdout=_stdout, stderr=subprocess.STDOUT, text=True) as process:
+        if process.stdout:
+            for line in process.stdout:
+                print(line)
+    # Change the current working directory back to the original
+    os.chdir(cwd)
+    return_code = process.returncode
+    if return_code == 0:
+        console.log(f"✅ Images Successfully Extracted! Path: {frames_path}")
+    else:
+        raise FailedProcess("Error extracting frames.")
+    return frames_path
+def ffmpeg_run(
+        video_path: Path,
+        output_path: Path,
+        ffmpeg_command: str = "ffmpeg",
+        # TODO: Enable these options
+        # start_time: Optional[str] = None,
+        # duration: Optional[float] = None,
+        # end_time: Optional[str]  = None,
+        fps: float = 1,
+        qscale: int = 1,
+        stream_file: Optional[IOBase] = None
+        ) -> str:
+    console.log("🌟 Starting the Frames Extraction...")
+    frames_path = ffmpeg_extract_frames(
+        video_path,
+        output_path,
+        fps=fps, qscale=qscale,
+        stream_file=stream_file
+    )
+    console.log(f"🎉 Frames Extraction Complete! Path: {frames_path}")
+    return frames_path
+if __name__ == "__main__":
+    import tempfile
+    with tempfile.NamedTemporaryFile(mode='w+t') as temp_file:
+        print(f"Using temp file: {temp_file.name}")
+        try:
+            ffmpeg_run(
+                Path("/home/europe/Desktop/gaussian-splatting-kit/test/test.mov"),
+                Path("/home/europe/Desktop/gaussian-splatting-kit/test"),
+                stream_file=temp_file
+            )
+        except FailedProcess:
+            console.log("🚨 Error extracting frames.")
+            temp_file.seek(0)
+            print(temp_file.read())

services/gaussian_splatting_cuda.py ADDED Viewed

	@@ -0,0 +1,108 @@

+from io import IOBase
+from pathlib import Path
+import subprocess
+from typing import Optional
+from rich.console import Console
+console = Console()
+def gaussian_splatting_cuda_training(
+        data_path: Path,
+        output_path: Path,
+        gs_command: str,
+        iterations: int = 10_000,
+        convergence_rate: float = 0.01,
+        resolution: int = 512,
+        enable_cr_monitoring: bool = False,
+        force: bool = False,
+        empty_gpu_cache: bool = False,
+        stream_file: Optional[IOBase] = None
+    ) -> str:
+    """
+    Core Options
+    -h, --help
+    Display this help menu.
+    -d, --data_path [PATH]
+    Specify the path to the training data.
+    -f, --force
+    Force overwriting of output folder. If not set, the program will exit if the output folder already exists.
+    -o, --output_path [PATH]
+    Specify the path to save the trained model. If this option is not specified, the trained model will be saved to the "output" folder located in the root directory of the project.
+    -i, --iter [NUM]
+    Specify the number of iterations to train the model. Although the paper sets the maximum number of iterations at 30k, you'll likely need far fewer. Starting with 6k or 7k iterations should yield preliminary results. Outputs are saved every 7k iterations and also at the end of the training. Therefore, even if you set it to 5k iterations, an output will be generated upon completion.
+    Advanced Options
+    --empty-gpu-cache Empty CUDA memory after ever 100 iterations. Attention! This has a considerable performance impact
+    --enable-cr-monitoring
+    Enable monitoring of the average convergence rate throughout training. If done, it will stop optimizing when the average convergence rate is below 0.008 per default after 15k iterations. This is useful for speeding up the training process when the gain starts to dimish. If not enabled, the training will stop after the specified number of iterations --iter. Otherwise its stops when max 30k iterations are reached.
+    -c, --convergence_rate [RATE]
+    Set custom average onvergence rate for the training process. Requires the flag --enable-cr-monitoring to be set.
+    """
+    cmd = [
+        gs_command,
+        f"--data-path={data_path.as_posix()}"
+        f"--output-path={output_path.as_posix()}"
+        f"--iter={iterations}",
+        # TODO: Enable these options and put the right defaults in the function signature
+        # f"--convergence-rate={convergence_rate}",
+        # f"--resolution={resolution}",
+        # "--enable-cr-monitoring" if enable_cr_monitoring else "",
+        # "--force" if force else "",
+        # "--empty-gpu-cache" if empty_gpu_cache else ""
+    ]
+    console.log(f"💻 Executing command: {' '.join(cmd)}")
+    _stdout = stream_file if stream_file else subprocess.PIPE
+    with subprocess.Popen(cmd, stdout=_stdout, stderr=subprocess.STDOUT, text=True) as process:
+        if process.stdout:
+            for line in process.stdout:
+                print(line)
+    # Check if the command was successful
+    return_code = process.returncode
+    if return_code == 0:
+        console.log('✅ Successfully splatted frames.')
+    else:
+        raise Exception('Error splatting frames.')
+def gaussian_splatting_cuda(
+        data_path: Path,
+        output_path: Path,
+        gs_command: str,
+        iterations: int = 10_000,
+        convergence_rate: float = 0.01,
+        resolution: int = 512,
+        enable_cr_monitoring: bool = False,
+        force: bool = False,
+        empty_gpu_cache: bool = False,
+        stream_file: Optional[IOBase] = None
+    ) -> str:
+    # Check if the output path exists
+    if output_path.exists() and not force:
+        raise Exception(f"Output folder already exists. Path: {output_path}, use --force to overwrite.")
+    # Create the output path if it doesn't exist
+    output_path.mkdir(parents=True, exist_ok=True)
+    # Execute gaussian_splatting_cuda
+    gaussian_splatting_cuda_training(
+        data_path,
+        output_path,
+        gs_command,
+        iterations,
+        convergence_rate,
+        resolution,
+        enable_cr_monitoring,
+        force,
+        empty_gpu_cache,
+        stream_file
+    )

services/http.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from pathlib import Path
+import requests
+from rich.console import Console
+console = Console()
+def download_file(url: str, file_path: Path) -> Path:
+    console.log(f"📥 Downloading File from URL: {url}")
+    response = requests.get(url, stream=True)
+    if response.status_code == 200:
+        with file_path.open('wb') as file:
+            for chunk in response.iter_content(chunk_size=1024):
+                if chunk:
+                    file.write(chunk)
+        console.log(f"✅ File Successfully Downloaded! Path: {file_path}")
+    else:
+        console.log(f"🚨 Error downloading file from {url}.")
+    return file_path
+def download_api(url: str, file_path: Path) -> Path:
+    # Download the video from internet
+    video_path = file_path + '/video.mp4'
+    console.log("🌟 Starting the Video Download...")
+    video_path = download_file(url, video_path)
+    console.log(f"🎉 Video Download Complete! Path: {video_path}")
+    return video_path

services/rerun.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import os
+import re
+from pathlib import Path
+import numpy as np
+import rerun as rr  # pip install rerun-sdk
+from utils.read_write_model import read_model
+# From https://github.com/rerun-io/rerun/tree/main/examples/python/structure_from_motion
+def read_and_log_sparse_reconstruction(
+        exp_name: str,
+        dataset_path: Path,
+        output_path: Path,
+        filter_output: bool = False,
+        filter_min_visible: int = 2_000
+    ) -> None:
+    rr.init(exp_name)
+    cameras, images, points3D = read_model(dataset_path / "sparse", ext=".bin")
+    if filter_output:
+        # Filter out noisy points
+        points3D = {id: point for id, point in points3D.items() if point.rgb.any() and len(point.image_ids) > 4}
+    rr.log_view_coordinates("/", up="-Y", timeless=True)
+    # Iterate through images (video frames) logging data related to each frame.
+    for image in sorted(images.values(), key=lambda im: im.name):  # type: ignore[no-any-return]
+        image_file = dataset_path / "images" / image.name
+        if not os.path.exists(image_file):
+            continue
+        # COLMAP sets image ids that don't match the original video frame
+        idx_match = re.search(r"\d+", image.name)
+        assert idx_match is not None
+        frame_idx = int(idx_match.group(0))
+        quat_xyzw = image.qvec[[1, 2, 3, 0]]  # COLMAP uses wxyz quaternions
+        camera = cameras[image.camera_id]
+        np.array([1.0, 1.0])
+        visible = [id != -1 and points3D.get(id) is not None for id in image.point3D_ids]
+        visible_ids = image.point3D_ids[visible]
+        if filter_output and len(visible_ids) < filter_min_visible:
+            continue
+        visible_xyzs = [points3D[id] for id in visible_ids]
+        visible_xys = image.xys[visible]
+        rr.set_time_sequence("frame", frame_idx)
+        points = [point.xyz for point in visible_xyzs]
+        point_colors = [point.rgb for point in visible_xyzs]
+        point_errors = [point.error for point in visible_xyzs]
+        rr.log_scalar("plot/avg_reproj_err", np.mean(point_errors), color=[240, 45, 58])
+        rr.log_points("points", points, colors=point_colors, ext={"error": point_errors})
+        # COLMAP's camera transform is "camera from world"
+        rr.log_transform3d(
+            "camera", rr.TranslationRotationScale3D(image.tvec, rr.Quaternion(xyzw=quat_xyzw)), from_parent=True
+        )
+        rr.log_view_coordinates("camera", xyz="RDF")  # X=Right, Y=Down, Z=Forward
+        # Log camera intrinsics
+        assert camera.model == "PINHOLE"
+        rr.log_pinhole(
+            "camera/image",
+            width=camera.width,
+            height=camera.height,
+            focal_length_px=camera.params[:2],
+            principal_point_px=camera.params[2:],
+        )
+        rr.log_image_file("camera/image", img_path=dataset_path / "images" / image.name)
+        rr.log_points("camera/image/keypoints", visible_xys, colors=[34, 138, 167])
+    rerun_output_directory = output_path / "rerun"
+    rerun_output_directory.mkdir(parents=True, exist_ok=True)
+    rerun_output_file = rerun_output_directory / "recording.rrd"
+    rr.save(rerun_output_file.as_posix())

services/utils/read_write_model.py ADDED Viewed

	@@ -0,0 +1,514 @@

+# This file is adapted from
+# https://github.com/colmap/colmap/blob/bf3e19140f491c3042bfd85b7192ef7d249808ec/scripts/python/read_write_model.py
+# Copyright (c) 2023, ETH Zurich and UNC Chapel Hill.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#
+#     * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
+#       its contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
+#  type: ignore
+from __future__ import annotations
+import argparse
+import collections
+import os
+import struct
+from pathlib import Path
+from typing import Mapping
+import numpy as np
+CameraModel = collections.namedtuple("CameraModel", ["model_id", "model_name", "num_params"])
+Camera = collections.namedtuple("Camera", ["id", "model", "width", "height", "params"])
+BaseImage = collections.namedtuple("Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
+Point3D = collections.namedtuple("Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"])
+class Image(BaseImage):
+    def qvec2rotmat(self):
+        return qvec2rotmat(self.qvec)
+CAMERA_MODELS = {
+    CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
+    CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
+    CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
+    CameraModel(model_id=3, model_name="RADIAL", num_params=5),
+    CameraModel(model_id=4, model_name="OPENCV", num_params=8),
+    CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
+    CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
+    CameraModel(model_id=7, model_name="FOV", num_params=5),
+    CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
+    CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
+    CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12),
+}
+CAMERA_MODEL_IDS = {camera_model.model_id: camera_model for camera_model in CAMERA_MODELS}
+CAMERA_MODEL_NAMES = {camera_model.model_name: camera_model for camera_model in CAMERA_MODELS}
+def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
+    """
+    Read and unpack the next bytes from a binary file.
+    :param fid:
+    :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
+    :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
+    :param endian_character: Any of {@, =, <, >, !}
+    :return: Tuple of read and unpacked values.
+    """
+    data = fid.read(num_bytes)
+    return struct.unpack(endian_character + format_char_sequence, data)
+def write_next_bytes(fid, data, format_char_sequence, endian_character="<"):
+    """
+    Pack and write to a binary file.
+    :param fid:
+    :param data: data to send, if multiple elements are sent at the same time,
+    they should be encapsuled either in a list or a tuple
+    :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
+    should be the same length as the data list or tuple
+    :param endian_character: Any of {@, =, <, >, !}
+    """
+    if isinstance(data, (list, tuple)):
+        bytes = struct.pack(endian_character + format_char_sequence, *data)
+    else:
+        bytes = struct.pack(endian_character + format_char_sequence, data)
+    fid.write(bytes)
+def read_cameras_text(path: Path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::WriteCamerasText(const std::string& path)
+        void Reconstruction::ReadCamerasText(const std::string& path)
+    """
+    cameras = {}
+    with open(path) as fid:
+        while True:
+            line = fid.readline()
+            if not line:
+                break
+            line = line.strip()
+            if len(line) > 0 and line[0] != "#":
+                elems = line.split()
+                camera_id = int(elems[0])
+                model = elems[1]
+                width = int(elems[2])
+                height = int(elems[3])
+                params = np.array(tuple(map(float, elems[4:])))
+                cameras[camera_id] = Camera(id=camera_id, model=model, width=width, height=height, params=params)
+    return cameras
+def read_cameras_binary(path_to_model_file: Path) -> Mapping[int, Camera]:
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::WriteCamerasBinary(const std::string& path)
+        void Reconstruction::ReadCamerasBinary(const std::string& path)
+    """
+    cameras = {}
+    with path_to_model_file.open("rb") as fid:
+        num_cameras = read_next_bytes(fid, 8, "Q")[0]
+        for _ in range(num_cameras):
+            camera_properties = read_next_bytes(fid, num_bytes=24, format_char_sequence="iiQQ")
+            camera_id = camera_properties[0]
+            model_id = camera_properties[1]
+            model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
+            width = camera_properties[2]
+            height = camera_properties[3]
+            num_params = CAMERA_MODEL_IDS[model_id].num_params
+            params = read_next_bytes(fid, num_bytes=8 * num_params, format_char_sequence="d" * num_params)
+            cameras[camera_id] = Camera(
+                id=camera_id, model=model_name, width=width, height=height, params=np.array(params)
+            )
+        assert len(cameras) == num_cameras
+    return cameras
+def write_cameras_text(cameras, path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::WriteCamerasText(const std::string& path)
+        void Reconstruction::ReadCamerasText(const std::string& path)
+    """
+    HEADER = (
+        "# Camera list with one line of data per camera:\n"
+        + "#   CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n"
+        + f"# Number of cameras: {len(cameras)}\n"
+    )
+    with open(path, "w") as fid:
+        fid.write(HEADER)
+        for _, cam in cameras.items():
+            to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params]
+            line = " ".join([str(elem) for elem in to_write])
+            fid.write(line + "\n")
+def write_cameras_binary(cameras, path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::WriteCamerasBinary(const std::string& path)
+        void Reconstruction::ReadCamerasBinary(const std::string& path)
+    """
+    with open(path_to_model_file, "wb") as fid:
+        write_next_bytes(fid, len(cameras), "Q")
+        for _, cam in cameras.items():
+            model_id = CAMERA_MODEL_NAMES[cam.model].model_id
+            camera_properties = [cam.id, model_id, cam.width, cam.height]
+            write_next_bytes(fid, camera_properties, "iiQQ")
+            for p in cam.params:
+                write_next_bytes(fid, float(p), "d")
+    return cameras
+def read_images_text(path: Path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadImagesText(const std::string& path)
+        void Reconstruction::WriteImagesText(const std::string& path)
+    """
+    images = {}
+    with open(path) as fid:
+        while True:
+            line = fid.readline()
+            if not line:
+                break
+            line = line.strip()
+            if len(line) > 0 and line[0] != "#":
+                elems = line.split()
+                image_id = int(elems[0])
+                qvec = np.array(tuple(map(float, elems[1:5])))
+                tvec = np.array(tuple(map(float, elems[5:8])))
+                camera_id = int(elems[8])
+                image_name = elems[9]
+                elems = fid.readline().split()
+                xys = np.column_stack([tuple(map(float, elems[0::3])), tuple(map(float, elems[1::3]))])
+                point3D_ids = np.array(tuple(map(int, elems[2::3])))
+                images[image_id] = Image(
+                    id=image_id,
+                    qvec=qvec,
+                    tvec=tvec,
+                    camera_id=camera_id,
+                    name=image_name,
+                    xys=xys,
+                    point3D_ids=point3D_ids,
+                )
+    return images
+def read_images_binary(path_to_model_file: Path) -> Mapping[int, Image]:
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadImagesBinary(const std::string& path)
+        void Reconstruction::WriteImagesBinary(const std::string& path)
+    """
+    images = {}
+    with open(path_to_model_file, "rb") as fid:
+        num_reg_images = read_next_bytes(fid, 8, "Q")[0]
+        for _ in range(num_reg_images):
+            binary_image_properties = read_next_bytes(fid, num_bytes=64, format_char_sequence="idddddddi")
+            image_id = binary_image_properties[0]
+            qvec = np.array(binary_image_properties[1:5])
+            tvec = np.array(binary_image_properties[5:8])
+            camera_id = binary_image_properties[8]
+            image_name = ""
+            current_char = read_next_bytes(fid, 1, "c")[0]
+            while current_char != b"\x00":  # look for the ASCII 0 entry
+                image_name += current_char.decode("utf-8")
+                current_char = read_next_bytes(fid, 1, "c")[0]
+            num_points2D = read_next_bytes(fid, num_bytes=8, format_char_sequence="Q")[0]
+            x_y_id_s = read_next_bytes(fid, num_bytes=24 * num_points2D, format_char_sequence="ddq" * num_points2D)
+            xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])), tuple(map(float, x_y_id_s[1::3]))])
+            point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
+            images[image_id] = Image(
+                id=image_id,
+                qvec=qvec,
+                tvec=tvec,
+                camera_id=camera_id,
+                name=image_name,
+                xys=xys,
+                point3D_ids=point3D_ids,
+            )
+    return images
+def write_images_text(images, path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadImagesText(const std::string& path)
+        void Reconstruction::WriteImagesText(const std::string& path)
+    """
+    if len(images) == 0:
+        mean_observations = 0
+    else:
+        mean_observations = sum((len(img.point3D_ids) for _, img in images.items())) / len(images)
+    HEADER = (
+        "# Image list with two lines of data per image:\n"
+        + "#   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n"
+        + "#   POINTS2D[] as (X, Y, POINT3D_ID)\n"
+        + f"# Number of images: {len(images)}, mean observations per image: {mean_observations}\n"
+    )
+    with open(path, "w") as fid:
+        fid.write(HEADER)
+        for _, img in images.items():
+            image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name]
+            first_line = " ".join(map(str, image_header))
+            fid.write(first_line + "\n")
+            points_strings = []
+            for xy, point3D_id in zip(img.xys, img.point3D_ids):
+                points_strings.append(" ".join(map(str, [*xy, point3D_id])))
+            fid.write(" ".join(points_strings) + "\n")
+def write_images_binary(images, path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadImagesBinary(const std::string& path)
+        void Reconstruction::WriteImagesBinary(const std::string& path)
+    """
+    with open(path_to_model_file, "wb") as fid:
+        write_next_bytes(fid, len(images), "Q")
+        for _, img in images.items():
+            write_next_bytes(fid, img.id, "i")
+            write_next_bytes(fid, img.qvec.tolist(), "dddd")
+            write_next_bytes(fid, img.tvec.tolist(), "ddd")
+            write_next_bytes(fid, img.camera_id, "i")
+            for char in img.name:
+                write_next_bytes(fid, char.encode("utf-8"), "c")
+            write_next_bytes(fid, b"\x00", "c")
+            write_next_bytes(fid, len(img.point3D_ids), "Q")
+            for xy, p3d_id in zip(img.xys, img.point3D_ids):
+                write_next_bytes(fid, [*xy, p3d_id], "ddq")
+def read_points3D_text(path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadPoints3DText(const std::string& path)
+        void Reconstruction::WritePoints3DText(const std::string& path)
+    """
+    points3D = {}
+    with open(path) as fid:
+        while True:
+            line = fid.readline()
+            if not line:
+                break
+            line = line.strip()
+            if len(line) > 0 and line[0] != "#":
+                elems = line.split()
+                point3D_id = int(elems[0])
+                xyz = np.array(tuple(map(float, elems[1:4])))
+                rgb = np.array(tuple(map(int, elems[4:7])))
+                error = float(elems[7])
+                image_ids = np.array(tuple(map(int, elems[8::2])))
+                point2D_idxs = np.array(tuple(map(int, elems[9::2])))
+                points3D[point3D_id] = Point3D(
+                    id=point3D_id, xyz=xyz, rgb=rgb, error=error, image_ids=image_ids, point2D_idxs=point2D_idxs
+                )
+    return points3D
+def read_points3D_binary(path_to_model_file: Path) -> Mapping[int, Point3D]:
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadPoints3DBinary(const std::string& path)
+        void Reconstruction::WritePoints3DBinary(const std::string& path)
+    """
+    points3D = {}
+    with open(path_to_model_file, "rb") as fid:
+        num_points = read_next_bytes(fid, 8, "Q")[0]
+        for _ in range(num_points):
+            binary_point_line_properties = read_next_bytes(fid, num_bytes=43, format_char_sequence="QdddBBBd")
+            point3D_id = binary_point_line_properties[0]
+            xyz = np.array(binary_point_line_properties[1:4])
+            rgb = np.array(binary_point_line_properties[4:7])
+            error = np.array(binary_point_line_properties[7])
+            track_length = read_next_bytes(fid, num_bytes=8, format_char_sequence="Q")[0]
+            track_elems = read_next_bytes(fid, num_bytes=8 * track_length, format_char_sequence="ii" * track_length)
+            image_ids = np.array(tuple(map(int, track_elems[0::2])))
+            point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
+            points3D[point3D_id] = Point3D(
+                id=point3D_id, xyz=xyz, rgb=rgb, error=error, image_ids=image_ids, point2D_idxs=point2D_idxs
+            )
+    return points3D
+def write_points3D_text(points3D, path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadPoints3DText(const std::string& path)
+        void Reconstruction::WritePoints3DText(const std::string& path)
+    """
+    if len(points3D) == 0:
+        mean_track_length = 0
+    else:
+        mean_track_length = sum((len(pt.image_ids) for _, pt in points3D.items())) / len(points3D)
+    HEADER = (
+        "# 3D point list with one line of data per point:\n"
+        + "#   POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n"
+        + f"# Number of points: {len(points3D)}, mean track length: {mean_track_length}\n"
+    )
+    with open(path, "w") as fid:
+        fid.write(HEADER)
+        for _, pt in points3D.items():
+            point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error]
+            fid.write(" ".join(map(str, point_header)) + " ")
+            track_strings = []
+            for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs):
+                track_strings.append(" ".join(map(str, [image_id, point2D])))
+            fid.write(" ".join(track_strings) + "\n")
+def write_points3D_binary(points3D, path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadPoints3DBinary(const std::string& path)
+        void Reconstruction::WritePoints3DBinary(const std::string& path)
+    """
+    with open(path_to_model_file, "wb") as fid:
+        write_next_bytes(fid, len(points3D), "Q")
+        for _, pt in points3D.items():
+            write_next_bytes(fid, pt.id, "Q")
+            write_next_bytes(fid, pt.xyz.tolist(), "ddd")
+            write_next_bytes(fid, pt.rgb.tolist(), "BBB")
+            write_next_bytes(fid, pt.error, "d")
+            track_length = pt.image_ids.shape[0]
+            write_next_bytes(fid, track_length, "Q")
+            for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs):
+                write_next_bytes(fid, [image_id, point2D_id], "ii")
+def detect_model_format(path: Path, ext: str) -> bool:
+    parts = ["cameras", "images", "points3D"]
+    if all([(path / p).with_suffix(ext) for p in parts]):
+        print("Detected model format: '" + ext + "'")
+        return True
+    return False
+def read_model(path: Path, ext: str = ""):
+    # try to detect the extension automatically
+    if ext == "":
+        if detect_model_format(path, ".bin"):
+            ext = ".bin"
+        elif detect_model_format(path, ".txt"):
+            ext = ".txt"
+        else:
+            print("Provide model format: '.bin' or '.txt'")
+            return
+    if ext == ".txt":
+        cameras = read_cameras_text((path / "cameras").with_suffix(ext))
+        images = read_images_text((path / "images").with_suffix(ext))
+        points3D = read_points3D_text((path / "points3D").with_suffix(ext))
+    else:
+        cameras = read_cameras_binary((path / "cameras").with_suffix(ext))
+        images = read_images_binary((path / "images").with_suffix(ext))
+        points3D = read_points3D_binary((path / "points3D").with_suffix(ext))
+    return cameras, images, points3D
+def write_model(cameras, images, points3D, path, ext=".bin"):
+    if ext == ".txt":
+        write_cameras_text(cameras, os.path.join(path, "cameras" + ext))
+        write_images_text(images, os.path.join(path, "images" + ext))
+        write_points3D_text(points3D, os.path.join(path, "points3D") + ext)
+    else:
+        write_cameras_binary(cameras, os.path.join(path, "cameras" + ext))
+        write_images_binary(images, os.path.join(path, "images" + ext))
+        write_points3D_binary(points3D, os.path.join(path, "points3D") + ext)
+    return cameras, images, points3D
+def qvec2rotmat(qvec):
+    return np.array(
+        [
+            [
+                1 - 2 * qvec[2] ** 2 - 2 * qvec[3] ** 2,
+                2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
+                2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2],
+            ],
+            [
+                2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
+                1 - 2 * qvec[1] ** 2 - 2 * qvec[3] ** 2,
+                2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1],
+            ],
+            [
+                2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
+                2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
+                1 - 2 * qvec[1] ** 2 - 2 * qvec[2] ** 2,
+            ],
+        ]
+    )
+def rotmat2qvec(R):
+    Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
+    K = (
+        np.array(
+            [
+                [Rxx - Ryy - Rzz, 0, 0, 0],
+                [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
+                [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
+                [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz],
+            ]
+        )
+        / 3.0
+    )
+    eigvals, eigvecs = np.linalg.eigh(K)
+    qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
+    if qvec[0] < 0:
+        qvec *= -1
+    return qvec
+def main():
+    parser = argparse.ArgumentParser(description="Read and write COLMAP binary and text models")
+    parser.add_argument("--input_model", help="path to input model folder")
+    parser.add_argument("--input_format", choices=[".bin", ".txt"], help="input model format", default="")
+    parser.add_argument("--output_model", help="path to output model folder")
+    parser.add_argument("--output_format", choices=[".bin", ".txt"], help="output model format", default=".txt")
+    args = parser.parse_args()
+    cameras, images, points3D = read_model(path=args.input_model, ext=args.input_format)
+    print("num_cameras:", len(cameras))
+    print("num_images:", len(images))
+    print("num_points3D:", len(points3D))
+    if args.output_model is not None:
+        write_model(cameras, images, points3D, path=args.output_model, ext=args.output_format)
+if __name__ == "__main__":
+    main()